Metal MCP Server

src
core
import { ResearchSession as IResearchSession, ResearchPlan, ResearchStep, ResearchProgress, ResearchFindings, StepResult, SessionOptions, Evidence } from '../types/session.js';
import { ContentExtractor } from './content-extractor.js';
import { ContentAnalyzer } from './content-analyzer.js';
import { ExtractedContent } from '../types/content.js';
import { ContentAnalysis } from '../types/analysis.js';
import { chromium, Browser, BrowserContext } from 'playwright';
import { parse as parseUrl } from 'url';

export class ResearchSession implements IResearchSession {
    public id: string;
    public topic: string;
    public status: 'planning' | 'in_progress' | 'analyzing' | 'synthesizing' | 'completed' | 'failed' | 'cancelled';
    public plan: ResearchPlan;
    public progress: ResearchProgress;
    public findings: ResearchFindings;
    public timestamp: {
        created: string;
        updated: string;
        completed?: string;
    };

    private visitedUrls: Set<string>;
    private contentExtractor: ContentExtractor;
    private contentAnalyzer: ContentAnalyzer;
    private options: Required<SessionOptions>;
    private browser: Browser | null = null;
    private context: BrowserContext | null = null;
    private startTime: number;

    private checkTimeout(): void {
        const elapsed = Date.now() - this.startTime;
        if (elapsed >= this.options.timeout) {
            throw new Error('Research session timeout');
        }
    }

    constructor(topic: string, options: SessionOptions = {}) {
        this.id = `research_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
        this.topic = topic;
        this.status = 'planning';
        this.visitedUrls = new Set<string>();
        this.contentExtractor = new ContentExtractor();
        this.contentAnalyzer = new ContentAnalyzer();
        this.startTime = Date.now();

        this.options = {
            maxSteps: options.maxSteps || 10,
            maxDepth: options.maxDepth || 2,
            maxBranching: options.maxBranching || 3,
            timeout: options.timeout || 55000, // Set below MCP timeout
            minRelevanceScore: options.minRelevanceScore || 0.7,
            maxParallelOperations: options.maxParallelOperations || 3
        };

        this.plan = this.createInitialPlan();
        this.progress = this.initializeProgress();
        this.findings = this.initializeFindings();
        this.timestamp = {
            created: new Date().toISOString(),
            updated: new Date().toISOString()
        };
    }

    private async initializeBrowser(): Promise<void> {
        if (!this.browser) {
            this.browser = await chromium.launch({ headless: true });
            this.context = await this.browser.newContext({
                userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
                viewport: { width: 1280, height: 800 },
                deviceScaleFactor: 1,
                isMobile: false,
                hasTouch: false
            });
        }
    }

    private isProcessableUrl(url: string): boolean {
        try {
            const parsedUrl = parseUrl(url);
            const path = parsedUrl.pathname?.toLowerCase() || '';
            
            // Skip PDFs and other non-HTML content
            const skipExtensions = ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'];
            if (skipExtensions.some(ext => path.endsWith(ext))) {
                console.error(`Skipping non-HTML content: ${url}`);
                return false;
            }

            return true;
        } catch (error) {
            console.error(`Invalid URL: ${url}`);
            return false;
        }
    }

    private async fetchContent(url: string): Promise<string> {
        this.checkTimeout();

        if (!this.isProcessableUrl(url)) {
            throw new Error(`Cannot process URL: ${url}`);
        }

        await this.initializeBrowser();
        if (!this.context) throw new Error('Browser context not initialized');

        const page = await this.context.newPage();
        try {
            // Navigate to the URL with a reduced timeout
            await page.goto(url, {
                waitUntil: 'domcontentloaded',
                timeout: 10000 // 10 seconds max for page load
            });

            // Get the HTML content immediately without waiting for additional content
            const html = await page.content();
            return html;
        } catch (error) {
            console.error(`Error fetching content from ${url}:`, error);
            throw error;
        } finally {
            await page.close();
        }
    }

    public async processUrl(url: string, depth: number = 0): Promise<StepResult> {
        console.log(`Processing URL: ${url} at depth ${depth}`);
        
        if (this.visitedUrls.has(url)) {
            console.log(`URL already visited: ${url}`);
            return { searchResults: [] };
        }

        try {
            console.log('Fetching content...');
            const htmlContent = await this.fetchContent(url);
            console.log('Content fetched, length:', htmlContent.length);

            console.log('Extracting content...');
            const content = await this.contentExtractor.extract(htmlContent, url);
            console.log('Content extracted, title:', content.title);
            this.visitedUrls.add(url);

            console.log('Analyzing content...');
            const analysis = await this.contentAnalyzer.analyze(content);
            console.log('Analysis complete:', {
                topics: analysis.topics.length,
                keyPoints: analysis.keyPoints.length,
                relevanceScore: analysis.relevanceScore
            });

            // Update progress
            this.progress.processedContent++;
            this.progress.visitedUrls.add(url);
            this.updateTimestamp();

            console.log('Processing findings...');
            await this.processFindings(content, analysis, depth);
            console.log('Findings processed');

            const result = {
                searchResults: [{
                    url,
                    title: content.title,
                    snippet: content.content.substring(0, 200),
                    relevanceScore: analysis.relevanceScore
                }],
                extractedContents: [content],
                analysis
            };

            console.log('URL processing complete:', {
                title: content.title,
                contentLength: content.content.length,
                relevanceScore: analysis.relevanceScore
            });

            return result;
        } catch (error) {
            console.error(`Error processing URL ${url}:`, error);
            return { searchResults: [] };
        }
    }

    private createInitialPlan(): ResearchPlan {
        return {
            steps: [],
            estimatedTime: 0,
            maxDepth: this.options.maxDepth,
            maxBranching: this.options.maxBranching,
            focusAreas: []
        };
    }

    private initializeProgress(): ResearchProgress {
        return {
            completedSteps: 0,
            totalSteps: 0,
            visitedUrls: new Set<string>(),
            processedContent: 0,
            startTime: new Date().toISOString()
        };
    }

    private initializeFindings(): ResearchFindings {
        return {
            mainTopics: [],
            keyInsights: [],
            sources: []
        };
    }

    private async processFindings(content: ExtractedContent, analysis: ContentAnalysis, depth: number): Promise<void> {
        console.log('Processing findings for:', content.url);
        
        try {
            // Extract code blocks and technical sections first
            console.log('Extracting code blocks and technical sections...');
            const codeBlocks = this.extractCodeBlocks(content.content);
            const technicalSections = this.extractTechnicalSections(content.content);
            console.log('Found:', {
                codeBlocks: codeBlocks.length,
                technicalSections: technicalSections.length
            });

            // Update main topics with higher weight for technical content
            console.log('Updating topics...');
            console.log('Before update - Topics:', this.findings.mainTopics.length);
            this.updateTopics(analysis, technicalSections);
            console.log('After update - Topics:', this.findings.mainTopics.length);

            // Update key insights with code examples
            console.log('Updating insights...');
            console.log('Before update - Insights:', this.findings.keyInsights.length);
            this.updateInsights(analysis, codeBlocks, technicalSections);
            console.log('After update - Insights:', this.findings.keyInsights.length);

            // Update sources with technical content score
            console.log('Updating sources...');
            console.log('Before update - Sources:', this.findings.sources.length);
            this.updateSources(content, analysis, technicalSections.length > 0);
            console.log('After update - Sources:', this.findings.sources.length);

            // Process related URLs if within depth limit
            if (depth < this.options.maxDepth) {
                console.log(`Processing related URLs at depth ${depth}...`);
                await this.processRelatedUrls(content, depth + 1);
            } else {
                console.log(`Max depth ${this.options.maxDepth} reached, skipping related URLs`);
            }

            console.log('Findings processing complete');
        } catch (error) {
            console.error('Error processing findings:', error);
        }
    }

    private extractCodeBlocks(content: string): string[] {
        const blocks: string[] = [];
        // Match both fenced code blocks and inline code
        const codeRegex = /```[\s\S]*?```|`[^`]+`/g;
        let match;
        
        while ((match = codeRegex.exec(content)) !== null) {
            blocks.push(match[0]);
        }
        
        return blocks;
    }

    private extractTechnicalSections(content: string): string[] {
        const sections: string[] = [];
        const technicalIndicators = [
            'implementation',
            'example',
            'usage',
            'code',
            'method',
            'function',
            'class',
            'pattern',
            'practice'
        ];

        // Split content into paragraphs
        const paragraphs = content.split(/\n\n+/);
        
        // Find paragraphs containing technical content
        paragraphs.forEach(paragraph => {
            const lowerParagraph = paragraph.toLowerCase();
            if (
                technicalIndicators.some(indicator => lowerParagraph.includes(indicator)) ||
                paragraph.includes('```') ||
                /`[^`]+`/.test(paragraph)
            ) {
                sections.push(paragraph);
            }
        });

        return sections;
    }

    private updateTopics(analysis: ContentAnalysis, technicalSections: string[]): void {
        console.log('Updating topics with analysis:', {
            topicsCount: analysis.topics ? analysis.topics.length : 0,
            technicalSectionsCount: technicalSections.length
        });

        if (!analysis.topics || analysis.topics.length === 0) {
            console.log('No topics found in analysis');
            return;
        }

        analysis.topics.forEach(topic => {
            console.log('Processing topic:', {
                name: topic.name,
                confidence: topic.confidence
            });

            const existingTopic = this.findings.mainTopics.find(t => t.name === topic.name);
            const hasTechnicalContent = technicalSections.some(section =>
                section.toLowerCase().includes(topic.name.toLowerCase())
            );

            const adjustedConfidence = hasTechnicalContent ?
                Math.min(1, topic.confidence * 1.3) :
                topic.confidence;

            console.log('Topic analysis:', {
                hasTechnicalContent,
                originalConfidence: topic.confidence,
                adjustedConfidence
            });

            if (existingTopic) {
                console.log('Updating existing topic:', existingTopic.name);
                existingTopic.importance = Math.max(existingTopic.importance, adjustedConfidence);
            } else {
                console.log('Adding new topic:', topic.name);
                this.findings.mainTopics.push({
                    name: topic.name,
                    importance: adjustedConfidence,
                    relatedTopics: [],
                    evidence: []
                });
            }
        });

        // Sort topics by importance
        this.findings.mainTopics.sort((a, b) => b.importance - a.importance);
        console.log('Updated topics count:', this.findings.mainTopics.length);
    }

    private updateInsights(analysis: ContentAnalysis, codeBlocks: string[], technicalSections: string[]): void {
        analysis.keyPoints.forEach(point => {
            // Find related code examples
            const relatedCode = codeBlocks.filter(code =>
                this.isCodeRelatedToPoint(code, point.text)
            );

            // Find related technical sections
            const relatedTechnical = technicalSections.filter(section =>
                this.isSectionRelatedToPoint(section, point.text)
            );

            // Adjust confidence based on technical content
            let adjustedConfidence = point.importance;
            if (relatedCode.length > 0) adjustedConfidence *= 1.2;
            if (relatedTechnical.length > 0) adjustedConfidence *= 1.1;

            if (adjustedConfidence >= this.options.minRelevanceScore) {
                // Convert code blocks and technical sections to Evidence objects
                const evidence: Evidence[] = [
                    ...relatedCode.map(code => ({
                        claim: "Code example supporting the insight",
                        sources: [code],
                        confidence: 0.9
                    })),
                    ...relatedTechnical.map(section => ({
                        claim: "Technical documentation supporting the insight",
                        sources: [section],
                        confidence: 0.8
                    }))
                ];

                this.findings.keyInsights.push({
                    text: point.text,
                    confidence: Math.min(1, adjustedConfidence),
                    supportingEvidence: evidence,
                    relatedTopics: point.topics
                });
            }
        });

        // Sort insights by confidence
        this.findings.keyInsights.sort((a, b) => b.confidence - a.confidence);
    }

    private updateSources(content: ExtractedContent, analysis: ContentAnalysis, hasTechnicalContent: boolean): void {
        const source = {
            url: content.url,
            title: content.title,
            credibilityScore: hasTechnicalContent ?
                Math.min(1, analysis.quality.credibilityScore * 1.2) :
                analysis.quality.credibilityScore,
            contributedFindings: analysis.keyPoints.map(point => point.text)
        };

        const existingSource = this.findings.sources.find(s => s.url === content.url);
        if (!existingSource) {
            this.findings.sources.push(source);
        }
    }

    private isCodeRelatedToPoint(code: string, point: string): boolean {
        const codeTerms = new Set(code.toLowerCase().split(/\W+/));
        const pointTerms = new Set(point.toLowerCase().split(/\W+/));
        
        // Check for common terms
        const intersection = [...pointTerms].filter(term => codeTerms.has(term));
        return intersection.length >= 2; // At least 2 common terms
    }

    private isSectionRelatedToPoint(section: string, point: string): boolean {
        const sectionLower = section.toLowerCase();
        const pointLower = point.toLowerCase();
        
        // Check for significant term overlap
        const sectionTerms = new Set(sectionLower.split(/\W+/));
        const pointTerms = new Set(pointLower.split(/\W+/));
        const intersection = [...pointTerms].filter(term => sectionTerms.has(term));
        
        return intersection.length >= 3 || // At least 3 common terms
               sectionLower.includes(pointLower) || // Contains the entire point
               pointLower.includes(sectionLower); // Point contains the section
    }

    private async processRelatedUrls(content: ExtractedContent, depth: number): Promise<void> {
        // Extract URLs from content and process them
        // This would be implemented to handle actual URL extraction and processing
    }

    private updateTimestamp(): void {
        this.timestamp.updated = new Date().toISOString();
    }

    public async complete(): Promise<void> {
        this.status = 'completed';
        this.timestamp.completed = new Date().toISOString();

        // Cleanup browser
        if (this.context) {
            await this.context.close();
            this.context = null;
        }
        if (this.browser) {
            await this.browser.close();
            this.browser = null;
        }
    }
}