Metal MCP Server

src
core
import natural from 'natural';
import { ContentAnalysis, Topic, KeyPoint, Entity, EntityType, EntityMention, Relationship, Citation, ContentQuality, AnalysisOptions } from '../types/analysis.js';
import { ExtractedContent } from '../types/content.js';

export class ContentAnalyzer {
    private tokenizer: natural.WordTokenizer;
    private tfidf: natural.TfIdf;
    private stemmer: typeof natural.PorterStemmerFr;
    private technicalTerms: Set<string>;
    private boilerplatePatterns: RegExp[];

    private isTechnicalContent(text: string): boolean {
        const technicalIndicators = [
            'example',
            'implementation',
            'usage',
            'api',
            'method',
            'function',
            'parameter',
            'return',
            'class',
            'interface',
            'object',
            'pattern'
        ];

        const lowerText = text.toLowerCase();
        return technicalIndicators.some(indicator => lowerText.includes(indicator)) ||
               text.includes('```') ||
               /`[^`]+`/.test(text);
    }

    private extractTechnicalTermsFromText(text: string): string[] {
        const words = text.toLowerCase().split(/\W+/);
        return words.filter(word =>
            word.length > 3 &&
            this.technicalTerms.has(word) &&
            !this.isStopWord(word)
        );
    }
    
    constructor() {
        this.tokenizer = new natural.WordTokenizer();
        this.tfidf = new natural.TfIdf();
        this.stemmer = natural.PorterStemmerFr;
        
        // Initialize technical terms focused on API wrappers and programming
        this.technicalTerms = new Set([
            // API and Design Patterns
            'api', 'wrapper', 'client', 'sdk', 'library', 'interface',
            'endpoint', 'request', 'response', 'http', 'rest', 'soap',
            'facade', 'adapter', 'proxy', 'decorator', 'factory',
            
            // Implementation Concepts
            'implementation', 'method', 'function', 'class', 'object',
            'parameter', 'argument', 'return', 'async', 'await', 'promise',
            'callback', 'error', 'exception', 'handler', 'middleware',
            
            // Best Practices
            'pattern', 'practice', 'standard', 'convention', 'principle',
            'solid', 'dry', 'separation', 'concern', 'abstraction',
            'encapsulation', 'inheritance', 'polymorphism',
            
            // Testing and Quality
            'test', 'mock', 'stub', 'assertion', 'coverage', 'unit',
            'integration', 'validation', 'verification', 'documentation',
            
            // Common Features
            'authentication', 'authorization', 'security', 'cache',
            'rate', 'limit', 'throttle', 'retry', 'timeout', 'logging'
        ]);

        // Initialize boilerplate patterns
        this.boilerplatePatterns = [
            /copyright/i,
            /all rights reserved/i,
            /terms of service/i,
            /privacy policy/i,
            /cookie policy/i,
            /contact us/i,
            /about us/i,
            /follow us/i,
            /subscribe/i,
            /sign up/i,
            /log in/i,
            /register/i
        ];
    }

    public async analyze(content: ExtractedContent, options: AnalysisOptions = {}): Promise<ContentAnalysis> {
        console.log('Starting content analysis for URL:', content.url);
        console.log('Content length:', content.content.length);

        // Prepare content for analysis
        const tokens = this.tokenizeContent(content.content);
        this.tfidf.addDocument(tokens);
        console.log('Tokenized content length:', tokens.length);

        // Extract topics and calculate relevance
        console.log('Extracting topics...');
        const topics = await this.extractTopics(content, options);
        console.log('Found topics:', topics.length, topics.map(t => t.name));

        console.log('Extracting key points...');
        const keyPoints = this.extractKeyPoints(content, topics, options);
        console.log('Found key points:', keyPoints.length);

        console.log('Extracting entities...');
        const entities = this.extractEntities(content);
        console.log('Found entities:', entities.length);

        const relationships = this.findRelationships(entities, content);
        const sentiment = this.analyzeSentiment(content.content);
        const quality = this.assessQuality(content);

        // Merge similar topics
        console.log('Merging similar topics...');
        const mergedTopics = this.mergeSimilarTopics(topics);
        console.log('After merging:', mergedTopics.length, mergedTopics.map(t => t.name));

        const result = {
            relevanceScore: this.calculateRelevanceScore(content, mergedTopics),
            topics: mergedTopics,
            keyPoints: this.deduplicateKeyPoints(keyPoints),
            entities,
            sentiment,
            relationships,
            citations: this.extractCitations(content),
            quality
        };

        console.log('Analysis complete. Topics:', result.topics.length);
        console.log('Key points:', result.keyPoints.length);
        console.log('Relevance score:', result.relevanceScore);

        return result;
    }

    private tokenizeContent(text: string): string[] {
        return this.tokenizer.tokenize(text.toLowerCase()) || [];
    }

    private async extractTopics(content: ExtractedContent, options: AnalysisOptions): Promise<Topic[]> {
        console.log('Extracting topics from content...');
        const maxTopics = options.maxTopics || 8;
        const minConfidence = options.minConfidence || 0.15;

        // Split content into sections
        const sections = content.content.split(/\n\n+/);
        console.log(`Found ${sections.length} sections to analyze`);
        
        // Initialize topic tracking
        const topicMentions = new Map<string, {
            count: number,
            contexts: string[],
            keywords: Set<string>
        }>();

        // Enhanced topic indicators for quantum computing
        const topicIndicators = [
            // General technical patterns
            { pattern: /(?:using|implementing|creating)\s+(\w+(?:\s+\w+){0,2})\s+(?:pattern|approach|method)/i, weight: 1.2 },
            { pattern: /(?:best\s+practice|recommended)\s+(?:is|for)\s+(\w+(?:\s+\w+){0,2})/i, weight: 1.1 },
            { pattern: /(\w+(?:\s+\w+){0,2})\s+implementation/i, weight: 1.0 },
            { pattern: /(\w+(?:\s+\w+){0,2})\s+(?:wrapper|api|interface)/i, weight: 1.0 },
            
            // Domain-specific patterns
            { pattern: /(?:quantum)\s+(\w+(?:\s+\w+){0,2})/i, weight: 1.3 },
            { pattern: /(\w+(?:\s+\w+){0,2})\s+(?:qubit|qubits)/i, weight: 1.3 },
            { pattern: /(\w+(?:\s+\w+){0,2})\s+(?:algorithm|computation)/i, weight: 1.2 },
            { pattern: /(?:advances?|developments?|breakthroughs?)\s+in\s+(\w+(?:\s+\w+){0,2})/i, weight: 1.2 }
        ];

        // Analyze each section
        sections.forEach((section, index) => {
            console.log(`Analyzing section ${index + 1}...`);
            const sectionLower = section.toLowerCase();
            
            // Look for topic indicators
            topicIndicators.forEach(({ pattern, weight }) => {
                const matches = sectionLower.match(pattern);
                if (matches && matches[1]) {
                    const topic = matches[1].trim();
                    const existing = topicMentions.get(topic) || { count: 0, contexts: [], keywords: new Set() };
                    existing.count += weight;
                    existing.contexts.push(section);
                    
                    // Extract related keywords
                    const keywords = this.extractKeywords(section);
                    keywords.forEach(k => existing.keywords.add(k));
                    
                    topicMentions.set(topic, existing);
                    console.log(`Found topic: ${topic} (weight: ${weight})`);
                }
            });

            // Look for technical content
            if (this.isTechnicalContent(section)) {
                const terms = this.extractTechnicalTermsFromText(section);
                terms.forEach((term: string) => {
                    const existing = topicMentions.get(term) || { count: 0, contexts: [], keywords: new Set() };
                    existing.count += 0.7;
                    existing.contexts.push(section);
                    topicMentions.set(term, existing);
                });
            }

            // Look for code examples
            if (section.includes('```') || section.includes('`')) {
                const codeKeywords = this.extractCodeKeywords(section);
                codeKeywords.forEach(keyword => {
                    const existing = topicMentions.get(keyword) || { count: 0, contexts: [], keywords: new Set() };
                    existing.count += 0.8;
                    existing.contexts.push(section);
                    topicMentions.set(keyword, existing);
                    console.log(`Found code keyword: ${keyword}`);
                });
            }
        });

        console.log(`Found ${topicMentions.size} potential topics`);

        // Convert to topics with enhanced scoring
        const topics: Topic[] = Array.from(topicMentions.entries())
            .map(([name, data]) => {
                // Calculate confidence with context bonus
                let confidence = Math.min(1, data.count / 3);
                
                // Boost confidence for topics with multiple contexts
                if (data.contexts.length > 1) {
                    confidence *= 1.2;
                }
                
                // Boost confidence for topics with technical keywords
                if (data.keywords.size > 2) {
                    confidence *= 1.1;
                }

                return {
                    name,
                    confidence: Math.min(1, confidence),
                    keywords: Array.from(data.keywords)
                };
            })
            .filter(topic => {
                const meetsThreshold = topic.confidence >= minConfidence;
                console.log(`Topic ${topic.name}: confidence ${topic.confidence} ${meetsThreshold ? 'accepted' : 'rejected'}`);
                return meetsThreshold;
            })
            .sort((a, b) => b.confidence - a.confidence)
            .slice(0, maxTopics);

        console.log(`Extracted ${topics.length} topics above confidence threshold`);
        return topics;
    }

    private extractKeywords(text: string): string[] {
        const words = text.toLowerCase().split(/\W+/);
        return words.filter(word =>
            word.length > 3 &&
            this.technicalTerms.has(word) &&
            !this.isStopWord(word)
        );
    }

    private extractCodeKeywords(text: string): string[] {
        const codePatterns = [
            /class\s+(\w+)/g,
            /function\s+(\w+)/g,
            /method\s+(\w+)/g,
            /interface\s+(\w+)/g,
            /import\s+(\w+)/g,
            /require\s+['"](.+?)['"]/g
        ];

        const keywords = new Set<string>();
        codePatterns.forEach(pattern => {
            let match;
            while ((match = pattern.exec(text)) !== null) {
                if (match[1]) {
                    keywords.add(match[1].toLowerCase());
                }
            }
        });

        return Array.from(keywords);
    }

    private getImportantTerms(text: string): Array<{term: string; score: number}> {
        const terms: Array<{term: string; score: number}> = [];
        const tokens = this.tokenizeContent(text);

        this.tfidf.listTerms(0).forEach(item => {
            const term = item.term;
            if (term.length > 2 && !this.isStopWord(term)) {
                // Boost score for technical terms
                const score = this.technicalTerms.has(term) ? item.tfidf * 1.5 : item.tfidf;
                terms.push({ term, score });
            }
        });

        return terms.sort((a, b) => b.score - a.score);
    }

    private mergeSimilarTopics(topics: Topic[]): Topic[] {
        const merged: Topic[] = [];
        const processed = new Set<string>();

        for (const topic of topics) {
            if (processed.has(topic.name)) continue;

            // Find similar topics
            const similar = topics.filter(t => 
                !processed.has(t.name) && 
                (this.areTopicsSimilar(topic, t) || this.areTopicsRelated(topic, t))
            );

            if (similar.length > 0) {
                // Merge topics
                const mergedTopic: Topic = {
                    name: this.selectBestTopicName(similar.map(t => t.name)),
                    confidence: Math.max(...similar.map(t => t.confidence)),
                    keywords: Array.from(new Set(similar.flatMap(t => t.keywords)))
                };
                merged.push(mergedTopic);
                similar.forEach(t => processed.add(t.name));
            } else {
                merged.push(topic);
                processed.add(topic.name);
            }
        }

        return merged;
    }

    private areTopicsSimilar(topic1: Topic, topic2: Topic): boolean {
        // Check for stem similarity
        const stem1 = this.stemmer.stem(topic1.name);
        const stem2 = this.stemmer.stem(topic2.name);
        if (stem1 === stem2) return true;

        // Check for keyword overlap
        const keywords1 = new Set(topic1.keywords);
        const keywords2 = new Set(topic2.keywords);
        const overlap = [...keywords1].filter(k => keywords2.has(k)).length;
        const similarity = overlap / Math.min(keywords1.size, keywords2.size);
        return similarity > 0.5;
    }

    private areTopicsRelated(topic1: Topic, topic2: Topic): boolean {
        // Check if topics often appear together in technical contexts
        const technicalPairs = [
            ['api', 'wrapper'],
            ['wrapper', 'implementation'],
            ['pattern', 'practice'],
            ['method', 'interface'],
            ['class', 'object'],
            ['error', 'handling'],
            ['authentication', 'security']
        ];

        return technicalPairs.some(([t1, t2]) => 
            (topic1.name.toLowerCase().includes(t1) && topic2.name.toLowerCase().includes(t2)) ||
            (topic1.name.toLowerCase().includes(t2) && topic2.name.toLowerCase().includes(t1))
        );
    }

    private selectBestTopicName(names: string[]): string {
        // Prefer technical terms
        const technicalNames = names.filter(name => 
            this.technicalTerms.has(name.toLowerCase())
        );
        if (technicalNames.length > 0) {
            return technicalNames[0];
        }

        // Otherwise use the longest name
        return names.sort((a, b) => b.length - a.length)[0];
    }

    private areTermsRelated(term1: string, term2: string): boolean {
        // Use word stems to check relation
        const stem1 = this.stemmer.stem(term1);
        const stem2 = this.stemmer.stem(term2);
        
        if (stem1 === stem2) return true;
        
        // Check technical term relationships
        const technicalPairs = [
            ['api', 'wrapper'],
            ['wrapper', 'implementation'],
            ['pattern', 'practice'],
            ['method', 'interface'],
            ['class', 'object'],
            ['error', 'handling'],
            ['authentication', 'security']
        ];

        return technicalPairs.some(([t1, t2]) => 
            (term1.includes(t1) && term2.includes(t2)) ||
            (term1.includes(t2) && term2.includes(t1))
        );
    }

    private selectTopicName(mainTerm: string, relatedTerms: string[]): string {
        // Prefer technical terms
        const technicalTerms = [mainTerm, ...relatedTerms].filter(term => 
            this.technicalTerms.has(term)
        );
        
        if (technicalTerms.length > 0) {
            return technicalTerms[0].charAt(0).toUpperCase() + technicalTerms[0].slice(1);
        }

        return mainTerm.charAt(0).toUpperCase() + mainTerm.slice(1);
    }

    private extractKeyPoints(content: ExtractedContent, topics: Topic[], options: AnalysisOptions): KeyPoint[] {
        // Split content into paragraphs first
        const paragraphs = content.content.split(/\n\n+/);
        const keyPoints: KeyPoint[] = [];
        const minImportance = options.minImportance || 0.25; // Lowered threshold

        // First pass: identify best practice and implementation sections
        const bestPracticeSections = paragraphs.filter(p => 
            /best\s+practices?|recommended|should|must|guidelines?/i.test(p)
        );
        const implementationSections = paragraphs.filter(p => 
            /implementation|example|usage|how\s+to|approach/i.test(p) ||
            p.includes('```') || 
            /\b(function|class|method|interface)\b/.test(p)
        );

        // Process best practice sections
        bestPracticeSections.forEach(section => {
            const sentences = section.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 20);
            sentences.forEach(sentence => {
                if (this.isBestPracticeStatement(sentence)) {
                    const importance = this.calculateSentenceImportance(sentence, topics) * 1.3; // Boost best practices
                    if (importance >= minImportance) {
                        keyPoints.push({
                            text: sentence.trim(),
                            importance,
                            topics: this.findRelatedTopics(sentence, topics),
                            supportingEvidence: this.findSupportingEvidence(sentence, content)
                        });
                    }
                }
            });
        });

        // Process implementation sections
        implementationSections.forEach(section => {
            const sentences = section.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 20);
            sentences.forEach(sentence => {
                if (this.isImplementationGuidance(sentence)) {
                    const importance = this.calculateSentenceImportance(sentence, topics) * 1.2; // Boost implementation guidance
                    if (importance >= minImportance) {
                        const evidence = [
                            ...this.findSupportingEvidence(sentence, content),
                            ...this.extractCodeExamples(section)
                        ];
                        keyPoints.push({
                            text: sentence.trim(),
                            importance,
                            topics: this.findRelatedTopics(sentence, topics),
                            supportingEvidence: evidence
                        });
                    }
                }
            });
        });

        // Process remaining paragraphs for other insights
        paragraphs.forEach(paragraph => {
            if (!bestPracticeSections.includes(paragraph) && !implementationSections.includes(paragraph)) {
                const sentences = paragraph.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 20);
                sentences.forEach(sentence => {
                    const importance = this.calculateSentenceImportance(sentence, topics);
                    if (importance >= minImportance && this.isInsightful(sentence)) {
                        keyPoints.push({
                            text: sentence.trim(),
                            importance,
                            topics: this.findRelatedTopics(sentence, topics),
                            supportingEvidence: this.findSupportingEvidence(sentence, content)
                        });
                    }
                });
            }
        });

        return this.deduplicateKeyPoints(
            keyPoints.sort((a, b) => b.importance - a.importance)
                .slice(0, options.maxKeyPoints || 15)
        );
    }

    private isBestPracticeStatement(sentence: string): boolean {
        const bestPracticeIndicators = [
            /\b(?:should|must|recommend|best|practice|important|key|essential|avoid|ensure)\b/i,
            /\b(?:pattern|approach|strategy|technique|principle)\b/i,
            /\b(?:better|improve|optimize|enhance)\b/i,
            /\b(?:common|typical|standard|conventional)\b/i
        ];

        const lowerSentence = sentence.toLowerCase();
        return bestPracticeIndicators.some(pattern => pattern.test(lowerSentence)) &&
               !this.isBoilerplate(sentence);
    }

    private isImplementationGuidance(sentence: string): boolean {
        const implementationIndicators = [
            /\b(?:implement|create|build|develop|use|initialize|configure)\b/i,
            /\b(?:method|function|class|interface|object)\b/i,
            /\b(?:parameter|argument|return|value|type)\b/i,
            /\b(?:example|sample|demo|code)\b/i
        ];

        const lowerSentence = sentence.toLowerCase();
        return implementationIndicators.some(pattern => pattern.test(lowerSentence)) &&
               !this.isBoilerplate(sentence);
    }

    private isInsightful(sentence: string): boolean {
        // Check if sentence contains meaningful technical content
        const technicalTermCount = this.tokenizeContent(sentence)
            .filter(token => this.technicalTerms.has(token)).length;
        
        return technicalTermCount >= 2 && // Has multiple technical terms
               sentence.length > 30 &&     // Not too short
               !this.isBoilerplate(sentence) &&
               !/^\s*[^a-zA-Z]*\s*$/.test(sentence); // Contains actual words
    }

    private extractCodeExamples(text: string): string[] {
        const examples: string[] = [];
        
        // Extract code blocks
        const codeBlockRegex = /```[\s\S]*?```/g;
        let match;
        while ((match = codeBlockRegex.exec(text)) !== null) {
            examples.push(match[0]);
        }
        
        // Extract inline code
        const inlineCodeRegex = /`[^`]+`/g;
        while ((match = inlineCodeRegex.exec(text)) !== null) {
            examples.push(match[0]);
        }
        
        return examples;
    }

    private deduplicateKeyPoints(keyPoints: KeyPoint[]): KeyPoint[] {
        const unique: KeyPoint[] = [];
        const seen = new Set<string>();

        for (const point of keyPoints) {
            const normalized = this.normalizeText(point.text);
            if (!seen.has(normalized) && !this.hasVerySimilarPoint(normalized, seen)) {
                unique.push(point);
                seen.add(normalized);
            }
        }

        return unique;
    }

    private normalizeText(text: string): string {
        return text.toLowerCase()
            .replace(/\s+/g, ' ')
            .replace(/[^\w\s]/g, '')
            .trim();
    }

    private hasVerySimilarPoint(text: string, seen: Set<string>): boolean {
        for (const existing of seen) {
            const similarity = this.calculateTextSimilarity(text, existing);
            if (similarity > 0.8) return true;
        }
        return false;
    }

    private calculateTextSimilarity(text1: string, text2: string): number {
        const words1 = new Set(text1.split(' '));
        const words2 = new Set(text2.split(' '));
        const intersection = new Set([...words1].filter(x => words2.has(x)));
        const union = new Set([...words1, ...words2]);
        return intersection.size / union.size;
    }

    private calculateSentenceImportance(sentence: string, topics: Topic[]): number {
        const tokens = this.tokenizeContent(sentence);
        let importance = 0;
        let technicalTermCount = 0;
        let hasCodeExample = false;

        // Check for code-like content
        hasCodeExample = sentence.includes('```') ||
                        sentence.includes('`') ||
                        /\b(function|class|const|let|var|import|export)\b/.test(sentence);

        // Count technical terms with weighted categories
        const termWeights = {
            implementation: 1.2,  // Implementation details
            pattern: 1.2,        // Design patterns
            practice: 1.2,       // Best practices
            test: 1.1,          // Testing related
            error: 1.1,         // Error handling
            api: 1.3,           // API specific
            wrapper: 1.3,       // Wrapper specific
            method: 1.1,        // Method related
            class: 1.1          // Class related
        };

        tokens.forEach(token => {
            if (this.technicalTerms.has(token)) {
                technicalTermCount++;
                // Apply additional weight for key terms
                for (const [term, weight] of Object.entries(termWeights)) {
                    if (token.includes(term)) {
                        importance += weight - 1; // Add the extra weight
                    }
                }
            }
        });

        // Calculate topic relevance with reduced penalty for multiple topics
        topics.forEach(topic => {
            topic.keywords.forEach(keyword => {
                if (tokens.includes(keyword.toLowerCase())) {
                    importance += topic.confidence * 0.8; // Reduced weight per topic
                }
            });
        });

        // Boost importance based on technical term density
        const technicalDensity = technicalTermCount / tokens.length;
        importance += technicalDensity * 0.5; // Reduced multiplier

        // Boost for code examples
        if (hasCodeExample) {
            importance += 0.3;
        }

        // Boost for sentences that look like best practices or implementation guidance
        if (
            sentence.toLowerCase().includes('should') ||
            sentence.toLowerCase().includes('best practice') ||
            sentence.toLowerCase().includes('recommend') ||
            sentence.toLowerCase().includes('pattern') ||
            sentence.toLowerCase().includes('example')
        ) {
            importance += 0.2;
        }

        return Math.min(importance, 1);
    }

    private findRelatedTopics(sentence: string, topics: Topic[]): string[] {
        const tokens = this.tokenizeContent(sentence);
        return topics
            .filter(topic => 
                topic.keywords.some(keyword => 
                    tokens.includes(keyword.toLowerCase())
                )
            )
            .map(topic => topic.name);
    }

    private findSupportingEvidence(sentence: string, content: ExtractedContent): string[] {
        const tokens = this.tokenizeContent(sentence);
        const evidence: string[] = [];
        
        // Split content into sentences
        const sentences = content.content.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0);
        
        // Find sentences that share significant terms with the input sentence
        sentences.forEach(s => {
            if (s === sentence) return;
            
            const sTokens = this.tokenizeContent(s);
            const sharedTerms = tokens.filter(t => sTokens.includes(t));
            
            // Check if the sentence contains technical terms
            const hasTechnicalTerms = sTokens.some(t => this.technicalTerms.has(t));
            
            if (sharedTerms.length >= 2 && hasTechnicalTerms) {
                evidence.push(s);
            }
        });

        return evidence;
    }

    private extractEntities(content: ExtractedContent): Entity[] {
        // Extract technical entities like algorithm names, standards, etc.
        const entities: Entity[] = [];
        const text = content.content;

        // Look for standard numbers (e.g., FIPS 203)
        const standardRegex = /(?:FIPS|SP|RFC)\s+\d+(?:-\d+)?/g;
        const standards = text.match(standardRegex) || [];
        standards.forEach(standard => {
            const mentions = this.findMentions(text, standard);
            entities.push({
                name: standard,
                type: 'standard' as EntityType,
                mentions
            });
        });

        // Look for algorithm names
        const algorithmRegex = /(?:ML-KEM|ML-DSA|SLH-DSA|CRYSTALS-Kyber|CRYSTALS-Dilithium|SPHINCS\+|FALCON)(?:-\d+)?/g;
        const algorithms = text.match(algorithmRegex) || [];
        algorithms.forEach(algorithm => {
            const mentions = this.findMentions(text, algorithm);
            entities.push({
                name: algorithm,
                type: 'algorithm' as EntityType,
                mentions
            });
        });

        return entities;
    }

    private findMentions(text: string, term: string): EntityMention[] {
        const mentions: EntityMention[] = [];
        let pos = text.indexOf(term);
        while (pos !== -1) {
            const start = Math.max(0, pos - 50);
            const end = Math.min(text.length, pos + term.length + 50);
            mentions.push({
                text: term,
                position: {
                    start: pos,
                    end: pos + term.length
                },
                context: text.substring(start, end)
            });
            pos = text.indexOf(term, pos + 1);
        }
        return mentions;
    }

    private findRelationships(entities: Entity[], content: ExtractedContent): Relationship[] {
        const relationships: Relationship[] = [];
        const text = content.content;

        // Look for relationships between standards and algorithms
        entities.forEach(e1 => {
            if (e1.type === 'standard') {
                entities.forEach(e2 => {
                    if (e2.type === 'algorithm') {
                        // Check if entities appear close to each other
                        const distance = this.findMinDistance(text, e1.name, e2.name);
                        if (distance < 100) { // within 100 characters
                            relationships.push({
                                source: e1.name,
                                target: e2.name,
                                type: 'specifies',
                                confidence: 1 - (distance / 100)
                            });
                        }
                    }
                });
            }
        });

        return relationships;
    }

    private findMinDistance(text: string, term1: string, term2: string): number {
        let minDistance = Infinity;
        let pos1 = text.indexOf(term1);
        
        while (pos1 !== -1) {
            let pos2 = text.indexOf(term2);
            while (pos2 !== -1) {
                const distance = Math.abs(pos2 - pos1);
                minDistance = Math.min(minDistance, distance);
                pos2 = text.indexOf(term2, pos2 + 1);
            }
            pos1 = text.indexOf(term1, pos1 + 1);
        }
        
        return minDistance;
    }

    private analyzeSentiment(text: string) {
        const analyzer = new natural.SentimentAnalyzer(
            'English',
            natural.PorterStemmerFr,
            'afinn'
        );
        
        const tokens = this.tokenizeContent(text);
        const score = analyzer.getSentiment(tokens);

        return {
            score: Math.max(-1, Math.min(1, score)), // Normalize to [-1, 1]
            confidence: Math.abs(score) / 5, // Simple confidence calculation
            aspects: [] // Could be enhanced with aspect-based sentiment analysis
        };
    }

    private assessQuality(content: ExtractedContent): ContentQuality {
        return {
            readability: this.calculateReadabilityScore(content.content),
            informationDensity: this.calculateInformationDensity(content),
            technicalDepth: this.calculateTechnicalDepth(content),
            credibilityScore: this.calculateCredibilityScore(content),
            freshness: this.calculateFreshnessScore(content)
        };
    }

    private calculateReadabilityScore(text: string): number {
        const sentences = text.split(/[.!?]+/).length;
        const words = text.split(/\s+/).length;
        const syllables = this.countSyllables(text);
        
        // Flesch-Kincaid Grade Level
        const grade = 0.39 * (words / sentences) + 11.8 * (syllables / words) - 15.59;
        
        // Convert to a 0-1 score, where 0.5 represents college level
        return Math.max(0, Math.min(1, 1 - (grade / 20)));
    }

    private countSyllables(text: string): number {
        const words = text.split(/\s+/);
        return words.reduce((count, word) => {
            return count + this.countWordSyllables(word);
        }, 0);
    }

    private countWordSyllables(word: string): number {
        word = word.toLowerCase();
        if (word.length <= 3) return 1;
        
        word = word.replace(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, '');
        word = word.replace(/^y/, '');
        
        const syllables = word.match(/[aeiouy]{1,2}/g);
        return syllables ? syllables.length : 1;
    }

    private calculateInformationDensity(content: ExtractedContent): number {
        const tokens = this.tokenizeContent(content.content);
        const technicalTerms = tokens.filter(t => this.technicalTerms.has(t));
        return Math.min(1, technicalTerms.length / (tokens.length * 0.2));
    }

    private calculateTechnicalDepth(content: ExtractedContent): number {
        const tokens = this.tokenizeContent(content.content);
        const uniqueTechnicalTerms = new Set(
            tokens.filter(t => this.technicalTerms.has(t))
        );
        return Math.min(1, uniqueTechnicalTerms.size / 20);
    }

    private calculateCredibilityScore(content: ExtractedContent): number {
        let score = 0.5; // Base score

        // Check for technical domain
        if (content.url.includes('.gov') || 
            content.url.includes('.edu') ||
            content.url.includes('csrc.') ||
            content.url.includes('nist.')) {
            score += 0.2;
        }

        // Check for citations
        const citations = this.extractCitations(content);
        if (citations.length > 0) {
            score += 0.1;
        }

        // Check for technical content
        const tokens = this.tokenizeContent(content.content);
        const technicalTermRatio = tokens.filter(t => this.technicalTerms.has(t)).length / tokens.length;
        score += technicalTermRatio * 0.2;

        return Math.min(1, score);
    }

    private calculateFreshnessScore(content: ExtractedContent): number {
        if (!content.metadata?.datePublished) return 0.5;

        const published = new Date(content.metadata.datePublished);
        const now = new Date();
        const ageInDays = (now.getTime() - published.getTime()) / (1000 * 60 * 60 * 24);

        // Score decreases with age, but technical content stays relevant longer
        return Math.max(0, Math.min(1, 1 - (ageInDays / 365)));
    }

    private extractCitations(content: ExtractedContent): Citation[] {
        const citations: Citation[] = [];
        const text = content.content;

        // Look for standard references
        const standardRefs = text.match(/(?:FIPS|SP|RFC)\s+\d+(?:-\d+)?/g) || [];
        standardRefs.forEach(ref => {
            citations.push({
                text: ref,
                type: 'standard'
            });
        });

        // Look for URL citations
        const urls = text.match(/https?:\/\/[^\s)]+/g) || [];
        urls.forEach(url => {
            citations.push({
                text: url,
                type: 'url',
                source: url
            });
        });

        return citations;
    }

    private isStopWord(word: string): boolean {
        return natural.stopwords.includes(word.toLowerCase());
    }

    private calculateRelevanceScore(content: ExtractedContent, topics: Topic[]): number {
        // Calculate overall relevance based on topics and content quality
        const topicScore = topics.reduce((sum, topic) => sum + topic.confidence, 0) / (topics.length || 1);
        const quality = this.assessQuality(content);
        
        return Math.min(
            1,
            (topicScore * 0.6) + 
            (quality.technicalDepth * 0.2) + 
            (quality.informationDensity * 0.2)
        );
    }

    private isBoilerplate(text: string): boolean {
        return this.boilerplatePatterns.some(pattern => pattern.test(text));
    }
}