RAG MCP Server

Overview Schema Related Servers Score Discussions

ai-segmenter.ts•16.9 KiB

/** * Segmenter IA pour suggestions de segmentation optimale * * Ce module utilise Cline ou un modèle léger pour analyser le contenu * et suggérer des points de segmentation optimaux pour le chunking. */ import { getRagConfigManager } from '../config/rag-config.js'; import { preprocessCode, PreprocessingResult } from './code-preprocessor.js'; import { ContentType, ProgrammingLanguage } from './content-detector.js'; import { getLlmCache } from './llm-cache.js'; import { LlmService } from './llm-service.js'; export interface SegmentationSuggestion { startLine: number; endLine: number; type: 'function' | 'class' | 'section' | 'paragraph' | 'logical_block'; confidence: number; reason: string; contentPreview: string; } export interface SegmentationAnalysis { suggestions: SegmentationSuggestion[]; optimalChunkSize: number; recommendedStrategy: 'structural' | 'semantic' | 'hybrid'; complexityScore: number; // 0-1, plus élevé = plus complexe } /** * Analyse un contenu pour suggérer des points de segmentation optimaux * * @param content Contenu à analyser * @param filePath Chemin du fichier * @param contentType Type de contenu * @param language Langage de programmation (si code) * @returns Analyse de segmentation avec suggestions */ export async function analyzeSegmentation( content: string, filePath: string, contentType: ContentType, language?: ProgrammingLanguage ): Promise<SegmentationAnalysis> { const configManager = getRagConfigManager(); // Vérifier si l'analyse LLM est activée if (configManager.isLlmAnalysisEnabled()) { try { console.log(`🧠 Analyse LLM activée pour ${filePath} (${contentType})`); // Vérifier le cache d'abord const cache = getLlmCache(); const cacheKey = `${filePath}:${contentType}:suggest_structure`; const cachedAnalysis = cache.get(content, filePath, 'suggest_structure', contentType); if (cachedAnalysis) { console.log(`✅ Utilisation du cache pour ${filePath}`); return parseLlmAnalysis(cachedAnalysis, content, contentType, language); } // Si pas dans le cache, appeler le service LLM const llmService = new LlmService(); const llmAnalysis = await llmService.analyzeContent( content, filePath, contentType, 'suggest_structure' ); // Stocker dans le cache cache.set(content, filePath, 'suggest_structure', llmAnalysis, contentType); console.log(`💾 Analyse LLM mise en cache pour ${filePath}`); // Parser la réponse LLM en suggestions return parseLlmAnalysis(llmAnalysis, content, contentType, language); } catch (error) { console.error(`❌ Analyse LLM échouée, fallback aux règles: ${error}`); // Fallback aux règles heuristiques } } // Analyse basique (règles heuristiques) const lines = content.split('\n'); const suggestions: SegmentationSuggestion[] = []; // Analyse basée sur le type de contenu switch (contentType) { case 'code': return analyzeCodeSegmentation(content, language, filePath); case 'doc': return analyzeDocumentationSegmentation(content, filePath); case 'config': return analyzeConfigSegmentation(content, filePath); default: return analyzeGenericSegmentation(content, filePath); } } /** * Analyse la segmentation pour le code */ async function analyzeCodeSegmentation( content: string, language?: ProgrammingLanguage, filePath?: string ): Promise<SegmentationAnalysis> { const suggestions: SegmentationSuggestion[] = []; try { // Utiliser le pré-processeur pour extraire la structure if (language && (language === 'javascript' || language === 'typescript' || language === 'python')) { const result = preprocessCode(content, language); // Suggestions basées sur les fonctions for (const func of result.structure.functions) { suggestions.push({ startLine: func.startLine, endLine: func.endLine, type: 'function', confidence: 0.9, reason: `Fonction ${func.name} avec ${func.parameters.length} paramètres`, contentPreview: func.signature.substring(0, 100), }); } // Suggestions basées sur les classes for (const cls of result.structure.classes) { suggestions.push({ startLine: cls.startLine, endLine: cls.endLine, type: 'class', confidence: 0.8, reason: `Classe ${cls.name} avec ${cls.methods.length} méthodes`, contentPreview: `class ${cls.name}`, }); } // Analyser la complexité const complexityScore = calculateCodeComplexity(result); return { suggestions, optimalChunkSize: complexityScore > 0.7 ? 800 : 1200, recommendedStrategy: suggestions.length > 0 ? 'structural' : 'semantic', complexityScore, }; } } catch (error) { console.error(`Erreur lors de l'analyse de segmentation du code: ${(error as Error).message}`); } // Fallback: analyse générique return analyzeGenericSegmentation(content, filePath || 'unknown'); } /** * Calcule la complexité du code */ function calculateCodeComplexity(result: PreprocessingResult): number { const lines = result.originalContent.split('\n').length; const functions = result.metadata.totalFunctions; const classes = result.metadata.totalClasses; // Métrique simple: densité de structures par ligne const structureDensity = (functions + classes) / Math.max(lines, 1); // Normaliser entre 0 et 1 return Math.min(1, structureDensity * 10); } /** * Analyse la segmentation pour la documentation */ async function analyzeDocumentationSegmentation( content: string, filePath: string ): Promise<SegmentationAnalysis> { const suggestions: SegmentationSuggestion[] = []; const lines = content.split('\n'); let currentSectionStart = 0; let currentSectionLevel = 0; for (let i = 0; i < lines.length; i++) { const line = lines[i]; // Détection des en-têtes Markdown const headerMatch = line.match(/^(#{1,6})\s+(.+)/); if (headerMatch) { const level = headerMatch[1].length; // Sauvegarder la section précédente if (currentSectionStart < i && i - currentSectionStart > 1) { suggestions.push({ startLine: currentSectionStart + 1, endLine: i, type: 'section', confidence: 0.7, reason: `Section niveau ${currentSectionLevel}`, contentPreview: lines[currentSectionStart]?.substring(0, 100) || '', }); } currentSectionStart = i; currentSectionLevel = level; } // Détection des paragraphes (lignes vides) if (line.trim() === '' && i > 0 && lines[i - 1].trim() !== '') { const paragraphStart = findParagraphStart(lines, i); if (paragraphStart < i - 1) { suggestions.push({ startLine: paragraphStart + 1, endLine: i, type: 'paragraph', confidence: 0.6, reason: 'Paragraphe cohérent', contentPreview: lines[paragraphStart]?.substring(0, 100) || '', }); } } } // Dernière section if (currentSectionStart < lines.length - 1) { suggestions.push({ startLine: currentSectionStart + 1, endLine: lines.length, type: 'section', confidence: 0.7, reason: `Section niveau ${currentSectionLevel}`, contentPreview: lines[currentSectionStart]?.substring(0, 100) || '', }); } // Analyser la densité de contenu const contentDensity = calculateContentDensity(content); return { suggestions, optimalChunkSize: contentDensity > 0.5 ? 600 : 1000, recommendedStrategy: suggestions.length > 2 ? 'structural' : 'semantic', complexityScore: contentDensity, }; } /** * Trouve le début d'un paragraphe */ function findParagraphStart(lines: string[], currentIndex: number): number { for (let i = currentIndex - 1; i >= 0; i--) { if (lines[i].trim() === '') { return i + 1; } } return 0; } /** * Calcule la densité de contenu */ function calculateContentDensity(content: string): number { const lines = content.split('\n'); const nonEmptyLines = lines.filter(line => line.trim().length > 0).length; const avgLineLength = lines.reduce((sum, line) => sum + line.length, 0) / Math.max(lines.length, 1); // Métrique combinée return Math.min(1, (nonEmptyLines / Math.max(lines.length, 1)) * (avgLineLength / 100)); } /** * Analyse la segmentation pour la configuration */ async function analyzeConfigSegmentation( content: string, filePath: string ): Promise<SegmentationAnalysis> { const suggestions: SegmentationSuggestion[] = []; // Pour JSON if (content.trim().startsWith('{') || content.trim().startsWith('[')) { try { const parsed = JSON.parse(content); // Fonction récursive pour analyser la structure function analyzeObject(obj: any, path: string = '', depth: number = 0): void { if (Array.isArray(obj)) { if (obj.length > 5 && depth < 3) { suggestions.push({ startLine: 0, // Approximation endLine: 0, type: 'logical_block', confidence: 0.7, reason: `Tableau avec ${obj.length} éléments${path ? ` dans ${path}` : ''}`, contentPreview: `[${obj.length} éléments]`, }); } } else if (typeof obj === 'object' && obj !== null) { const keys = Object.keys(obj); if (keys.length > 3 && depth < 2) { suggestions.push({ startLine: 0, endLine: 0, type: 'logical_block', confidence: 0.8, reason: `Objet avec ${keys.length} propriétés${path ? `: ${path}` : ''}`, contentPreview: `{${keys.slice(0, 3).join(', ')}...}`, }); } // Analyser les sous-objets for (const [key, value] of Object.entries(obj)) { analyzeObject(value, path ? `${path}.${key}` : key, depth + 1); } } } analyzeObject(parsed); } catch (error) { // JSON invalide } } // Pour YAML if (content.includes('---\n')) { const yamlDocs = content.split('---\n').filter(doc => doc.trim()); yamlDocs.forEach((doc, index) => { suggestions.push({ startLine: 0, endLine: 0, type: 'logical_block', confidence: 0.9, reason: `Document YAML ${index + 1}/${yamlDocs.length}`, contentPreview: doc.split('\n')[0]?.substring(0, 100) || '', }); }); } const structureScore = suggestions.length > 0 ? 0.7 : 0.3; return { suggestions, optimalChunkSize: 800, recommendedStrategy: suggestions.length > 0 ? 'structural' : 'semantic', complexityScore: structureScore, }; } /** * Analyse générique pour contenu non spécifique */ async function analyzeGenericSegmentation( content: string, filePath: string ): Promise<SegmentationAnalysis> { const suggestions: SegmentationSuggestion[] = []; const lines = content.split('\n'); // Détection des blocs logiques (lignes vides) let currentBlockStart = 0; for (let i = 0; i < lines.length; i++) { if (lines[i].trim() === '' && i > currentBlockStart) { if (i - currentBlockStart > 1) { suggestions.push({ startLine: currentBlockStart + 1, endLine: i, type: 'logical_block', confidence: 0.5, reason: 'Bloc logique détecté', contentPreview: lines[currentBlockStart]?.substring(0, 100) || '', }); } currentBlockStart = i + 1; } } // Dernier bloc if (currentBlockStart < lines.length - 1) { suggestions.push({ startLine: currentBlockStart + 1, endLine: lines.length, type: 'logical_block', confidence: 0.5, reason: 'Dernier bloc logique', contentPreview: lines[currentBlockStart]?.substring(0, 100) || '', }); } const density = calculateContentDensity(content); return { suggestions, optimalChunkSize: density > 0.4 ? 700 : 1200, recommendedStrategy: 'semantic', complexityScore: density, }; } /** * Parse la réponse LLM en suggestions de segmentation */ function parseLlmAnalysis( llmAnalysis: string, content: string, contentType: ContentType, language?: ProgrammingLanguage ): SegmentationAnalysis { const lines = content.split('\n'); const suggestions: SegmentationSuggestion[] = []; try { // Essayer de parser la réponse LLM (format attendu: JSON ou texte structuré) if (llmAnalysis.includes('{') && llmAnalysis.includes('}')) { // Essayer d'extraire du JSON const jsonMatch = llmAnalysis.match(/\{[\s\S]*\}/); if (jsonMatch) { const parsed = JSON.parse(jsonMatch[0]); if (parsed.suggestions && Array.isArray(parsed.suggestions)) { parsed.suggestions.forEach((suggestion: any) => { suggestions.push({ startLine: suggestion.startLine || 1, endLine: suggestion.endLine || lines.length, type: suggestion.type || 'logical_block', confidence: suggestion.confidence || 0.7, reason: suggestion.reason || 'Suggestion LLM', contentPreview: suggestion.contentPreview || lines.slice( (suggestion.startLine || 1) - 1, (suggestion.endLine || lines.length) ).join('\n').substring(0, 100) }); }); } } } // Si pas de JSON, essayer de parser du texte structuré if (suggestions.length === 0) { const linesAnalysis = llmAnalysis.split('\n'); let currentSuggestion: any = null; for (const line of linesAnalysis) { if (line.includes('Ligne') || line.includes('line') || line.includes('start')) { const lineMatch = line.match(/(\d+)[^\d]*(\d+)?/); if (lineMatch) { if (currentSuggestion) { suggestions.push(currentSuggestion); } currentSuggestion = { startLine: parseInt(lineMatch[1]), endLine: lineMatch[2] ? parseInt(lineMatch[2]) : parseInt(lineMatch[1]) + 10, type: 'logical_block', confidence: 0.6, reason: 'Détection LLM', contentPreview: '' }; } } } if (currentSuggestion) { suggestions.push(currentSuggestion); } } } catch (error) { console.error(`❌ Erreur parsing réponse LLM: ${error}`); } // Si aucune suggestion LLM, créer une suggestion générique if (suggestions.length === 0) { suggestions.push({ startLine: 1, endLine: Math.min(20, lines.length), type: 'logical_block', confidence: 0.5, reason: 'Analyse LLM générique', contentPreview: lines.slice(0, 3).join('\n').substring(0, 100) }); } // Calculer la complexité basée sur le type de contenu let complexityScore = 0.5; if (contentType === 'code') complexityScore = 0.7; if (contentType === 'config') complexityScore = 0.6; if (contentType === 'doc') complexityScore = 0.4; return { suggestions, optimalChunkSize: complexityScore > 0.6 ? 800 : 1200, recommendedStrategy: suggestions.length > 2 ? 'structural' : 'hybrid', complexityScore }; } /** * Utilise Cline pour obtenir des suggestions de segmentation avancées * * @param content Contenu à analyser * @param filePath Chemin du fichier * @param contentType Type de contenu * @returns Suggestions IA avancées */ export async function getClineSegmentationSuggestions( content: string, filePath: string, contentType: ContentType ): Promise<SegmentationSuggestion[]> { // Pour l'instant, utiliser le service LLM console.log(`🧠 Utilisation du service LLM pour ${filePath}`); const analysis = await analyzeSegmentation(content, filePath, contentType); return analysis.suggestions; } /** * Optimise les chunks existants basé sur les suggestions IA * * @param chunks Chunks existants * @param suggestions Suggestions de segmentation * @returns Chunks optimisés */ export function optimizeChunksWithSuggestions( chunks: string[], suggestions: SegmentationSuggestion[], originalContent: string ): string[] { if (suggestions.length === 0 || chunks.length <= 1) { return chunks; } const lines = originalContent.split('\n'); const optimizedChunks: string[] = []; // Pour chaque suggestion, essayer de créer un chunk optimisé for (const suggestion of suggestions) { if (suggestion.confidence > 0.6 && suggestion.startLine > 0 && suggestion.endLine > suggestion.startLine) { const chunkContent = lines.slice(suggestion.startLine - 1, suggestion.endLine).join('\n'); if (chunkContent.length > 50 && chunkContent.length < 2000) { optimizedChunks.push(chunkContent); } } } // Si on a des chunks optimisés, les utiliser if (optimizedChunks.length > 0) { return optimizedChunks; } // Sinon, garder les chunks originaux return chunks; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ali-48/rag-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ai-segmenter.ts•16.9 KiB