Skip to main content
Glama

documcp

by tosin2013
kg-code-integration.ts18 kB
/** * Knowledge Graph Code Integration Module * Implements Phase 1.2: Documentation Context in Knowledge Graph * * Populates the knowledge graph with code file entities, documentation section entities, * and relationships between code and documentation for drift detection and coverage analysis. */ import { promises as fs } from "fs"; import path from "path"; import crypto from "crypto"; import { GraphNode, GraphEdge } from "./knowledge-graph.js"; import { ExtractedContent } from "../utils/content-extractor.js"; import { getKnowledgeGraph } from "./kg-integration.js"; import { validateAndStoreDocumentationLinks } from "./kg-link-validator.js"; import { ASTAnalyzer } from "../utils/ast-analyzer.js"; /** * Create code file entities from repository source code */ export async function createCodeFileEntities( projectId: string, repoPath: string, ): Promise<GraphNode[]> { const kg = await getKnowledgeGraph(); const codeFiles: GraphNode[] = []; // Directories to scan for code const sourceDirs = ["src", "lib", "app", "packages"]; for (const dir of sourceDirs) { const dirPath = path.join(repoPath, dir); try { await fs.access(dirPath); const files = await walkSourceFiles(dirPath, repoPath); for (const filePath of files) { try { const codeFileNode = await createCodeFileEntity( projectId, filePath, repoPath, ); if (codeFileNode) { kg.addNode(codeFileNode); codeFiles.push(codeFileNode); // Create relationship: project -> code_file kg.addEdge({ source: projectId, target: codeFileNode.id, type: "depends_on", weight: 1.0, confidence: 1.0, properties: { dependencyType: "contains", }, }); } } catch (error) { console.warn(`Failed to process file ${filePath}:`, error); } } } catch { // Directory doesn't exist, skip } } return codeFiles; } /** * Create a single code file entity */ async function createCodeFileEntity( projectId: string, filePath: string, repoPath: string, ): Promise<GraphNode | null> { const content = await fs.readFile(filePath, "utf-8"); const stats = await fs.stat(filePath); const relativePath = path.relative(repoPath, filePath); const ext = path.extname(filePath); const language = getLanguageFromExtension(ext); if (!language) return null; // Calculate content hash for change detection const contentHash = crypto.createHash("sha256").update(content).digest("hex"); // Extract functions and classes using AST parsing const { functions, classes, imports, exports } = await extractCodeStructure( filePath, content, language, ); // Estimate complexity const linesOfCode = content.split("\n").length; const complexity = estimateComplexity(linesOfCode, functions.length); const nodeId = `code_file:${projectId}:${relativePath.replace( /[/\\]/g, ":", )}`; return { id: nodeId, type: "code_file", label: path.basename(filePath), properties: { path: relativePath, language, functions, classes, dependencies: imports, // Now extracted via AST imports, exports, lastModified: stats.mtime.toISOString(), linesOfCode, contentHash, complexity, }, weight: 1.0, lastUpdated: new Date().toISOString(), }; } /** * Create documentation section entities from extracted content */ export async function createDocumentationEntities( projectId: string, extractedContent: ExtractedContent, ): Promise<GraphNode[]> { const kg = await getKnowledgeGraph(); const docSections: GraphNode[] = []; // Process README sections if (extractedContent.readme) { for (const section of extractedContent.readme.sections) { const docNode = createDocSectionEntity( projectId, "README.md", section.title, extractedContent.readme.content.substring(0, 1000), // First 1000 chars "reference", ); kg.addNode(docNode); docSections.push(docNode); // Create relationship: project -> documentation_section kg.addEdge({ source: projectId, target: docNode.id, type: "depends_on", weight: 1.0, confidence: 1.0, properties: { dependencyType: "contains", }, }); } } // Process existing docs for (const doc of extractedContent.existingDocs) { const docNode = createDocSectionEntity( projectId, doc.path, doc.title, doc.content, doc.category, ); kg.addNode(docNode); docSections.push(docNode); // Create relationship: project -> documentation_section kg.addEdge({ source: projectId, target: docNode.id, type: "depends_on", weight: 1.0, confidence: 1.0, properties: { dependencyType: "contains", }, }); // Validate external links in documentation (async, non-blocking) validateAndStoreDocumentationLinks(docNode.id, doc.content).catch((error) => console.warn(`Failed to validate links in ${doc.path}:`, error.message), ); } // Process ADRs for (const adr of extractedContent.adrs) { const docNode = createDocSectionEntity( projectId, `docs/adrs/${adr.number}-${adr.title}.md`, adr.title, adr.content, "explanation", ); kg.addNode(docNode); docSections.push(docNode); kg.addEdge({ source: projectId, target: docNode.id, type: "depends_on", weight: 1.0, confidence: 1.0, properties: { dependencyType: "contains", }, }); } return docSections; } /** * Create a single documentation section entity */ function createDocSectionEntity( projectId: string, filePath: string, sectionTitle: string, content: string, category?: "tutorial" | "how-to" | "reference" | "explanation", ): GraphNode { const contentHash = crypto.createHash("sha256").update(content).digest("hex"); const wordCount = content.split(/\s+/).length; const hasCodeExamples = /```/.test(content); // Extract referenced code files/functions from content const referencedCodeFiles = extractCodeReferences(content); const referencedFunctions = extractFunctionReferences(content); const referencedClasses = extractClassReferences(content); const nodeId = `documentation_section:${projectId}:${filePath.replace( /[/\\]/g, ":", )}:${sectionTitle.replace(/\s+/g, "_")}`; return { id: nodeId, type: "documentation_section", label: sectionTitle, properties: { filePath, sectionTitle, contentHash, referencedCodeFiles, referencedFunctions, referencedClasses, lastUpdated: new Date().toISOString(), category, effectivenessScore: hasCodeExamples ? 0.8 : 0.5, wordCount, hasCodeExamples, }, weight: 1.0, lastUpdated: new Date().toISOString(), }; } /** * Link code files to documentation sections */ export async function linkCodeToDocs( codeFiles: GraphNode[], docSections: GraphNode[], ): Promise<GraphEdge[]> { const kg = await getKnowledgeGraph(); const edges: GraphEdge[] = []; for (const docSection of docSections) { const { referencedCodeFiles, referencedFunctions, referencedClasses } = docSection.properties; // Create "references" edges: documentation_section -> code_file for (const codeFile of codeFiles) { const codeFilePath = codeFile.properties.path; // Check if doc references this code file if ( referencedCodeFiles.includes(codeFilePath) || referencedFunctions.some((fn: string) => codeFile.properties.functions.includes(fn), ) || referencedClasses.some((cls: string) => codeFile.properties.classes.includes(cls), ) ) { const edge = kg.addEdge({ source: docSection.id, target: codeFile.id, type: "references", weight: 1.0, confidence: 0.8, properties: { referenceType: determineReferenceType( docSection.properties.category, ), isAccurate: true, // Assume accurate until drift detected lastVerified: new Date().toISOString(), }, }); edges.push(edge); // Create reverse "documents" edge: code_file -> documentation_section const documentsEdge = kg.addEdge({ source: codeFile.id, target: docSection.id, type: "documents", weight: 1.0, confidence: 0.8, properties: { coverage: determineCoverage( referencedFunctions.length, codeFile.properties.functions.length, ), lastVerified: new Date().toISOString(), quality: "medium", }, }); edges.push(documentsEdge); } } } // Detect outdated documentation for (const docSection of docSections) { for (const edge of edges) { if (edge.source === docSection.id && edge.type === "references") { const codeFile = codeFiles.find((cf) => cf.id === edge.target); if (codeFile) { // Check if code has changed since doc was last updated const docUpdated = new Date(docSection.properties.lastUpdated); const codeUpdated = new Date(codeFile.properties.lastModified); if (codeUpdated > docUpdated) { // Simple heuristic for change type - could be enhanced with drift detector const changeType = "modification"; // AST-based diff available via DriftDetector const outdatedEdge = kg.addEdge({ source: docSection.id, target: codeFile.id, type: "outdated_for", weight: 0.5, confidence: 0.9, properties: { detectedAt: new Date().toISOString(), changeType, // Enhanced from "unknown" - can integrate DriftDetector for precise diff severity: "medium", autoFixable: false, }, }); edges.push(outdatedEdge); } } } } } return edges; } // ============================================================================ // Helper Functions // ============================================================================ async function walkSourceFiles( dir: string, baseDir: string, files: string[] = [], ): Promise<string[]> { try { const entries = await fs.readdir(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dir, entry.name); if ( entry.isDirectory() && !entry.name.startsWith(".") && entry.name !== "node_modules" && entry.name !== "dist" && entry.name !== "build" ) { await walkSourceFiles(fullPath, baseDir, files); } else if (entry.isFile()) { const ext = path.extname(entry.name); if ( [ ".js", ".ts", ".jsx", ".tsx", ".py", ".rb", ".go", ".java", ".rs", ".c", ".cpp", ".cs", ].includes(ext) ) { files.push(fullPath); } } } } catch { // Directory doesn't exist or can't be read } return files; } function getLanguageFromExtension(ext: string): string | null { const languageMap: Record<string, string> = { ".js": "javascript", ".jsx": "javascript", ".ts": "typescript", ".tsx": "typescript", ".py": "python", ".rb": "ruby", ".go": "go", ".java": "java", ".rs": "rust", ".c": "c", ".cpp": "cpp", ".cs": "csharp", ".php": "php", ".swift": "swift", ".kt": "kotlin", ".scala": "scala", }; return languageMap[ext] || null; } /** * Extract code structure using AST parsing (replaces regex-based extraction) * Addresses TODO: Use proper AST parsing instead of basic regex */ async function extractCodeStructure( filePath: string, content: string, language: string, ): Promise<{ functions: string[]; classes: string[]; imports: string[]; exports: string[]; }> { const functions: string[] = []; const classes: string[] = []; const imports: string[] = []; const exports: string[] = []; // Use AST analyzer for TypeScript/JavaScript files if (language === "typescript" || language === "javascript") { try { const analyzer = new ASTAnalyzer(); await analyzer.initialize(); const astResult = await analyzer.analyzeFile(filePath); if (astResult) { // Extract function names functions.push(...astResult.functions.map((f) => f.name)); // Extract class names classes.push(...astResult.classes.map((c) => c.name)); // Note: AST analyzer doesn't currently track dependencies per function/class // We'll extract imports from the code using regex as fallback const importMatches = content.matchAll( /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g, ); for (const match of importMatches) { imports.push(match[1]); } // Extract exports (check isExported flag) const exportedFunctions = astResult.functions .filter((f) => f.isExported) .map((f) => f.name); const exportedClasses = astResult.classes .filter((c) => c.isExported) .map((c) => c.name); exports.push(...exportedFunctions, ...exportedClasses); return { functions, classes, imports, exports }; } } catch (error) { console.warn( `AST parsing failed for ${filePath}, falling back to regex:`, error, ); // Fall through to regex-based extraction } } // Fallback: regex-based extraction for non-TS/JS or if AST fails if (language === "typescript" || language === "javascript") { // Extract function declarations const functionMatches = content.matchAll( /(?:export\s+)?(?:async\s+)?function\s+(\w+)/g, ); for (const match of functionMatches) { functions.push(match[1]); } // Extract arrow functions assigned to const/let const arrowFunctionMatches = content.matchAll( /(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>/g, ); for (const match of arrowFunctionMatches) { functions.push(match[1]); } // Extract class declarations const classMatches = content.matchAll(/(?:export\s+)?class\s+(\w+)/g); for (const match of classMatches) { classes.push(match[1]); } // Extract imports const importMatches = content.matchAll( /import\s+.*?\s+from\s+['"]([^'"]+)['"]/g, ); for (const match of importMatches) { imports.push(match[1]); } // Extract exports const exportMatches = content.matchAll( /export\s+(?:function|class|const|let|var)\s+(\w+)/g, ); for (const match of exportMatches) { exports.push(match[1]); } } else if (language === "python") { const functionMatches = content.matchAll(/def\s+(\w+)/g); for (const match of functionMatches) { functions.push(match[1]); } const classMatches = content.matchAll(/class\s+(\w+)/g); for (const match of classMatches) { classes.push(match[1]); } // Extract Python imports const importMatches = content.matchAll( /(?:from\s+(\S+)\s+)?import\s+([^\n]+)/g, ); for (const match of importMatches) { imports.push(match[1] || match[2].trim()); } } return { functions, classes, imports, exports }; } function estimateComplexity( linesOfCode: number, functionCount: number, ): "low" | "medium" | "high" { const score = linesOfCode + functionCount * 10; if (score < 100) return "low"; if (score < 300) return "medium"; return "high"; } function extractCodeReferences(content: string): string[] { const references: string[] = []; // Extract file paths from markdown links and code blocks const filePathMatches = content.matchAll(/`([^`]+\.(ts|js|py|rb|go|java))`/g); for (const match of filePathMatches) { references.push(match[1]); } return references; } function extractFunctionReferences(content: string): string[] { const functions: string[] = []; // Extract function names from code blocks and inline code const functionMatches = content.matchAll(/`(\w+)\(\)`/g); for (const match of functionMatches) { functions.push(match[1]); } return functions; } function extractClassReferences(content: string): string[] { const classes: string[] = []; // Extract class names from code blocks (usually PascalCase) const classMatches = content.matchAll(/`([A-Z][a-zA-Z0-9]+)`/g); for (const match of classMatches) { if (!/\(\)$/.test(match[1])) { // Not a function call classes.push(match[1]); } } return classes; } function determineReferenceType( category?: "tutorial" | "how-to" | "reference" | "explanation", ): "example" | "api-reference" | "tutorial" | "explanation" { switch (category) { case "tutorial": return "tutorial"; case "reference": return "api-reference"; case "how-to": return "example"; case "explanation": return "explanation"; default: return "api-reference"; } } function determineCoverage( referencedCount: number, totalCount: number, ): "partial" | "complete" | "comprehensive" { if (totalCount === 0) return "partial"; const ratio = referencedCount / totalCount; if (ratio >= 0.8) return "comprehensive"; if (ratio >= 0.5) return "complete"; return "partial"; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tosin2013/documcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server