Doclea MCP

Official

Overview Schema Related Servers Score Discussions

incremental-scanner.ts•11.1 KiB

import { createHash } from "node:crypto"; import type { CodeChunk } from "../../chunking/code"; import { chunkCode } from "../../chunking/code"; import type { CodeGraphStorage } from "../../database/code-graph"; import type { EmbeddingClient } from "../../embeddings/provider"; import type { VectorStore } from "../../vectors/interface"; import type { ChangeDetector } from "./change-detector"; import { GraphExtractor } from "./graph-extractor"; import type { CodeSummarizer } from "./summarizer"; import type { CodeNode, FileChange, IncrementalScanResult, ScanOptions, ScanStats, } from "./types"; /** * Incremental code scanner that updates both RAG and KAG layers */ export class IncrementalScanner { private graphExtractor: GraphExtractor; constructor( private changeDetector: ChangeDetector, private codeGraph: CodeGraphStorage, private summarizer?: CodeSummarizer, // RAG layer dependencies (optional - scanner works without them for KAG-only mode) private vectorStore?: VectorStore, private embeddings?: EmbeddingClient, ) { this.graphExtractor = new GraphExtractor(); } /** * Scan incrementally, only processing changed files */ async scanIncremental( files: Array<{ path: string; content: string }>, options: ScanOptions = {}, ): Promise<IncrementalScanResult> { // 1. Detect changes const changes = await this.changeDetector.detectChanges(files, { detectDeleted: options.detectDeleted ?? true, }); console.log(`Detected ${changes.length} changes:`, { added: changes.filter((c) => c.type === "added").length, modified: changes.filter((c) => c.type === "modified").length, deleted: changes.filter((c) => c.type === "deleted").length, }); // 2. Initialize stats const stats: ScanStats = { filesScanned: 0, nodesAdded: 0, nodesUpdated: 0, nodesDeleted: 0, edgesAdded: 0, edgesDeleted: 0, documentsUpdated: 0, embeddingsRegenerated: 0, }; // 3. Process each change type for (const change of changes.filter((c) => c.type === "added")) { await this.processAddedFile(change, files, stats); } for (const change of changes.filter((c) => c.type === "modified")) { await this.processModifiedFile(change, files, stats); } for (const change of changes.filter((c) => c.type === "deleted")) { await this.processDeletedFile(change, stats); } // 4. Flush remaining embeddings if (this.pendingEmbeddings.length > 0) { await this.flushEmbeddings(stats); } // 5. Update hashes await this.changeDetector.updateHashes(changes); return { changes, stats }; } /** * Flush pending embeddings in a batch */ private async flushEmbeddings(stats: ScanStats): Promise<void> { if ( !this.vectorStore || !this.embeddings || this.pendingEmbeddings.length === 0 ) { return; } console.log( `[scan] Flushing ${this.pendingEmbeddings.length} embeddings in batch`, ); try { const batch = this.pendingEmbeddings; // Generate embedding texts const embeddingTexts = batch.map(({ node, chunk }) => { const codeSnippet = chunk.content.slice(0, 500); return [ node.name, node.signature || "", node.summary || "", codeSnippet, ] .filter(Boolean) .join("\n"); }); let vectors: number[][] = []; try { vectors = await this.embeddings.embedBatch(embeddingTexts); } catch (error) { console.warn( "[scan] Batch embedding failed, falling back to per-item:", error, ); vectors = await Promise.all( embeddingTexts.map(async (text, index) => { try { return await this.embeddings?.embed(text); } catch (itemError) { const nodeId = batch[index]?.node.id ?? `index_${index}`; console.warn(`[scan] Failed to embed node ${nodeId}:`, itemError); return []; } }), ); } // Upsert all vectors for (let i = 0; i < batch.length; i++) { const { node, chunk, filePath } = batch[i]; const vector = vectors[i]; if (!vector || vector.length === 0) { continue; } const payload = { memoryId: node.id, type: node.type, title: node.name, tags: [ chunk.metadata.language, ...(node.metadata?.isExported ? ["exported"] : []), ], relatedFiles: [filePath], importance: node.type === "function" || node.type === "class" ? 0.7 : 0.5, signature: node.signature, startLine: node.startLine, endLine: node.endLine, }; const vectorId = this.generateVectorId(filePath, node.name); await this.vectorStore.upsert(vectorId, vector, payload); stats.embeddingsRegenerated++; } // Clear pending this.pendingEmbeddings = []; } catch (error) { console.warn("[scan] Failed to flush embeddings:", error); // Clear pending to prevent infinite retry this.pendingEmbeddings = []; } } // Pending embeddings for batch processing private pendingEmbeddings: Array<{ node: CodeNode; chunk: CodeChunk; filePath: string; }> = []; private static readonly EMBEDDING_BATCH_SIZE = 32; /** * Process a newly added file */ private async processAddedFile( change: FileChange, allFiles: Array<{ path: string; content: string }>, stats: ScanStats, ): Promise<void> { const file = allFiles.find((f) => f.path === change.path); if (!file) return; try { // Chunk the file const chunks = await this.chunkFile(file.path, file.content); // Use file-level extraction to get module nodes and import edges const { nodes, edges } = await this.graphExtractor.extractFromFile( file.path, chunks, file.content, ); // Generate summaries and upsert nodes for (const node of nodes) { // Generate summary for function/class nodes if ( this.summarizer && (node.type === "function" || node.type === "class") ) { try { // Find the corresponding chunk for this node const chunk = chunks.find( (c) => c.metadata.name === node.name && c.metadata.startLine === node.startLine, ); if (chunk) { const summaryResult = await this.summarizer.summarize(chunk); node.summary = summaryResult.summary; } } catch (error) { console.warn("Failed to generate summary:", error); } } // KAG: Upsert node await this.codeGraph.upsertNode(node); stats.nodesAdded++; // RAG: Queue embedding for batch processing if ( this.vectorStore && this.embeddings && (node.type === "function" || node.type === "class") ) { // Find the corresponding chunk for this node const chunk = chunks.find( (c) => c.metadata.name === node.name || (c.metadata.isFunction && node.type === "function" && c.metadata.name === node.name) || (c.metadata.isClass && node.type === "class" && c.metadata.name === node.name), ) || chunks.find( (c) => (c.metadata.isFunction && node.type === "function") || (c.metadata.isClass && node.type === "class"), ); if (chunk) { this.pendingEmbeddings.push({ node, chunk, filePath: file.path }); stats.documentsUpdated++; if ( this.pendingEmbeddings.length >= IncrementalScanner.EMBEDDING_BATCH_SIZE ) { await this.flushEmbeddings(stats); } } } } // Upsert edges for (const edge of edges) { await this.codeGraph.upsertEdge(edge); stats.edgesAdded++; } stats.filesScanned++; } catch (error) { console.warn(`Failed to process added file ${file.path}:`, error); } } /** * Process a modified file */ private async processModifiedFile( change: FileChange, allFiles: Array<{ path: string; content: string }>, stats: ScanStats, ): Promise<void> { // 1. Delete old data for this file await this.deleteFileData(change.path, stats); // 2. Re-add with new content await this.processAddedFile(change, allFiles, stats); } /** * Process a deleted file */ private async processDeletedFile( change: FileChange, stats: ScanStats, ): Promise<void> { await this.deleteFileData(change.path, stats); } /** * Delete all data (RAG + KAG) for a file */ private async deleteFileData( filePath: string, stats: ScanStats, ): Promise<void> { // KAG: Delete nodes from this file const nodes = await this.codeGraph.getNodesByPath(filePath); for (const node of nodes) { // RAG: Delete vector embedding for this node if ( this.vectorStore && (node.type === "function" || node.type === "class") ) { try { const vectorId = this.generateVectorId(filePath, node.name); const deleted = await this.vectorStore.delete(vectorId); if (deleted) { stats.embeddingsRegenerated++; } } catch (error) { console.warn(`Failed to delete embedding for ${node.name}:`, error); } } // Delete edges connected to this node const edgesDeleted = await this.codeGraph.deleteEdgesByNode(node.id); stats.edgesDeleted += edgesDeleted; // Delete the node await this.codeGraph.deleteNode(node.id); stats.nodesDeleted++; } } /** * Chunk a file into code chunks */ private async chunkFile( filePath: string, content: string, ): Promise<CodeChunk[]> { // Detect language from file extension const ext = filePath.split(".").pop()?.toLowerCase(); if (!ext) return []; const languageMap: Record<string, any> = { ts: "typescript", tsx: "tsx", js: "javascript", jsx: "jsx", py: "python", go: "go", rs: "rust", }; const language = languageMap[ext]; if (!language) { console.warn(`Unsupported file extension: ${ext}`); return []; } try { return await chunkCode(content, language, { maxTokens: 512, includeImports: false, splitLargeFunctions: true, }); } catch (error) { console.warn(`Failed to chunk file ${filePath}:`, error); return []; } } /** * Generate deterministic vector ID based on file path and node name */ private generateVectorId(filePath: string, nodeName: string): string { const hash = createHash("sha256") .update(`${filePath}:${nodeName}`) .digest("hex") .slice(0, 16); return `code_${hash}`; } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/docleaai/doclea-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

incremental-scanner.ts•11.1 KiB