Doclea MCP

Official

Overview Schema Related Servers Score Discussions

summarizer.test.ts•21.6 KiB

import { Database } from "bun:sqlite"; import { afterEach, beforeEach, describe, expect, it } from "bun:test"; import type { CodeChunk, CodeChunkMetadata, SupportedLanguage, } from "../../chunking/code"; import { batchUpdateSummaries } from "../../tools/code/batch-update-summaries"; import { getUnsummarized } from "../../tools/code/get-unsummarized"; import { CodeSummarizer } from "../../tools/code/summarizer"; /** * Create CodeChunkMetadata with default values for tests */ function createTestMetadata( overrides: Partial<CodeChunkMetadata> & { name: string | null; language: SupportedLanguage; }, ): CodeChunkMetadata { return { startLine: 1, endLine: 10, startByte: 0, endByte: 100, nodeType: overrides.isFunction ? "function_declaration" : overrides.isClass ? "class_declaration" : "unknown", parentName: null, isFunction: false, isClass: false, isImport: false, ...overrides, }; } describe("CodeSummarizer", () => { describe("heuristicSummary", () => { it("should extract JSDoc for TypeScript", async () => { const summarizer = new CodeSummarizer({ enabled: true }); const chunk: CodeChunk = { content: `/** * Process user data and return formatted result * @param user The user object */ export function processUser(user: User): FormattedUser { return { ...user, formatted: true }; }`, tokenCount: 50, metadata: createTestMetadata({ name: "processUser", language: "typescript", isFunction: true, isClass: false, isImport: false, }), }; const result = await summarizer.heuristicSummary(chunk); expect(result.summary).toContain("Process user data"); expect(result.generatedBy).toBe("docstring"); expect(result.confidence).toBe(0.9); expect(result.needsAiSummary).toBe(false); }); it("should extract Python docstrings", async () => { const summarizer = new CodeSummarizer({ enabled: true }); const chunk: CodeChunk = { content: `def process_data(data): """Process incoming data and return transformed result.""" return transform(data)`, tokenCount: 50, metadata: createTestMetadata({ name: "process_data", language: "python", isFunction: true, isClass: false, isImport: false, }), }; const result = await summarizer.heuristicSummary(chunk); expect(result.summary).toContain("Process incoming data"); expect(result.generatedBy).toBe("docstring"); expect(result.confidence).toBe(0.9); }); it("should extract Go doc comments", async () => { const summarizer = new CodeSummarizer({ enabled: true }); const chunk: CodeChunk = { content: `// ProcessData handles the incoming data stream // and returns the transformed result func ProcessData(data []byte) ([]byte, error) { return transform(data) }`, tokenCount: 50, metadata: createTestMetadata({ name: "ProcessData", language: "go", isFunction: true, isClass: false, isImport: false, }), }; const result = await summarizer.heuristicSummary(chunk); expect(result.summary).toContain("ProcessData handles"); expect(result.generatedBy).toBe("docstring"); expect(result.confidence).toBe(0.9); }); it("should extract Rust doc comments", async () => { const summarizer = new CodeSummarizer({ enabled: true }); const chunk: CodeChunk = { content: `/// Process data and return transformed result /// Takes a byte slice and returns a new vector pub fn process_data(data: &[u8]) -> Vec<u8> { data.to_vec() }`, tokenCount: 50, metadata: createTestMetadata({ name: "process_data", language: "rust", isFunction: true, isClass: false, isImport: false, }), }; const result = await summarizer.heuristicSummary(chunk); expect(result.summary).toContain("Process data"); expect(result.generatedBy).toBe("docstring"); expect(result.confidence).toBe(0.9); }); it("should fall back to first-line comment", async () => { const summarizer = new CodeSummarizer({ enabled: true }); const chunk: CodeChunk = { content: `// Helper to format dates consistently function formatDate(date: Date): string { return date.toISOString(); }`, tokenCount: 50, metadata: createTestMetadata({ name: "formatDate", language: "typescript", isFunction: true, isClass: false, isImport: false, }), }; const result = await summarizer.heuristicSummary(chunk); expect(result.summary).toContain("Helper to format dates"); expect(result.generatedBy).toBe("comment"); expect(result.confidence).toBe(0.7); }); it("should fall back to signature-based summary", async () => { const summarizer = new CodeSummarizer({ enabled: true }); const chunk: CodeChunk = { content: `function processData(data: any) { return data; }`, tokenCount: 50, metadata: createTestMetadata({ name: "processData", language: "typescript", isFunction: true, isClass: false, isImport: false, }), }; const result = await summarizer.heuristicSummary(chunk); expect(result.summary).toBe("Function processData"); expect(result.generatedBy).toBe("signature"); expect(result.confidence).toBe(0.5); }); it("should handle class types", async () => { const summarizer = new CodeSummarizer({ enabled: true }); const chunk: CodeChunk = { content: `class UserService { getUser() {} }`, tokenCount: 50, metadata: createTestMetadata({ name: "UserService", language: "typescript", isFunction: false, isClass: true, isImport: false, }), }; const result = await summarizer.heuristicSummary(chunk); expect(result.summary).toBe("Class UserService"); expect(result.generatedBy).toBe("signature"); }); it("should return empty when disabled", async () => { const summarizer = new CodeSummarizer({ enabled: false }); const chunk: CodeChunk = { content: `/** Some docs */ function test() {}`, tokenCount: 50, metadata: createTestMetadata({ name: "test", language: "typescript", isFunction: true, isClass: false, isImport: false, }), }; const result = await summarizer.summarize(chunk); expect(result.summary).toBe(""); expect(result.confidence).toBe(0); }); }); describe("hybrid mode", () => { it("should flag low-confidence summaries for AI", async () => { const summarizer = new CodeSummarizer({ enabled: true, strategy: "hybrid", minConfidenceThreshold: 0.6, }); const chunk: CodeChunk = { content: `function doSomething() { return true; }`, tokenCount: 50, metadata: createTestMetadata({ name: "doSomething", language: "typescript", isFunction: true, isClass: false, isImport: false, }), }; const result = await summarizer.summarize(chunk); expect(result.generatedBy).toBe("signature"); expect(result.confidence).toBe(0.5); expect(result.needsAiSummary).toBe(true); }); it("should not flag high-confidence summaries", async () => { const summarizer = new CodeSummarizer({ enabled: true, strategy: "hybrid", minConfidenceThreshold: 0.6, }); const chunk: CodeChunk = { content: `/** * Validates user input for correctness */ function validateInput(input: string): boolean { return input.length > 0; }`, tokenCount: 50, metadata: createTestMetadata({ name: "validateInput", language: "typescript", isFunction: true, isClass: false, isImport: false, }), }; const result = await summarizer.summarize(chunk); expect(result.generatedBy).toBe("docstring"); expect(result.confidence).toBe(0.9); expect(result.needsAiSummary).toBe(false); }); it("should flag exported functions when preferAiForExported is true", async () => { const summarizer = new CodeSummarizer({ enabled: true, strategy: "hybrid", preferAiForExported: true, minConfidenceThreshold: 0.6, }); const chunk: CodeChunk = { content: `/** * Core API function */ export function apiHandler() {}`, tokenCount: 50, metadata: createTestMetadata({ name: "apiHandler", language: "typescript", isFunction: true, isClass: false, isImport: false, isExported: false, // Will be detected from content }), }; const result = await summarizer.summarize(chunk); // Even with good docs, exported functions should be flagged expect(result.needsAiSummary).toBe(true); }); it("should not flag in heuristic-only mode", async () => { const summarizer = new CodeSummarizer({ enabled: true, strategy: "heuristic", minConfidenceThreshold: 0.6, }); const chunk: CodeChunk = { content: `function doSomething() { return true; }`, tokenCount: 50, metadata: createTestMetadata({ name: "doSomething", language: "typescript", isFunction: true, isClass: false, isImport: false, }), }; const result = await summarizer.summarize(chunk); expect(result.needsAiSummary).toBe(false); }); it("should detect public keyword for exported status", async () => { const summarizer = new CodeSummarizer({ enabled: true, strategy: "hybrid", preferAiForExported: true, minConfidenceThreshold: 0.6, }); const chunk: CodeChunk = { content: `/** * Gets user data */ public class UserService {}`, tokenCount: 50, metadata: createTestMetadata({ name: "UserService", // Using typescript here as the test focuses on public keyword detection, not language-specific parsing language: "typescript", isFunction: false, isClass: true, isImport: false, }), }; const result = await summarizer.summarize(chunk); expect(result.needsAiSummary).toBe(true); }); it("should detect pub keyword for Rust exported status", async () => { const summarizer = new CodeSummarizer({ enabled: true, strategy: "hybrid", preferAiForExported: true, minConfidenceThreshold: 0.6, }); const chunk: CodeChunk = { content: `/// Process data pub fn process() {}`, tokenCount: 50, metadata: createTestMetadata({ name: "process", language: "rust", isFunction: true, isClass: false, isImport: false, }), }; const result = await summarizer.summarize(chunk); expect(result.needsAiSummary).toBe(true); }); }); }); describe("getUnsummarized", () => { let db: Database; beforeEach(() => { db = new Database(":memory:"); db.run(` CREATE TABLE IF NOT EXISTS code_nodes ( id TEXT PRIMARY KEY, type TEXT NOT NULL, name TEXT NOT NULL, file_path TEXT NOT NULL, start_line INTEGER, end_line INTEGER, signature TEXT, summary TEXT, metadata TEXT DEFAULT '{}', created_at INTEGER NOT NULL, updated_at INTEGER NOT NULL ) `); const now = Math.floor(Date.now() / 1000); // Node with good summary db.run( `INSERT INTO code_nodes (id, type, name, file_path, summary, metadata, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, [ "src/utils.ts:function:formatDate", "function", "formatDate", "src/utils.ts", "Formats a date to ISO string", JSON.stringify({ summaryGeneratedBy: "ai", summaryConfidence: 0.95 }), now, now, ], ); // Node needing AI (low confidence) db.run( `INSERT INTO code_nodes (id, type, name, file_path, start_line, end_line, summary, metadata, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [ "src/utils.ts:function:doSomething", "function", "doSomething", "src/utils.ts", 1, 10, "Function doSomething", JSON.stringify({ summaryGeneratedBy: "signature", summaryConfidence: 0.5, needsAiSummary: 1, }), now, now, ], ); // Node with no summary db.run( `INSERT INTO code_nodes (id, type, name, file_path, start_line, end_line, summary, metadata, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [ "src/api.ts:function:handleRequest", "function", "handleRequest", "src/api.ts", 1, 20, null, JSON.stringify({}), now, now, ], ); // Node with needsAiSummary flag db.run( `INSERT INTO code_nodes (id, type, name, file_path, start_line, end_line, summary, metadata, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [ "src/api.ts:class:ApiHandler", "class", "ApiHandler", "src/api.ts", 21, 50, "Class ApiHandler", JSON.stringify({ summaryGeneratedBy: "signature", summaryConfidence: 0.5, needsAiSummary: 1, }), now, now, ], ); }); afterEach(() => { db.close(); }); it("should return nodes needing summaries", async () => { const result = await getUnsummarized( { limit: 10, includeCode: false, confidenceThreshold: 0.6, }, db, ); expect(result.total).toBe(3); expect(result.nodes.length).toBe(3); }); it("should filter by file path", async () => { const result = await getUnsummarized( { filePath: "src/api.ts", limit: 10, includeCode: false, confidenceThreshold: 0.6, }, db, ); expect(result.total).toBe(2); expect(result.nodes.every((n) => n.filePath === "src/api.ts")).toBe(true); }); it("should respect limit parameter", async () => { const result = await getUnsummarized( { limit: 2, includeCode: false, confidenceThreshold: 0.6, }, db, ); expect(result.nodes.length).toBe(2); expect(result.total).toBe(3); }); it("should respect confidence threshold", async () => { const result = await getUnsummarized( { limit: 10, includeCode: false, confidenceThreshold: 0.4, }, db, ); // Nodes returned should either: // - have confidence < threshold, OR // - have no summary, OR // - have needsAiSummary flag set (checked by query) // The query uses OR conditions, so all results should meet at least one criteria expect(result.nodes.length).toBeGreaterThan(0); // Verify nodes with confidence >= threshold still appear if they have needsAiSummary flag const lowConfidenceOrNoSummary = result.nodes.filter( (n) => n.confidence < 0.4 || !n.currentSummary, ); expect(lowConfidenceOrNoSummary.length).toBeGreaterThanOrEqual(1); }); it("should include metadata in results", async () => { const result = await getUnsummarized( { limit: 10, includeCode: false, confidenceThreshold: 0.6, }, db, ); const node = result.nodes.find((n) => n.name === "doSomething"); expect(node).toBeDefined(); expect(node?.confidence).toBe(0.5); expect(node?.currentSummary).toBe("Function doSomething"); }); it("should return empty when no nodes need summaries", async () => { // Update all nodes to have high confidence const _now = Math.floor(Date.now() / 1000); db.run( `UPDATE code_nodes SET summary = 'Good summary', metadata = ? WHERE summary IS NULL OR summary = ''`, [JSON.stringify({ summaryGeneratedBy: "ai", summaryConfidence: 0.95 })], ); db.run(`UPDATE code_nodes SET metadata = ?`, [ JSON.stringify({ summaryGeneratedBy: "ai", summaryConfidence: 0.95, needsAiSummary: 0, }), ]); const result = await getUnsummarized( { limit: 10, includeCode: false, confidenceThreshold: 0.6, }, db, ); expect(result.total).toBe(0); expect(result.message).toContain("No nodes need AI summaries"); }); }); describe("batchUpdateSummaries", () => { let db: Database; beforeEach(() => { db = new Database(":memory:"); db.run(` CREATE TABLE IF NOT EXISTS code_nodes ( id TEXT PRIMARY KEY, type TEXT NOT NULL, name TEXT NOT NULL, file_path TEXT NOT NULL, start_line INTEGER, end_line INTEGER, signature TEXT, summary TEXT, metadata TEXT DEFAULT '{}', created_at INTEGER NOT NULL, updated_at INTEGER NOT NULL ) `); const now = Math.floor(Date.now() / 1000); // Create some nodes that need summaries db.run( `INSERT INTO code_nodes (id, type, name, file_path, summary, metadata, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, [ "src/utils.ts:function:doSomething", "function", "doSomething", "src/utils.ts", "Function doSomething", JSON.stringify({ summaryGeneratedBy: "signature", summaryConfidence: 0.5, needsAiSummary: 1, }), now, now, ], ); db.run( `INSERT INTO code_nodes (id, type, name, file_path, summary, metadata, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, [ "src/api.ts:function:handleRequest", "function", "handleRequest", "src/api.ts", null, JSON.stringify({}), now, now, ], ); }); afterEach(() => { db.close(); }); it("should update summaries for multiple nodes", async () => { const result = await batchUpdateSummaries( { summaries: [ { nodeId: "src/utils.ts:function:doSomething", summary: "Performs an important operation on the data", }, { nodeId: "src/api.ts:function:handleRequest", summary: "Handles incoming HTTP requests and routes them appropriately", }, ], }, db, ); expect(result.updated).toBe(2); expect(result.failed).toBe(0); expect(result.message).toContain("Successfully updated 2"); }); it("should set metadata correctly", async () => { await batchUpdateSummaries( { summaries: [ { nodeId: "src/utils.ts:function:doSomething", summary: "Performs an important operation", }, ], }, db, ); const row = db .query("SELECT summary, metadata FROM code_nodes WHERE id = ?") .get("src/utils.ts:function:doSomething") as any; expect(row.summary).toBe("Performs an important operation"); const metadata = JSON.parse(row.metadata); expect(metadata.summaryGeneratedBy).toBe("ai"); expect(metadata.summaryConfidence).toBe(0.95); expect(metadata.needsAiSummary).toBe(0); }); it("should handle nonexistent nodes gracefully", async () => { const result = await batchUpdateSummaries( { summaries: [ { nodeId: "src/utils.ts:function:doSomething", summary: "Valid update", }, { nodeId: "nonexistent:function:fake", summary: "This should fail", }, ], }, db, ); expect(result.updated).toBe(1); expect(result.failed).toBe(1); expect(result.message).toContain("1 failed"); }); it("should return all failed when no nodes match", async () => { const result = await batchUpdateSummaries( { summaries: [ { nodeId: "nonexistent:function:fake1", summary: "This should fail", }, { nodeId: "nonexistent:function:fake2", summary: "This should also fail", }, ], }, db, ); expect(result.updated).toBe(0); expect(result.failed).toBe(2); }); it("should use transactions for efficiency", async () => { // Create many nodes const now = Math.floor(Date.now() / 1000); for (let i = 0; i < 20; i++) { db.run( `INSERT INTO code_nodes (id, type, name, file_path, summary, metadata, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, [ `src/batch.ts:function:func${i}`, "function", `func${i}`, "src/batch.ts", null, JSON.stringify({}), now, now, ], ); } const summaries = Array.from({ length: 20 }, (_, i) => ({ nodeId: `src/batch.ts:function:func${i}`, summary: `Summary for function ${i}`, })); const result = await batchUpdateSummaries({ summaries }, db); expect(result.updated).toBe(20); expect(result.failed).toBe(0); // Verify all updates were applied const count = db .query( "SELECT COUNT(*) as count FROM code_nodes WHERE summary LIKE 'Summary for function%'", ) .get() as any; expect(count.count).toBe(20); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/docleaai/doclea-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

summarizer.test.ts•21.6 KiB