Skip to main content
Glama

AutoDev Codebase MCP Server

by anrgct
parser.spec.ts20.7 kB
// npx vitest services/code-index/processors/__tests__/parser.spec.ts import { vi, describe, it, expect, beforeEach } from "vitest" import { CodeParser, codeParser } from "../parser" import Parser from "web-tree-sitter" import { loadRequiredLanguageParsers } from "../../../tree-sitter/languageParser" import { readFile } from "fs/promises" // Override Jest-based fs/promises mock with vitest-compatible version vi.mock("fs/promises", () => ({ default: { readFile: vi.fn(), writeFile: vi.fn(), mkdir: vi.fn(), access: vi.fn(), rename: vi.fn(), constants: {}, }, readFile: vi.fn(), writeFile: vi.fn(), mkdir: vi.fn(), access: vi.fn(), rename: vi.fn(), })) vi.mock("../../../tree-sitter/languageParser") const mockLanguageParser = { js: { parser: { parse: vi.fn((content: string) => ({ rootNode: { text: content, startPosition: { row: 0 }, endPosition: { row: content.split("\n").length - 1 }, children: [], type: "program", }, })), }, query: { captures: vi.fn().mockReturnValue([]), }, }, } describe("CodeParser", () => { let parser: CodeParser beforeEach(() => { vi.clearAllMocks() parser = new CodeParser() ;(loadRequiredLanguageParsers as any).mockResolvedValue(mockLanguageParser as any) // Set up default fs.readFile mock return value vi.mocked(readFile).mockResolvedValue("// default test content") }) describe("parseFile", () => { it("should return empty array for unsupported extensions", async () => { const result = await parser.parseFile("test.unsupported") expect(result).toEqual([]) }) it("should use provided content instead of reading file when options.content is provided", async () => { const content = `/* This is a long test content string that exceeds 100 characters to properly test the parser's behavior with large inputs. It includes multiple lines and various JavaScript constructs to simulate real-world code. const a = 1; const b = 2; function test() { return a + b; } class Example { constructor() { this.value = 42; } } // More comments to pad the length to ensure we hit the minimum character requirement */` const result = await parser.parseFile("test.js", { content }) expect(vi.mocked(readFile)).not.toHaveBeenCalled() expect(result.length).toBeGreaterThan(0) }) it("should read file when no content is provided", async () => { const testContent = `/* This is a long test content string that exceeds 100 characters to properly test file reading behavior. It includes multiple lines and various JavaScript constructs to simulate real-world code. const x = 10; const y = 20; function calculate() { return x * y; } class Calculator { constructor() { this.history = []; } add(a, b) { return a + b; } } // More comments to pad the length to ensure we hit the minimum character requirement */` // Reset the mock and set new return value vi.mocked(readFile).mockReset() vi.mocked(readFile).mockResolvedValue(testContent) const result = await parser.parseFile("test.js") expect(vi.mocked(readFile)).toHaveBeenCalledWith("test.js", "utf8") expect(result.length).toBeGreaterThan(0) }) it("should handle file read errors gracefully", async () => { // Reset the mock and set it to reject vi.mocked(readFile).mockReset() vi.mocked(readFile).mockRejectedValue(new Error("File not found")) const result = await parser.parseFile("test.js") expect(result).toEqual([]) }) it("should use provided fileHash when available", async () => { const content = `/* This is a long test content string that exceeds 100 characters to test fileHash behavior. It includes multiple lines and various JavaScript constructs to simulate real-world code. const items = [1, 2, 3]; const sum = items.reduce((a, b) => a + b, 0); function processItems(items) { return items.map(item => item * 2); } // More comments to pad the length to ensure we hit the minimum character requirement */` const fileHash = "test-hash" const result = await parser.parseFile("test.js", { content, fileHash }) expect(result[0].fileHash).toBe(fileHash) }) }) describe("isSupportedLanguage", () => { it("should return true for supported extensions", () => { expect(parser["isSupportedLanguage"](".js")).toBe(true) }) it("should return false for unsupported extensions", () => { expect(parser["isSupportedLanguage"](".unsupported")).toBe(false) }) }) describe("createFileHash", () => { it("should generate consistent hashes for same content", () => { const content = "test content" const hash1 = parser["createFileHash"](content) const hash2 = parser["createFileHash"](content) expect(hash1).toBe(hash2) expect(hash1).toMatch(/^[a-f0-9]{64}$/) // SHA-256 hex format }) it("should generate different hashes for different content", () => { const hash1 = parser["createFileHash"]("content1") const hash2 = parser["createFileHash"]("content2") expect(hash1).not.toBe(hash2) }) }) describe("parseContent", () => { it("should wait for pending parser loads", async () => { const pendingLoad = new Promise((resolve) => setTimeout(() => resolve(mockLanguageParser), 100)) parser["pendingLoads"].set(".js", pendingLoad as Promise<any>) const result = await parser["parseContent"]("test.js", "const test = 123", "hash") expect(result).toBeDefined() }) it("should handle parser load errors", async () => { ;(loadRequiredLanguageParsers as any).mockRejectedValue(new Error("Load failed")) const result = await parser["parseContent"]("test.js", "const test = 123", "hash") expect(result).toEqual([]) }) it("should return empty array when no parser is available", async () => { ;(loadRequiredLanguageParsers as any).mockResolvedValue({} as any) const result = await parser["parseContent"]("test.js", "const test = 123", "hash") expect(result).toEqual([]) }) }) describe("_performFallbackChunking", () => { it("should chunk content when no captures are found", async () => { const content = `/* This is a long test content string that exceeds 100 characters to test fallback chunking behavior. It includes multiple lines and various JavaScript constructs to simulate real-world code. line1: const a = 1; line2: const b = 2; line3: function sum() { return a + b; } line4: class Adder { constructor(x, y) { this.x = x; this.y = y; } } line5: const instance = new Adder(1, 2); line6: console.log(instance.x + instance.y); line7: // More comments to pad the length to ensure we hit the minimum character requirement */` const result = await parser["_performFallbackChunking"]("test.js", content, "hash", new Set()) expect(result.length).toBeGreaterThan(0) expect(result[0].type).toBe("fallback_chunk") }) it("should respect MIN_BLOCK_CHARS for fallback chunks", async () => { const shortContent = "short" const result = await parser["_performFallbackChunking"]("test.js", shortContent, "hash", new Set()) expect(result).toEqual([]) }) }) describe("_chunkLeafNodeByLines", () => { it("should chunk leaf nodes by lines", async () => { const mockNode = { text: `/* This is a long test content string that exceeds 100 characters to test line chunking behavior. line1: const a = 1; line2: const b = 2; line3: function sum() { return a + b; } line4: class Multiplier { constructor(x, y) { this.x = x; this.y = y; } } line5: const instance = new Multiplier(3, 4); line6: console.log(instance.x * instance.y); line7: // More comments to pad the length to ensure we hit the minimum character requirement */`, startPosition: { row: 10 }, endPosition: { row: 12 }, type: "function", } as unknown as Parser.SyntaxNode const result = await parser["_chunkLeafNodeByLines"](mockNode, "test.js", "hash", new Set()) expect(result.length).toBeGreaterThan(0) expect(result[0].type).toBe("function") expect(result[0].start_line).toBe(11) // 1-based }) }) describe("_chunkTextByLines", () => { it("should handle oversized lines by splitting them", async () => { const longLine = "a".repeat(2000) const lines = ["normal", longLine, "normal"] const result = await parser["_chunkTextByLines"](lines, "test.js", "hash", "test_type", new Set()) const segments = result.filter((r) => r.type === "test_type_segment") expect(segments.length).toBeGreaterThan(1) }) it("should re-balance chunks when remainder is too small", async () => { const lines = Array(100) .fill("line with 10 chars") .map((_, i) => `${i}: line`) const result = await parser["_chunkTextByLines"](lines, "test.js", "hash", "test_type", new Set()) result.forEach((chunk) => { expect(chunk.content.length).toBeGreaterThanOrEqual(100) expect(chunk.content.length).toBeLessThanOrEqual(1150) }) }) }) describe("singleton instance", () => { it("should maintain parser state across calls", async () => { const result1 = await codeParser.parseFile("test.js", { content: "const a = 1" }) const result2 = await codeParser.parseFile("test.js", { content: "const b = 2" }) expect(result1).toBeDefined() expect(result2).toBeDefined() }) }) describe("deduplication", () => { it("should remove contained blocks", async () => { // Mock a scenario where we have both parent and child blocks const mockBlocks = [ { file_path: "test.js", identifier: "parentFunction", type: "function", start_line: 1, end_line: 10, content: "function parent() {\n function child() {}\n}", segmentHash: "parent-hash", fileHash: "file-hash", chunkSource: "tree-sitter" as const }, { file_path: "test.js", identifier: "child", type: "function", start_line: 2, end_line: 3, content: "function child() {}", segmentHash: "child-hash", fileHash: "file-hash", chunkSource: "tree-sitter" as const } ] const result = parser["deduplicateBlocks"](mockBlocks) expect(result).toHaveLength(1) expect(result[0].identifier).toBe("parentFunction") }) it("should prioritize tree-sitter over fallback", async () => { const mockBlocks = [ { file_path: "test.js", identifier: null, type: "fallback_chunk", start_line: 1, end_line: 5, content: "some code content", segmentHash: "fallback-hash", fileHash: "file-hash", chunkSource: "fallback" as const }, { file_path: "test.js", identifier: "myFunction", type: "function", start_line: 1, end_line: 5, content: "some code content", segmentHash: "tree-hash", fileHash: "file-hash", chunkSource: "tree-sitter" as const } ] const result = parser["deduplicateBlocks"](mockBlocks) expect(result).toHaveLength(1) expect(result[0].chunkSource).toBe("tree-sitter") expect(result[0].identifier).toBe("myFunction") }) it("should check containment correctly", async () => { const parentBlock = { file_path: "test.js", identifier: "parent", type: "function", start_line: 1, end_line: 10, content: "function parent() {\n const x = 1;\n return x;\n}", segmentHash: "parent-hash", fileHash: "file-hash", chunkSource: "tree-sitter" as const } const childBlock = { file_path: "test.js", identifier: null, type: "variable", start_line: 2, end_line: 2, content: "const x = 1;", segmentHash: "child-hash", fileHash: "file-hash", chunkSource: "tree-sitter" as const } const isContained = parser["isBlockContained"](childBlock, parentBlock) expect(isContained).toBe(true) }) it("should extract identifiers from tree-sitter captures", async () => { // Mock tree-sitter captures with name information const mockCaptures = [ { name: "definition.function", node: { type: "function_declaration", text: "function testFunction() {\n return 42;\n}", startPosition: { row: 0 }, endPosition: { row: 2 }, childForFieldName: vi.fn(), children: [] } }, { name: "name", node: { text: "testFunction", startPosition: { row: 0 }, endPosition: { row: 0 } } } ] // Mock tree with rootNode const mockTree = { rootNode: { text: "function testFunction() {\n return 42;\n}", startPosition: { row: 0 }, endPosition: { row: 2 } } } // Mock the language query captures method const mockLanguage = { parser: { parse: vi.fn().mockReturnValue(mockTree) }, query: { captures: vi.fn().mockReturnValue(mockCaptures) } } // Set up parser with mock language parser["loadedParsers"]["js"] = mockLanguage as any const result = await parser["parseContent"]("test.js", "function testFunction() {\n return 42;\n}", "hash") // Should extract identifier from captures if (result.length > 0) { expect(result[0].identifier).toBe("testFunction") expect(result[0].type).toBe("function_declaration") expect(result[0].chunkSource).toBe("tree-sitter") } }) it("should extract JSON property identifiers correctly", async () => { // Mock JSON property captures const mockCaptures = [ { name: "property.definition", node: { type: "pair", text: '"testProperty": {\n "value": 42\n}', startPosition: { row: 0 }, endPosition: { row: 2 }, childForFieldName: vi.fn(), children: [] } }, { name: "property.name.definition", node: { text: '"testProperty"', startPosition: { row: 0 }, endPosition: { row: 0 } } } ] // Mock tree with rootNode const mockTree = { rootNode: { text: '{"testProperty": {"value": 42}}', startPosition: { row: 0 }, endPosition: { row: 2 } } } // Mock the language query captures method const mockLanguage = { parser: { parse: vi.fn().mockReturnValue(mockTree) }, query: { captures: vi.fn().mockReturnValue(mockCaptures) } } // Set up parser with mock language for JSON parser["loadedParsers"]["json"] = mockLanguage as any const result = await parser["parseContent"]("test.json", '{"testProperty": {"value": 42}}', "hash") // Should extract identifier from JSON property captures (without quotes) if (result.length > 0) { expect(result[0].identifier).toBe("testProperty") expect(result[0].type).toBe("pair") expect(result[0].chunkSource).toBe("tree-sitter") } }) }) describe("hierarchy extraction", () => { it("should extract parent hierarchy for nested functions", async () => { // Mock nested function captures const mockCaptures = [ { name: "definition.class", node: { type: "class_declaration", text: "class UserService {\n validateEmail(email) {\n return email.includes('@');\n }\n}", startPosition: { row: 0 }, endPosition: { row: 4 }, parent: null, childForFieldName: vi.fn(), children: [] } }, { name: "name", node: { text: "UserService", startPosition: { row: 0 }, endPosition: { row: 0 } } }, { name: "definition.function", node: { type: "function_declaration", text: "validateEmail(email) {\n return email.includes('@');\n }", startPosition: { row: 1 }, endPosition: { row: 3 }, parent: { type: "class_declaration", text: "class UserService {\n validateEmail(email) {\n return email.includes('@');\n }\n}", startPosition: { row: 0 }, endPosition: { row: 4 }, parent: null, childForFieldName: vi.fn(() => ({ text: "UserService" })), children: [] }, childForFieldName: vi.fn(), children: [] } }, { name: "name", node: { text: "validateEmail", startPosition: { row: 1 }, endPosition: { row: 1 } } } ] // Mock tree with rootNode const mockTree = { rootNode: { text: "class UserService {\n validateEmail(email) {\n return email.includes('@');\n }\n}", startPosition: { row: 0 }, endPosition: { row: 4 } } } // Mock the language query captures method const mockLanguage = { parser: { parse: vi.fn().mockReturnValue(mockTree) }, query: { captures: vi.fn().mockReturnValue(mockCaptures) } } // Set up parser with mock language parser["loadedParsers"]["js"] = mockLanguage as any const result = await parser["parseContent"]("test.js", "class UserService {\n validateEmail(email) {\n return email.includes('@');\n }\n}", "hash") // Should extract parent hierarchy for the method const functionBlock = result.find(block => block.identifier === "validateEmail") if (functionBlock) { expect(functionBlock.parentChain).toHaveLength(1) expect(functionBlock.parentChain[0].identifier).toBe("UserService") expect(functionBlock.parentChain[0].type).toBe("class") expect(functionBlock.hierarchyDisplay).toBe("class UserService > function validateEmail") } }) it("should handle JSON property hierarchy", async () => { // Mock JSON property captures const mockCaptures = [ { name: "property.definition", node: { type: "pair", text: '"database": {\n "host": "localhost"\n}', startPosition: { row: 1 }, endPosition: { row: 3 }, parent: { type: "object", text: '{\n "database": {\n "host": "localhost"\n }\n}', startPosition: { row: 0 }, endPosition: { row: 4 }, parent: null, children: [] }, childForFieldName: vi.fn(), children: [ { text: '"database"' } ] } }, { name: "property.name.definition", node: { text: '"database"', startPosition: { row: 1 }, endPosition: { row: 1 } } } ] // Mock tree with rootNode const mockTree = { rootNode: { text: '{\n "database": {\n "host": "localhost"\n }\n}', startPosition: { row: 0 }, endPosition: { row: 4 } } } // Mock the language query captures method const mockLanguage = { parser: { parse: vi.fn().mockReturnValue(mockTree) }, query: { captures: vi.fn().mockReturnValue(mockCaptures) } } // Set up parser with mock language for JSON parser["loadedParsers"]["json"] = mockLanguage as any const result = await parser["parseContent"]("test.json", '{\n "database": {\n "host": "localhost"\n }\n}', "hash") // Should extract JSON property identifier without quotes const propertyBlock = result.find(block => block.identifier === "database") if (propertyBlock) { expect(propertyBlock.identifier).toBe("database") expect(propertyBlock.type).toBe("pair") expect(propertyBlock.hierarchyDisplay).toBe("property database") } }) it("should handle empty parent chain for top-level functions", async () => { // Mock top-level function captures const mockCaptures = [ { name: "definition.function", node: { type: "function_declaration", text: "function topLevelFunction() {\n return 42;\n}", startPosition: { row: 0 }, endPosition: { row: 2 }, parent: { type: "program", startPosition: { row: 0 }, endPosition: { row: 2 }, parent: null }, childForFieldName: vi.fn(), children: [] } }, { name: "name", node: { text: "topLevelFunction", startPosition: { row: 0 }, endPosition: { row: 0 } } } ] // Mock tree with rootNode const mockTree = { rootNode: { text: "function topLevelFunction() {\n return 42;\n}", startPosition: { row: 0 }, endPosition: { row: 2 } } } // Mock the language query captures method const mockLanguage = { parser: { parse: vi.fn().mockReturnValue(mockTree) }, query: { captures: vi.fn().mockReturnValue(mockCaptures) } } // Set up parser with mock language parser["loadedParsers"]["js"] = mockLanguage as any const result = await parser["parseContent"]("test.js", "function topLevelFunction() {\n return 42;\n}", "hash") // Should have empty parent chain for top-level function const functionBlock = result.find(block => block.identifier === "topLevelFunction") if (functionBlock) { expect(functionBlock.parentChain).toHaveLength(0) expect(functionBlock.hierarchyDisplay).toBe("function topLevelFunction") } }) }) })

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/anrgct/autodev-codebase'

If you have feedback or need assistance with the MCP directory API, please join our Discord server