Skip to main content
Glama
treeSitterProcessor.test.ts12.6 kB
/** * @fileOverview: Unit tests for the TreeSitterProcessor class * @module: TreeSitterProcessor Tests * @description: Comprehensive test suite for TreeSitterProcessor covering AST parsing, code chunking, symbol extraction, and fallback mechanisms */ import { TreeSitterProcessor, CodeChunk, CodeSymbol, CodeXRef } from '../treeSitterProcessor'; // Mock tree-sitter modules jest.mock('tree-sitter', () => ({ Parser: jest.fn().mockImplementation(() => ({ setLanguage: jest.fn(), parse: jest.fn(), })), })); jest.mock('tree-sitter-typescript', () => ({ default: { typescript: jest.fn(), }, })); jest.mock('tree-sitter-javascript', () => ({ default: jest.fn(), })); jest.mock('tree-sitter-python', () => ({ default: jest.fn(), })); // Mock logger jest.mock('../../utils/logger', () => ({ logger: { info: jest.fn(), warn: jest.fn(), error: jest.fn(), debug: jest.fn(), }, })); import { logger } from '../../utils/logger'; describe('TreeSitterProcessor', () => { let processor: TreeSitterProcessor; beforeEach(() => { jest.clearAllMocks(); processor = new TreeSitterProcessor(); }); describe('Initialization', () => { test('should initialize with empty parsers map', () => { expect(processor).toBeDefined(); }); }); describe('parseAndChunk', () => { const testContent = 'function test() { return "hello"; }'; const testFilePath = '/test/file.js'; const testLanguage = 'javascript'; test('should handle invalid content input', async () => { const result = await processor.parseAndChunk('', testLanguage, testFilePath); expect(result.chunks).toBeDefined(); expect(result.chunks.length).toBeGreaterThan(0); expect(result.symbols).toEqual([]); expect(result.xrefs).toEqual([]); expect(logger.warn).toHaveBeenCalledWith('Invalid content for parsing', expect.any(Object)); }); test('should handle invalid language input', async () => { const result = await processor.parseAndChunk(testContent, '', testFilePath); expect(result.chunks).toBeDefined(); expect(result.symbols).toEqual([]); expect(result.xrefs).toEqual([]); expect(logger.warn).toHaveBeenCalledWith('Invalid language for parsing', expect.any(Object)); }); test('should handle empty content', async () => { const result = await processor.parseAndChunk('', testLanguage, testFilePath); expect(result.chunks).toBeDefined(); expect(result.chunks.length).toBe(1); expect(result.chunks[0].content).toBe(''); expect(result.chunks[0].startLine).toBe(1); expect(result.chunks[0].endLine).toBe(1); expect(result.chunks[0].tokenEstimate).toBe(0); }); test('should handle content with null bytes', async () => { const contentWithNull = 'function test() { return "hello\0world"; }'; const result = await processor.parseAndChunk(contentWithNull, testLanguage, testFilePath); expect(result.chunks).toBeDefined(); expect(logger.warn).toHaveBeenCalledWith( 'Problematic content detected, using fallback', expect.any(Object) ); }); test('should handle content with invalid characters', async () => { const contentWithInvalidChars = 'function test() { return "hello\x00world"; }'; const result = await processor.parseAndChunk( contentWithInvalidChars, testLanguage, testFilePath ); expect(result.chunks).toBeDefined(); expect(logger.warn).toHaveBeenCalledWith( 'Problematic content detected, using fallback', expect.any(Object) ); }); test('should handle very large content', async () => { const largeContent = 'x'.repeat(1024 * 1024 + 1); // Over 1MB const result = await processor.parseAndChunk(largeContent, testLanguage, testFilePath); expect(result.chunks).toBeDefined(); expect(logger.warn).toHaveBeenCalledWith( 'Problematic content detected, using fallback', expect.any(Object) ); }); }); describe('Fallback Chunking', () => { test('should create chunks from empty content', () => { const chunks = (processor as any).fallbackChunking('', '/test/empty.js'); expect(chunks).toHaveLength(1); expect(chunks[0]).toEqual({ content: '', startLine: 1, endLine: 1, tokenEstimate: 0, symbolName: 'empty_file', symbolType: 'fallback', }); }); test('should create chunks from whitespace-only content', () => { const chunks = (processor as any).fallbackChunking(' \n\t \n ', '/test/whitespace.js'); expect(chunks).toHaveLength(1); expect(chunks[0]).toEqual({ content: '', startLine: 1, endLine: 1, tokenEstimate: 0, symbolName: 'empty_file', symbolType: 'fallback', }); }); test('should chunk content into 50-line segments', () => { const lines = Array.from({ length: 120 }, (_, i) => `line ${i + 1};`); const content = lines.join('\n'); const chunks = (processor as any).fallbackChunking(content, '/test/large.js'); expect(chunks).toHaveLength(3); // 120 lines / 50 lines per chunk = 3 chunks // First chunk: lines 1-50 expect(chunks[0].startLine).toBe(1); expect(chunks[0].endLine).toBe(50); expect(chunks[0].content).toContain('line 1;'); expect(chunks[0].content).toContain('line 50;'); // Second chunk: lines 51-100 expect(chunks[1].startLine).toBe(51); expect(chunks[1].endLine).toBe(100); // Third chunk: lines 101-120 expect(chunks[2].startLine).toBe(101); expect(chunks[2].endLine).toBe(120); }); test('should estimate tokens correctly', () => { const testStrings = [ { content: '', expected: 0 }, { content: 'abcd', expected: 1 }, // 4 chars / 4 = 1 token { content: 'abcdefgh', expected: 2 }, // 8 chars / 4 = 2 tokens { content: 'abcdefghi', expected: 3 }, // 9 chars / 4 = 2.25, ceil to 3 ]; testStrings.forEach(({ content, expected }) => { const estimate = (processor as any).estimateTokens(content); expect(estimate).toBe(expected); }); }); }); describe('Edge Cases', () => { test('should handle single line content', () => { const content = 'const x = 42;'; const chunks = (processor as any).fallbackChunking(content, '/test/single.js'); expect(chunks).toHaveLength(1); expect(chunks[0].content).toBe(content); expect(chunks[0].startLine).toBe(1); expect(chunks[0].endLine).toBe(1); }); test('should handle content ending without newline', () => { const content = 'line 1\nline 2\nline 3'; const chunks = (processor as any).fallbackChunking(content, '/test/no-final-newline.js'); expect(chunks).toHaveLength(1); expect(chunks[0].startLine).toBe(1); expect(chunks[0].endLine).toBe(3); }); test('should handle content with only newlines', () => { const content = '\n\n\n\n'; const chunks = (processor as any).fallbackChunking(content, '/test/newlines-only.js'); expect(chunks).toHaveLength(1); expect(chunks[0].content).toBe(content); expect(chunks[0].startLine).toBe(1); expect(chunks[0].endLine).toBe(4); }); test('should handle very long lines', () => { const longLine = 'x'.repeat(1000); const content = `${longLine}\nnormal line`; const chunks = (processor as any).fallbackChunking(content, '/test/long-line.js'); expect(chunks).toHaveLength(1); expect(chunks[0].content).toBe(content); expect(chunks[0].tokenEstimate).toBe(Math.ceil(content.length / 4)); }); }); describe('Chunking Algorithms', () => { test('should maintain line numbering accuracy', () => { const content = 'line1\nline2\nline3\nline4\nline5'; const chunks = (processor as any).fallbackChunking(content, '/test/lines.js'); expect(chunks).toHaveLength(1); expect(chunks[0].startLine).toBe(1); expect(chunks[0].endLine).toBe(5); }); test('should handle chunking with exact chunk size boundaries', () => { const lines = Array.from({ length: 50 }, (_, i) => `line ${i + 1};`); const content = lines.join('\n'); const chunks = (processor as any).fallbackChunking(content, '/test/exact-50.js'); expect(chunks).toHaveLength(1); expect(chunks[0].startLine).toBe(1); expect(chunks[0].endLine).toBe(50); }); test('should handle chunking with partial final chunk', () => { const lines = Array.from({ length: 75 }, (_, i) => `line ${i + 1};`); const content = lines.join('\n'); const chunks = (processor as any).fallbackChunking(content, '/test/75-lines.js'); expect(chunks).toHaveLength(2); expect(chunks[0].startLine).toBe(1); expect(chunks[0].endLine).toBe(50); expect(chunks[1].startLine).toBe(51); expect(chunks[1].endLine).toBe(75); }); test('should preserve content integrity across chunks', () => { const originalLines = Array.from({ length: 75 }, (_, i) => `unique_line_${i + 1}_content`); const content = originalLines.join('\n'); const chunks = (processor as any).fallbackChunking(content, '/test/integrity.js'); // Reconstruct content from chunks const reconstructed = chunks.map(chunk => chunk.content).join('\n'); expect(reconstructed).toBe(content); // Verify line counts const originalLineCount = originalLines.length; const totalChunkLines = chunks.reduce( (sum, chunk) => sum + (chunk.endLine - chunk.startLine + 1), 0 ); expect(totalChunkLines).toBe(originalLineCount); }); }); describe('Error Handling', () => { test('should handle parser initialization failures gracefully', async () => { // Mock parser to be unavailable jest.doMock('tree-sitter', () => { throw new Error('Module not found'); }); const result = await processor.parseAndChunk('test content', 'javascript', '/test.js'); expect(result.chunks).toBeDefined(); expect(result.chunks.length).toBeGreaterThan(0); expect(logger.warn).toHaveBeenCalledWith( 'No parser available for language', expect.any(Object) ); }); test('should handle parsing errors with fallback', async () => { // This test would require mocking the actual tree-sitter parsing to fail // For now, we test the fallback mechanism through other error paths const result = await processor.parseAndChunk( 'content', 'nonexistent-language', '/test.unknown' ); expect(result.chunks).toBeDefined(); expect(logger.warn).toHaveBeenCalledWith( 'No parser available for language', expect.any(Object) ); }); }); describe('Integration with parseAndChunk', () => { test('should return valid chunk structure', async () => { const result = await processor.parseAndChunk('function test() {}', 'javascript', '/test.js'); expect(Array.isArray(result.chunks)).toBe(true); expect(Array.isArray(result.symbols)).toBe(true); expect(Array.isArray(result.xrefs)).toBe(true); if (result.chunks.length > 0) { const chunk = result.chunks[0]; expect(typeof chunk.content).toBe('string'); expect(typeof chunk.startLine).toBe('number'); expect(typeof chunk.endLine).toBe('number'); expect(typeof chunk.tokenEstimate).toBe('number'); expect(chunk.startLine).toBeGreaterThan(0); expect(chunk.endLine).toBeGreaterThanOrEqual(chunk.startLine); } }); test('should handle multiline content chunking', async () => { const multilineContent = ` function component() { const [state, setState] = useState(initialValue); useEffect(() => { fetchData().then(data => { setState(data); }); }, []); return ( <div> <h1>Title</h1> <p>Content: {state}</p> </div> ); } export default component; `.trim(); const result = await processor.parseAndChunk( multilineContent, 'javascript', '/test/component.jsx' ); expect(result.chunks.length).toBeGreaterThan(0); // Verify that chunks cover the entire content const totalLines = multilineContent.split('\n').length; const maxEndLine = Math.max(...result.chunks.map(c => c.endLine)); expect(maxEndLine).toBeGreaterThanOrEqual(totalLines); }); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sbarron/AmbianceMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server