Skip to main content
Glama
character-chunker.test.ts4.63 kB
import { beforeEach, describe, expect, it } from "vitest"; import { CharacterChunker } from "../../../src/code/chunker/character-chunker.js"; import type { ChunkerConfig } from "../../../src/code/types.js"; describe("CharacterChunker", () => { let chunker: CharacterChunker; let config: ChunkerConfig; beforeEach(() => { config = { chunkSize: 100, chunkOverlap: 20, maxChunkSize: 200, }; chunker = new CharacterChunker(config); }); describe("chunk", () => { it("should chunk small code into single chunk", async () => { const code = "function hello() {\n console.log('Starting hello function');\n return 'world';\n}"; const chunks = await chunker.chunk(code, "test.ts", "typescript"); expect(chunks).toHaveLength(1); expect(chunks[0].content).toContain("hello"); expect(chunks[0].startLine).toBe(1); expect(chunks[0].metadata.language).toBe("typescript"); }); it("should chunk large code into multiple chunks", async () => { const code = Array(20) .fill("function testFunction() { console.log('This is a test function'); return true; }\n") .join(""); const chunks = await chunker.chunk(code, "test.js", "javascript"); expect(chunks.length).toBeGreaterThan(1); chunks.forEach((chunk) => { expect(chunk.content.length).toBeLessThanOrEqual(config.maxChunkSize); }); }); it("should preserve line numbers", async () => { const code = "This is line 1 with enough content to not be filtered\n" + "This is line 2 with enough content to not be filtered\n" + "This is line 3 with enough content to not be filtered"; const chunks = await chunker.chunk(code, "test.txt", "text"); expect(chunks.length).toBeGreaterThan(0); expect(chunks[0].startLine).toBe(1); expect(chunks[0].endLine).toBeGreaterThan(chunks[0].startLine); }); it("should apply overlap between chunks", async () => { const code = Array(20).fill("const x = 1;\n").join(""); const chunks = await chunker.chunk(code, "test.js", "javascript"); if (chunks.length > 1) { // Check that there's overlap in content expect(chunks.length).toBeGreaterThan(1); } }); it("should find good break points", async () => { const code = `function foo() { return 1; } function bar() { return 2; } function baz() { return 3; }`; const chunks = await chunker.chunk(code, "test.js", "javascript"); // Should try to break at function boundaries chunks.forEach((chunk) => { expect(chunk.content.length).toBeGreaterThan(0); }); }); it("should handle empty code", async () => { const code = ""; const chunks = await chunker.chunk(code, "test.ts", "typescript"); expect(chunks).toHaveLength(0); }); it("should handle code with only whitespace", async () => { const code = " \n\n\n "; const chunks = await chunker.chunk(code, "test.ts", "typescript"); expect(chunks).toHaveLength(0); }); it("should skip very small chunks", async () => { const code = "x"; const chunks = await chunker.chunk(code, "test.ts", "typescript"); expect(chunks).toHaveLength(0); }); }); describe("supportsLanguage", () => { it("should support all languages", () => { expect(chunker.supportsLanguage("typescript")).toBe(true); expect(chunker.supportsLanguage("python")).toBe(true); expect(chunker.supportsLanguage("unknown")).toBe(true); }); }); describe("getStrategyName", () => { it("should return correct strategy name", () => { expect(chunker.getStrategyName()).toBe("character-based"); }); }); describe("metadata", () => { it("should include correct chunk metadata", async () => { const code = "function test() {\n console.log('test function');\n return 1;\n}"; const chunks = await chunker.chunk(code, "/path/to/file.ts", "typescript"); expect(chunks.length).toBeGreaterThan(0); expect(chunks[0].metadata).toEqual({ filePath: "/path/to/file.ts", language: "typescript", chunkIndex: 0, chunkType: "block", }); }); it("should increment chunk index", async () => { const code = Array(20).fill("function test() {}\n").join(""); const chunks = await chunker.chunk(code, "test.ts", "typescript"); if (chunks.length > 1) { expect(chunks[0].metadata.chunkIndex).toBe(0); expect(chunks[1].metadata.chunkIndex).toBe(1); } }); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mhalder/qdrant-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server