Claude Writer's Aid MCP

Overview Schema Related Servers Score Discussions

MarkdownChunker.test.ts•11.3 KiB

/** * Unit tests for MarkdownChunker */ import { MarkdownChunker } from "../../parsers/MarkdownChunker.js"; import type { MarkdownHeading } from "../../markdown/types.js"; describe("MarkdownChunker", () => { describe("Basic Chunking", () => { it("should create chunks from content", () => { const chunker = new MarkdownChunker(); const content = "# Introduction\n\nThis is the introduction section."; const headings: MarkdownHeading[] = [ { id: "h1", file_id: "file1", level: 1, text: "Introduction", slug: "introduction", line_number: 1, parent_id: null, }, ]; const chunks = chunker.chunk("file1", content, headings); expect(chunks.length).toBeGreaterThan(0); expect(chunks[0].file_id).toBe("file1"); expect(chunks[0].chunk_index).toBe(0); }); it("should set chunk indices correctly", () => { const chunker = new MarkdownChunker(); const content = `# Section 1 Content 1 # Section 2 Content 2 # Section 3 Content 3`; const headings: MarkdownHeading[] = [ { id: "h1", file_id: "file1", level: 1, text: "Section 1", slug: "section-1", line_number: 1, parent_id: null, }, { id: "h2", file_id: "file1", level: 1, text: "Section 2", slug: "section-2", line_number: 4, parent_id: null, }, { id: "h3", file_id: "file1", level: 1, text: "Section 3", slug: "section-3", line_number: 7, parent_id: null, }, ]; const chunks = chunker.chunk("file1", content, headings); expect(chunks[0].chunk_index).toBe(0); expect(chunks[1].chunk_index).toBe(1); expect(chunks[2].chunk_index).toBe(2); }); }); describe("Heading-Based Chunking", () => { it("should split content by headings", () => { const chunker = new MarkdownChunker(); const content = `# Chapter 1 This is chapter 1 content. # Chapter 2 This is chapter 2 content.`; const headings: MarkdownHeading[] = [ { id: "h1", file_id: "file1", level: 1, text: "Chapter 1", slug: "chapter-1", line_number: 1, parent_id: null, }, { id: "h2", file_id: "file1", level: 1, text: "Chapter 2", slug: "chapter-2", line_number: 4, parent_id: null, }, ]; const chunks = chunker.chunk("file1", content, headings); expect(chunks).toHaveLength(2); expect(chunks[0].content).toContain("Chapter 1"); expect(chunks[0].content).toContain("chapter 1 content"); expect(chunks[1].content).toContain("Chapter 2"); expect(chunks[1].content).toContain("chapter 2 content"); }); it("should preserve heading hierarchy in context", () => { const chunker = new MarkdownChunker({ preserveContext: true }); const content = `# Chapter 1 ## Section 1.1 ### Subsection 1.1.1 Content here`; const headings: MarkdownHeading[] = [ { id: "h1", file_id: "file1", level: 1, text: "Chapter 1", slug: "chapter-1", line_number: 1, parent_id: null, }, { id: "h2", file_id: "file1", level: 2, text: "Section 1.1", slug: "section-1-1", line_number: 2, parent_id: "h1", }, { id: "h3", file_id: "file1", level: 3, text: "Subsection 1.1.1", slug: "subsection-1-1-1", line_number: 3, parent_id: "h2", }, ]; const chunks = chunker.chunk("file1", content, headings); expect(chunks[2].heading).toBe("Chapter 1 > Section 1.1 > Subsection 1.1.1"); }); it("should handle content before first heading", () => { const chunker = new MarkdownChunker(); const content = `This is preamble text before any heading. # First Heading Content after heading.`; const headings: MarkdownHeading[] = [ { id: "h1", file_id: "file1", level: 1, text: "First Heading", slug: "first-heading", line_number: 3, parent_id: null, }, ]; const chunks = chunker.chunk("file1", content, headings); expect(chunks).toHaveLength(2); expect(chunks[0].heading).toBeNull(); expect(chunks[0].content).toContain("preamble text"); expect(chunks[1].heading).toBe("First Heading"); }); }); describe("Size-Based Chunking", () => { it("should split large sections into multiple chunks", () => { const chunker = new MarkdownChunker({ maxChunkSize: 20, overlapSize: 5 }); const content = `# Long Section ${"word ".repeat(100)}`; // 100 words const headings: MarkdownHeading[] = [ { id: "h1", file_id: "file1", level: 1, text: "Long Section", slug: "long-section", line_number: 1, parent_id: null, }, ]; const chunks = chunker.chunk("file1", content, headings); expect(chunks.length).toBeGreaterThan(1); // Each chunk should be around maxChunkSize for (const chunk of chunks) { expect(chunk.word_count).toBeLessThanOrEqual(25); // Some tolerance } }); it("should create overlapping chunks", () => { const chunker = new MarkdownChunker({ maxChunkSize: 10, overlapSize: 3, splitOnHeadings: false, }); const content = "one two three four five six seven eight nine ten eleven twelve"; const chunks = chunker.chunk("file1", content, []); expect(chunks.length).toBeGreaterThan(1); // Check for overlap (last words of chunk N should appear in chunk N+1) if (chunks.length > 1) { const chunk0Words = chunks[0].content.split(/\s+/); // Should have some overlap const lastWordsChunk0 = chunk0Words.slice(-3).join(" "); expect(chunks[1].content).toContain(lastWordsChunk0.split(" ")[0]); } }); it("should handle size-based chunking without headings", () => { const chunker = new MarkdownChunker({ maxChunkSize: 20, overlapSize: 5, splitOnHeadings: false, }); const content = "word ".repeat(50); const chunks = chunker.chunk("file1", content, []); expect(chunks.length).toBeGreaterThan(1); expect(chunks[0].heading).toBeNull(); }); }); describe("Word and Token Counting", () => { it("should count words correctly", () => { const chunker = new MarkdownChunker(); const content = `# Test This is five words total.`; const headings: MarkdownHeading[] = [ { id: "h1", file_id: "file1", level: 1, text: "Test", slug: "test", line_number: 1, parent_id: null, }, ]; const chunks = chunker.chunk("file1", content, headings); expect(chunks[0].word_count).toBeGreaterThan(0); }); it("should estimate token count", () => { const chunker = new MarkdownChunker(); const content = `# Test Some content here.`; const headings: MarkdownHeading[] = [ { id: "h1", file_id: "file1", level: 1, text: "Test", slug: "test", line_number: 1, parent_id: null, }, ]; const chunks = chunker.chunk("file1", content, headings); // Token count should be roughly 1.3x word count expect(chunks[0].token_count).toBeGreaterThan(chunks[0].word_count); expect(chunks[0].token_count).toBeLessThan(chunks[0].word_count * 2); }); }); describe("Configuration", () => { it("should use default configuration", () => { const chunker = new MarkdownChunker(); const content = "Test content"; const chunks = chunker.chunk("file1", content, []); expect(chunks).toBeDefined(); }); it("should accept custom configuration", () => { const customConfig = { maxChunkSize: 100, overlapSize: 20, splitOnHeadings: false, preserveContext: false, }; const chunker = new MarkdownChunker(customConfig); const content = "word ".repeat(200); const chunks = chunker.chunk("file1", content, []); expect(chunks.length).toBeGreaterThan(0); }); it("should disable context preservation when configured", () => { const chunker = new MarkdownChunker({ preserveContext: false }); const content = `# Chapter 1 ## Section 1.1 Content`; const headings: MarkdownHeading[] = [ { id: "h1", file_id: "file1", level: 1, text: "Chapter 1", slug: "chapter-1", line_number: 1, parent_id: null, }, { id: "h2", file_id: "file1", level: 2, text: "Section 1.1", slug: "section-1-1", line_number: 2, parent_id: "h1", }, ]; const chunks = chunker.chunk("file1", content, headings); // Should only have section heading, not full context expect(chunks[1].heading).toBe("Section 1.1"); expect(chunks[1].heading).not.toContain("Chapter 1"); }); }); describe("Edge Cases", () => { it("should handle empty content", () => { const chunker = new MarkdownChunker(); const chunks = chunker.chunk("file1", "", []); expect(chunks).toHaveLength(0); }); it("should handle content with no headings", () => { const chunker = new MarkdownChunker({ splitOnHeadings: true }); const content = "Just plain text without any headings."; const chunks = chunker.chunk("file1", content, []); // Should fallback to size-based chunking expect(chunks).toHaveLength(1); expect(chunks[0].heading).toBeNull(); }); it("should handle very short content", () => { const chunker = new MarkdownChunker({ maxChunkSize: 1000 }); const content = "# Test\nShort."; const headings: MarkdownHeading[] = [ { id: "h1", file_id: "file1", level: 1, text: "Test", slug: "test", line_number: 1, parent_id: null, }, ]; const chunks = chunker.chunk("file1", content, headings); expect(chunks).toHaveLength(1); }); it("should handle single word content", () => { const chunker = new MarkdownChunker(); const content = "Test"; const chunks = chunker.chunk("file1", content, []); expect(chunks).toHaveLength(1); expect(chunks[0].word_count).toBe(1); }); it("should prevent infinite loop with invalid overlap config", () => { const chunker = new MarkdownChunker({ maxChunkSize: 10, overlapSize: 15, // Overlap > max size splitOnHeadings: false, }); const content = "word ".repeat(50); const chunks = chunker.chunk("file1", content, []); // Should still produce chunks without infinite loop expect(chunks.length).toBeGreaterThan(0); expect(chunks.length).toBeLessThan(100); // Sanity check }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/xiaolai/claude-writers-aid-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

MarkdownChunker.test.ts•11.3 KiB