GenAIScript

Official

Overview Schema Related Servers Score Discussions

mdchunk.ts•4.49 KiB

/** * Chunks markdown into sections based on headings while maintaining subtrees. * Handles WorkspaceFile objects and plain markdown strings. * Does not reliably handle code sections containing markdown. * @param markdown - The markdown content as a string or a WorkspaceFile object. If a WorkspaceFile, its content is used. Throws if encoding is base64. * @param approximateTokens - Function to estimate token count of text. Used to calculate chunk sizes. * @param options - Optional configuration including maxTokens (default 4096) and pageSeparator (default "======"). * @returns Array of TextChunk objects representing the chunks, including metadata such as filename and line range. */ export async function chunkMarkdown( markdown: string | WorkspaceFile, approximateTokens: (text: string) => number, options?: { maxTokens?: number pageSeparator?: string } ): Promise<TextChunk[]> { const { maxTokens = 4096, pageSeparator = "======" } = options || {} if (!markdown) return [] type Section = { heading: string; lines: string[]; level: number } const filename = typeof markdown === "object" ? markdown.filename : "" if (typeof markdown === "object") { if (markdown.encoding === "base64") throw new Error("base64 encoding not supported") markdown = markdown.content } const lines = markdown.split(/\r?\n/g) const sections: Section[] = [] let current: Section | null = null lines.forEach((line) => { if (line.startsWith(pageSeparator)) { if (current) sections.push(current) current = null return } const match = /^(\#{1,6})\s+(.*)/.exec(line) if (match) { if (current) sections.push(current) current = { heading: match[2], lines: [line], level: match[1].length, } return } if (!current) current = { heading: "", lines: [], level: 0 } current.lines.push(line) }) if (current) sections.push(current) const chunks: string[] = [] let tempChunk: Section[] = [] let tokenCount = 0 for (let i = 0; i < sections.length; i++) { const sectionTokens = sectionTokenCount(sections[i], approximateTokens) if (sectionTokens > maxTokens) { if (tempChunk.length) { chunks.push(buildChunk(tempChunk)) tempChunk = [] tokenCount = 0 } chunks.push(buildChunk([sections[i]])) continue } if (tokenCount + sectionTokens <= maxTokens) { tempChunk.push(sections[i]) tokenCount += sectionTokens } else { // Instead of discarding, gather removed sections and prepend them to the new chunk const removedSections: Section[] = [] let j = i while ( j > 0 && sections[j].level > sections[j - 1].level && tokenCount + sectionTokens > maxTokens && tempChunk.length ) { const removed = tempChunk.pop() if (removed) { removedSections.unshift(removed) tokenCount -= sectionTokenCount(removed, approximateTokens) } j-- } // Close off current chunk if (tempChunk.length) { chunks.push(buildChunk(tempChunk)) } // Start the new chunk with removed and current tempChunk = [...removedSections, sections[i]] tokenCount = tempChunk.reduce( (acc, sec) => acc + sectionTokenCount(sec, approximateTokens), 0 ) } } if (tempChunk.length) chunks.push(buildChunk(tempChunk)) // convert into text chunk let currentLine = 0 return chunks.map( (chunk, i) => ({ filename: filename + `#chunk${i}`, lineStart: currentLine, lineEnd: (currentLine += chunk.split(/\r?\n/g).length), content: chunk, }) satisfies TextChunk ) function sectionTokenCount( section: { lines: string[] }, tokenCount: (txt: string) => number ) { return section.lines.reduce((acc, line) => acc + tokenCount(line), 0) } function buildChunk(sections: { lines: string[] }[]) { return sections.map((s) => s.lines.join("\n")).join("\n") } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/microsoft/genaiscript'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

mdchunk.ts•4.49 KiB