GenAIScript

Official

Overview Schema Related Servers Score Discussions

LocalDocumentResult.ts•13.3 KiB

import { LocalDocument } from "./LocalDocument" import { LocalDocumentIndex } from "./LocalDocumentIndex" import { QueryResult, DocumentChunkMetadata, DocumentTextSection, } from "./types" /** * Represents a search result for a document stored on disk. */ export class LocalDocumentResult extends LocalDocument { private readonly _chunks: QueryResult<DocumentChunkMetadata>[] private readonly _tokenizer: Tokenizer private readonly _score: number /** * @private * Internal constructor for `LocalDocumentResult` instances. */ public constructor( index: LocalDocumentIndex, id: string, uri: string, chunks: QueryResult<DocumentChunkMetadata>[], tokenizer: Tokenizer ) { super(index, id, uri) this._chunks = chunks this._tokenizer = tokenizer // Compute average score let score = 0 this._chunks.forEach((chunk) => (score += chunk.score)) this._score = score / this._chunks.length } /** * Returns the chunks of the document that matched the query. */ public get chunks(): QueryResult<DocumentChunkMetadata>[] { return this._chunks } /** * Returns the average score of the document result. */ public get score(): number { return this._score } /** * Renders all of the results chunks as spans of text (sections.) * @remarks * The returned sections will be sorted by document order and limited to maxTokens in length. * @param maxTokens Maximum number of tokens per section. * @returns Array of rendered text sections. */ public async renderAllSections( maxTokens: number ): Promise<DocumentTextSection[]> { // Load text from disk const text = await this.loadText() // Add chunks to a temp array and split any chunks that are longer than maxTokens. const chunks: SectionChunk[] = [] for (let i = 0; i < this._chunks.length; i++) { const chunk = this._chunks[i] const startPos = chunk.item.metadata.startPos const endPos = chunk.item.metadata.endPos const chunkText = text.substring(startPos, endPos + 1) const tokens = this._tokenizer.encode(chunkText) let offset = 0 while (offset < tokens.length) { const chunkLength = Math.min(maxTokens, tokens.length - offset) chunks.push({ text: this._tokenizer.decode( tokens.slice(offset, offset + chunkLength) ), startPos: startPos + offset, endPos: startPos + offset + chunkLength - 1, score: chunk.score, tokenCount: chunkLength, }) offset += chunkLength } } // Sort chunks by startPos const sorted = chunks.sort((a, b) => a.startPos - b.startPos) // Generate sections const sections: Section[] = [] for (let i = 0; i < sorted.length; i++) { const chunk = sorted[i] let section = sections[sections.length - 1] if (!section || section.tokenCount + chunk.tokenCount > maxTokens) { section = { chunks: [], score: 0, tokenCount: 0, } sections.push(section) } section.chunks.push(chunk) section.score += chunk.score section.tokenCount += chunk.tokenCount } // Normalize section scores sections.forEach((section) => (section.score /= section.chunks.length)) // Return final rendered sections return sections.map((section) => { let text = "" section.chunks.forEach((chunk) => (text += chunk.text)) return { text: text, tokenCount: section.tokenCount, score: section.score, } }) } /** * Renders the top spans of text (sections) of the document based on the query result. * @remarks * The returned sections will be sorted by relevance and limited to the top `maxSections`. * @param maxTokens Maximum number of tokens per section. * @param maxSections Maximum number of sections to return. * @param overlappingChunks Optional. If true, overlapping chunks of text will be added to each section until the maxTokens is reached. * @returns Array of rendered text sections. */ public async renderSections( maxTokens: number, maxSections: number, overlappingChunks = true ): Promise<DocumentTextSection[]> { // Load text from disk const text = await this.loadText() // First check to see if the entire document is shorter than maxTokens const length = await this.getLength() if (length <= maxTokens) { return [ { text, tokenCount: length, score: 1.0, }, ] } // Otherwise, we need to split the document into sections // - Add each chunk to a temp array and filter out any chunk that's longer then maxTokens. // - Sort the array by startPos to arrange chunks in document order. // - Generate a new array of sections by combining chunks until the maxTokens is reached for each section. // - Generate an aggregate score for each section by averaging the score of each chunk in the section. // - Sort the sections by score and limit to maxSections. // - For each remaining section combine adjacent chunks of text. // - Dynamically add overlapping chunks of text to each section until the maxTokens is reached. const chunks: SectionChunk[] = this._chunks .map((chunk) => { const startPos = chunk.item.metadata.startPos const endPos = chunk.item.metadata.endPos const chunkText = text.substring(startPos, endPos + 1) return { text: chunkText, startPos, endPos, score: chunk.score, tokenCount: this._tokenizer.encode(chunkText).length, } }) .filter((chunk) => chunk.tokenCount <= maxTokens) .sort((a, b) => a.startPos - b.startPos) // Check for no chunks if (chunks.length === 0) { // Take the top chunk and return a subset of its text const topChunk = this._chunks[0] const startPos = topChunk.item.metadata.startPos const endPos = topChunk.item.metadata.endPos const chunkText = text.substring(startPos, endPos + 1) const tokens = this._tokenizer.encode(chunkText) return [ { text: this._tokenizer.decode(tokens.slice(0, maxTokens)), tokenCount: maxTokens, score: topChunk.score, }, ] } // Generate sections const sections: Section[] = [] for (let i = 0; i < chunks.length; i++) { const chunk = chunks[i] let section = sections[sections.length - 1] if (!section || section.tokenCount + chunk.tokenCount > maxTokens) { section = { chunks: [], score: 0, tokenCount: 0, } sections.push(section) } section.chunks.push(chunk) section.score += chunk.score section.tokenCount += chunk.tokenCount } // Normalize section scores sections.forEach((section) => (section.score /= section.chunks.length)) // Sort sections by score and limit to maxSections sections.sort((a, b) => b.score - a.score) if (sections.length > maxSections) { sections.splice(maxSections, sections.length - maxSections) } // Combine adjacent chunks of text sections.forEach((section) => { for (let i = 0; i < section.chunks.length - 1; i++) { const chunk = section.chunks[i] const nextChunk = section.chunks[i + 1] if (chunk.endPos + 1 === nextChunk.startPos) { chunk.text += nextChunk.text chunk.endPos = nextChunk.endPos chunk.tokenCount += nextChunk.tokenCount section.chunks.splice(i + 1, 1) i-- } } }) // Add overlapping chunks of text to each section until the maxTokens is reached if (overlappingChunks) { const connector: SectionChunk = { text: "\n\n...\n\n", startPos: -1, endPos: -1, score: 0, tokenCount: this._tokenizer.encode("\n\n...\n\n").length, } sections.forEach((section) => { // Insert connectors between chunks if (section.chunks.length > 1) { for (let i = 0; i < section.chunks.length - 1; i++) { section.chunks.splice(i + 1, 0, connector) section.tokenCount += connector.tokenCount i++ } } // Add chunks to beginning and end of the section until maxTokens is reached let budget = maxTokens - section.tokenCount if (budget > 40) { const sectionStart = section.chunks[0].startPos const sectionEnd = section.chunks[section.chunks.length - 1].endPos if (sectionStart > 0) { const beforeTex = text.substring( 0, section.chunks[0].startPos ) const beforeTokens = this.encodeBeforeText( beforeTex, Math.ceil(budget / 2) ) const beforeBudget = sectionEnd < text.length - 1 ? Math.min( beforeTokens.length, Math.ceil(budget / 2) ) : Math.min(beforeTokens.length, budget) const chunk: SectionChunk = { text: this._tokenizer.decode( beforeTokens.slice(-beforeBudget) ), startPos: sectionStart - beforeBudget, endPos: sectionStart - 1, score: 0, tokenCount: beforeBudget, } section.chunks.unshift(chunk) section.tokenCount += chunk.tokenCount budget -= chunk.tokenCount } if (sectionEnd < text.length - 1) { const afterText = text.substring(sectionEnd + 1) const afterTokens = this.encodeAfterText( afterText, budget ) const afterBudget = Math.min(afterTokens.length, budget) const chunk: SectionChunk = { text: this._tokenizer.decode( afterTokens.slice(0, afterBudget) ), startPos: sectionEnd + 1, endPos: sectionEnd + afterBudget, score: 0, tokenCount: afterBudget, } section.chunks.push(chunk) section.tokenCount += chunk.tokenCount budget -= chunk.tokenCount } } }) } // Return final rendered sections return sections.map((section) => { let text = "" section.chunks.forEach((chunk) => (text += chunk.text)) return { text: text, tokenCount: section.tokenCount, score: section.score, } }) } private encodeBeforeText(text: string, budget: number): number[] { const maxLength = budget * 8 const substr = text.length <= maxLength ? text : text.substring(text.length - maxLength) return this._tokenizer.encode(substr) } private encodeAfterText(text: string, budget: number): number[] { const maxLength = budget * 8 const substr = text.length <= maxLength ? text : text.substring(0, maxLength) return this._tokenizer.encode(substr) } } interface SectionChunk { text: string startPos: number endPos: number score: number tokenCount: number } interface Section { chunks: SectionChunk[] score: number tokenCount: number }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/microsoft/genaiscript'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

LocalDocumentResult.ts•13.3 KiB