DevOps AI Toolkit

knowledge.ts•5.34 KiB

/** * Knowledge Base Tools * * Utility tools for knowledge base document processing. * These tools handle text chunking and ID generation - no AI/embeddings involved. * * PRD #356: Knowledge Base System - Milestone 1 */ import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; import { v5 as uuidv5 } from 'uuid'; import { createHash } from 'crypto'; import { ToolDefinition } from '../types'; /** * Result returned by knowledge tool handlers */ export interface KnowledgeToolResult { success: boolean; data?: unknown; error?: string; message: string; } /** * Self-contained knowledge tool definition */ export interface KnowledgeTool { definition: ToolDefinition; handler: (args: Record<string, unknown>) => Promise<KnowledgeToolResult>; } /** * UUID v5 namespace for knowledge chunk IDs * Using the URL namespace as base for URI-based IDs */ const KNOWLEDGE_NAMESPACE = '6ba7b810-9dad-11d1-80b4-00c04fd430c8'; /** * Chunk result returned by the chunking tool */ interface ChunkResult { id: string; content: string; checksum: string; chunkIndex: number; totalChunks: number; } /** * Create a successful tool result */ function successResult(data: unknown, message: string): KnowledgeToolResult { return { success: true, data, message }; } /** * Create an error tool result */ function errorResult(error: string, message: string): KnowledgeToolResult { return { success: false, error, message }; } /** * Generate a deterministic chunk ID using UUID v5 * Same URI + chunkIndex always produces the same ID */ function generateChunkId(uri: string, chunkIndex: number): string { return uuidv5(`${uri}#${chunkIndex}`, KNOWLEDGE_NAMESPACE); } /** * Generate SHA-256 checksum of content */ function generateChecksum(content: string): string { return createHash('sha256').update(content).digest('hex'); } /** * knowledge_chunk - Split document into semantic chunks with deterministic IDs * * Uses RecursiveCharacterTextSplitter to split text at natural boundaries * (paragraphs, sentences, words) while respecting size limits and overlap. */ export const knowledgeChunk: KnowledgeTool = { definition: { name: 'knowledge_chunk', type: 'agentic', description: 'Split a document into semantic chunks with deterministic IDs. ' + 'Chunks are split at natural boundaries (paragraphs, sentences, words) ' + 'with configurable size and overlap. Each chunk gets a deterministic UUID v5 ' + 'based on the source URI and chunk index.', inputSchema: { type: 'object', properties: { content: { type: 'string', description: 'Document content to split into chunks', }, uri: { type: 'string', description: 'Document URI for deterministic ID generation (e.g., https://github.com/org/repo/blob/main/docs/guide.md)', }, maxChunkSize: { type: 'number', description: 'Maximum chunk size in characters (default: 1000)', }, chunkOverlap: { type: 'number', description: 'Number of overlapping characters between chunks (default: 200)', }, }, required: ['content', 'uri'], }, }, handler: async (args: Record<string, unknown>): Promise<KnowledgeToolResult> => { const content = args.content as string | undefined; const uri = args.uri as string | undefined; const maxChunkSize = (args.maxChunkSize as number) ?? 1000; const chunkOverlap = (args.chunkOverlap as number) ?? 200; // Validate required parameters if (!uri) { return errorResult('Missing required parameter: uri', 'knowledge_chunk requires uri parameter'); } // Validate chunk parameters if (!Number.isInteger(maxChunkSize) || maxChunkSize < 1) { return errorResult('Invalid maxChunkSize', 'maxChunkSize must be a positive integer'); } if (!Number.isInteger(chunkOverlap) || chunkOverlap < 0 || chunkOverlap >= maxChunkSize) { return errorResult('Invalid chunkOverlap', 'chunkOverlap must be >= 0 and < maxChunkSize'); } // Handle empty or missing content if (!content || !content.trim()) { return successResult( { chunks: [], totalChunks: 0, uri }, 'Empty content - no chunks created' ); } try { // Create text splitter with semantic-aware separators const splitter = new RecursiveCharacterTextSplitter({ chunkSize: maxChunkSize, chunkOverlap: chunkOverlap, separators: ['\n\n', '\n', '. ', '! ', '? ', '; ', ', ', ' ', ''], }); // Split the text const textChunks = await splitter.splitText(content); // Generate chunk objects with deterministic IDs const chunks: ChunkResult[] = textChunks.map((text, index) => ({ id: generateChunkId(uri, index), content: text, checksum: generateChecksum(text), chunkIndex: index, totalChunks: textChunks.length, })); return successResult( { chunks, totalChunks: chunks.length, uri }, `Split document into ${chunks.length} chunks` ); } catch (error) { const message = error instanceof Error ? error.message : String(error); return errorResult(message, `Failed to chunk document: ${message}`); } }, }; /** * All knowledge tools for registration */ export const KNOWLEDGE_TOOLS: KnowledgeTool[] = [knowledgeChunk];

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/vfarcic/dot-ai'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

knowledge.ts•5.34 KiB