Intlayer

intlayer
apps
backend
src
utils
AI
askDocQuestion

askDocQuestion.ts•11.7 KiB

import { readAsset } from 'utils:asset'; import type { AIConfig, ChatCompletionRequestMessage } from '@intlayer/ai'; import { streamText } from '@intlayer/ai'; import { getMarkdownMetadata } from '@intlayer/core'; import { getBlogs, getDocs, getFrequentQuestions } from '@intlayer/docs'; import { OpenAI } from 'openai'; const readEmbeddingsForFile = (fileKey: string): Record<string, number[]> => { try { return JSON.parse( readAsset(`./embeddings/${fileKey.replace('.md', '.json')}`, 'utf-8') ) as Record<string, number[]>; } catch { return {}; } }; type VectorStoreEl = { fileKey: string; chunkNumber: number; content: string; embedding?: number[]; docUrl: string; docName: string; }; /** * Simple in-memory vector store to hold document embeddings and their content. * Each entry contains: * - fileKey: A unique key identifying the file * - chunkNumber: The number of the chunk within the document * - content: The chunk content * - embedding: The numerical embedding vector for the chunk */ const vectorStore: VectorStoreEl[] = []; /* * Ask question AI configuration */ const MAX_RELEVANT_CHUNKS_NB: number = 20; // Maximum number of relevant chunks to attach to chatGPT context const MIN_RELEVANT_CHUNKS_SIMILARITY: number = 0.42; // Minimum similarity required for a chunk to be considered relevant /* * Embedding model configuration */ const EMBEDDING_MODEL: OpenAI.EmbeddingModel = 'text-embedding-3-large'; // Model to use for embedding generation const OVERLAP_TOKENS: number = 200; // Number of tokens to overlap between chunks const MAX_CHUNK_TOKENS: number = 800; // Maximum number of tokens per chunk const CHAR_BY_TOKEN: number = 4.15; // Approximate pessimistically the number of characters per token // Can use `tiktoken` or other tokenizers to calculate it more precisely const MAX_CHARS: number = MAX_CHUNK_TOKENS * CHAR_BY_TOKEN; const OVERLAP_CHARS: number = OVERLAP_TOKENS * CHAR_BY_TOKEN; const skipDocEmbeddingsIndex = process.env.SKIP_DOC_EMBEDDINGS_INDEX === 'true'; /** * Splits a given text into chunks ensuring each chunk does not exceed MAX_CHARS. * @param text - The input text to split. * @returns - Array of text chunks. */ const chunkText = (text: string): string[] => { const chunks: string[] = []; let start = 0; while (start < text.length) { let end = Math.min(start + MAX_CHARS, text.length); // Ensure we don't cut words in the middle (find nearest space) if (end < text.length) { const lastSpace = text.lastIndexOf(' ', end); if (lastSpace > start) { end = lastSpace; } } chunks.push(text.substring(start, end)); // Move start forward correctly const nextStart = end - OVERLAP_CHARS; if (nextStart <= start) { // Prevent infinite loop if overlap is too large start = end; } else { start = nextStart; } } return chunks; }; /** * Generates an embedding for a given text using OpenAI's embedding API. * Trims the text if it exceeds the maximum allowed characters. * * @param text - The input text to generate an embedding for * @returns The embedding vector as a number array */ const generateEmbedding = async (text: string): Promise<number[]> => { try { const openaiClient = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); const response = await openaiClient.embeddings.create({ model: EMBEDDING_MODEL, input: text, }); return response.data[0].embedding; } catch (error) { console.error('Error generating embedding:', error); return []; } }; /** * Calculates the cosine similarity between two vectors. * Cosine similarity measures the cosine of the angle between two vectors in an inner product space. * Used to determine the similarity between chunks of text. * * @param vecA - The first vector * @param vecB - The second vector * @returns The cosine similarity score */ const cosineSimilarity = (vecA: number[], vecB: number[]): number => { // Calculate the dot product of the two vectors const dotProduct = vecA.reduce((sum, a, idx) => sum + a * vecB[idx], 0); // Calculate the magnitude (Euclidean norm) of each vector const magnitudeA = Math.sqrt(vecA.reduce((sum, a) => sum + a * a, 0)); const magnitudeB = Math.sqrt(vecB.reduce((sum, b) => sum + b * b, 0)); // Compute and return the cosine similarity return dotProduct / (magnitudeA * magnitudeB); }; /** * Indexes all Markdown documents by generating embeddings for each chunk and storing them in memory. * Persists per-document embeddings under `embeddings/<fileKey>.json`. * Handles cases where files have been updated and chunk counts have changed. */ export const loadMarkdownFiles = async (): Promise<void> => { // Retrieve documentation and blog posts in English locale const frequentQuestions = await getFrequentQuestions(); const docs = await getDocs(); const blogs = await getBlogs(); const files = { ...docs, ...blogs, ...frequentQuestions }; // Combine docs and blogs into a single object // Iterate over each file key (identifier) in the combined files for await (const fileKey of Object.keys(files)) { // Get the metadata of the file const fileMetadata = getMarkdownMetadata( files[fileKey as keyof typeof files] as string ); // Split the document into chunks based on headings const fileChunks = chunkText( files[fileKey as keyof typeof files] as string ); // Read existing embeddings for this file const existingEmbeddings = readEmbeddingsForFile(fileKey); // Check if the number of chunks has changed for this file const existingChunksForFile = Object.keys(existingEmbeddings); const currentChunkCount = fileChunks.length; const previousChunkCount = existingChunksForFile.length; let shouldRegenerateFileEmbeddings = false; // If chunk count differs, we need to regenerate embeddings for this file if (currentChunkCount !== previousChunkCount) { console.info( `File "${fileKey}" chunk count changed: ${previousChunkCount} -> ${currentChunkCount}. Regenerating embeddings.` ); shouldRegenerateFileEmbeddings = !skipDocEmbeddingsIndex; } // Iterate over each chunk within the current file let resultForFile: Record<string, number[] | undefined> = {}; for await (const chunkIndex of Object.keys(fileChunks)) { const chunkNumber = Number(chunkIndex) + 1; // Chunk number starts at 1 const chunksNumber = fileChunks.length; const fileChunk = fileChunks[ chunkIndex as keyof typeof fileChunks ] as string; const chunkKeyName = `chunk_${chunkNumber}`; // Unique key for the chunk within the file // Retrieve precomputed embedding if available and file hasn't changed const docEmbedding = !shouldRegenerateFileEmbeddings ? (existingEmbeddings[ chunkKeyName as keyof typeof existingEmbeddings ] as number[] | undefined) : undefined; const embedding = docEmbedding; // Use existing embedding if available and valid // Update the file-scoped result object with the embedding resultForFile = { ...resultForFile, [chunkKeyName]: embedding }; // Store the embedding and content in the in-memory vector store vectorStore.push({ fileKey, chunkNumber, embedding, content: fileChunk, docUrl: fileMetadata.url, docName: fileMetadata.title, }); console.info(`- Loaded: ${fileKey}/${chunkKeyName}/${chunksNumber}`); } } }; // Automatically index Markdown files loadMarkdownFiles(); /** * Searches the indexed documents for the most relevant chunks based on a query. * Utilizes cosine similarity to find the closest matching embeddings. * * @param query - The search query provided by the user * @returns An array of the top matching document chunks' content */ export const searchChunkReference = async ( query: string, maxResults: number = MAX_RELEVANT_CHUNKS_NB, minSimilarity: number = MIN_RELEVANT_CHUNKS_SIMILARITY ): Promise<VectorStoreEl[]> => { // Generate an embedding for the user's query const queryEmbedding = await generateEmbedding(query); // Calculate similarity scores between the query embedding and each document's embedding const selection = vectorStore .filter((chunk) => chunk.embedding) .map((chunk) => ({ ...chunk, similarity: cosineSimilarity(queryEmbedding, chunk.embedding!), // Add similarity score to each doc })) .filter((chunk) => chunk.similarity > minSimilarity) // Filter out documents with low similarity scores .sort((a, b) => b.similarity - a.similarity) // Sort documents by highest similarity first .slice(0, maxResults); // Select the top 6 most similar documents const orderedDocKeys = new Set(selection.map((chunk) => chunk.fileKey)); const orderedVectorStore = vectorStore.sort((a, _b) => orderedDocKeys.has(a.fileKey) ? -1 : 1 ); const results = orderedVectorStore.filter((chunk) => selection.some( (v) => v.fileKey === chunk.fileKey && v.chunkNumber === chunk.chunkNumber ) ); // Return the content of the top matching documents return results; }; const CHAT_GPT_PROMPT = readAsset('./PROMPT.md'); // Initial prompt configuration for the chatbot export const initPrompt: ChatCompletionRequestMessage = { role: 'system', content: CHAT_GPT_PROMPT, }; export type AskDocQuestionResult = { response: string; relatedFiles: string[]; }; export type AskDocQuestionOptions = { onMessage?: (chunk: string) => void; }; /** * Handles the "Ask a question" endpoint in an Express.js route. * Processes user messages, retrieves relevant documents, and interacts with AI models to generate responses. * * @param messages - An array of chat messages from the user and assistant * @returns The assistant's response as a string */ export const askDocQuestion = async ( messages: ChatCompletionRequestMessage[], aiConfig: AIConfig, options?: AskDocQuestionOptions ): Promise<AskDocQuestionResult> => { // Format the user's question to keep only the relevant keywords const query = messages .filter((message) => message.role === 'user') .map((message) => `- ${message.content}`) .join('\n'); // 1) Find relevant documents based on the user's question const relevantFilesReferences = await searchChunkReference(query); // 2) Integrate the relevant documents into the initial system prompt const systemPrompt = initPrompt.content.replace( '{{relevantFilesReferences}}', relevantFilesReferences.length === 0 ? 'Not relevant file found related to the question.' : relevantFilesReferences .map((doc, idx) => [ '-----', '---', `chunkId: ${idx}`, `docChunk: "${doc.chunkNumber}/${doc.fileKey.length}"`, `docName: "${doc.docName}"`, `docUrl: "${doc.docUrl}"`, `---`, doc.content, `-----`, ].join('\n') ) .join('\n\n') // Insert relevant docs into the prompt ); // Format messages for AI SDK const aiMessages = [ { role: 'system' as const, content: systemPrompt, }, ...messages.slice(-8), ]; if (!aiConfig) { throw new Error('Failed to initialize AI configuration'); } // 3) Use the AI SDK to stream the response let fullResponse = ''; const stream = streamText({ ...aiConfig, messages: aiMessages, }); // Process the stream for await (const chunk of stream.textStream) { fullResponse += chunk; options?.onMessage?.(chunk); } // 4) Extract unique related files const relatedFiles = [ ...new Set(relevantFilesReferences.map((doc) => doc.fileKey)), ]; // 5) Return the assistant's response to the user return { response: fullResponse ?? 'Error: No result found', relatedFiles, }; };

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/aymericzip/intlayer'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

askDocQuestion.ts•11.7 KiB