Deep Research MCP Server

providers.ts•20.2 kB

import { GoogleGenAI } from "@google/genai"; import { logger } from "../logger.js"; import { LRUCache } from 'lru-cache'; import { createHash } from 'node:crypto'; import { RecursiveCharacterTextSplitter } from './text-splitter.js'; import { getEncoding, type Tiktoken, type TiktokenEncoding } from 'js-tiktoken'; import { cosineSimilarity } from 'ai'; // Environment and client setup function clampNumber(n: number, min: number, max: number): number { if (Number.isNaN(n)) { return min; } return Math.max(min, Math.min(max, n)); } const API_KEY = process.env.GEMINI_API_KEY; if (!API_KEY) { throw new Error('Missing GEMINI_API_KEY in environment variables'); } const client = new GoogleGenAI({ apiKey: API_KEY, ...(process.env.GEMINI_API_ENDPOINT ? { apiEndpoint: process.env.GEMINI_API_ENDPOINT } : {}), }); const MODEL = process.env.GEMINI_MODEL || 'gemini-2.5-flash'; const MAX_TOKENS = clampNumber(parseInt(process.env.GEMINI_MAX_OUTPUT_TOKENS || '65536', 10), 1024, 65000); const TEMPERATURE = parseFloat(process.env.GEMINI_TEMPERATURE || '0.4'); const TOP_P = parseFloat(process.env.GEMINI_TOP_P || '0.9'); const TOP_K = clampNumber(parseInt(process.env.GEMINI_TOP_K || '40', 10), 1, 1000); const CANDIDATE_COUNT = clampNumber(parseInt(process.env.GEMINI_CANDIDATE_COUNT || '2', 10), 1, 8); const THINKING_BUDGET_TOKENS = clampNumber(parseInt(process.env.THINKING_BUDGET_TOKENS || '1500', 10), 0, 8000); const ENABLE_URL_CONTEXT = (process.env.ENABLE_URL_CONTEXT || 'true').toLowerCase() === 'true'; const ENABLE_GEMINI_GOOGLE_SEARCH = (process.env.ENABLE_GEMINI_GOOGLE_SEARCH || 'true').toLowerCase() === 'true'; const ENABLE_GEMINI_CODE_EXECUTION = (process.env.ENABLE_GEMINI_CODE_EXECUTION || 'false').toLowerCase() === 'true'; const ENABLE_GEMINI_FUNCTIONS = (process.env.ENABLE_GEMINI_FUNCTIONS || 'false').toLowerCase() === 'true'; const ENABLE_PROVIDER_CACHE = (process.env.ENABLE_PROVIDER_CACHE || 'true').toLowerCase() === 'true'; const PROVIDER_CACHE_MAX = clampNumber(parseInt(process.env.PROVIDER_CACHE_MAX || '100', 10), 10, 5000); const PROVIDER_CACHE_TTL_MS = clampNumber(parseInt(process.env.PROVIDER_CACHE_TTL_MS || '600000', 10), 1000, 86400000); // Convenience alias const ai = client; // Embeddings model lives here to avoid cycles const EMBEDDING_MODEL = process.env.GEMINI_EMBEDDING_MODEL || 'text-embedding-004'; // Centralized embeddings generation (Gemini-only) type EmbedVector = number[]; type EmbedResponse = { embeddings?: Array<{ values?: EmbedVector }> }; export async function generateTextEmbedding(text: string): Promise<number[]> { const res: EmbedResponse = await ai.models.embedContent({ model: EMBEDDING_MODEL, contents: [{ role: 'user', parts: [{ text }] }], }) as unknown as EmbedResponse; const values = res?.embeddings?.[0]?.values; return Array.isArray(values) ? values : []; } // Shared generation config (optionally enforce JSON via responseMimeType) type Empty = Record<string, never>; type Tool = { googleSearch: Empty } | { codeExecution: Empty }; type GenExtra = Partial<{ responseMimeType: string; responseSchema: object; tools: Tool[] }>; function baseConfig(extra?: GenExtra) { return { temperature: TEMPERATURE, maxOutputTokens: MAX_TOKENS, candidateCount: CANDIDATE_COUNT, topP: TOP_P, topK: TOP_K, ...(extra?.responseMimeType ? { responseMimeType: extra.responseMimeType } : {}), ...(extra?.responseSchema ? { responseSchema: extra.responseSchema } : {}), }; } // Input type kept compatible with the project type Part = { text?: string }; type ContentMsg = { role: string; parts: Part[] }; type ContentArg = string | { contents: ContentMsg[] }; // Provider-level cache (optional) type GenerateRaw = { candidates?: Array<{ content?: { parts?: Array<Part> } }> }; type GenerateWrapped = GenerateRaw & { response: { text: () => Promise<string> } }; const providerCache = ENABLE_PROVIDER_CACHE ? new LRUCache<string, GenerateWrapped>({ max: PROVIDER_CACHE_MAX, ttl: PROVIDER_CACHE_TTL_MS }) : null; function hashKey(obj: unknown): string { try { return createHash('sha256').update(JSON.stringify(obj)).digest('hex'); } catch { // Fallback to toString in pathological cases return String(obj); } } // Default tool set based on env flags; merged with any extra.tools at call time function defaultTools(): Tool[] { const tools: Tool[] = []; if (ENABLE_GEMINI_GOOGLE_SEARCH) { tools.push({ googleSearch: {} }); } if (ENABLE_GEMINI_CODE_EXECUTION) { tools.push({ codeExecution: {} }); } // Functions require explicit declarations from caller; gate by flag but do not add empty set return tools; } // Unified raw->text extractor used across helpers function extractTextFromRaw(r: GenerateRaw): string { const parts = r.candidates?.[0]?.content?.parts; const firstText = parts?.find((p) => typeof p.text === 'string')?.text; return firstText ?? ''; } // Core generator: normalize ContentArg and call model.generateContent async function generateContentInternal(prompt: ContentArg, extra?: GenExtra): Promise<GenerateWrapped> { const contents: ContentMsg[] = typeof prompt === 'string' ? [{ role: 'user', parts: [{ text: prompt }] }] : prompt.contents; const toolsCombined = [ ...defaultTools(), ...(ENABLE_GEMINI_FUNCTIONS ? (extra?.tools || []) : []), ]; const configObj = baseConfig(extra); const cacheKey = ENABLE_PROVIDER_CACHE ? hashKey({ MODEL, contents, config: configObj, tools: toolsCombined }) : ''; if (providerCache && cacheKey) { const hit = providerCache.get(cacheKey); if (hit) { // Basic hit log logger.info({ key: cacheKey.slice(0,8) }, "[provider-cache] HIT"); return hit; } else { logger.info({ key: cacheKey.slice(0,8) }, "[provider-cache] MISS"); } } const request: { model: string; contents: ContentMsg[]; config: ReturnType<typeof baseConfig>; tools?: Tool[] } = { model: MODEL, contents, config: baseConfig(extra), }; if (toolsCombined.length > 0) { request.tools = toolsCombined; } const raw = await ai.models.generateContent(request); // Back-compat shim: expose response.text() like older code expects const textVal = extractTextFromRaw(raw as unknown as GenerateRaw); const wrapped = Object.assign({}, raw, { response: { text: async () => textVal, }, }); if (providerCache) { providerCache.set(cacheKey, wrapped); } return wrapped; } // Maintain expected exports export const o3MiniModel = { generateContent: (prompt: ContentArg) => generateContentInternal(prompt), }; export const o3MiniModel2 = { generateContent: (prompt: ContentArg) => generateContentInternal(prompt), }; export const researchModel = { generateContent: (prompt: ContentArg) => generateContentInternal(prompt), }; // Token counting via model.countTokens export async function countTokens(contents: Array<{ role: string; parts: Array<{ text?: string }> }>) { const res = await ai.models.countTokens({ model: MODEL, contents }); return (res.totalTokens as number) ?? 0; } // Trims a prompt based on token count export async function trimPrompt(prompt: string, maxTokens: number) { if (!prompt) { return ''; } const contents = [{ role: 'user', parts: [{ text: prompt }] }]; const tokenLength = await countTokens(contents); if (tokenLength <= maxTokens) { return prompt; } // simple resize by characters proportional to overflow const overflowTokens = tokenLength - maxTokens; const approxCharsPerToken = Math.max(2, Math.floor(prompt.length / Math.max(1, tokenLength))); const sliceLen = Math.max(140, prompt.length - overflowTokens * approxCharsPerToken); return prompt.slice(0, sliceLen); } // Function for managing prompts export function createPrompt(template: string, variables: Record<string, string>): string { let prompt = template; for (const key in variables) { prompt = prompt.replace(`{{${key}}`, variables[key] ?? ''); } return prompt; } // Configurable text call; returns string. Adds JSON response MIME when requested. // REPLACEMENT: Raw Fetch Implementation // REPLACEMENT: Raw Fetch Implementation export async function callGeminiProConfigurable( prompt: string, opts?: { json?: boolean; schema?: object; tools?: Tool[] } ): Promise<string> { const apiKey = process.env.GEMINI_API_KEY; const model = process.env.GEMINI_MODEL || 'models/gemini-2.5-flash'; const url = `https://generativelanguage.googleapis.com/v1beta/${model}:generateContent?key=${apiKey}`; // Prepend JSON instructions if needed (since we disabled strict mode) let finalPrompt = prompt; if (opts?.json) { finalPrompt = "You must return a valid JSON object. Do not include markdown formatting like ```json. \n\n" + prompt; } const requestBody: any = { contents: [{ role: 'user', parts: [{ text: finalPrompt }] }], generationConfig: { temperature: 0.7, maxOutputTokens: 8192 } }; // Google Search Tool injection if (opts?.tools && opts.tools.some(t => 'googleSearch' in t)) { requestBody.tools = [{ googleSearch: {} }]; } else if (process.env.ENABLE_GEMINI_GOOGLE_SEARCH === 'true') { // Force search if enabled globally and appropriate (though usually we want explicit control) } try { console.error(`[RawFetch] STARTING REQUEST to ${model}`); console.error(`[RawFetch] Prompt Length: ${finalPrompt.length}`); const response = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(requestBody) }); if (!response.ok) { const errText = await response.text(); console.error(`[RawFetch] HTTP ERROR ${response.status}: ${errText}`); // Allow fallback to empty string so the agent retries or moves on return ''; } const data = await response.json(); const text = data.candidates?.[0]?.content?.parts?.[0]?.text; if (!text) { console.error('[RawFetch] Empty response content:', JSON.stringify(data)); return ''; } return text; } catch (error) { console.error('[RawFetch] Exception:', error); return ''; } } // Add type export to align with deep-research.ts export type ResearchResultOutput = { content: string; sources: string[]; methodology: string; limitations: string; citations: Array<{ source: string; context: string }>; learnings: string[]; visitedUrls: string[]; }; // Text splitter utilities retained const TextProcessingConfig: { MIN_CHUNK_SIZE: number; MAX_CHUNK_SIZE: number; CHUNK_OVERLAP: number } = { MIN_CHUNK_SIZE: 140, MAX_CHUNK_SIZE: 8192, CHUNK_OVERLAP: 20, }; export function getChunkSize(): number { return process.env.CHUNK_SIZE ? Number(process.env.CHUNK_SIZE) : TextProcessingConfig.MIN_CHUNK_SIZE; } export interface TextSplitter { splitText(text: string): Promise<string[]>; chunkSize: number; } // Export a real SemanticTextSplitter here (providers owns embeddings) export class SemanticTextSplitter implements TextSplitter { chunkSize: number; chunkOverlap: number; private sentenceRegex = /(?<=[.!?])\s+/g; private tokenizer: Tiktoken; private clean(s: string) { return s.replace(/\s+/g, ' ').trim(); } constructor(params?: { chunkSize?: number; chunkOverlap?: number }) { this.chunkSize = params?.chunkSize ?? TextProcessingConfig.MIN_CHUNK_SIZE; this.chunkOverlap = params?.chunkOverlap ?? TextProcessingConfig.CHUNK_OVERLAP; // Use OpenAI o200k_base encoding for robust token estimation const enc: TiktokenEncoding = (process.env.TIKTOKEN_ENCODING as TiktokenEncoding) || 'o200k_base'; try { this.tokenizer = getEncoding(enc); } catch { // last-resort fallback to a naive tokenizer this.tokenizer = { encode: (text: string) => Array.from(new TextEncoder().encode(text)), decode: (tokens: number[]) => new TextDecoder().decode(Uint8Array.from(tokens)), free: () => {}, } as unknown as Tiktoken; } } private tokenCount(s: string): number { try { return this.tokenizer.encode(s).length; } catch { // fallback return Math.ceil(s.length / 4); } } async splitText(text: string): Promise<string[]> { if (!text?.trim()) { return []; } // Start with sentence segmentation; fallback to recursive splitter if needed const normalized = text.replace(/\r\n?/g, '\n'); const sentences = normalized .split(this.sentenceRegex) .map(s => this.clean(s)) .filter(Boolean); // If too few sentences, fallback to recursive character splitter if (sentences.length <= 1) { const rc = new RecursiveCharacterTextSplitter({ chunkSize: this.chunkSize, chunkOverlap: this.chunkOverlap }); return rc.splitText(normalized); } // Compute embeddings for semantic grouping const embs = await Promise.all(sentences.map(s => generateTextEmbedding(s))); // Simple greedy grouping by similarity to current centroid until chunk token budget reached const chunks: string[] = []; let i = 0; while (i < sentences.length) { const start = i; const first = sentences[i]; if (!first) { break; } let current = first; let centroid = embs[i] ?? []; i++; while (i < sentences.length) { const next = sentences[i]; if (!next) { break; } const nextEmb = embs[i] ?? []; const sim = centroid.length && nextEmb.length ? cosineSimilarity(centroid, nextEmb) : 0; const merged = `${current} ${next}`.trim(); const mergedTokens = this.tokenCount(merged); const withinBudget = mergedTokens <= this.chunkSize; // Merge if semantically close or still far from budget target if (withinBudget && sim >= 0.65) { current = merged; // update centroid (simple average) if (centroid.length && nextEmb.length && centroid.length === nextEmb.length) { centroid = centroid.map((v, k) => (v + (nextEmb[k] as number)) / 2); } i++; } else { break; } } // Ensure minimal overlap by pulling back part of last sentence if significantly over budget chunks.push(current); if (i < sentences.length && this.chunkOverlap > 0) { // overlap by reusing the last sentence of this chunk as the first of next chunk when possible i = Math.max(start + 1, i - 1); } } return chunks.map(c => this.clean(c)).filter(Boolean); } } export async function semanticChunking(text: string, splitterOpt?: TextSplitter) { const effectiveSplitter = splitterOpt || new SemanticTextSplitter({ chunkSize: getChunkSize(), chunkOverlap: TextProcessingConfig.CHUNK_OVERLAP, }); try { return await effectiveSplitter.splitText(text); } catch (error) { console.error('Chunking error:', error); return [text]; } } // Adaptive prompt utilities export async function adaptivePrompt( basePrompt: string, context: string[], similarityThreshold = 0.8 ) { // Compute embeddings locally using the embeddings helper above const baseEmbedding = await generateTextEmbedding(basePrompt); const contextEmbeddings = await Promise.all(context.map(txt => generateTextEmbedding(txt))); const relevantContext = context.filter( (_ , i) => baseEmbedding && contextEmbeddings[i] && cosineSimilarity(baseEmbedding, contextEmbeddings[i]) > similarityThreshold ); return `${basePrompt}\n\nRelevant Context:\n${relevantContext.join('\n')}`; } // Helper to extract text from GenerateContentResponse consistently export function extractText(raw: unknown): string { return extractTextFromRaw(raw as GenerateRaw); } // Convenience helper to call with explicit tools (e.g., function calling) export async function generateWithTools(prompt: ContentArg, tools: Tool[], extra?: Omit<GenExtra, 'tools'>) { return generateContentInternal(prompt, { ...(extra || {}), tools }); } // Simple citation extractor for URLs and bracketed references export function extractCitations(text: string): { urls: string[]; refs: string[] } { if (!text) { return { urls: [], refs: [] }; } const urlRegex = /(https?:\/\/[^\s)]+)(?=[)\s]|$)/g; const refRegex = /\[\[(\d+)\]\]/g; const urls = Array.from(new Set( Array.from(text.matchAll(urlRegex)) .map(m => m[1]) .filter((u): u is string => typeof u === 'string') )); const refs = Array.from(new Set(Array.from(text.matchAll(refRegex)).map(m => m[0]))); return { urls, refs }; } // Simple concurrency pool (no external deps) async function runWithConcurrency<T, R>(items: T[], limit: number, worker: (item: T, idx: number) => Promise<R>): Promise<R[]> { const results = new Array<R>(items.length); let inFlight = 0; let index = 0; return await new Promise<R[]>((resolve, reject) => { const next = () => { if (index >= items.length && inFlight === 0) { return resolve(results); } while (inFlight < limit && index < items.length) { const i = index++; inFlight++; const item = items[i]; if (item === undefined) { inFlight--; continue; } Promise.resolve(worker(item, i)) .then((res) => { results[i] = res; }) .catch(reject) .finally(() => { inFlight--; next(); }); } }; next(); }); } // Batch generation using existing config/tools/caching export async function generateBatch(prompts: ContentArg[], extra?: GenExtra, concurrency = clampNumber(parseInt(process.env.CONCURRENCY_LIMIT || '5', 10), 1, 64)) { return runWithConcurrency<ContentArg, GenerateWrapped>(prompts, concurrency, (p) => generateContentInternal(p, extra)); } // Batch with explicit tools support (e.g., function calling) export async function generateBatchWithTools(prompts: ContentArg[], tools: Tool[], extra?: Omit<GenExtra, 'tools'>, concurrency = clampNumber(parseInt(process.env.CONCURRENCY_LIMIT || '5', 10), 1, 64)) { return runWithConcurrency(prompts, concurrency, (p) => generateContentInternal(p, { ...(extra || {}), tools })); } // Structured helpers for analysis → final pipeline with optional URL grounding export async function generateAnalysisPlan( userPrompt: string, context: string[], urlContext?: { url: string; snippet: string }[], schema: object = { type: 'object', properties: { plan: { type: 'array', items: { type: 'string' } }, risks: { type: 'array', items: { type: 'string' } }, missingInfo: { type: 'array', items: { type: 'string' } } }, required: ['plan'], additionalProperties: false, } ): Promise<string> { const urlBlock = ENABLE_URL_CONTEXT && urlContext?.length ? `\n\nURL Context:\n${urlContext.map(u => `- ${u.url}: ${u.snippet}`).join('\n')}` : ''; const prompt = `You are a precise research strategist. Produce a concise JSON plan for answering the user query.\n` + `User Query:\n${userPrompt}\n\nContext:\n${context.join('\n')}\n${urlBlock}\n\n` + `Constraints: Use at most ${THINKING_BUDGET_TOKENS} tokens for analysis. Do not include the final answer.`; return await callGeminiProConfigurable(prompt, { json: true, schema }); } export async function generateFinalFromPlan( planJson: string, context: string[], urlContext?: { url: string; snippet: string }[], schema: object = { type: 'object', properties: { sections: { type: 'array', items: { type: 'string' } }, citations: { type: 'array', items: { type: 'string' } } }, required: ['sections'], additionalProperties: false, } ): Promise<string> { const urlBlock = ENABLE_URL_CONTEXT && urlContext?.length ? `\n\nURL Context:\n${urlContext.map(u => `- ${u.url}: ${u.snippet}`).join('\n')}` : ''; const prompt = `Using the provided JSON plan, write the final structured output as JSON (sections and citations).\n` + `Plan JSON:\n${planJson}\n\nContext:\n${context.join('\n')}\n${urlBlock}`; return await callGeminiProConfigurable(prompt, { json: true, schema }); }

Loading blob content...

Latest Blog Posts

Don't Use Large Strings as Cache Keys
By punkpeye on January 11, 2026.
markdown
node-js
cache
What are Claude Skills?
By punkpeye on January 10, 2026.
mcp
skills
How to Test MCP Streamable HTTP Endpoints Using cURL
By punkpeye on January 2, 2026.
tutorial
bash

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/hfredrick69/deep-research-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

providers.ts•20.2 kB