import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import type { McpDependencies } from '../server/mcp-server.js';
import { contextPackInput } from './schemas.js';
import { embed, chat } from '../services/ollama-client.js';
import { textSearch } from '../services/text-search.js';
import { SemanticCache } from '../services/semantic-cache.js';
import { dedup } from '../services/dedup.js';
import { estimateTokens } from '../utils/tokens.js';
import { auditToolCall } from '../middleware/audit.js';
import { logger } from '../utils/logger.js';
export function registerContextPack(server: McpServer, deps: McpDependencies): void {
server.registerTool('context_pack', {
description:
'Pack optimized context for a given goal. Combines semantic search, text search, deduplication, and synthesis into a structured context bundle.',
inputSchema: contextPackInput,
}, async (args, extra) => {
const start = Date.now();
const { goal, scope, max_tokens = 4000 } = args;
try {
// Check cache
const cacheKey = SemanticCache.keyFromString(`pack:${goal}:${scope ?? ''}`);
const cached = deps.cache.get(cacheKey);
if (cached) {
logger.debug({ goal }, 'context.pack cache hit');
return { content: [{ type: 'text' as const, text: cached }] };
}
// Embed the goal
const [goalEmbedding] = await embed(goal);
// Vector search
const vectorResults = await deps.vectorStore.search(goalEmbedding, {
topK: 15,
minScore: 0.25,
scope,
});
// Text search
const ftsResults = textSearch(deps.db, goal, { limit: 10, scope });
// Merge and deduplicate
const allChunks = [
...vectorResults.map((r) => r.content),
...ftsResults.map((r) => r.content),
];
const uniqueChunks = dedup(allChunks, 0.8);
// Trim to budget
let contextText = '';
for (const chunk of uniqueChunks) {
if (estimateTokens(contextText + chunk) > max_tokens * 0.7) break;
contextText += chunk + '\n\n';
}
if (!contextText.trim()) {
const result = JSON.stringify({
summary: 'No relevant context found in memory.',
facts: [],
next_actions: ['Use memory.upsert to store relevant information first.'],
});
return { content: [{ type: 'text' as const, text: result }] };
}
// Synthesize with Ollama
const synthesized = await chat([
{
role: 'system',
content: `You are a context optimizer. Given retrieved memory chunks and a user goal, produce a structured JSON response with:
- "summary": A concise summary of the relevant context (2-3 sentences)
- "facts": An array of key facts extracted from the context
- "next_actions": Suggested next steps based on the context
Keep total output under ${max_tokens} tokens. Output ONLY valid JSON, no markdown.`,
},
{
role: 'user',
content: `Goal: ${goal}\n\nRetrieved context:\n${contextText}`,
},
]);
// Cache the result
deps.cache.set(cacheKey, synthesized);
auditToolCall(deps.db, 'context_pack', extra.sessionId, goal, `${estimateTokens(synthesized)} tokens`, Date.now() - start);
return { content: [{ type: 'text' as const, text: synthesized }] };
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logger.error({ err, goal }, 'context.pack failed');
auditToolCall(deps.db, 'context_pack', extra.sessionId, goal, `error: ${msg}`, Date.now() - start);
return { content: [{ type: 'text' as const, text: `Error: ${msg}` }], isError: true };
}
});
}