Skip to main content
Glama
mcp-tools.test.ts21.4 kB
/** * MCP Tools End-to-End Tests * * Tests the 5 essential user workflows with real PostgreSQL and Ollama dependencies. * These tests verify complete user interaction patterns, not edge cases or load testing. * * Essential Workflows: * 1. Memory CRUD - Store, retrieve, update, delete lifecycle * 2. Memory Search - Store multiple memories and search by text/metadata * 3. Reasoning - Think, assess confidence, detect bias * 4. Problem Decomposition - Breakdown, analyze, evaluate * 5. Tool Chaining - Memory + Reasoning integration * * Data Cleanup Strategy (Requirement 13.3): * - Each test uses unique user IDs and session IDs * - Each test cleans up its own data in afterEach * - Final verification ensures database is in clean state * * Requirements: 13.1, 13.3, 13.7 */ import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest"; import { CognitiveMCPServer } from "../../server/mcp-server.js"; import type { MCPResponse } from "../../server/types.js"; import { cleanupUserData, createE2ETestContext, getDatabaseStats, resetTestCounter, trackMemoryId, verifyCleanState, type E2ETestContext, } from "../utils/e2e-cleanup.js"; import { withTimeout } from "../utils/test-helpers.js"; // Shared test state let server: CognitiveMCPServer; let testContext: E2ETestContext; /** * Helper function to invoke MCP tool */ async function invokeTool(toolName: string, params: Record<string, unknown>): Promise<MCPResponse> { const tool = server.toolRegistry.getTool(toolName); if (!tool) { throw new Error(`Tool not found: ${toolName}`); } return await tool.handler(params); } /** * Helper to get database client for cleanup operations */ async function getDbClient() { if (!server["databaseManager"]) { throw new Error("Database manager not available"); } return await server["databaseManager"].getConnection(); } /** * Ensures the database schema exists. * Creates tables if they don't exist (idempotent operations). * Does NOT drop existing data. */ async function ensureDatabaseSchema( client: Awaited<ReturnType<typeof getDbClient>> ): Promise<void> { const embeddingDimension = parseInt(process.env.EMBEDDING_DIMENSION ?? "768", 10); // Create memories table if not exists await client.query(` CREATE TABLE IF NOT EXISTS memories ( id TEXT PRIMARY KEY, user_id TEXT NOT NULL, session_id TEXT NOT NULL, content TEXT NOT NULL, primary_sector TEXT NOT NULL, strength REAL NOT NULL DEFAULT 1.0, salience REAL NOT NULL DEFAULT 0.5, importance REAL NOT NULL DEFAULT 0.5, decay_rate REAL NOT NULL DEFAULT 0.1, created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), last_accessed TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), access_count INTEGER NOT NULL DEFAULT 0, metadata JSONB, search_vector tsvector ) `); // Create function and trigger for search_vector (idempotent) // Note: Using $$ for dollar-quoting in PostgreSQL functions await client.query(` CREATE OR REPLACE FUNCTION memories_search_vector_update() RETURNS trigger AS $$ BEGIN NEW.search_vector := to_tsvector('english', COALESCE(NEW.content, '')); RETURN NEW; END; $$ LANGUAGE plpgsql `); await client.query(`DROP TRIGGER IF EXISTS memories_search_vector_trigger ON memories`); await client.query(` CREATE TRIGGER memories_search_vector_trigger BEFORE INSERT OR UPDATE OF content ON memories FOR EACH ROW EXECUTE FUNCTION memories_search_vector_update() `); // Create GIN index for full-text search await client.query(` CREATE INDEX IF NOT EXISTS idx_memories_search_vector ON memories USING GIN(search_vector) `); // Create memory_embeddings table if not exists await client.query(` CREATE TABLE IF NOT EXISTS memory_embeddings ( memory_id TEXT NOT NULL REFERENCES memories(id) ON DELETE CASCADE, sector TEXT NOT NULL, embedding vector(${embeddingDimension}), dimension INTEGER NOT NULL, model TEXT NOT NULL, created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), PRIMARY KEY (memory_id, sector) ) `); // Create memory_connections table if not exists await client.query(` CREATE TABLE IF NOT EXISTS memory_connections ( id TEXT PRIMARY KEY, source_id TEXT NOT NULL REFERENCES memories(id) ON DELETE CASCADE, target_id TEXT NOT NULL REFERENCES memories(id) ON DELETE CASCADE, connection_type TEXT NOT NULL, strength REAL NOT NULL DEFAULT 1.0, created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() ) `); // Create memory_metadata table if not exists await client.query(` CREATE TABLE IF NOT EXISTS memory_metadata ( memory_id TEXT PRIMARY KEY REFERENCES memories(id) ON DELETE CASCADE, keywords TEXT[] NOT NULL DEFAULT '{}', tags TEXT[] NOT NULL DEFAULT '{}', category TEXT, context TEXT, importance REAL DEFAULT 0.5, is_atomic BOOLEAN DEFAULT TRUE, parent_id TEXT REFERENCES memories(id), CONSTRAINT valid_importance CHECK (importance >= 0 AND importance <= 1) ) `); // Create memory_links table if not exists await client.query(` CREATE TABLE IF NOT EXISTS memory_links ( source_id TEXT NOT NULL REFERENCES memories(id) ON DELETE CASCADE, target_id TEXT NOT NULL REFERENCES memories(id) ON DELETE CASCADE, link_type TEXT NOT NULL, weight REAL DEFAULT 0.5, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, traversal_count INTEGER DEFAULT 0, PRIMARY KEY (source_id, target_id), CONSTRAINT valid_weight CHECK (weight >= 0 AND weight <= 1), CONSTRAINT no_self_links CHECK (source_id != target_id) ) `); } describe("MCP Tools End-to-End Tests", () => { beforeAll(async () => { // Reset test counter for deterministic IDs resetTestCounter(); }); beforeEach(async () => { // Create fresh server instance server = new CognitiveMCPServer(); // Initialize server with timeout await withTimeout(() => server.initialize(), 10000); // Ensure database schema exists (create tables if needed, but don't drop existing data) if (server["databaseManager"]) { const client = await getDbClient(); try { await ensureDatabaseSchema(client); } finally { client.release(); } } // Create unique test context for this test testContext = createE2ETestContext("e2e-test"); }); afterEach(async () => { // Clean up test data for this specific test if (server["databaseManager"] && server.isInitialized) { const client = await getDbClient(); try { // Clean up all data for this test's user ID await cleanupUserData(client, testContext.userId); } catch (error) { console.error("Cleanup error:", error); } finally { client.release(); } } // Shutdown server if (server.isInitialized) { await server.shutdown(); } }); afterAll(async () => { // Final verification that all E2E test data has been cleaned up const verifyServer = new CognitiveMCPServer(); try { await withTimeout(() => verifyServer.initialize(), 10000); if (verifyServer["databaseManager"]) { const client = await verifyServer["databaseManager"].getConnection(); try { const cleanState = await verifyCleanState(client); if (!cleanState.isClean) { console.warn( `⚠️ E2E test data not fully cleaned up: ${cleanState.remainingMemoryCount} memories remaining` ); } const stats = await getDatabaseStats(client); console.log( `📊 Final DB stats: ${stats.totalMemories} memories, ${stats.testMemories} test memories` ); } finally { client.release(); } } await verifyServer.shutdown(); } catch { // Verification is best-effort } }); // ============================================================================ // Workflow 1: Memory CRUD // Purpose: Verify the complete memory lifecycle from creation to deletion // ============================================================================ describe("Workflow 1: Memory CRUD", () => { it( "should handle store → retrieve → update → delete workflow", { timeout: 30000 }, async () => { testContext = createE2ETestContext("memory-crud"); // Step 1: Store memory const storeResponse = await invokeTool("remember", { content: "User prefers dark mode for better readability", userId: testContext.userId, sessionId: testContext.sessionId, primarySector: "semantic", metadata: { keywords: ["dark", "mode", "preference"], tags: ["ui", "settings"], category: "preferences", importance: 0.8, }, }); expect(storeResponse.success).toBe(true); expect(storeResponse.data).toBeDefined(); const memoryId = (storeResponse.data as Record<string, unknown>).memoryId as string; expect(memoryId).toBeDefined(); expect((storeResponse.data as Record<string, unknown>).embeddingsGenerated).toBe(5); // Track memory for cleanup (backup in case test fails before delete) trackMemoryId(testContext, memoryId); // Step 2: Retrieve memory const retrieveResponse = await invokeTool("recall", { userId: testContext.userId, text: "dark mode", limit: 10, }); expect(retrieveResponse.success).toBe(true); const memories = (retrieveResponse.data as Record<string, unknown>).memories as Array< Record<string, unknown> >; expect(memories).toHaveLength(1); expect(memories[0].id).toBe(memoryId); expect(memories[0].content).toContain("dark mode"); // Step 3: Update memory const updateResponse = await invokeTool("update_memory", { memoryId, userId: testContext.userId, content: "User strongly prefers dark mode for all interfaces", strength: 0.95, metadata: { importance: 0.9 }, }); expect(updateResponse.success).toBe(true); expect((updateResponse.data as Record<string, unknown>).embeddingsRegenerated).toBe(true); expect((updateResponse.data as Record<string, unknown>).strength).toBe(0.95); // Step 4: Delete memory (soft delete first, then hard delete) const softDeleteResponse = await invokeTool("forget", { memoryId, userId: testContext.userId, soft: true, }); expect(softDeleteResponse.success).toBe(true); expect((softDeleteResponse.data as Record<string, unknown>).deletionType).toBe("soft"); const hardDeleteResponse = await invokeTool("forget", { memoryId, userId: testContext.userId, soft: false, }); expect(hardDeleteResponse.success).toBe(true); expect((hardDeleteResponse.data as Record<string, unknown>).deletionType).toBe("hard"); } ); }); // ============================================================================ // Workflow 2: Memory Search // Purpose: Verify that multiple memories can be stored and searched effectively // ============================================================================ describe("Workflow 2: Memory Search", () => { it( "should handle store multiple → search by text → search by metadata workflow", { timeout: 30000 }, async () => { testContext = createE2ETestContext("memory-search"); // Step 1: Store multiple memories const memoriesData = [ { content: "Machine learning algorithms for classification", keywords: ["machine", "learning"], tags: ["ai", "ml"], }, { content: "Deep learning neural networks for image recognition", keywords: ["deep", "learning"], tags: ["ai", "deep-learning"], }, { content: "Natural language processing for text analysis", keywords: ["nlp", "text"], tags: ["ai", "nlp"], }, ]; for (const mem of memoriesData) { const response = await invokeTool("remember", { content: mem.content, userId: testContext.userId, sessionId: testContext.sessionId, primarySector: "semantic", metadata: { keywords: mem.keywords, tags: mem.tags, category: "technology" }, }); expect(response.success).toBe(true); trackMemoryId(testContext, (response.data as Record<string, unknown>).memoryId as string); } // Step 2: Search by text const textSearchResponse = await invokeTool("search", { userId: testContext.userId, text: "learning", limit: 10, }); expect(textSearchResponse.success).toBe(true); const textSearchMemories = (textSearchResponse.data as Record<string, unknown>) .memories as Array<Record<string, unknown>>; expect(textSearchMemories.length).toBeGreaterThan(0); // Step 3: Search by metadata const metadataSearchResponse = await invokeTool("search", { userId: testContext.userId, metadata: { tags: ["deep-learning"] }, limit: 10, }); expect(metadataSearchResponse.success).toBe(true); const metadataSearchMemories = (metadataSearchResponse.data as Record<string, unknown>) .memories as Array<Record<string, unknown>>; expect(metadataSearchMemories.length).toBeGreaterThan(0); const firstMemoryMetadata = metadataSearchMemories[0].metadata as Record<string, unknown>; expect(firstMemoryMetadata.tags).toContain("deep-learning"); } ); }); // ============================================================================ // Workflow 3: Reasoning // Purpose: Verify the reasoning workflow including thinking, confidence, and bias // ============================================================================ describe("Workflow 3: Reasoning", () => { it("should handle think → assess confidence → detect bias workflow", async () => { testContext = createE2ETestContext("reasoning"); // Step 1: Think about a problem const thinkResponse = await invokeTool("think", { problem: "Should we migrate our monolithic application to microservices?", mode: "analytical", context: { evidence: [ "Current system handles 10k requests/sec", "Team has 5 developers", "Budget is limited", ], }, }); expect(thinkResponse.success).toBe(true); const thinkData = thinkResponse.data as Record<string, unknown>; expect(thinkData.reasoning).toBeDefined(); expect(thinkData.conclusion).toBeDefined(); const reasoningText = Array.isArray(thinkData.reasoning) ? (thinkData.reasoning as string[]).join(" ") : (thinkData.reasoning as string); // Step 2: Assess confidence const confidenceResponse = await invokeTool("assess_confidence", { reasoning: reasoningText, evidence: [ "Current system handles 10k requests/sec", "Team has 5 developers", "Budget is limited", ], }); expect(confidenceResponse.success).toBe(true); const confidenceData = confidenceResponse.data as Record<string, unknown>; expect(confidenceData.overallConfidence).toBeGreaterThanOrEqual(0); expect(confidenceData.overallConfidence).toBeLessThanOrEqual(1); expect(confidenceData.factors).toBeDefined(); // Step 3: Detect bias const biasResponse = await invokeTool("detect_bias", { reasoning: reasoningText, context: "System migration decision", }); expect(biasResponse.success).toBe(true); const biasData = biasResponse.data as Record<string, unknown>; expect(biasData.biases).toBeDefined(); expect(Array.isArray(biasData.biases)).toBe(true); }); }); // ============================================================================ // Workflow 4: Problem Decomposition // Purpose: Verify the problem decomposition and analysis workflow // ============================================================================ describe("Workflow 4: Problem Decomposition", () => { it("should handle breakdown → analyze → evaluate workflow", async () => { testContext = createE2ETestContext("decomposition"); // Step 1: Decompose problem const breakdownResponse = await invokeTool("breakdown", { problem: "Design a scalable e-commerce platform", maxDepth: 2, }); expect(breakdownResponse.success).toBe(true); const breakdownData = breakdownResponse.data as Record<string, unknown>; expect(breakdownData.subProblems).toBeDefined(); expect((breakdownData.subProblems as unknown[]).length).toBeGreaterThan(0); // Step 2: Analyze systematically const analyzeResponse = await invokeTool("analyze", { problem: "Design a scalable e-commerce platform", context: { constraints: ["Budget: $100k", "Timeline: 6 months"], goals: ["Handle 100k users", "99.9% uptime"], }, }); expect(analyzeResponse.success).toBe(true); const analyzeData = analyzeResponse.data as Record<string, unknown>; expect(analyzeData.framework).toBeDefined(); expect(analyzeData.result).toBeDefined(); // Step 3: Evaluate reasoning quality const resultData = analyzeData.result as Record<string, unknown>; const evaluateResponse = await invokeTool("evaluate", { reasoning: resultData.conclusion as string, context: "E-commerce platform design with constraints", }); expect(evaluateResponse.success).toBe(true); const evaluateData = evaluateResponse.data as Record<string, unknown>; expect(evaluateData.quality).toBeDefined(); expect(evaluateData.strengths).toBeDefined(); expect(evaluateData.weaknesses).toBeDefined(); }); }); // ============================================================================ // Workflow 5: Tool Chaining (Memory + Reasoning Integration) // Purpose: Verify that memory and reasoning tools can be chained together // ============================================================================ describe("Workflow 5: Tool Chaining", () => { it("should chain memory and reasoning tools", { timeout: 30000 }, async () => { testContext = createE2ETestContext("tool-chaining"); // Step 1: Store context memory const storeResponse = await invokeTool("remember", { content: "Company policy: All decisions must consider environmental impact", userId: testContext.userId, sessionId: testContext.sessionId, primarySector: "semantic", metadata: { keywords: ["policy", "environment", "decisions"], tags: ["policy", "sustainability"], category: "company-policy", importance: 0.9, }, }); expect(storeResponse.success).toBe(true); trackMemoryId( testContext, (storeResponse.data as Record<string, unknown>).memoryId as string ); // Step 2: Retrieve relevant context const retrieveResponse = await invokeTool("recall", { userId: testContext.userId, text: "company policy decisions", limit: 5, }); expect(retrieveResponse.success).toBe(true); const memories = (retrieveResponse.data as Record<string, unknown>).memories as Array< Record<string, unknown> >; const context = memories.map((m) => m.content as string); // Step 3: Think with retrieved context const thinkResponse = await invokeTool("think", { problem: "Should we switch to cheaper but less eco-friendly packaging?", mode: "critical", context: { evidence: context, constraints: ["Budget constraints", "Environmental policy"], }, }); expect(thinkResponse.success).toBe(true); const thinkData = thinkResponse.data as Record<string, unknown>; expect(thinkData.reasoning).toBeDefined(); // Step 4: Store the reasoning as a new memory const insightResponse = await invokeTool("remember", { content: `Decision insight: ${thinkData.conclusion as string}`, userId: testContext.userId, sessionId: testContext.sessionId, primarySector: "reflective", metadata: { keywords: ["decision", "packaging", "environment"], tags: ["decision", "insight"], category: "decisions", importance: 0.8, }, }); expect(insightResponse.success).toBe(true); trackMemoryId( testContext, (insightResponse.data as Record<string, unknown>).memoryId as string ); }); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/keyurgolani/ThoughtMcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server