Google Calendar MCP

Overview Schema Related Servers Score Discussions

claude-mcp-integration.test.ts•28.7 kB

import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest'; import Anthropic from '@anthropic-ai/sdk'; import { Client } from "@modelcontextprotocol/sdk/client/index.js"; import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; /** * Minimal Claude + MCP Integration Tests * * PURPOSE: Test ONLY what's unique to LLM integration: * 1. Can Claude understand user intent and select appropriate tools? * 2. Can Claude handle multi-step reasoning? * 3. Can Claude handle ambiguous requests appropriately? * * NOT TESTED HERE (covered in direct-integration.test.ts): * - Tool functionality * - Conflict detection * - Calendar operations * - Error handling * - Performance */ interface LLMResponse { content: string; toolCalls: Array<{ name: string; arguments: Record<string, any> }>; executedResults: Array<{ toolCall: { name: string; arguments: Record<string, any> }; result: any; success: boolean; }>; } // Pricing per million tokens (as of 2025) const MODEL_PRICING: Record<string, { input: number; output: number }> = { 'claude-haiku-4-5-20251001': { input: 1.00, output: 5.00 }, 'claude-sonnet-4-5-20250929': { input: 3.00, output: 15.00 }, }; // Default model for tests - using Haiku 4.5 for cost efficiency const DEFAULT_MODEL = 'claude-haiku-4-5-20251001'; interface CostTracker { model: string; totalInputTokens: number; totalOutputTokens: number; totalCacheWriteTokens: number; totalCacheReadTokens: number; requestCount: number; perTestCosts: Array<{ testName: string; inputTokens: number; outputTokens: number; cost: number }>; } class ClaudeMCPClient { private anthropic: Anthropic; private mcpClient: Client; private costTracker: CostTracker; private currentTestName: string = ''; constructor(apiKey: string, mcpClient: Client) { this.anthropic = new Anthropic({ apiKey }); this.mcpClient = mcpClient; const model = process.env.ANTHROPIC_MODEL ?? DEFAULT_MODEL; this.costTracker = { model, totalInputTokens: 0, totalOutputTokens: 0, totalCacheWriteTokens: 0, totalCacheReadTokens: 0, requestCount: 0, perTestCosts: [] }; } setCurrentTest(testName: string) { this.currentTestName = testName; } private calculateCost( inputTokens: number, outputTokens: number, cacheWriteTokens: number = 0, cacheReadTokens: number = 0 ): number { const pricing = MODEL_PRICING[this.costTracker.model] || { input: 3.00, output: 15.00 }; // Cache write costs 25% more, cache read costs 90% less (10% of base) const inputCost = (inputTokens / 1_000_000) * pricing.input; const outputCost = (outputTokens / 1_000_000) * pricing.output; const cacheWriteCost = (cacheWriteTokens / 1_000_000) * pricing.input * 1.25; const cacheReadCost = (cacheReadTokens / 1_000_000) * pricing.input * 0.10; return inputCost + outputCost + cacheWriteCost + cacheReadCost; } async sendMessage(prompt: string): Promise<LLMResponse> { // Get available tools from MCP server const availableTools = await this.mcpClient.listTools(); const model = process.env.ANTHROPIC_MODEL ?? DEFAULT_MODEL; // Convert MCP tools to Claude format with prompt caching // Adding cache_control to the LAST tool caches ALL tools as a prefix const claudeTools = availableTools.tools.map((tool, index, arr) => { const baseTool = { name: tool.name, description: tool.description, input_schema: tool.inputSchema }; // Add cache_control to last tool to cache entire tool schema if (index === arr.length - 1) { return { ...baseTool, cache_control: { type: 'ephemeral' as const } }; } return baseTool; }); // Send to Claude const message = await this.anthropic.messages.create({ model, max_tokens: 2500, tools: claudeTools, messages: [{ role: 'user' as const, content: prompt }] }); // Track token usage (including cache metrics) const usage = message.usage as { input_tokens: number; output_tokens: number; cache_creation_input_tokens?: number; cache_read_input_tokens?: number; }; const inputTokens = usage.input_tokens || 0; const outputTokens = usage.output_tokens || 0; const cacheWriteTokens = usage.cache_creation_input_tokens || 0; const cacheReadTokens = usage.cache_read_input_tokens || 0; this.costTracker.totalInputTokens += inputTokens; this.costTracker.totalOutputTokens += outputTokens; this.costTracker.totalCacheWriteTokens += cacheWriteTokens; this.costTracker.totalCacheReadTokens += cacheReadTokens; this.costTracker.requestCount++; // Track per-test costs if (this.currentTestName) { const cost = this.calculateCost(inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens); const existing = this.costTracker.perTestCosts.find(t => t.testName === this.currentTestName); if (existing) { existing.inputTokens += inputTokens; existing.outputTokens += outputTokens; existing.cost += cost; } else { this.costTracker.perTestCosts.push({ testName: this.currentTestName, inputTokens, outputTokens, cost }); } } // Extract tool calls const toolCalls: Array<{ name: string; arguments: Record<string, any> }> = []; let textContent = ''; message.content.forEach(content => { if (content.type === 'text') { textContent += content.text; } else if (content.type === 'tool_use') { toolCalls.push({ name: content.name, arguments: content.input as Record<string, any> }); } }); // Execute tool calls const executedResults = []; for (const toolCall of toolCalls) { try { const result = await this.mcpClient.callTool({ name: toolCall.name, arguments: toolCall.arguments }); executedResults.push({ toolCall, result, success: true }); } catch (error) { executedResults.push({ toolCall, result: { error: String(error) }, success: false }); } } return { content: textContent, toolCalls, executedResults }; } getCostSummary(): string { const pricing = MODEL_PRICING[this.costTracker.model] || { input: 3.00, output: 15.00 }; const totalCost = this.calculateCost( this.costTracker.totalInputTokens, this.costTracker.totalOutputTokens, this.costTracker.totalCacheWriteTokens, this.costTracker.totalCacheReadTokens ); // Calculate what cost would have been without caching const totalTokensIfNoCaching = this.costTracker.totalInputTokens + this.costTracker.totalCacheWriteTokens + this.costTracker.totalCacheReadTokens; const costWithoutCaching = this.calculateCost(totalTokensIfNoCaching, this.costTracker.totalOutputTokens); const savings = costWithoutCaching - totalCost; const savingsPercent = costWithoutCaching > 0 ? (savings / costWithoutCaching) * 100 : 0; let summary = '\n' + '='.repeat(70) + '\n'; summary += ' CLAUDE API COST SUMMARY\n'; summary += '='.repeat(70) + '\n\n'; summary += `Model: ${this.costTracker.model}\n`; summary += `Pricing: $${pricing.input.toFixed(2)}/MTok input, $${pricing.output.toFixed(2)}/MTok output\n\n`; summary += `Total Requests: ${this.costTracker.requestCount}\n`; summary += `Input Tokens (uncached): ${this.costTracker.totalInputTokens.toLocaleString()}\n`; summary += `Output Tokens: ${this.costTracker.totalOutputTokens.toLocaleString()}\n`; // Cache statistics summary += '\n--- Cache Statistics ---\n'; summary += `Cache Write Tokens: ${this.costTracker.totalCacheWriteTokens.toLocaleString()}\n`; summary += `Cache Read Tokens: ${this.costTracker.totalCacheReadTokens.toLocaleString()}\n`; const cacheHitRate = this.costTracker.totalCacheWriteTokens + this.costTracker.totalCacheReadTokens > 0 ? (this.costTracker.totalCacheReadTokens / (this.costTracker.totalCacheWriteTokens + this.costTracker.totalCacheReadTokens)) * 100 : 0; summary += `Cache Hit Rate: ${cacheHitRate.toFixed(1)}%\n`; summary += '\n' + '-'.repeat(70) + '\n'; summary += 'Per-Test Breakdown:\n'; summary += '-'.repeat(70) + '\n'; for (const test of this.costTracker.perTestCosts) { const shortName = test.testName.length > 50 ? test.testName.substring(0, 47) + '...' : test.testName; summary += `${shortName.padEnd(52)} $${test.cost.toFixed(4)}\n`; } summary += '-'.repeat(70) + '\n'; summary += `${'TOTAL COST'.padEnd(52)} $${totalCost.toFixed(4)}\n`; if (savings > 0) { summary += `${'Cost without caching'.padEnd(52)} $${costWithoutCaching.toFixed(4)}\n`; summary += `${'SAVINGS FROM CACHING'.padEnd(52)} $${savings.toFixed(4)} (${savingsPercent.toFixed(1)}%)\n`; } summary += '='.repeat(70) + '\n'; return summary; } } describe('Claude + MCP Essential Tests', () => { let mcpClient: Client; let claudeClient: ClaudeMCPClient; beforeAll(async () => { // Start MCP server const cleanEnv = Object.fromEntries( Object.entries(process.env).filter(([_, value]) => value !== undefined) ) as Record<string, string>; cleanEnv.NODE_ENV = 'test'; // Create MCP client mcpClient = new Client({ name: "minimal-test-client", version: "1.0.0" }, { capabilities: { tools: {} } }); // Connect to server const transport = new StdioClientTransport({ command: 'node', args: ['build/index.js'], env: cleanEnv }); await mcpClient.connect(transport); // Initialize Claude client const apiKey = process.env.CLAUDE_API_KEY; if (!apiKey) { throw new Error('CLAUDE_API_KEY not set'); } claudeClient = new ClaudeMCPClient(apiKey, mcpClient); // Verify connection const tools = await mcpClient.listTools(); console.log(`Connected to MCP with ${tools.tools.length} tools available`); }, 30000); beforeEach((context) => { if (claudeClient) { claudeClient.setCurrentTest(context.task.name); } }); afterAll(async () => { if (claudeClient) { console.log(claudeClient.getCostSummary()); } if (mcpClient) await mcpClient.close(); }, 10000); describe('Core LLM Capabilities', () => { it('should select appropriate tools for user intent', async () => { // Synonyms to make intent detection more robust against LLM response variation const intentSynonyms: Record<string, string[]> = { 'create': ['create', 'schedule', 'book', 'add', 'set up', 'meeting', 'event'], 'search': ['search', 'find', 'look', 'meetings', 'sarah', 'results'], 'availability': ['availability', 'free', 'busy', 'available', 'afternoon', 'schedule'] }; const testCases = [ { intent: 'create', prompt: 'Schedule a meeting tomorrow at 3 PM', expectedTools: ['create-event', 'get-current-time'] }, { intent: 'search', prompt: 'Find my meetings with Sarah', expectedTools: ['search-events', 'list-events', 'get-current-time'] }, { intent: 'availability', prompt: 'Am I free tomorrow afternoon?', expectedTools: ['get-freebusy', 'list-events', 'get-current-time'] } ]; for (const test of testCases) { const response = await claudeClient.sendMessage(test.prompt); // Check if Claude used one of the expected tools const usedExpectedTool = response.toolCalls.some(tc => test.expectedTools.includes(tc.name) ); // Or at least understood the intent in its response (check synonyms) const responseText = response.content.toLowerCase(); const synonyms = intentSynonyms[test.intent] || [test.intent]; const mentionedIntent = synonyms.some(word => responseText.includes(word)); const understoodIntent = usedExpectedTool || mentionedIntent; expect(understoodIntent).toBe(true); } }, 60000); it('should handle multi-step requests', async () => { const response = await claudeClient.sendMessage( 'What time is it now, and do I have any meetings in the next 2 hours?' ); // This requires multiple tool calls or understanding multiple parts const handledMultiStep = response.toolCalls.length > 1 || // Multiple tools used (response.toolCalls.some(tc => tc.name === 'get-current-time') && response.toolCalls.some(tc => tc.name === 'list-events')) || // Both time and events (response.content.includes('time') && response.content.includes('meeting')); // Understood both parts expect(handledMultiStep).toBe(true); }, 30000); it('should handle ambiguous requests gracefully', async () => { const response = await claudeClient.sendMessage( 'Set up the usual' ); // Claude should either: // 1. Ask for clarification // 2. Make a reasonable attempt with available context // 3. Explain what information is needed const handledGracefully = response.content.toLowerCase().includes('what') || response.content.toLowerCase().includes('specify') || response.content.toLowerCase().includes('usual') || response.content.toLowerCase().includes('more') || response.toolCalls.length > 0; // Or attempts something expect(handledGracefully).toBe(true); }, 30000); }); describe('Tool Selection Accuracy', () => { it('should distinguish between list and search operations', async () => { // Specific search should use search-events const searchResponse = await claudeClient.sendMessage( 'Find meetings about project alpha' ); const usedSearch = searchResponse.toolCalls.some(tc => tc.name === 'search-events') || searchResponse.content.toLowerCase().includes('search'); // General list should use list-events const listResponse = await claudeClient.sendMessage( 'Show me tomorrow\'s schedule' ); const usedList = listResponse.toolCalls.some(tc => tc.name === 'list-events') || listResponse.content.toLowerCase().includes('tomorrow'); // At least one should be correct expect(usedSearch || usedList).toBe(true); }, 30000); it('should understand when NOT to use tools', async () => { const response = await claudeClient.sendMessage( 'How does Google Calendar handle recurring events?' ); // This is a question about calendars, not a calendar operation // Claude should either: // 1. Not use tools and explain // 2. Use minimal tools (like list-calendars) to provide context const appropriateResponse = response.toolCalls.length === 0 || // No tools response.toolCalls.length === 1 && response.toolCalls[0].name === 'list-calendars' || // Just checking calendars response.content.toLowerCase().includes('recurring'); // Explains about recurring events expect(appropriateResponse).toBe(true); }, 30000); }); describe('Context Understanding', () => { it('should understand relative time expressions', async () => { const testPhrases = [ 'tomorrow at 2 PM', 'next Monday', 'in 30 minutes' ]; for (const phrase of testPhrases) { const response = await claudeClient.sendMessage( `Schedule a meeting ${phrase}` ); // Claude should either get current time or attempt to create an event const understoodTime = response.toolCalls.some(tc => tc.name === 'get-current-time' || tc.name === 'create-event' ) || response.content.toLowerCase().includes(phrase.split(' ')[0]); // References the time expect(understoodTime).toBe(true); } }, 60000); }); describe('Multi-Calendar Coordination', () => { // These tests verify Claude's ability to handle realistic multi-calendar scenarios // that require coordinating across multiple calendars and accounts it('should coordinate availability across work and personal calendars', async () => { const response = await claudeClient.sendMessage( `I need to find a 2-hour block tomorrow where I'm free on both my work calendar and my personal calendar. Can you check my availability across both?` ); // Claude should understand this requires checking multiple calendars const understoodMultiCalendar = response.toolCalls.some(tc => tc.name === 'get-freebusy' || tc.name === 'list-events' || tc.name === 'list-calendars' ) || response.content.toLowerCase().includes('calendar') || response.content.toLowerCase().includes('availability'); // Should reference checking multiple sources or ask which calendars const handledMultipleCalendars = understoodMultiCalendar || response.content.toLowerCase().includes('both') || response.content.toLowerCase().includes('work') || response.content.toLowerCase().includes('personal'); expect(handledMultipleCalendars).toBe(true); }, 45000); it('should find mutual availability for group meeting', async () => { const response = await claudeClient.sendMessage( `I need to schedule a team meeting for tomorrow afternoon. Can you find a 1-hour slot where all team calendars are free? Check the primary calendars from both my work and personal accounts.` ); // Claude should use freebusy or list-events to check availability const usedAvailabilityTools = response.toolCalls.some(tc => tc.name === 'get-freebusy' || tc.name === 'list-events' ); // Or understood the multi-person coordination need const understoodCoordination = usedAvailabilityTools || response.content.toLowerCase().includes('availability') || response.content.toLowerCase().includes('free') || response.content.toLowerCase().includes('slot') || response.content.toLowerCase().includes('team'); expect(understoodCoordination).toBe(true); }, 45000); it('should handle cross-calendar conflict detection', async () => { const response = await claudeClient.sendMessage( `I want to schedule a dentist appointment on my personal calendar next Tuesday at 10 AM, but first check if I have any conflicts on my work calendar at that time.` ); // Claude should check for conflicts before creating const toolNames = response.toolCalls.map(tc => tc.name); // Should use conflict detection or availability checking tools const checkedConflicts = toolNames.includes('get-freebusy') || toolNames.includes('list-events') || response.content.toLowerCase().includes('conflict') || response.content.toLowerCase().includes('check'); expect(checkedConflicts).toBe(true); }, 45000); it('should understand calendar-specific event creation', async () => { const response = await claudeClient.sendMessage( `Schedule "Doctor Visit" on my personal calendar (not work) for next Friday at 2 PM. Make it a 1-hour appointment.` ); // Claude should understand to target a specific calendar const attemptedCreate = response.toolCalls.some(tc => tc.name === 'create-event' || tc.name === 'list-calendars' ); // Or asked for clarification about which calendar const understoodCalendarTarget = attemptedCreate || response.content.toLowerCase().includes('personal') || response.content.toLowerCase().includes('calendar') || response.content.toLowerCase().includes('which'); expect(understoodCalendarTarget).toBe(true); }, 45000); it('should plan complex multi-step scheduling workflow', async () => { const response = await claudeClient.sendMessage( `I need to block off time for a trip next week. Please: 1. First show me what meetings I have scheduled next week across all calendars 2. Then identify any meetings I'll need to reschedule 3. Create an "Out of Office" event on my primary calendar for Monday-Wednesday` ); // This requires multi-step reasoning const toolNames = response.toolCalls.map(tc => tc.name); // Should attempt to list events or understand the multi-step nature const handledMultiStep = toolNames.includes('list-events') || toolNames.includes('list-calendars') || response.content.toLowerCase().includes('step') || response.content.toLowerCase().includes('first') || response.content.toLowerCase().includes('next week') || response.toolCalls.length >= 1; expect(handledMultiStep).toBe(true); }, 45000); it('should suggest using account parameter for disambiguation', async () => { const response = await claudeClient.sendMessage( `I have multiple Google accounts connected. Schedule a meeting on the calendar from my work account specifically, not my personal one.` ); // Claude should understand account disambiguation const understoodAccountContext = response.toolCalls.some(tc => tc.arguments?.account !== undefined || tc.name === 'list-calendars' ) || response.content.toLowerCase().includes('account') || response.content.toLowerCase().includes('work') || response.content.toLowerCase().includes('which'); expect(understoodAccountContext).toBe(true); }, 45000); it('should check shared calendar availability', async () => { const response = await claudeClient.sendMessage( `Check if the team shared calendar has any events scheduled for tomorrow afternoon. I want to book the conference room if it's available.` ); // Should query events or freebusy for shared calendar const checkedSharedCalendar = response.toolCalls.some(tc => tc.name === 'list-events' || tc.name === 'get-freebusy' || tc.name === 'list-calendars' ) || response.content.toLowerCase().includes('shared') || response.content.toLowerCase().includes('team') || response.content.toLowerCase().includes('available'); expect(checkedSharedCalendar).toBe(true); }, 45000); it('should handle "block time on all calendars" request', async () => { const response = await claudeClient.sendMessage( `I need to block off 9 AM to 12 PM tomorrow for deep work. Create "Focus Time - Do Not Disturb" events on ALL my calendars so people see I'm busy regardless of which calendar they check.` ); // Should understand the need to create on multiple calendars const understoodMultiCalendarCreate = response.toolCalls.some(tc => tc.name === 'create-event' || tc.name === 'list-calendars' ) || response.content.toLowerCase().includes('all') || response.content.toLowerCase().includes('multiple') || response.content.toLowerCase().includes('calendars'); expect(understoodMultiCalendarCreate).toBe(true); }, 45000); }); describe('Real-World Scheduling Scenarios', () => { // These test realistic prompts that users might actually send it('should handle "when can we meet" style requests', async () => { const response = await claudeClient.sendMessage( `When am I free for a 30-minute call sometime this week? Prefer afternoon slots.` ); const handledAvailabilityRequest = response.toolCalls.some(tc => tc.name === 'get-freebusy' || tc.name === 'list-events' || tc.name === 'get-current-time' ) || response.content.toLowerCase().includes('free') || response.content.toLowerCase().includes('available') || response.content.toLowerCase().includes('afternoon'); expect(handledAvailabilityRequest).toBe(true); }, 45000); it('should handle rescheduling with constraints', async () => { const response = await claudeClient.sendMessage( `I need to move my 2 PM meeting today to sometime tomorrow, but it can't conflict with my existing appointments. Find me an open slot.` ); // Should check availability and understand rescheduling const handledReschedule = response.toolCalls.some(tc => tc.name === 'list-events' || tc.name === 'get-freebusy' || tc.name === 'search-events' || tc.name === 'get-current-time' || tc.name === 'update-event' ) || response.content.toLowerCase().includes('reschedul') || response.content.toLowerCase().includes('move') || response.content.toLowerCase().includes('conflict') || response.content.toLowerCase().includes('meeting') || response.content.toLowerCase().includes('tomorrow') || response.content.toLowerCase().includes('slot'); expect(handledReschedule).toBe(true); }, 45000); it('should understand recurring meeting context', async () => { const response = await claudeClient.sendMessage( `I have a weekly team standup that I need to skip this week only. Can you help me handle just this occurrence without affecting future meetings?` ); // Should understand recurring event modification scope const understoodRecurring = response.toolCalls.some(tc => tc.name === 'search-events' || tc.name === 'list-events' || tc.name === 'update-event' || tc.name === 'delete-event' ) || response.content.toLowerCase().includes('recurring') || response.content.toLowerCase().includes('this week only') || response.content.toLowerCase().includes('occurrence') || response.content.toLowerCase().includes('instance'); expect(understoodRecurring).toBe(true); }, 45000); it('should coordinate external attendee scheduling', async () => { const response = await claudeClient.sendMessage( `Schedule a meeting with an external client next week. I need to check my availability first, then create the meeting with them as an attendee. Their email is client@example.com.` ); // Should handle multi-step external attendee flow const handledExternalAttendee = response.toolCalls.some(tc => tc.name === 'list-events' || tc.name === 'get-freebusy' || tc.name === 'create-event' ) || response.content.toLowerCase().includes('attendee') || response.content.toLowerCase().includes('client') || response.content.toLowerCase().includes('availability'); expect(handledExternalAttendee).toBe(true); }, 45000); }); }); /** * Test Categories: * * Core LLM Capabilities: * ✅ Tool selection for different intents (create, search, availability) * ✅ Multi-step request handling (LLM reasoning) * ✅ Ambiguous request handling (LLM robustness) * ✅ Context understanding - relative time expressions * ✅ Knowing when NOT to use tools (LLM judgment) * * Multi-Calendar Coordination (NEW): * ✅ Cross-calendar availability checking (work + personal) * ✅ Mutual availability for group meetings * ✅ Cross-calendar conflict detection * ✅ Calendar-specific event creation * ✅ Multi-step scheduling workflows * ✅ Account parameter disambiguation * ✅ Shared calendar operations * ✅ Block time across all calendars * * Real-World Scenarios (NEW): * ✅ "When can we meet" style requests * ✅ Rescheduling with constraints * ✅ Recurring meeting modifications (single occurrence) * ✅ External attendee coordination * * What's NOT tested here (covered in direct-integration.test.ts): * ✂️ Tool functionality and response formats * ✂️ API error handling * ✂️ Performance benchmarks * ✂️ Data validation */

Latest Blog Posts

Model Context Protocol Proxies: Enabling Enterprise Control with Virtual MCPs
By Om-Shree-0709 on December 9, 2025.
AI Security
Virtual MCP
Kubernetes Operator
The State of MCP in 2025: Who's Building What and Why It Matters
By punkpeye on December 7, 2025.
mcp
startups
MCP hosting with persistent storage
By punkpeye on December 6, 2025.
changelog

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nspady/google-calendar-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server