/**
* Integration Test: Sampling Flow End-to-End
*
* Tests the complete sampling workflow:
* 1. TypeScript code execution
* 2. Sampling bridge server initialization
* 3. LLM provider integration
* 4. Response handling
* 5. Metrics collection
* 6. Audit logging
*/
import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest';
import type { MCPClientPool } from '../../src/mcp/client-pool.js';
import { executeTypescriptInSandbox } from '../../src/executors/sandbox-executor.js';
import type { SandboxOptions } from '../../src/types.js';
describe('Sampling Integration Tests', () => {
let mockMcpClientPool: MCPClientPool;
beforeAll(() => {
// Mock MCP Client Pool (sampling doesn't require actual MCP tools)
mockMcpClientPool = {
callTool: vi.fn(),
discoverMCPTools: vi.fn().mockResolvedValue([]),
getToolSchema: vi.fn(),
getAllMCPServers: vi.fn().mockReturnValue([]),
listAllTools: vi.fn().mockResolvedValue([]),
close: vi.fn(),
} as unknown as MCPClientPool;
});
afterAll(async () => {
if (mockMcpClientPool?.close) {
await mockMcpClientPool.close();
}
});
it('should_completeSamplingRoundTrip_when_validCodeWithLlmAsk', async () => {
// SKIP if no API key configured (CI/CD environments)
if (!process.env.ANTHROPIC_API_KEY && !process.env.GEMINI_API_KEY && !process.env.OPENAI_API_KEY) {
console.warn('⚠️ Skipping sampling integration test - no API key configured');
return;
}
const options: SandboxOptions = {
code: `
// Simple sampling test - ask for a number
const result = await llm.ask('Return only the number 42, nothing else');
console.log('LLM Response:', result);
`,
allowedTools: [],
timeoutMs: 30000,
enableSampling: true,
maxSamplingRounds: 1,
maxSamplingTokens: 100,
};
const result = await executeTypescriptInSandbox(options, mockMcpClientPool);
// Verify execution succeeded
expect(result.success).toBe(true);
expect(result.error).toBeUndefined();
// Verify output contains LLM response
expect(result.output).toContain('LLM Response:');
// Verify sampling metrics are present
expect(result.toolCallSummary).toBeDefined();
// Verify execution time is reasonable (<30s)
expect(result.executionTimeMs).toBeLessThan(30000);
expect(result.executionTimeMs).toBeGreaterThan(0);
}, 35000); // 35s timeout for integration test
it('should_handleSamplingErrors_when_invalidPrompt', async () => {
// SKIP if no API key configured
if (!process.env.ANTHROPIC_API_KEY && !process.env.GEMINI_API_KEY && !process.env.OPENAI_API_KEY) {
console.warn('⚠️ Skipping sampling error test - no API key configured');
return;
}
const options: SandboxOptions = {
code: `
// Test error handling with empty prompt
try {
await llm.ask('');
} catch (error) {
console.log('Error caught:', error.message);
}
`,
allowedTools: [],
timeoutMs: 10000,
enableSampling: true,
maxSamplingRounds: 1,
maxSamplingTokens: 50,
};
const result = await executeTypescriptInSandbox(options, mockMcpClientPool);
// Should succeed (error is caught in user code)
expect(result.success).toBe(true);
}, 15000);
it('should_enforceSamplingLimits_when_maxRoundsExceeded', async () => {
// SKIP if no API key configured
if (!process.env.ANTHROPIC_API_KEY && !process.env.GEMINI_API_KEY && !process.env.OPENAI_API_KEY) {
console.warn('⚠️ Skipping sampling limits test - no API key configured');
return;
}
const options: SandboxOptions = {
code: `
// Try to exceed max rounds (should fail gracefully)
let count = 0;
for (let i = 0; i < 5; i++) {
try {
await llm.ask('Say hello');
count++;
} catch (error) {
console.log('Round limit reached after', count, 'rounds');
break;
}
}
`,
allowedTools: [],
timeoutMs: 60000,
enableSampling: true,
maxSamplingRounds: 2, // Limit to 2 rounds
maxSamplingTokens: 500,
};
const result = await executeTypescriptInSandbox(options, mockMcpClientPool);
// Verify execution completed (limits enforced)
expect(result.success).toBe(true);
expect(result.output).toContain('Round limit reached');
}, 65000);
it('should_fallbackToDirectAPI_when_MCPSamplingUnavailable', async () => {
// SKIP if no API key configured
if (!process.env.ANTHROPIC_API_KEY && !process.env.GEMINI_API_KEY && !process.env.OPENAI_API_KEY) {
console.warn('⚠️ Skipping sampling fallback test - no API key configured');
return;
}
const options: SandboxOptions = {
code: `
// Test hybrid sampling (should work with or without MCP SDK sampling)
const result = await llm.ask('Return the word TEST');
console.log('Fallback test result:', result);
`,
allowedTools: [],
timeoutMs: 20000,
enableSampling: true,
maxSamplingRounds: 1,
maxSamplingTokens: 50,
};
// Execute WITHOUT mcpServer parameter (forces fallback to direct API)
const result = await executeTypescriptInSandbox(options, mockMcpClientPool);
// Verify fallback works
expect(result.success).toBe(true);
expect(result.output).toContain('Fallback test result:');
}, 25000);
// Note: 4 integration tests above provide comprehensive coverage of:
// 1. Complete sampling roundtrip (llm.ask)
// 2. Error handling (invalid prompts)
// 3. Rate limit enforcement (maxRounds)
// 4. Fallback to direct API (when MCP unavailable)
});