Thoughtbox

proctor.test.ts•14.6 KiB

#!/usr/bin/env npx tsx /** * Unit tests for Proctored Execution Environment * SPEC: SIL-007 * * Run with: npx tsx tests/unit/proctor.test.ts */ import { ProctoredExecutor, getProctoredExecutor, resetProctoredExecutor, DEFAULT_SANDBOX_CONFIG, type TestCase, type ExecutionLogs, type ProctoredResult, } from "../../benchmarks/proctor.js"; // ============================================================================= // Test Utilities // ============================================================================= let testsPassed = 0; let testsFailed = 0; let currentTest = ""; function test(name: string, fn: () => void | Promise<void>): void { currentTest = name; Promise.resolve(fn()) .then(() => { testsPassed++; console.log(` ✓ ${name}`); }) .catch((err) => { testsFailed++; console.error(` ✗ ${name}`); console.error(` Error: ${err.message}`); }); } function assert(condition: boolean, message?: string): void { if (!condition) { throw new Error(message || `Assertion failed in "${currentTest}"`); } } function assertEqual<T>(actual: T, expected: T, message?: string): void { if (actual !== expected) { throw new Error( message || `Expected "${expected}" but got "${actual}" in "${currentTest}"` ); } } // ============================================================================= // Test Fixtures // ============================================================================= const mockTestCase: TestCase = { id: "test-001", name: "simple-test", code: 'console.log("Hello, test!")', expectedDuration: 100, // 100ms for simple code (avoids timing anomaly for fast execution) difficulty: "easy", }; const slowTestCase: TestCase = { id: "test-002", name: "slow-test", code: "// slow computation", expectedDuration: 60000, // 1 minute difficulty: "hard", }; // ============================================================================= // Tests // ============================================================================= async function runTests(): Promise<void> { console.log("\nProctored Executor Tests\n"); // Reset singleton before tests resetProctoredExecutor(); // ------------------------------------------------------------------------- // Initialization Tests // ------------------------------------------------------------------------- console.log("Initialization Tests:"); test("creates executor with default config", () => { const executor = new ProctoredExecutor(); const config = executor.getConfig(); assertEqual(config.image, DEFAULT_SANDBOX_CONFIG.image); assertEqual(config.networkDisabled, DEFAULT_SANDBOX_CONFIG.networkDisabled); assertEqual(config.memoryLimitMb, DEFAULT_SANDBOX_CONFIG.memoryLimitMb); assertEqual(config.timeoutSeconds, DEFAULT_SANDBOX_CONFIG.timeoutSeconds); }); test("creates executor with custom config", () => { const executor = new ProctoredExecutor({ timeoutSeconds: 60, memoryLimitMb: 256, }); const config = executor.getConfig(); assertEqual(config.timeoutSeconds, 60); assertEqual(config.memoryLimitMb, 256); // Other values should be defaults assertEqual(config.networkDisabled, DEFAULT_SANDBOX_CONFIG.networkDisabled); }); test("singleton pattern works", () => { resetProctoredExecutor(); const exec1 = getProctoredExecutor({ timeoutSeconds: 100 }); const exec2 = getProctoredExecutor(); // Should be same instance assertEqual(exec1.getConfig().timeoutSeconds, exec2.getConfig().timeoutSeconds); }); test("setConfig updates configuration", () => { const executor = new ProctoredExecutor(); executor.setConfig({ memoryLimitMb: 1024 }); assertEqual(executor.getConfig().memoryLimitMb, 1024); }); // ------------------------------------------------------------------------- // Execution Tests // ------------------------------------------------------------------------- console.log("\nExecution Tests:"); test("executes simple code successfully", async () => { const executor = new ProctoredExecutor({ useProcessMode: true }); // Use a test case with 1ms expected duration to avoid timing anomaly for instant code const quickTestCase: TestCase = { ...mockTestCase, expectedDuration: 1, // 1ms expected - instant code won't trigger timing anomaly }; const result = await executor.executeProctored( 'console.log("Hello from sandbox")', quickTestCase ); // With 1ms expected, even 0ms actual = 0 ratio, but timing check uses threshold // For very quick test cases, we just check markers and exit code assert(result.logs.exitCode === 0, "Exit code should be 0"); assert(result.logs.stdout.includes("Hello from sandbox"), "Should include output"); assert(result.logs.stdout.includes("TEST_START:"), "Should include start marker"); assert(result.logs.stdout.includes("TEST_END:"), "Should include end marker"); }); test("captures test markers in output", async () => { const executor = new ProctoredExecutor({ useProcessMode: true }); const result = await executor.executeProctored( 'console.log("test output")', mockTestCase ); assert( result.logs.stdout.includes(`TEST_START:${mockTestCase.name}`), "Should include start marker" ); assert( result.logs.stdout.includes(`TEST_END:${mockTestCase.name}`), "Should include end marker" ); }); test("captures duration in output", async () => { const executor = new ProctoredExecutor({ useProcessMode: true }); const result = await executor.executeProctored( 'console.log("quick")', mockTestCase ); assert( result.logs.stdout.includes("DURATION:"), "Should include duration marker" ); }); test("handles execution errors gracefully", async () => { const executor = new ProctoredExecutor({ useProcessMode: true }); const result = await executor.executeProctored( 'throw new Error("Test error")', mockTestCase ); assert(result.passed === false, "Should fail for throwing code"); assert(result.logs.exitCode === 1, "Exit code should be 1"); assert(result.logs.stderr.includes("Test error"), "Should capture error message"); }); // ------------------------------------------------------------------------- // Verification Tests // ------------------------------------------------------------------------- console.log("\nVerification Tests:"); test("detects missing start marker", () => { const executor = new ProctoredExecutor(); const logs: ExecutionLogs = { stdout: `TEST_END:${mockTestCase.name}\nDURATION:100`, stderr: "", exitCode: 0, startTime: new Date().toISOString(), endTime: new Date().toISOString(), resourceUsage: { cpuUsed: 0, memoryPeakMb: 10, diskReadBytes: 0, diskWriteBytes: 0 }, }; const verification = executor.verifyExecution(logs, mockTestCase); assert(verification.consistent === false, "Should be inconsistent"); assert( verification.flags.some((f) => f.type === "missing_start"), "Should flag missing start" ); }); test("detects missing end marker", () => { const executor = new ProctoredExecutor(); const logs: ExecutionLogs = { stdout: `TEST_START:${mockTestCase.name}\nDURATION:100`, stderr: "", exitCode: 0, startTime: new Date().toISOString(), endTime: new Date().toISOString(), resourceUsage: { cpuUsed: 0, memoryPeakMb: 10, diskReadBytes: 0, diskWriteBytes: 0 }, }; const verification = executor.verifyExecution(logs, mockTestCase); assert(verification.consistent === false, "Should be inconsistent"); assert( verification.flags.some((f) => f.type === "missing_end"), "Should flag missing end" ); }); test("passes verification with all markers present", () => { const executor = new ProctoredExecutor(); const logs: ExecutionLogs = { stdout: `TEST_START:${mockTestCase.name}\noutput\nTEST_END:${mockTestCase.name}\nDURATION:1000`, stderr: "", exitCode: 0, startTime: new Date().toISOString(), endTime: new Date().toISOString(), resourceUsage: { cpuUsed: 0, memoryPeakMb: 10, diskReadBytes: 0, diskWriteBytes: 0 }, }; const verification = executor.verifyExecution(logs, mockTestCase); assert(verification.consistent === true, "Should be consistent"); assert( !verification.flags.some((f) => f.type === "missing_start"), "Should not flag missing start" ); assert( !verification.flags.some((f) => f.type === "missing_end"), "Should not flag missing end" ); }); // ------------------------------------------------------------------------- // Timing Analysis Tests // ------------------------------------------------------------------------- console.log("\nTiming Analysis Tests:"); test("detects suspiciously fast execution", () => { const executor = new ProctoredExecutor(); const logs: ExecutionLogs = { stdout: `TEST_START:${slowTestCase.name}\nTEST_END:${slowTestCase.name}\nDURATION:100`, // 100ms vs 60000ms expected stderr: "", exitCode: 0, startTime: new Date().toISOString(), endTime: new Date().toISOString(), resourceUsage: { cpuUsed: 0, memoryPeakMb: 10, diskReadBytes: 0, diskWriteBytes: 0 }, }; const timingAnalysis = executor.analyzeTime(logs, slowTestCase); assert(timingAnalysis.suspicious === true, "Should flag suspicious timing"); assert(timingAnalysis.ratio < 0.1, `Ratio should be < 0.1, got ${timingAnalysis.ratio}`); assert(timingAnalysis.anomalyScore > 0, "Anomaly score should be positive"); }); test("passes reasonable execution time", () => { const executor = new ProctoredExecutor(); const logs: ExecutionLogs = { stdout: `TEST_START:${mockTestCase.name}\nTEST_END:${mockTestCase.name}\nDURATION:50`, // 50ms vs 100ms expected = 50% stderr: "", exitCode: 0, startTime: new Date().toISOString(), endTime: new Date().toISOString(), resourceUsage: { cpuUsed: 0, memoryPeakMb: 10, diskReadBytes: 0, diskWriteBytes: 0 }, }; const timingAnalysis = executor.analyzeTime(logs, mockTestCase); assert(timingAnalysis.suspicious === false, "Should not flag reasonable timing"); assert(timingAnalysis.ratio >= 0.1, `Ratio should be >= 0.1, got ${timingAnalysis.ratio}`); }); test("timing analysis extracts duration from logs", () => { const executor = new ProctoredExecutor(); const logs: ExecutionLogs = { stdout: "TEST_START:test\nDURATION:12345\nTEST_END:test", stderr: "", exitCode: 0, startTime: new Date().toISOString(), endTime: new Date().toISOString(), resourceUsage: { cpuUsed: 0, memoryPeakMb: 10, diskReadBytes: 0, diskWriteBytes: 0 }, }; const timingAnalysis = executor.analyzeTime(logs, mockTestCase); assertEqual(timingAnalysis.actualDuration, 12345); assertEqual(timingAnalysis.expectedDuration, mockTestCase.expectedDuration); }); // ------------------------------------------------------------------------- // Resource Anomaly Tests // ------------------------------------------------------------------------- console.log("\nResource Anomaly Tests:"); test("flags suspiciously low memory usage", () => { const executor = new ProctoredExecutor(); const logs: ExecutionLogs = { stdout: `TEST_START:${mockTestCase.name}\nTEST_END:${mockTestCase.name}\nDURATION:1000`, stderr: "", exitCode: 0, startTime: new Date().toISOString(), endTime: new Date().toISOString(), resourceUsage: { cpuUsed: 0, memoryPeakMb: 0.1, diskReadBytes: 0, diskWriteBytes: 0 }, }; const verification = executor.verifyExecution(logs, mockTestCase); assert( verification.flags.some((f) => f.type === "resource_anomaly"), "Should flag resource anomaly" ); }); // ------------------------------------------------------------------------- // Timeout Tests // ------------------------------------------------------------------------- console.log("\nTimeout Tests:"); test("enforces timeout for long-running code", async () => { const executor = new ProctoredExecutor({ useProcessMode: true, timeoutSeconds: 1, // 1 second timeout }); const longRunningCode = ` let start = Date.now(); while (Date.now() - start < 10000) { // Spin for 10 seconds } `; try { await executor.executeProctored(longRunningCode, { ...mockTestCase, code: longRunningCode, }); assert(false, "Should have thrown timeout error"); } catch (err) { assert( (err as Error).message.includes("timeout"), `Expected timeout error, got: ${(err as Error).message}` ); } }); // ------------------------------------------------------------------------- // Integration Tests // ------------------------------------------------------------------------- console.log("\nIntegration Tests:"); test("full execution flow with verification", async () => { const executor = new ProctoredExecutor({ useProcessMode: true }); // Use 1ms expected duration to prevent timing anomaly for fast code const quickTestCase: TestCase = { ...mockTestCase, expectedDuration: 1, }; const result = await executor.executeProctored( ` let sum = 0; for (let i = 0; i < 1000; i++) { sum += i; } console.log("Sum: " + sum); `, quickTestCase ); // Should have valid logs assert(result.logs.stdout.includes("Sum:"), "Should have computation output"); assert(result.logs.exitCode === 0, "Should have exit code 0"); // Should have valid verification structure assert( typeof result.verification.timingAnalysis.actualDuration === "number", "Should have timing" ); // Markers should be present (consistent) assert( result.logs.stdout.includes("TEST_START:"), "Should include start marker" ); assert( result.logs.stdout.includes("TEST_END:"), "Should include end marker" ); }); // ------------------------------------------------------------------------- // Wait and Report // ------------------------------------------------------------------------- await new Promise((resolve) => setTimeout(resolve, 500)); console.log("\n" + "=".repeat(50)); console.log(`Tests Passed: ${testsPassed}`); console.log(`Tests Failed: ${testsFailed}`); console.log("=".repeat(50)); if (testsFailed > 0) { process.exit(1); } } // Run tests runTests().catch((err) => { console.error("Test runner error:", err); process.exit(1); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Kastalien-Research/thoughtbox'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

proctor.test.ts•14.6 KiB