Thoughtbox

Thoughtbox
benchmarks

proctor.ts•11.9 KiB

/** * Proctored Execution Environment * SPEC: SIL-007 * * Provides sandboxed code execution with: * - Docker container isolation * - Network isolation * - Resource limits (CPU, memory) * - Comprehensive execution logging * - Log consistency verification * - Timing anomaly detection * * Note: This implementation can run in two modes: * 1. Docker mode (production) - Full isolation with containers * 2. Process mode (development/testing) - Subprocess execution for testing */ import { EventEmitter } from "events"; import { spawn, type ChildProcess } from "child_process"; import { writeFile, unlink, mkdir, rmdir } from "fs/promises"; import { join } from "path"; import { tmpdir } from "os"; import { randomUUID } from "crypto"; // ============================================================================= // Types // ============================================================================= /** * Test case to execute in sandbox */ export interface TestCase { id: string; name: string; code: string; expectedDuration: number; difficulty: "easy" | "medium" | "hard"; } /** * Resource usage metrics from execution */ export interface ResourceMetrics { cpuUsed: number; memoryPeakMb: number; diskReadBytes: number; diskWriteBytes: number; } /** * Execution logs from sandbox */ export interface ExecutionLogs { stdout: string; stderr: string; exitCode: number; startTime: string; endTime: string; resourceUsage: ResourceMetrics; } /** * Verification flag indicating potential issues */ export interface VerificationFlag { type: | "missing_start" | "missing_end" | "timing_anomaly" | "resource_anomaly" | "execution_error"; message: string; severity: "warning" | "critical"; } /** * Timing analysis results */ export interface TimingAnalysis { actualDuration: number; expectedDuration: number; ratio: number; anomalyScore: number; suspicious: boolean; } /** * Execution verification results */ export interface Verification { consistent: boolean; flags: VerificationFlag[]; timingAnalysis: TimingAnalysis; } /** * Complete proctored execution result */ export interface ProctoredResult { passed: boolean; logs: ExecutionLogs; verification: Verification; } /** * Sandbox configuration */ export interface SandboxConfig { /** Docker image to use (ignored in process mode) */ image: string; /** Disable network access */ networkDisabled: boolean; /** Memory limit in MB */ memoryLimitMb: number; /** CPU percentage limit (0-100) */ cpuPercent: number; /** Read-only filesystem */ readonlyRoot: boolean; /** Execution timeout in seconds */ timeoutSeconds: number; /** Use process mode instead of Docker */ useProcessMode: boolean; } // ============================================================================= // Default Configuration // ============================================================================= export const DEFAULT_SANDBOX_CONFIG: SandboxConfig = { image: "thoughtbox-sandbox:latest", networkDisabled: true, memoryLimitMb: 512, cpuPercent: 50, readonlyRoot: true, timeoutSeconds: 300, useProcessMode: true, // Default to process mode for easier testing }; // ============================================================================= // ProctoredExecutor // ============================================================================= /** * Executes code in a proctored sandbox environment. * * Provides isolation, logging, and verification to detect gaming attempts. * Can run in Docker mode (full isolation) or process mode (for testing). */ export class ProctoredExecutor extends EventEmitter { private config: SandboxConfig; constructor(config: Partial<SandboxConfig> = {}) { super(); this.config = { ...DEFAULT_SANDBOX_CONFIG, ...config }; } /** * Execute code in a proctored sandbox environment. * * @param code - Code to execute * @param testCase - Test case metadata * @returns Proctored execution result */ async executeProctored( code: string, testCase: TestCase ): Promise<ProctoredResult> { if (this.config.useProcessMode) { return this.executeInProcess(code, testCase); } return this.executeInDocker(code, testCase); } /** * Execute code in a subprocess (for testing without Docker). */ private async executeInProcess( code: string, testCase: TestCase ): Promise<ProctoredResult> { const startTime = new Date().toISOString(); const workDir = join(tmpdir(), `proctor-${randomUUID()}`); try { // Create workspace await mkdir(workDir, { recursive: true }); // Create test runner script const runnerScript = this.createRunnerScript(code, testCase); const scriptPath = join(workDir, "run-test.js"); await writeFile(scriptPath, runnerScript); // Execute with timeout const execResult = await this.runProcess(scriptPath, workDir, testCase); const endTime = new Date().toISOString(); // Build logs const logs: ExecutionLogs = { stdout: execResult.stdout, stderr: execResult.stderr, exitCode: execResult.exitCode, startTime, endTime, resourceUsage: this.estimateResourceUsage(execResult), }; // Verify execution const verification = this.verifyExecution(logs, testCase); // Determine pass/fail const passed = verification.consistent && logs.exitCode === 0 && !verification.flags.some((f) => f.severity === "critical"); return { passed, logs, verification }; } finally { // Cleanup try { await unlink(join(workDir, "run-test.js")).catch(() => {}); await rmdir(workDir).catch(() => {}); } catch { // Best effort cleanup } } } /** * Execute code in Docker container (full isolation). */ private async executeInDocker( code: string, testCase: TestCase ): Promise<ProctoredResult> { // Docker implementation would go here // For now, fall back to process mode with a warning console.warn( "[ProctoredExecutor] Docker mode not fully implemented, using process mode" ); return this.executeInProcess(code, testCase); } /** * Create the test runner script with markers. */ private createRunnerScript(code: string, testCase: TestCase): string { return ` const startMarker = 'TEST_START:${testCase.name}'; const endMarker = 'TEST_END:${testCase.name}'; console.log(startMarker); const startTime = Date.now(); try { ${code} const duration = Date.now() - startTime; console.log(endMarker); console.log('DURATION:' + duration); process.exit(0); } catch (error) { console.error('ERROR:', error.message); console.log(endMarker); process.exit(1); } `.trim(); } /** * Run subprocess with timeout. */ private runProcess( scriptPath: string, workDir: string, testCase: TestCase ): Promise<{ stdout: string; stderr: string; exitCode: number }> { return new Promise((resolve, reject) => { const timeout = setTimeout(() => { if (proc) { proc.kill("SIGKILL"); } reject( new Error( `Execution timeout after ${this.config.timeoutSeconds}s` ) ); }, this.config.timeoutSeconds * 1000); let stdout = ""; let stderr = ""; const proc: ChildProcess = spawn("node", [scriptPath], { cwd: workDir, env: { ...process.env, TEST_NAME: testCase.name, TEST_ID: testCase.id, }, }); proc.stdout?.on("data", (data) => { stdout += data.toString(); }); proc.stderr?.on("data", (data) => { stderr += data.toString(); }); proc.on("close", (exitCode) => { clearTimeout(timeout); resolve({ stdout, stderr, exitCode: exitCode ?? 0, }); }); proc.on("error", (err) => { clearTimeout(timeout); reject(err); }); }); } /** * Estimate resource usage from execution (process mode approximation). */ private estimateResourceUsage(execResult: { stdout: string; stderr: string; }): ResourceMetrics { // In process mode, we can't get exact metrics // Return reasonable estimates based on output size const outputSize = execResult.stdout.length + execResult.stderr.length; return { cpuUsed: 0, // Unknown in process mode memoryPeakMb: Math.max(10, outputSize / 1000), // Rough estimate diskReadBytes: 0, diskWriteBytes: outputSize, }; } /** * Verify execution logs for consistency and detect gaming. */ verifyExecution(logs: ExecutionLogs, testCase: TestCase): Verification { const flags: VerificationFlag[] = []; // Check 1: Test markers present const startMarker = `TEST_START:${testCase.name}`; const endMarker = `TEST_END:${testCase.name}`; if (!logs.stdout.includes(startMarker)) { flags.push({ type: "missing_start", message: "Test start marker not found in output", severity: "critical", }); } if (!logs.stdout.includes(endMarker)) { flags.push({ type: "missing_end", message: "Test end marker not found in output", severity: "critical", }); } // Check 2: Timing analysis const timingAnalysis = this.analyzeTime(logs, testCase); if (timingAnalysis.suspicious) { flags.push({ type: "timing_anomaly", message: `Solve time (${timingAnalysis.actualDuration}ms) suspiciously fast vs expected (${timingAnalysis.expectedDuration}ms)`, severity: timingAnalysis.ratio < 0.05 ? "critical" : "warning", }); } // Check 3: Resource usage anomalies if (logs.resourceUsage.memoryPeakMb < 1) { flags.push({ type: "resource_anomaly", message: "Suspiciously low memory usage - may not have executed properly", severity: "warning", }); } // Check 4: Execution errors if (logs.stderr.includes("ERROR:")) { flags.push({ type: "execution_error", message: "Execution error detected in stderr", severity: "warning", }); } const consistent = !flags.some((f) => f.severity === "critical"); return { consistent, flags, timingAnalysis }; } /** * Analyze execution timing for anomalies. */ analyzeTime(logs: ExecutionLogs, testCase: TestCase): TimingAnalysis { // Extract duration from logs const durationMatch = logs.stdout.match(/DURATION:(\d+)/); const actualDuration = durationMatch ? parseInt(durationMatch[1], 10) : 0; const expectedDuration = testCase.expectedDuration; const ratio = expectedDuration > 0 ? actualDuration / expectedDuration : 1; // Anomaly score: 0 = normal, 1 = highly anomalous // Very fast completion is suspicious const anomalyScore = ratio < 0.1 ? 1 - ratio * 10 : 0; return { actualDuration, expectedDuration, ratio, anomalyScore, suspicious: ratio < 0.1, // 10% of expected time = suspicious }; } /** * Get current configuration. */ getConfig(): SandboxConfig { return { ...this.config }; } /** * Update configuration. */ setConfig(config: Partial<SandboxConfig>): void { this.config = { ...this.config, ...config }; } } // ============================================================================= // Singleton // ============================================================================= let executorInstance: ProctoredExecutor | null = null; /** * Get singleton ProctoredExecutor instance. */ export function getProctoredExecutor( config?: Partial<SandboxConfig> ): ProctoredExecutor { if (!executorInstance) { executorInstance = new ProctoredExecutor(config); } return executorInstance; } /** * Reset singleton (for testing). */ export function resetProctoredExecutor(): void { executorInstance = null; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/glassBead-tc/Thoughtbox'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

proctor.ts•11.9 KiB