/**
* Proctored Execution Environment
* SPEC: SIL-007
*
* Provides sandboxed code execution with:
* - Docker container isolation
* - Network isolation
* - Resource limits (CPU, memory)
* - Comprehensive execution logging
* - Log consistency verification
* - Timing anomaly detection
*
* Note: This implementation can run in two modes:
* 1. Docker mode (production) - Full isolation with containers
* 2. Process mode (development/testing) - Subprocess execution for testing
*/
import { EventEmitter } from "events";
import { spawn, type ChildProcess } from "child_process";
import { writeFile, unlink, mkdir, rmdir } from "fs/promises";
import { join } from "path";
import { tmpdir } from "os";
import { randomUUID } from "crypto";
// =============================================================================
// Types
// =============================================================================
/**
* Test case to execute in sandbox
*/
export interface TestCase {
id: string;
name: string;
code: string;
expectedDuration: number;
difficulty: "easy" | "medium" | "hard";
}
/**
* Resource usage metrics from execution
*/
export interface ResourceMetrics {
cpuUsed: number;
memoryPeakMb: number;
diskReadBytes: number;
diskWriteBytes: number;
}
/**
* Execution logs from sandbox
*/
export interface ExecutionLogs {
stdout: string;
stderr: string;
exitCode: number;
startTime: string;
endTime: string;
resourceUsage: ResourceMetrics;
}
/**
* Verification flag indicating potential issues
*/
export interface VerificationFlag {
type:
| "missing_start"
| "missing_end"
| "timing_anomaly"
| "resource_anomaly"
| "execution_error";
message: string;
severity: "warning" | "critical";
}
/**
* Timing analysis results
*/
export interface TimingAnalysis {
actualDuration: number;
expectedDuration: number;
ratio: number;
anomalyScore: number;
suspicious: boolean;
}
/**
* Execution verification results
*/
export interface Verification {
consistent: boolean;
flags: VerificationFlag[];
timingAnalysis: TimingAnalysis;
}
/**
* Complete proctored execution result
*/
export interface ProctoredResult {
passed: boolean;
logs: ExecutionLogs;
verification: Verification;
}
/**
* Sandbox configuration
*/
export interface SandboxConfig {
/** Docker image to use (ignored in process mode) */
image: string;
/** Disable network access */
networkDisabled: boolean;
/** Memory limit in MB */
memoryLimitMb: number;
/** CPU percentage limit (0-100) */
cpuPercent: number;
/** Read-only filesystem */
readonlyRoot: boolean;
/** Execution timeout in seconds */
timeoutSeconds: number;
/** Use process mode instead of Docker */
useProcessMode: boolean;
}
// =============================================================================
// Default Configuration
// =============================================================================
export const DEFAULT_SANDBOX_CONFIG: SandboxConfig = {
image: "thoughtbox-sandbox:latest",
networkDisabled: true,
memoryLimitMb: 512,
cpuPercent: 50,
readonlyRoot: true,
timeoutSeconds: 300,
useProcessMode: true, // Default to process mode for easier testing
};
// =============================================================================
// ProctoredExecutor
// =============================================================================
/**
* Executes code in a proctored sandbox environment.
*
* Provides isolation, logging, and verification to detect gaming attempts.
* Can run in Docker mode (full isolation) or process mode (for testing).
*/
export class ProctoredExecutor extends EventEmitter {
private config: SandboxConfig;
constructor(config: Partial<SandboxConfig> = {}) {
super();
this.config = { ...DEFAULT_SANDBOX_CONFIG, ...config };
}
/**
* Execute code in a proctored sandbox environment.
*
* @param code - Code to execute
* @param testCase - Test case metadata
* @returns Proctored execution result
*/
async executeProctored(
code: string,
testCase: TestCase
): Promise<ProctoredResult> {
if (this.config.useProcessMode) {
return this.executeInProcess(code, testCase);
}
return this.executeInDocker(code, testCase);
}
/**
* Execute code in a subprocess (for testing without Docker).
*/
private async executeInProcess(
code: string,
testCase: TestCase
): Promise<ProctoredResult> {
const startTime = new Date().toISOString();
const workDir = join(tmpdir(), `proctor-${randomUUID()}`);
try {
// Create workspace
await mkdir(workDir, { recursive: true });
// Create test runner script
const runnerScript = this.createRunnerScript(code, testCase);
const scriptPath = join(workDir, "run-test.js");
await writeFile(scriptPath, runnerScript);
// Execute with timeout
const execResult = await this.runProcess(scriptPath, workDir, testCase);
const endTime = new Date().toISOString();
// Build logs
const logs: ExecutionLogs = {
stdout: execResult.stdout,
stderr: execResult.stderr,
exitCode: execResult.exitCode,
startTime,
endTime,
resourceUsage: this.estimateResourceUsage(execResult),
};
// Verify execution
const verification = this.verifyExecution(logs, testCase);
// Determine pass/fail
const passed =
verification.consistent &&
logs.exitCode === 0 &&
!verification.flags.some((f) => f.severity === "critical");
return { passed, logs, verification };
} finally {
// Cleanup
try {
await unlink(join(workDir, "run-test.js")).catch(() => {});
await rmdir(workDir).catch(() => {});
} catch {
// Best effort cleanup
}
}
}
/**
* Execute code in Docker container (full isolation).
*/
private async executeInDocker(
code: string,
testCase: TestCase
): Promise<ProctoredResult> {
// Docker implementation would go here
// For now, fall back to process mode with a warning
console.warn(
"[ProctoredExecutor] Docker mode not fully implemented, using process mode"
);
return this.executeInProcess(code, testCase);
}
/**
* Create the test runner script with markers.
*/
private createRunnerScript(code: string, testCase: TestCase): string {
return `
const startMarker = 'TEST_START:${testCase.name}';
const endMarker = 'TEST_END:${testCase.name}';
console.log(startMarker);
const startTime = Date.now();
try {
${code}
const duration = Date.now() - startTime;
console.log(endMarker);
console.log('DURATION:' + duration);
process.exit(0);
} catch (error) {
console.error('ERROR:', error.message);
console.log(endMarker);
process.exit(1);
}
`.trim();
}
/**
* Run subprocess with timeout.
*/
private runProcess(
scriptPath: string,
workDir: string,
testCase: TestCase
): Promise<{ stdout: string; stderr: string; exitCode: number }> {
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
if (proc) {
proc.kill("SIGKILL");
}
reject(
new Error(
`Execution timeout after ${this.config.timeoutSeconds}s`
)
);
}, this.config.timeoutSeconds * 1000);
let stdout = "";
let stderr = "";
const proc: ChildProcess = spawn("node", [scriptPath], {
cwd: workDir,
env: {
...process.env,
TEST_NAME: testCase.name,
TEST_ID: testCase.id,
},
});
proc.stdout?.on("data", (data) => {
stdout += data.toString();
});
proc.stderr?.on("data", (data) => {
stderr += data.toString();
});
proc.on("close", (exitCode) => {
clearTimeout(timeout);
resolve({
stdout,
stderr,
exitCode: exitCode ?? 0,
});
});
proc.on("error", (err) => {
clearTimeout(timeout);
reject(err);
});
});
}
/**
* Estimate resource usage from execution (process mode approximation).
*/
private estimateResourceUsage(execResult: {
stdout: string;
stderr: string;
}): ResourceMetrics {
// In process mode, we can't get exact metrics
// Return reasonable estimates based on output size
const outputSize = execResult.stdout.length + execResult.stderr.length;
return {
cpuUsed: 0, // Unknown in process mode
memoryPeakMb: Math.max(10, outputSize / 1000), // Rough estimate
diskReadBytes: 0,
diskWriteBytes: outputSize,
};
}
/**
* Verify execution logs for consistency and detect gaming.
*/
verifyExecution(logs: ExecutionLogs, testCase: TestCase): Verification {
const flags: VerificationFlag[] = [];
// Check 1: Test markers present
const startMarker = `TEST_START:${testCase.name}`;
const endMarker = `TEST_END:${testCase.name}`;
if (!logs.stdout.includes(startMarker)) {
flags.push({
type: "missing_start",
message: "Test start marker not found in output",
severity: "critical",
});
}
if (!logs.stdout.includes(endMarker)) {
flags.push({
type: "missing_end",
message: "Test end marker not found in output",
severity: "critical",
});
}
// Check 2: Timing analysis
const timingAnalysis = this.analyzeTime(logs, testCase);
if (timingAnalysis.suspicious) {
flags.push({
type: "timing_anomaly",
message: `Solve time (${timingAnalysis.actualDuration}ms) suspiciously fast vs expected (${timingAnalysis.expectedDuration}ms)`,
severity: timingAnalysis.ratio < 0.05 ? "critical" : "warning",
});
}
// Check 3: Resource usage anomalies
if (logs.resourceUsage.memoryPeakMb < 1) {
flags.push({
type: "resource_anomaly",
message:
"Suspiciously low memory usage - may not have executed properly",
severity: "warning",
});
}
// Check 4: Execution errors
if (logs.stderr.includes("ERROR:")) {
flags.push({
type: "execution_error",
message: "Execution error detected in stderr",
severity: "warning",
});
}
const consistent = !flags.some((f) => f.severity === "critical");
return { consistent, flags, timingAnalysis };
}
/**
* Analyze execution timing for anomalies.
*/
analyzeTime(logs: ExecutionLogs, testCase: TestCase): TimingAnalysis {
// Extract duration from logs
const durationMatch = logs.stdout.match(/DURATION:(\d+)/);
const actualDuration = durationMatch ? parseInt(durationMatch[1], 10) : 0;
const expectedDuration = testCase.expectedDuration;
const ratio =
expectedDuration > 0 ? actualDuration / expectedDuration : 1;
// Anomaly score: 0 = normal, 1 = highly anomalous
// Very fast completion is suspicious
const anomalyScore = ratio < 0.1 ? 1 - ratio * 10 : 0;
return {
actualDuration,
expectedDuration,
ratio,
anomalyScore,
suspicious: ratio < 0.1, // 10% of expected time = suspicious
};
}
/**
* Get current configuration.
*/
getConfig(): SandboxConfig {
return { ...this.config };
}
/**
* Update configuration.
*/
setConfig(config: Partial<SandboxConfig>): void {
this.config = { ...this.config, ...config };
}
}
// =============================================================================
// Singleton
// =============================================================================
let executorInstance: ProctoredExecutor | null = null;
/**
* Get singleton ProctoredExecutor instance.
*/
export function getProctoredExecutor(
config?: Partial<SandboxConfig>
): ProctoredExecutor {
if (!executorInstance) {
executorInstance = new ProctoredExecutor(config);
}
return executorInstance;
}
/**
* Reset singleton (for testing).
*/
export function resetProctoredExecutor(): void {
executorInstance = null;
}