mcp-codex-dev

Overview Schema Related Servers Score Discussions

codex-tdd.ts•12.2 KiB

import { z } from "zod"; import * as crypto from "node:crypto"; import * as fs from "node:fs"; import * as path from "node:path"; import { fileURLToPath } from "node:url"; import { CodexExecutor } from "../services/codex-executor.js"; import { progressServer } from "../services/progress-server.js"; import { sessionManager } from "../services/session-manager.js"; import { mapCodexLineToProgressEvent } from "../services/event-mapper.js"; import { loadConfig, getToolConfig } from "../config/config.js"; import { CodexWriteResult, CodexErrorCode, CodexErrorInfo, } from "../types/index.js"; export const CodexTddParamsSchema = z.object({ instruction: z .string() .describe( "Detailed description of the feature or bug fix to implement using TDD" ), sessionId: z .string() .uuid() .optional() .describe("Resume an existing TDD session by ID"), workingDirectory: z .string() .optional() .describe("Working directory path"), planReference: z .string() .optional() .describe("Plan file path or content summary, used as coding context"), taskContext: z .string() .optional() .describe( "Task position and context within the plan " + "(e.g. 'Task 3 of 5: Implement validation logic. Depends on Task 2 auth module.')" ), testFramework: z .string() .optional() .describe( "Test framework hint (e.g. 'jest', 'vitest', 'pytest', 'go test'). " + "Defaults to auto-detect from project." ), }); export type CodexTddParams = z.infer<typeof CodexTddParamsSchema>; export async function codexTdd( params: CodexTddParams, extra?: { signal?: AbortSignal } ): Promise<CodexWriteResult> { const config = await loadConfig({ workingDirectory: params.workingDirectory, }); const toolCfg = getToolConfig(config, "tdd"); const executor = await CodexExecutor.create({ workingDirectory: params.workingDirectory, }); // Load and fill the TDD template const template = await loadTddTemplate({ tddTemplate: config.tddTemplate, workingDirectory: params.workingDirectory, }); // Build context section: plan reference + task position let contextValue = ""; if (params.planReference) { contextValue += params.planReference; } if (params.taskContext) { if (contextValue) contextValue += "\n\n"; contextValue += `**Current Task Position:** ${params.taskContext}`; } if (!contextValue) { contextValue = "(No plan reference provided)"; } // Strip TEST_FRAMEWORK section from template if not provided let processedTemplate = template; if (!params.testFramework) { // Remove "### Test Framework\n\nUse: {TEST_FRAMEWORK}\n" section processedTemplate = processedTemplate.replace( /### Test Framework\s*\n+Use: \{TEST_FRAMEWORK\}\s*\n*/g, "" ); } const fullInstruction = fillTemplate(processedTemplate, { INSTRUCTION: params.instruction, PLAN_REFERENCE: contextValue, ...(params.testFramework ? { TEST_FRAMEWORK: params.testFramework } : {}), }); try { const operationId = `tdd-${crypto.randomUUID()}`; progressServer.startOperation( operationId, "write", params.instruction.slice(0, 120) ); // Mark session as active before execution (for resume) if (params.sessionId) { await sessionManager.updateStatus(params.sessionId, "active", { workingDirectory: params.workingDirectory, }); } let result; try { result = await executor.executeWrite(fullInstruction, { sessionId: params.sessionId, workingDirectory: params.workingDirectory, model: toolCfg.model, sandbox: toolCfg.sandbox, timeout: toolCfg.timeout, onLine: (line) => { const event = mapCodexLineToProgressEvent(line, operationId); if (event) progressServer.emit(event); }, signal: extra?.signal, }); } finally { progressServer.endOperation(operationId, result?.exitCode === 0); } // Parse output const parsed = executor.parseOutput(result.stdout); if (!params.sessionId && !parsed.sessionId) { throw { code: CodexErrorCode.CODEX_INVALID_OUTPUT, message: "Codex CLI did not emit a session_id/thread_id in --json output. Ensure Codex CLI is up to date and that --json output is enabled.", recoverable: false, } satisfies CodexErrorInfo; } // Determine status let status: CodexWriteResult["status"] = "completed"; if (result.exitCode !== 0) { status = "error"; } else if ( parsed.filesCreated.length > 0 || parsed.filesModified.length > 0 ) { status = "needs_review"; } // Track session const sessionId = parsed.sessionId || params.sessionId; if (!sessionId) { throw { code: CodexErrorCode.CODEX_INVALID_OUTPUT, message: "Missing sessionId (neither parsed from Codex output nor provided via params.sessionId).", recoverable: false, } satisfies CodexErrorInfo; } const trackedStatus = result.exitCode !== 0 ? "abandoned" : "completed"; if (params.sessionId) { // Resuming existing session await sessionManager.markResumed(params.sessionId, { workingDirectory: params.workingDirectory, }); await sessionManager.updateStatus(params.sessionId, trackedStatus, { workingDirectory: params.workingDirectory, }); } else { // New session await sessionManager.track( { sessionId, type: "write", instruction: params.instruction, createdAt: new Date().toISOString(), status: trackedStatus, }, { workingDirectory: params.workingDirectory } ); } return { success: result.exitCode === 0, sessionId, output: { summary: parsed.summary, filesModified: parsed.filesModified, filesCreated: parsed.filesCreated, }, status, }; } catch (error) { // Restore session status if it was set to "active" before failure if (params.sessionId) { try { await sessionManager.updateStatus(params.sessionId, "abandoned", { workingDirectory: params.workingDirectory, }); } catch { /* best effort */ } } const errorInfo = error as CodexErrorInfo; return { success: false, sessionId: params.sessionId || "", output: { summary: "", filesModified: [], filesCreated: [], }, status: "error", error: { code: errorInfo.code || CodexErrorCode.UNKNOWN_ERROR, message: errorInfo.message || String(error), recoverable: errorInfo.recoverable ?? false, suggestion: errorInfo.suggestion, }, }; } } // ── Template loading (mirrors codex-review.ts pattern) ────────────── async function loadTddTemplate(options: { tddTemplate?: string; workingDirectory?: string; }): Promise<string> { // Priority 1: Config override (file path or inline content) if (options.tddTemplate) { const templateOverride = options.tddTemplate; const candidates: string[] = []; if (options.workingDirectory) { candidates.push( path.resolve(options.workingDirectory, templateOverride) ); } candidates.push(path.resolve(templateOverride)); for (const candidate of candidates) { try { if (fs.existsSync(candidate)) { return await fs.promises.readFile(candidate, "utf-8"); } } catch { // continue } } // If it looks like inline template content, use it directly if ( templateOverride.includes("\n") || templateOverride.trimStart().startsWith("#") ) { return templateOverride; } console.error( `codex-dev: tddTemplate override not found: ${templateOverride}` ); } // Priority 2: Bundled template file const moduleDir = path.dirname(fileURLToPath(import.meta.url)); const templatePath = path.join( moduleDir, "..", "..", "templates", "tdd-developer.md" ); try { if (fs.existsSync(templatePath)) { return await fs.promises.readFile(templatePath, "utf-8"); } } catch { // Fall through to default } // Priority 3: Hardcoded fallback return DEFAULT_TDD_TEMPLATE; } function fillTemplate( template: string, values: Record<string, string> ): string { let result = template; for (const [key, value] of Object.entries(values)) { result = result.replace(new RegExp(`\\{${key}\\}`, "g"), () => value); } return result; } // ── Hardcoded fallback template ───────────────────────────────────── const DEFAULT_TDD_TEMPLATE = `# TDD Developer Agent You are a developer who strictly follows Test-Driven Development. Below are your methodology rules, followed by the specific task to implement. ## The Iron Law \`\`\` NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST \`\`\` Wrote code before the test? Delete it. Start over. No exceptions. ## Red-Green-Refactor Cycle For EACH piece of functionality: ### 1. RED — Write Failing Test Write ONE minimal test showing what should happen. - Tests one behavior - Clear name describing the expected behavior - Uses real code, not mocks (mock only when unavoidable) ### 2. VERIFY RED — Run Tests (MANDATORY) Run the test. Confirm: - Test fails (not errors) - Failure message matches expectation - Fails because feature is missing, not typos ### 3. GREEN — Write Minimal Code Write the simplest code that makes the test pass. Nothing more. Do NOT add features the test doesn't require. ### 4. VERIFY GREEN — Run ALL Tests (MANDATORY) Run full test suite. Confirm: - New test passes - ALL existing tests still pass - No errors or warnings ### 5. REFACTOR — Clean Up (Only After Green) Remove duplication, improve names, extract helpers. Keep ALL tests green. Do NOT add new behavior. ### 6. REPEAT Next failing test for the next behavior. ## Anti-Patterns to AVOID 1. Testing mock behavior instead of real code 2. Test-only methods in production classes 3. Mocking without understanding dependencies 4. Incomplete mocks hiding structural assumptions 5. Tests as afterthought ## Red Flags — STOP and Start Over - Wrote production code before writing a test - Test passes immediately on first run - Can't explain why the test failed - Rationalizing "just this once" ## Bug Fix Process 1. Write failing test reproducing the bug 2. Verify it fails 3. Fix with minimal code 4. Verify all tests pass ## Verification Checklist (Before Completing) - [ ] Every new function/method has a test written BEFORE implementation - [ ] Watched each test fail before implementing - [ ] Each test failed for the expected reason - [ ] Wrote minimal code to pass each test - [ ] ALL tests pass - [ ] Test output is clean - [ ] Tests use real code (mocks only when unavoidable) - [ ] Edge cases and error paths are covered ## Process Summary \`\`\` For each behavior: 1. Write ONE failing test 2. RUN tests → confirm FAIL (RED) 3. Write MINIMAL production code 4. RUN tests → confirm ALL PASS (GREEN) 5. Refactor (keep green) 6. Goto 1 \`\`\` --- ## Now Apply the Above to Your Task Use the TDD methodology above to implement the following. ### Task {INSTRUCTION} ### Context {PLAN_REFERENCE} ### Test Framework Use: {TEST_FRAMEWORK} ### After Implementation: Commit and Self-Review Once all tests pass and you've completed the TDD cycle: #### 1. Commit your work Stage and commit with a clear message describing what was implemented. #### 2. Self-review before reporting back Review your own work with fresh eyes: - **Completeness:** Did I fully implement everything the task specifies? Did I miss any requirements or edge cases? - **Quality:** Are names clear and accurate? Is the code clean and maintainable? - **Discipline:** Did I avoid overbuilding (YAGNI)? Did I only build what was requested? Did I follow existing patterns in the codebase? - **Testing:** Do tests verify real behavior (not mock behavior)? Did I follow TDD? Are tests comprehensive? If you find issues during self-review, fix them now before reporting. #### 3. Report When done, report: what you implemented, what you tested and test results, files changed, and any self-review findings. `;

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/FYZAFH/mcp-codex-dev'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

codex-tdd.ts•12.2 KiB