# RAW baseline (no UI_SPEC / no Megamind / no refinement) — Jan 2, 2026
#
# Purpose:
# - "Without a pipeline at all" baseline: generate TSX directly from the task prompt.
# - Still runs deterministic gates + broken-page gate + vision judge + section creativity,
# so we can compare creativity/shippable apples-to-apples against the full pipeline.
# - Vertex-only (no OpenRouter).
#
# Run (5-task debug):
# .venv/bin/python -m titan_factory.cli run --max-tasks 5 --run-id jan2-raw-5 -c config/config-raw-baseline-jan2.yaml
#
# Run (10-task batch, 2 concurrency):
# .venv/bin/python -m titan_factory.cli run --max-tasks 10 --run-id jan2-raw-10 -c config/config-raw-baseline-jan2.yaml
models:
# Planner is unused in raw mode, but must be present for config schema.
planner:
provider: vertex
model: deepseek-ai/deepseek-v3.2-maas
publishable: true
max_tokens: 2000
temperature: 0.7
# Same generators as pipeline run for fair comparison.
ui_generators:
- provider: vertex
model: moonshotai/kimi-k2-thinking-maas
publishable: true
variants: 2
max_tokens: 65000
temperature: 0.7
- provider: vertex
model: minimaxai/minimax-m2-maas
publishable: true
variants: 2
max_tokens: 65000
temperature: 0.7
patcher:
provider: vertex
model: qwen/qwen3-coder-480b-a35b-instruct-maas
publishable: true
max_tokens: 65000
temperature: 0.2
vision_judge:
provider: gemini
model: gemini-3.0-flash
publishable: false
max_tokens: 65000
temperature: 0.7
force_adc: true
pipeline:
# RAW baseline: bypass UI_SPEC planning and generate code directly from the task prompt.
raw_generation_enabled: true
skip_judge: false
vision_score_threshold: 0.0
# No style routing (keep baseline truly "raw").
style_routing_enabled: false
style_gates_enabled: false
# No Megamind or refinement loops (baseline).
megamind_enabled: false
megamind_v2_enabled: false
evol_enabled: false
refinement_enabled: false
# Creativity selection still applies (for consistent reporting); we are measuring it.
creativity_gate:
enabled: true
# Same creativity gate as the full pipeline so comparisons remain apples-to-apples.
min_avg: 0.0
min_high_sections: 2
enforce: false
selection:
mode: creativity_first
judge_weight: 0.2
creativity_weight: 0.8
# Blank/broken-page killer (prevents the "blank shippable" bug in baseline runs too).
broken_vision_gate_enabled: true
broken_vision_gate_min_confidence: 0.85
# Deterministic gates (record; enforce later)
deterministic_gates_enabled: true
deterministic_gates_enforce: false
axe_gate_enabled: true
axe_fail_impacts: ["critical"]
axe_timeout_ms: 60000
lighthouse_gate_enabled: true
lighthouse_preset: desktop
lighthouse_timeout_ms: 240000
lighthouse_min_scores:
accessibility: 0.80
performance: 0.00
best_practices: 0.00
seo: 0.00
task_prompt_pack: mixed
generate_edit_tasks: false
# Use the built-in UIGEN system prompt in raw mode (it already contains anti-monoculture,
# creative risk, and accessibility rules).
uigen_prompt_variants:
- id: builtin_raw
source: builtin
input_mode: auto
max_fix_rounds: 2
polish_loop_enabled: false
model_timeout_ms: 150000
build_timeout_ms: 240000
render_timeout_ms: 120000
budget:
task_concurrency: 2
concurrency_vertex: 8
concurrency_gemini: 2
concurrency_build: 2
concurrency_render: 2
requests_per_min_vertex: 60
max_total_tasks: null
stop_after_usd: null
export:
holdout_niches: 0
validation_split: 0.0
holdout_niche_ids: []
gcs:
bucket: null
prefix: titan-factory-outputs
upload_interval_tasks: 50
vertex:
endpoint_template: "https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/endpoints/openapi/chat/completions"