index.htmlโข28 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Reinforcement Learning Battle Cars</title>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@4.20.0/dist/tf.min.js"></script>
<style>
:root {
color-scheme: dark;
--bg: #0b1021;
--panel: #151b33;
--accent: #67e8f9;
--accent-2: #fda4af;
--grid: #1f2a48;
}
* {
box-sizing: border-box;
}
body {
margin: 0;
font-family: "Inter", system-ui, -apple-system, sans-serif;
background: radial-gradient(circle at 20% 20%, #162042, var(--bg));
color: #e8edf7;
display: grid;
grid-template-columns: 2fr 1fr;
min-height: 100vh;
}
header {
grid-column: 1 / span 2;
padding: 1.5rem 2rem 0.5rem;
display: flex;
justify-content: space-between;
align-items: baseline;
}
h1 {
margin: 0;
font-size: 1.8rem;
letter-spacing: 0.02em;
}
.pill {
background: rgba(255, 255, 255, 0.06);
border: 1px solid rgba(255, 255, 255, 0.08);
padding: 0.35rem 0.75rem;
border-radius: 999px;
font-size: 0.9rem;
color: #c7d2fe;
}
main {
display: grid;
grid-template-columns: 1fr 1fr;
grid-column: 1 / span 2;
gap: 1.2rem;
padding: 0 2rem 2rem;
}
canvas {
width: 100%;
height: 620px;
background: radial-gradient(circle at 50% 50%, #0f162c, #0a0f1f 60%);
border-radius: 14px;
border: 1px solid #1f2a48;
box-shadow: 0 10px 50px rgba(0, 0, 0, 0.45);
}
.panel {
background: var(--panel);
border-radius: 14px;
padding: 1rem 1.25rem;
border: 1px solid #1f2a48;
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
}
.panel h2 {
margin-top: 0;
font-size: 1.2rem;
color: #c4d6ff;
}
.stats {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 0.75rem;
}
.stat-card {
background: rgba(255, 255, 255, 0.04);
padding: 0.75rem 0.9rem;
border-radius: 10px;
border: 1px solid rgba(255, 255, 255, 0.08);
}
.stat-card strong {
display: block;
color: #94a3b8;
font-size: 0.85rem;
margin-bottom: 0.3rem;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.stat-card span {
font-size: 1.2rem;
color: #e5e7eb;
font-variant-numeric: tabular-nums;
}
.controls {
display: flex;
gap: 0.5rem;
margin-bottom: 1rem;
flex-wrap: wrap;
}
button {
padding: 0.5rem 0.9rem;
background: #1f2a48;
border: 1px solid #2f3a62;
color: #e2e8f0;
border-radius: 8px;
cursor: pointer;
transition: transform 0.08s ease, background 0.2s ease;
}
button:hover {
background: #2b3763;
transform: translateY(-1px);
}
button:active {
transform: translateY(0);
}
.legend {
display: flex;
gap: 1rem;
margin-bottom: 1rem;
flex-wrap: wrap;
}
.legend-item {
display: inline-flex;
align-items: center;
gap: 0.4rem;
font-size: 0.95rem;
}
.chip {
width: 16px;
height: 16px;
border-radius: 4px;
}
ul {
margin: 0;
padding-left: 1.1rem;
color: #cbd5e1;
line-height: 1.5;
}
.hype {
margin: 1rem 0 1.2rem;
}
.hype-label {
color: #cbd5e1;
font-size: 0.95rem;
margin-bottom: 0.4rem;
}
.hype-bar {
width: 100%;
height: 14px;
border-radius: 8px;
background: rgba(255, 255, 255, 0.08);
overflow: hidden;
border: 1px solid rgba(255, 255, 255, 0.08);
}
.hype-fill {
height: 100%;
width: 10%;
border-radius: 8px;
background: linear-gradient(90deg, #22d3ee, #a855f7, #f472b6);
box-shadow: 0 0 12px rgba(244, 114, 182, 0.6);
transition: width 0.1s ease-out, filter 0.2s ease-out;
}
.hype-caption {
margin-top: 0.35rem;
color: #94a3b8;
font-size: 0.9rem;
}
.commentary ul {
list-style: none;
padding-left: 0;
margin-top: 0.4rem;
display: grid;
gap: 0.35rem;
}
.commentary li {
background: rgba(255, 255, 255, 0.04);
border: 1px solid rgba(255, 255, 255, 0.08);
padding: 0.5rem 0.7rem;
border-radius: 10px;
color: #e2e8f0;
font-size: 0.95rem;
}
footer {
grid-column: 1 / span 2;
padding: 0 2rem 1.5rem;
color: #9ca3af;
font-size: 0.9rem;
}
</style>
</head>
<body>
<header>
<h1>Reinforcement Learning Battle Cars</h1>
<div class="pill">Twin neural Q-nets learn to duel entirely in-browser</div>
</header>
<main>
<canvas id="arena" width="900" height="700"></canvas>
<section class="panel">
<h2>Simulation Controls</h2>
<div class="controls">
<button id="toggle">Pause</button>
<button id="reset">Reset Episode</button>
<button id="boost">Give Red a small training boost</button>
</div>
<div class="legend">
<span class="legend-item"><span class="chip" style="background:#60a5fa"></span>Blue agent</span>
<span class="legend-item"><span class="chip" style="background:#f472b6"></span>Red agent</span>
<span class="legend-item"><span class="chip" style="background:#facc15"></span>Impulse damage</span>
</div>
<div class="stats" id="stats"></div>
<div class="hype">
<div class="hype-label">Arena hype</div>
<div class="hype-bar"><div class="hype-fill"></div></div>
<div class="hype-caption">Builds on hard hits, drifts, and close shaves</div>
</div>
<div class="commentary">
<h2>Commentary feed</h2>
<ul id="event-log"></ul>
</div>
<h2>How the duel works</h2>
<ul>
<li>Both cars learn online with compact <strong>neural double Q-networks</strong> (TensorFlow.js) that use a soft-updated target model and an annealed <strong>epsilon-greedy</strong> policy.</li>
<li>The state captures normalized offsets, heading error trig features, speed, closing velocity, and wall proximity so the agents can reason about closing speed and positioning.</li>
<li>Actions are accelerate, brake, coast, or steer left/right. Each move applies damage from impacts or wall slaps.</li>
<li>Rewards combine impact damage, survival bonus, penalties for idling, and <strong>dense shaping</strong> for moving toward the opponent.</li>
<li>Episodes end when one hull reaches 0%. The surviving car gets a win bonus and the neural Q-nets are updated from replayed transitions every step.</li>
</ul>
</section>
</main>
<footer>
Watch the Q-values converge as cars learn to circle, flank, and ram each other around the square arena. Training happens entirely in the browserโno back end required.
</footer>
<script>
const canvas = document.getElementById("arena");
const ctx = canvas.getContext("2d");
const statsEl = document.getElementById("stats");
const logEl = document.getElementById("event-log");
const hypeFill = document.querySelector(".hype-fill");
const WORLD = {
w: canvas.width,
h: canvas.height,
margin: 40,
wallDamage: 14,
friction: 0.971,
dt: 1 / 60,
collisionRadius: 18,
};
const ACTIONS = ["accelerate", "brake", "left", "right", "coast", "look"];
const STATE_SIZE = 9;
let hype = 0.1;
function addLog(message) {
const item = document.createElement("li");
item.textContent = message;
logEl.prepend(item);
while (logEl.children.length > 7) {
logEl.removeChild(logEl.lastChild);
}
}
function updateHypeBar(delta = -0.004) {
hype = clamp(hype + delta, 0, 1);
hypeFill.style.width = `${(hype * 100).toFixed(0)}%`;
hypeFill.style.filter = `saturate(${0.8 + hype * 0.6}) drop-shadow(0 0 ${12 * hype}px rgba(244, 114, 182, 0.6))`;
}
function closingVelocity(self, opponent) {
const dx = opponent.x - self.x;
const dy = opponent.y - self.y;
const dist = Math.hypot(dx, dy) + 1e-5;
const selfVx = Math.cos(self.angle) * self.v;
const selfVy = Math.sin(self.angle) * self.v;
const oppVx = Math.cos(opponent.angle) * opponent.v;
const oppVy = Math.sin(opponent.angle) * opponent.v;
// negative dot means closing; flip sign so positive = closing speed
const relV = -(dx * (selfVx - oppVx) + dy * (selfVy - oppVy)) / dist;
return clamp(relV, -12, 12);
}
function headingError(self, opponent) {
const desired = Math.atan2(opponent.y - self.y, opponent.x - self.x);
const diff = ((desired - self.angle + Math.PI * 3) % (Math.PI * 2)) - Math.PI;
return Math.abs(diff);
}
function clamp(val, min, max) {
return Math.max(min, Math.min(max, val));
}
class NeuralQ {
constructor(actionSize, stateSize = STATE_SIZE) {
this.actionSize = actionSize;
this.stateSize = stateSize;
this.online = this.buildModel();
this.target = this.buildModel();
this.optimizer = tf.train.adam(0.0009);
this.updateTarget(1);
}
buildModel() {
const input = tf.input({ shape: [this.stateSize] });
let x = tf.layers.dense({ units: 64, activation: "relu", kernelInitializer: "heNormal" }).apply(input);
x = tf.layers.layerNormalization().apply(x);
x = tf.layers.dense({ units: 64, activation: "relu", kernelInitializer: "heNormal" }).apply(x);
const output = tf.layers.dense({
units: this.actionSize,
activation: "linear",
kernelInitializer: "glorotUniform",
}).apply(x);
return tf.model({ inputs: input, outputs: output });
}
qValues(stateVec) {
return tf.tidy(() => {
const input = tf.tensor2d([stateVec], [1, this.stateSize]);
const out = this.online.predict(input);
const data = out.dataSync();
return Array.from(data);
});
}
updateTarget(tau = 0.02) {
const onlineWeights = this.online.getWeights();
const targetWeights = this.target.getWeights();
const blended = onlineWeights.map((w, i) =>
tf.add(tf.mul(w, tau), tf.mul(targetWeights[i], 1 - tau))
);
this.target.setWeights(blended);
targetWeights.forEach((w) => w.dispose());
}
train(batch, gamma) {
if (!batch.length) return;
const states = batch.map((b) => b.state);
const nextStates = batch.map((b) => b.nextState);
const actions = batch.map((b) => b.actionIdx);
const rewards = batch.map((b) => b.reward);
const dones = batch.map((b) => b.done ? 1 : 0);
const statesTensor = tf.tensor2d(states, [states.length, this.stateSize]);
const nextStatesTensor = tf.tensor2d(nextStates, [nextStates.length, this.stateSize]);
const actionMask = tf.oneHot(actions, this.actionSize);
const rewardsTensor = tf.tensor1d(rewards);
const donesTensor = tf.tensor1d(dones);
this.optimizer.minimize(() => {
const nextOnline = this.online.predict(nextStatesTensor);
const nextTarget = this.target.predict(nextStatesTensor);
const nextAction = tf.argMax(nextOnline, 1);
const nextActionMask = tf.oneHot(nextAction, this.actionSize);
const nextQ = tf.sum(tf.mul(nextTarget, nextActionMask), 1);
const targetQ = rewardsTensor.add(nextQ.mul(tf.sub(1, donesTensor)).mul(gamma));
const currentQAll = this.online.predict(statesTensor);
const pickedQ = tf.sum(tf.mul(currentQAll, actionMask), 1);
const loss = tf.losses.huberLoss(targetQ, pickedQ).mean();
return loss;
});
this.updateTarget();
statesTensor.dispose();
nextStatesTensor.dispose();
actionMask.dispose();
rewardsTensor.dispose();
donesTensor.dispose();
}
}
const REPLAY_CAPACITY = 16000;
class ReplayBuffer {
constructor(capacity = REPLAY_CAPACITY) {
this.capacity = capacity;
this.buffer = [];
}
push(transition) {
if (this.buffer.length >= this.capacity) {
this.buffer.shift();
}
this.buffer.push(transition);
}
sample(batchSize) {
const out = [];
for (let i = 0; i < batchSize; i++) {
out.push(this.buffer[Math.floor(Math.random() * this.buffer.length)]);
}
return out;
}
get size() {
return this.buffer.length;
}
}
class Car {
constructor(options) {
Object.assign(this, options);
this.v = 0;
this.health = 100;
this.wins = 0;
this.episodes = 0;
this.color = options.color;
this.brain = new NeuralQ(ACTIONS.length);
this.buffer = new ReplayBuffer();
this.lockTimer = 0;
}
reset(x, y, angle) {
this.x = x;
this.y = y;
this.angle = angle;
this.v = 0;
this.health = 100;
this.lockTimer = 0;
}
stateVector(opponent) {
const dx = opponent.x - this.x;
const dy = opponent.y - this.y;
const dist = Math.hypot(dx, dy);
const relAngle = Math.atan2(dy, dx);
const angleDiff = ((relAngle - this.angle + Math.PI * 3) % (Math.PI * 2)) - Math.PI;
const wallProximity = Math.min(
this.x - WORLD.margin,
WORLD.w - WORLD.margin - this.x,
this.y - WORLD.margin,
WORLD.h - WORLD.margin - this.y
);
return [
dx / WORLD.w,
dy / WORLD.h,
Math.cos(angleDiff),
Math.sin(angleDiff),
dist / Math.hypot(WORLD.w, WORLD.h),
this.v / 9,
opponent.v / 9,
closingVelocity(this, opponent) / 12,
wallProximity / (WORLD.margin + 10),
];
}
chooseAction(stateVec, epsilon) {
const qs = this.brain.qValues(stateVec);
let actionIdx;
if (Math.random() < epsilon) {
actionIdx = Math.floor(Math.random() * ACTIONS.length);
} else {
const max = Math.max(...qs);
const best = [];
qs.forEach((v, i) => v === max && best.push(i));
actionIdx = best[Math.floor(Math.random() * best.length)];
}
return { action: ACTIONS[actionIdx], actionIdx };
}
lookAt(opponent) {
const desired = Math.atan2(opponent.y - this.y, opponent.x - this.x);
const diff = ((desired - this.angle + Math.PI * 3) % (Math.PI * 2)) - Math.PI;
const maxTurn = 0.18;
this.angle += clamp(diff, -maxTurn, maxTurn);
const aligned = Math.abs(diff) < 0.12;
this.lockTimer = aligned ? 60 : Math.max(this.lockTimer, 30);
}
applyAction(action, opponent) {
const accel = 0.55 * (1 + (this.lockTimer > 0 ? 0.2 : 0));
const turn = 0.09;
if (action === "accelerate") this.v += accel;
if (action === "brake") this.v -= accel * 1.2;
if (action === "left") this.angle -= turn;
if (action === "right") this.angle += turn;
if (action === "look") this.lookAt(opponent);
this.v = clamp(this.v, -6, 9);
}
move() {
this.x += Math.cos(this.angle) * this.v;
this.y += Math.sin(this.angle) * this.v;
this.v *= WORLD.friction;
if (this.lockTimer > 0) {
this.v += 0.04;
this.lockTimer -= 1;
}
}
remember(state, actionIdx, reward, nextState, done) {
this.buffer.push({ state, actionIdx, reward, nextState, done });
}
learn(batchSize, gamma) {
if (this.buffer.size === 0) return;
const samples = this.buffer.sample(Math.min(batchSize, this.buffer.size));
this.brain.train(samples, gamma);
}
}
const blue = new Car({ name: "Blue", color: "#60a5fa" });
const red = new Car({ name: "Red", color: "#f472b6" });
function resetEpisode() {
blue.reset(WORLD.margin + 80, WORLD.margin + 80, 0);
red.reset(WORLD.w - WORLD.margin - 80, WORLD.h - WORLD.margin - 80, Math.PI);
blue.episodes += 1;
red.episodes += 1;
}
resetEpisode();
let running = true;
let epsilon = 0.18;
const epsilonFloor = 0.02;
const decay = 0.9985;
const gamma = 0.965;
const batchSize = 16;
const updatesPerStep = 2;
function handleCollisions() {
let damageBlue = 0;
let damageRed = 0;
let wallHitBlue = false;
let wallHitRed = false;
let impactEvent = null;
// Wall collision
[blue, red].forEach((car) => {
const minX = WORLD.margin;
const maxX = WORLD.w - WORLD.margin;
const minY = WORLD.margin;
const maxY = WORLD.h - WORLD.margin;
if (car.x < minX || car.x > maxX || car.y < minY || car.y > maxY) {
car.v *= -0.3;
car.health -= WORLD.wallDamage * Math.abs(car.v) * WORLD.dt * 3;
car.x = clamp(car.x, minX, maxX);
car.y = clamp(car.y, minY, maxY);
if (car === blue) {
damageBlue += WORLD.wallDamage;
wallHitBlue = true;
} else {
damageRed += WORLD.wallDamage;
wallHitRed = true;
}
}
});
// Car-to-car collision
const dx = blue.x - red.x;
const dy = blue.y - red.y;
const dist = Math.hypot(dx, dy);
if (dist < WORLD.collisionRadius * 2) {
const normalX = dx / (dist || 1);
const normalY = dy / (dist || 1);
const relVel = blue.v - red.v;
const impulse = relVel * 0.6;
blue.v -= impulse * normalX;
red.v += impulse * normalX;
const impact = Math.abs(relVel) * 12;
blue.health -= impact * 0.6;
red.health -= impact * 0.6;
impactEvent = {
x: (blue.x + red.x) / 2,
y: (blue.y + red.y) / 2,
impact,
};
if (impact > 6) {
pulses.push({ x: impactEvent.x, y: impactEvent.y, r: 10, a: 0.8 });
spawnSparks(impactEvent.x, impactEvent.y, impact);
}
// attacker bonus goes to faster car
if (blue.v > red.v) {
red.health -= impact * 0.4;
damageRed += impact;
} else {
blue.health -= impact * 0.4;
damageBlue += impact;
}
}
return { damageBlue, damageRed, wallHitBlue, wallHitRed, impactEvent };
}
function computeReward({
damageBlue,
damageRed,
prevDist,
nextDist,
blueSpeed,
redSpeed,
closingBlue,
closingRed,
}) {
const distanceGain = clamp(prevDist - nextDist, -40, 40);
const proximityShaping = distanceGain * 0.11;
const closingBonusBlue = clamp(closingBlue, -10, 10) * 0.05;
const closingBonusRed = clamp(closingRed, -10, 10) * 0.05;
const survivalBonus = 0.02;
const driftPenaltyBlue = nextDist > prevDist ? -0.02 : 0;
const driftPenaltyRed = prevDist < nextDist ? -0.02 : 0;
const stallPenaltyBlue = Math.abs(blueSpeed) < 0.35 ? -0.12 : 0;
const stallPenaltyRed = Math.abs(redSpeed) < 0.35 ? -0.12 : 0;
return {
rewardBlue:
damageRed * 0.6 -
damageBlue * 0.7 +
proximityShaping +
closingBonusBlue +
survivalBonus +
stallPenaltyBlue +
driftPenaltyBlue,
rewardRed:
damageBlue * 0.6 -
damageRed * 0.7 -
proximityShaping +
closingBonusRed +
survivalBonus +
stallPenaltyRed +
driftPenaltyRed,
};
}
function step(trainingOnly = false, epsilonOverride = null) {
if (!trainingOnly && !running) return;
if (epsilonOverride === null) {
epsilon = Math.max(epsilonFloor, epsilon * decay);
}
const prevDist = Math.hypot(blue.x - red.x, blue.y - red.y);
const blueState = blue.stateVector(red);
const redState = red.stateVector(blue);
const explore = epsilonOverride ?? epsilon;
const { action: blueAction, actionIdx: blueIdx } = blue.chooseAction(blueState, explore);
const { action: redAction, actionIdx: redIdx } = red.chooseAction(redState, explore);
blue.applyAction(blueAction, red);
red.applyAction(redAction, blue);
blue.move();
red.move();
const { damageBlue, damageRed, wallHitBlue, wallHitRed, impactEvent } = handleCollisions();
const nextBlue = blue.stateVector(red);
const nextRed = red.stateVector(blue);
const nextDist = Math.hypot(blue.x - red.x, blue.y - red.y);
const headingBlue = headingError(blue, red);
const headingRed = headingError(red, blue);
let { rewardBlue, rewardRed } = computeReward({
damageBlue,
damageRed,
prevDist,
nextDist,
blueSpeed: blue.v,
redSpeed: red.v,
closingBlue: closingVelocity(blue, red),
closingRed: closingVelocity(red, blue),
});
if (!trainingOnly) {
// narrative overlays
if (impactEvent && impactEvent.impact > 8) {
addLog(
impactEvent.impact > 14
? "Massive slam! Both cars reel from the hit."
: "Solid ram lands โ chassis shaking!"
);
updateHypeBar(impactEvent.impact / 300);
} else if (!impactEvent && prevDist < 120 && nextDist > prevDist && Math.random() < 0.25) {
addLog("Near miss โ they drift past with inches to spare.");
updateHypeBar(0.015);
}
if (wallHitBlue || wallHitRed) {
addLog(`${wallHitBlue ? "Blue" : "Red"} scrapes the wall!`);
updateHypeBar(0.01);
}
updateHypeBar();
}
let done = false;
if (blue.health <= 0 || red.health <= 0) {
done = true;
if (blue.health <= 0 && red.health <= 0) {
// double knockout, neutral ending
} else if (blue.health <= 0) {
rewardRed += 12;
red.wins += 1;
} else if (red.health <= 0) {
rewardBlue += 12;
blue.wins += 1;
}
}
blue.remember(blueState, blueIdx, rewardBlue, nextBlue, done);
red.remember(redState, redIdx, rewardRed, nextRed, done);
for (let i = 0; i < updatesPerStep; i++) {
blue.learn(batchSize, gamma);
red.learn(batchSize, gamma);
}
if (done) {
resetEpisode();
}
}
function drawArena() {
ctx.clearRect(0, 0, canvas.width, canvas.height);
ctx.save();
ctx.strokeStyle = "#1f2a48";
ctx.lineWidth = 3;
ctx.strokeRect(WORLD.margin, WORLD.margin, WORLD.w - WORLD.margin * 2, WORLD.h - WORLD.margin * 2);
for (let i = WORLD.margin; i < WORLD.w - WORLD.margin; i += 60) {
ctx.strokeStyle = "rgba(255,255,255,0.04)";
ctx.beginPath();
ctx.moveTo(i, WORLD.margin);
ctx.lineTo(i, WORLD.h - WORLD.margin);
ctx.stroke();
}
for (let j = WORLD.margin; j < WORLD.h - WORLD.margin; j += 60) {
ctx.strokeStyle = "rgba(255,255,255,0.04)";
ctx.beginPath();
ctx.moveTo(WORLD.margin, j);
ctx.lineTo(WORLD.w - WORLD.margin, j);
ctx.stroke();
}
drawCar(blue);
drawCar(red);
drawDamagePulse();
drawSparks();
ctx.restore();
}
const pulses = [];
const sparks = [];
function spawnSparks(x, y, intensity) {
const count = clamp(Math.floor(intensity / 2), 6, 22);
for (let i = 0; i < count; i++) {
const angle = Math.random() * Math.PI * 2;
const speed = Math.random() * 3 + intensity * 0.05;
sparks.push({
x,
y,
vx: Math.cos(angle) * speed,
vy: Math.sin(angle) * speed,
life: 1,
});
}
}
function drawSparks() {
for (let i = sparks.length - 1; i >= 0; i--) {
const p = sparks[i];
ctx.beginPath();
ctx.strokeStyle = `rgba(250, 204, 21, ${p.life})`;
ctx.lineWidth = 2;
ctx.moveTo(p.x, p.y);
ctx.lineTo(p.x - p.vx * 2, p.y - p.vy * 2);
ctx.stroke();
p.x += p.vx;
p.y += p.vy;
p.vx *= 0.96;
p.vy *= 0.96;
p.life -= 0.02;
if (p.life <= 0) sparks.splice(i, 1);
}
}
function drawDamagePulse() {
pulses.forEach((p) => {
ctx.beginPath();
ctx.strokeStyle = `rgba(250, 204, 21, ${p.a})`;
ctx.lineWidth = 4;
ctx.arc(p.x, p.y, p.r, 0, Math.PI * 2);
ctx.stroke();
});
for (let i = pulses.length - 1; i >= 0; i--) {
pulses[i].r += 2.5;
pulses[i].a -= 0.03;
if (pulses[i].a <= 0) pulses.splice(i, 1);
}
}
function drawCar(car) {
ctx.save();
ctx.translate(car.x, car.y);
ctx.rotate(car.angle);
ctx.fillStyle = car.color;
ctx.strokeStyle = "rgba(255,255,255,0.4)";
ctx.lineWidth = 2;
ctx.beginPath();
ctx.roundRect(-16, -10, 32, 20, 6);
ctx.fill();
ctx.stroke();
ctx.fillStyle = "rgba(255,255,255,0.8)";
ctx.fillRect(0, -6, 10, 12);
ctx.restore();
// health bar
ctx.fillStyle = "rgba(255,255,255,0.15)";
ctx.fillRect(car.x - 20, car.y - 28, 40, 6);
ctx.fillStyle = car.color;
ctx.fillRect(car.x - 20, car.y - 28, 40 * (car.health / 100), 6);
if (car.health < 30) ctx.fillStyle = "#facc15";
}
function updateStats() {
statsEl.innerHTML = "";
const data = [
{ label: "Blue health", value: blue.health.toFixed(1) + "%" },
{ label: "Red health", value: red.health.toFixed(1) + "%" },
{ label: "Blue wins", value: blue.wins },
{ label: "Red wins", value: red.wins },
{ label: "Episodes", value: Math.max(blue.episodes, red.episodes) },
{ label: "Exploration", value: `ฮต = ${epsilon.toFixed(3)}` },
{ label: "Replay size", value: `${blue.buffer.size} / ${REPLAY_CAPACITY}` },
];
data.forEach((d) => {
const card = document.createElement("div");
card.className = "stat-card";
card.innerHTML = `<strong>${d.label}</strong><span>${d.value}</span>`;
statsEl.appendChild(card);
});
}
function loop() {
step();
drawArena();
updateStats();
requestAnimationFrame(loop);
}
function warmupReplay(iterations = 420) {
resetEpisode();
for (let i = 0; i < iterations; i++) {
const annealed = 0.5 - (0.4 * i) / iterations; // start random, taper down
step(true, Math.max(0.08, annealed));
}
epsilon = Math.max(0.05, epsilon * Math.pow(decay, iterations));
resetEpisode();
}
warmupReplay();
loop();
document.getElementById("toggle").addEventListener("click", (e) => {
running = !running;
e.target.textContent = running ? "Pause" : "Resume";
});
document.getElementById("reset").addEventListener("click", () => {
resetEpisode();
});
document.getElementById("boost").addEventListener("click", () => {
pulses.push({ x: red.x, y: red.y, r: 12, a: 0.6 });
const state = red.stateVector(blue);
red.remember(state, 0, 5, state, false);
red.learn(1, gamma);
});
// Add slight camera pulse on strong impacts
setInterval(() => {
const lastPulse = pulses.at(-1);
if (lastPulse) {
canvas.style.transform = "translateZ(0) scale(1.01)";
setTimeout(() => (canvas.style.transform = ""), 80);
}
}, 120);
</script>
</body>
</html>