Skip to main content
Glama
index.html21.2 kB
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Experience Replay Learning - AgentDB</title> <style> * { margin: 0; padding: 0; box-sizing: border-box; } body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); min-height: 100vh; padding: 2rem; } .container { max-width: 1400px; margin: 0 auto; } header { background: white; border-radius: 12px; padding: 2rem; margin-bottom: 2rem; box-shadow: 0 10px 30px rgba(0,0,0,0.2); } h1 { color: #333; margin-bottom: 0.5rem; } .subtitle { color: #666; } .grid { display: grid; grid-template-columns: 2fr 1fr; gap: 2rem; } .card { background: white; border-radius: 12px; padding: 2rem; box-shadow: 0 10px 30px rgba(0,0,0,0.2); } .card h2 { color: #333; margin-bottom: 1rem; } .game-board { display: grid; grid-template-columns: repeat(5, 1fr); gap: 0.5rem; margin: 1rem 0; } .cell { aspect-ratio: 1; background: #f0f0f0; border: 2px solid #ddd; border-radius: 6px; display: flex; align-items: center; justify-content: center; font-size: 1.5rem; cursor: pointer; transition: all 0.2s ease; } .cell:hover { background: #e0e0e0; transform: scale(1.05); } .cell.agent { background: #667eea; color: white; } .cell.goal { background: #28a745; color: white; } .cell.obstacle { background: #dc3545; color: white; } .cell.path { background: #667eea33; } .controls { display: grid; grid-template-columns: repeat(3, 1fr); gap: 0.5rem; margin: 1rem 0; } .btn { padding: 0.75rem 1rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border: none; border-radius: 6px; font-weight: 600; cursor: pointer; transition: opacity 0.3s ease; } .btn:hover { opacity: 0.9; } .btn.secondary { background: #6c757d; } .stat-box { background: #f8f9fa; border-radius: 6px; padding: 1rem; margin-bottom: 0.5rem; } .stat-label { color: #666; font-size: 0.9rem; } .stat-value { color: #333; font-size: 1.5rem; font-weight: bold; margin-top: 0.25rem; } .progress-bar { width: 100%; height: 20px; background: #e0e0e0; border-radius: 10px; overflow: hidden; margin-top: 0.5rem; } .progress-fill { height: 100%; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); transition: width 0.3s ease; } .experience-list { max-height: 300px; overflow-y: auto; background: #f8f9fa; border-radius: 6px; padding: 1rem; } .experience-item { background: white; border-radius: 4px; padding: 0.75rem; margin-bottom: 0.5rem; font-size: 0.85rem; border-left: 3px solid #667eea; } .reward-positive { border-left-color: #28a745; } .reward-negative { border-left-color: #dc3545; } .q-values { display: grid; grid-template-columns: repeat(2, 1fr); gap: 0.5rem; margin-top: 1rem; } .q-value { background: #f8f9fa; padding: 0.75rem; border-radius: 6px; text-align: center; } .q-label { color: #666; font-size: 0.85rem; } .q-number { color: #333; font-size: 1.2rem; font-weight: bold; margin-top: 0.25rem; } .training-log { background: #f8f9fa; border-radius: 6px; padding: 1rem; font-family: 'Courier New', monospace; font-size: 0.85rem; max-height: 200px; overflow-y: auto; } .log-entry { margin-bottom: 0.25rem; color: #333; } .log-success { color: #28a745; } .log-fail { color: #dc3545; } </style> </head> <body> <div class="container"> <header> <h1>🎮 Experience Replay Learning</h1> <p class="subtitle">Q-Learning with Experience Buffer and Neural Pattern Training</p> </header> <div class="grid"> <div> <div class="card" style="margin-bottom: 1rem;"> <h2>Grid World Environment</h2> <p style="color: #666; margin-bottom: 1rem;"> Navigate to the goal 🎯. Avoid obstacles ⛔. Learn optimal paths! </p> <div class="game-board" id="gameBoard"></div> <div class="controls"> <button class="btn" id="upBtn">⬆️ Up</button> <button class="btn" id="trainBtn">🧠 Train Agent</button> <button class="btn" id="downBtn">⬇️ Down</button> <button class="btn" id="leftBtn">⬅️ Left</button> <button class="btn" id="autoBtn">🤖 Auto Play</button> <button class="btn" id="rightBtn">➡️ Right</button> </div> <div class="btn" style="margin-top: 1rem; width: 100%; text-align: center;" id="resetBtn"> 🔄 Reset Environment </div> </div> <div class="card"> <h2>Experience Buffer (Last 10)</h2> <div class="experience-list" id="experienceList"> <p style="color: #666; text-align: center;">No experiences yet</p> </div> </div> </div> <div> <div class="card" style="margin-bottom: 1rem;"> <h2>Learning Progress</h2> <div class="stat-box"> <div class="stat-label">Episodes Completed</div> <div class="stat-value" id="episodes">0</div> </div> <div class="stat-box"> <div class="stat-label">Success Rate</div> <div class="stat-value" id="successRate">0%</div> <div class="progress-bar"> <div class="progress-fill" id="successProgress" style="width: 0%"></div> </div> </div> <div class="stat-box"> <div class="stat-label">Avg Steps to Goal</div> <div class="stat-value" id="avgSteps">-</div> </div> <div class="stat-box"> <div class="stat-label">Total Experiences</div> <div class="stat-value" id="totalExp">0</div> </div> </div> <div class="card" style="margin-bottom: 1rem;"> <h2>Q-Values (Current State)</h2> <div class="q-values"> <div class="q-value"> <div class="q-label">Up ⬆️</div> <div class="q-number" id="qUp">0.00</div> </div> <div class="q-value"> <div class="q-label">Down ⬇️</div> <div class="q-number" id="qDown">0.00</div> </div> <div class="q-value"> <div class="q-label">Left ⬅️</div> <div class="q-number" id="qLeft">0.00</div> </div> <div class="q-value"> <div class="q-label">Right ➡️</div> <div class="q-number" id="qRight">0.00</div> </div> </div> </div> <div class="card"> <h2>Training Log</h2> <div class="training-log" id="trainingLog"> <div class="log-entry">Ready to start training...</div> </div> </div> </div> </div> </div> <script type="module"> /** * Experience Replay Q-Learning Example * * Demonstrates: * 1. Experience buffer management * 2. Q-learning with replay * 3. Exploration vs exploitation * 4. Policy improvement over time */ const GRID_SIZE = 5; const ACTIONS = ['up', 'down', 'left', 'right']; let state = { x: 0, y: 0 }; let goal = { x: 4, y: 4 }; let obstacles = [{ x: 1, y: 1 }, { x: 2, y: 2 }, { x: 3, y: 1 }]; let experienceBuffer = []; let qTable = {}; let episodes = 0; let successes = 0; let stepHistory = []; let autoPlaying = false; // Hyperparameters const ALPHA = 0.1; // Learning rate const GAMMA = 0.9; // Discount factor const EPSILON = 0.1; // Exploration rate const BUFFER_SIZE = 100; function initGrid() { const board = document.getElementById('gameBoard'); board.innerHTML = ''; for (let y = 0; y < GRID_SIZE; y++) { for (let x = 0; x < GRID_SIZE; x++) { const cell = document.createElement('div'); cell.className = 'cell'; cell.dataset.x = x; cell.dataset.y = y; if (x === state.x && y === state.y) { cell.classList.add('agent'); cell.textContent = '🤖'; } else if (x === goal.x && y === goal.y) { cell.classList.add('goal'); cell.textContent = '🎯'; } else if (obstacles.some(o => o.x === x && o.y === y)) { cell.classList.add('obstacle'); cell.textContent = '⛔'; } board.appendChild(cell); } } updateQValues(); } function getStateKey(s) { return `${s.x},${s.y}`; } function getQValue(stateKey, action) { if (!qTable[stateKey]) { qTable[stateKey] = { up: 0, down: 0, left: 0, right: 0 }; } return qTable[stateKey][action]; } function setQValue(stateKey, action, value) { if (!qTable[stateKey]) { qTable[stateKey] = { up: 0, down: 0, left: 0, right: 0 }; } qTable[stateKey][action] = value; } function chooseAction(stateKey) { // Epsilon-greedy policy if (Math.random() < EPSILON) { // Explore: random action return ACTIONS[Math.floor(Math.random() * ACTIONS.length)]; } else { // Exploit: best known action let maxQ = -Infinity; let bestAction = ACTIONS[0]; for (const action of ACTIONS) { const q = getQValue(stateKey, action); if (q > maxQ) { maxQ = q; bestAction = action; } } return bestAction; } } function takeAction(action) { const newState = { ...state }; switch (action) { case 'up': newState.y = Math.max(0, newState.y - 1); break; case 'down': newState.y = Math.min(GRID_SIZE - 1, newState.y + 1); break; case 'left': newState.x = Math.max(0, newState.x - 1); break; case 'right': newState.x = Math.min(GRID_SIZE - 1, newState.x + 1); break; } // Calculate reward let reward = -0.1; // Small penalty for each step let done = false; if (newState.x === goal.x && newState.y === goal.y) { reward = 10; // Big reward for reaching goal done = true; } else if (obstacles.some(o => o.x === newState.x && o.y === newState.y)) { reward = -5; // Penalty for hitting obstacle newState.x = state.x; newState.y = state.y; } // Store experience const experience = { state: getStateKey(state), action: action, reward: reward, nextState: getStateKey(newState), done: done, timestamp: Date.now() }; addExperience(experience); state = newState; initGrid(); return { reward, done }; } function addExperience(exp) { experienceBuffer.unshift(exp); if (experienceBuffer.length > BUFFER_SIZE) { experienceBuffer.pop(); } renderExperiences(); document.getElementById('totalExp').textContent = experienceBuffer.length; } function trainFromReplay(batchSize = 10) { if (experienceBuffer.length < batchSize) return; // Sample random batch from buffer const batch = []; for (let i = 0; i < batchSize; i++) { const idx = Math.floor(Math.random() * experienceBuffer.length); batch.push(experienceBuffer[idx]); } // Update Q-values for (const exp of batch) { let target = exp.reward; if (!exp.done) { // Q-learning: max future Q-value let maxNextQ = -Infinity; for (const action of ACTIONS) { const q = getQValue(exp.nextState, action); maxNextQ = Math.max(maxNextQ, q); } target += GAMMA * maxNextQ; } const currentQ = getQValue(exp.state, exp.action); const newQ = currentQ + ALPHA * (target - currentQ); setQValue(exp.state, exp.action, newQ); } updateQValues(); logTraining(`Trained on ${batchSize} experiences`); } async function autoPlay() { autoPlaying = !autoPlaying; const btn = document.getElementById('autoBtn'); if (autoPlaying) { btn.textContent = '⏸️ Stop'; btn.classList.add('secondary'); while (autoPlaying) { const stateKey = getStateKey(state); const action = chooseAction(stateKey); const { reward, done } = takeAction(action); if (done) { episodes++; if (reward > 0) { successes++; logTraining(`✓ Episode ${episodes}: Reached goal`, 'success'); } else { logTraining(`✗ Episode ${episodes}: Failed`, 'fail'); } stepHistory.push(experienceBuffer.filter(e => e.timestamp > Date.now() - 5000 ).length); // Train after each episode trainFromReplay(10); updateStats(); // Reset state = { x: 0, y: 0 }; initGrid(); await sleep(500); } await sleep(100); } } else { btn.textContent = '🤖 Auto Play'; btn.classList.remove('secondary'); } } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } function updateQValues() { const stateKey = getStateKey(state); document.getElementById('qUp').textContent = getQValue(stateKey, 'up').toFixed(2); document.getElementById('qDown').textContent = getQValue(stateKey, 'down').toFixed(2); document.getElementById('qLeft').textContent = getQValue(stateKey, 'left').toFixed(2); document.getElementById('qRight').textContent = getQValue(stateKey, 'right').toFixed(2); } function renderExperiences() { const list = document.getElementById('experienceList'); if (experienceBuffer.length === 0) { list.innerHTML = '<p style="color: #666; text-align: center;">No experiences yet</p>'; return; } const recent = experienceBuffer.slice(0, 10); list.innerHTML = recent.map(exp => { const rewardClass = exp.reward > 0 ? 'reward-positive' : exp.reward < -1 ? 'reward-negative' : ''; return ` <div class="experience-item ${rewardClass}"> State: ${exp.state} | Action: ${exp.action} | Reward: ${exp.reward.toFixed(2)} </div> `; }).join(''); } function updateStats() { document.getElementById('episodes').textContent = episodes; if (episodes > 0) { const rate = (successes / episodes * 100).toFixed(0); document.getElementById('successRate').textContent = `${rate}%`; document.getElementById('successProgress').style.width = `${rate}%`; } if (stepHistory.length > 0) { const avg = stepHistory.reduce((a, b) => a + b, 0) / stepHistory.length; document.getElementById('avgSteps').textContent = avg.toFixed(1); } } function logTraining(message, type = '') { const log = document.getElementById('trainingLog'); const entry = document.createElement('div'); entry.className = `log-entry ${type ? 'log-' + type : ''}`; entry.textContent = `[${new Date().toLocaleTimeString()}] ${message}`; log.insertBefore(entry, log.firstChild); // Keep only last 20 entries while (log.children.length > 20) { log.removeChild(log.lastChild); } } function reset() { state = { x: 0, y: 0 }; experienceBuffer = []; qTable = {}; episodes = 0; successes = 0; stepHistory = []; initGrid(); renderExperiences(); updateStats(); updateQValues(); document.getElementById('trainingLog').innerHTML = '<div class="log-entry">Environment reset</div>'; } // Event listeners document.getElementById('upBtn').addEventListener('click', () => { const { reward, done } = takeAction('up'); if (done) { state = { x: 0, y: 0 }; initGrid(); } }); document.getElementById('downBtn').addEventListener('click', () => { const { reward, done } = takeAction('down'); if (done) { state = { x: 0, y: 0 }; initGrid(); } }); document.getElementById('leftBtn').addEventListener('click', () => { const { reward, done } = takeAction('left'); if (done) { state = { x: 0, y: 0 }; initGrid(); } }); document.getElementById('rightBtn').addEventListener('click', () => { const { reward, done } = takeAction('right'); if (done) { state = { x: 0, y: 0 }; initGrid(); } }); document.getElementById('trainBtn').addEventListener('click', () => { trainFromReplay(20); }); document.getElementById('autoBtn').addEventListener('click', autoPlay); document.getElementById('resetBtn').addEventListener('click', reset); // Initialize initGrid(); </script> </body> </html>

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/airmcp-com/mcp-standards'

If you have feedback or need assistance with the MCP directory API, please join our Discord server