test-summary-template.html•14.4 kB
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>MongoDB MCP Server - Accuracy Test Summary</title>
<style>
body {
font-family:
-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
margin: 0;
padding: 20px;
background-color: #f5f5f5;
color: #333;
}
.container {
max-width: 1400px;
margin: 0 auto;
background: white;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
padding: 30px;
}
h1 {
color: #00684a;
border-bottom: 3px solid #00684a;
padding-bottom: 10px;
margin-bottom: 30px;
}
.header-info {
background: #f8f9fa;
padding: 20px;
border-radius: 6px;
margin-bottom: 20px;
border-left: 4px solid #00684a;
}
.header-info:nth-child(3) {
border-left-color: #007bff;
}
.header-info:nth-child(4) {
border-left-color: #28a745;
}
.header-info h2 {
margin-top: 0;
margin-bottom: 15px;
color: #00684a;
font-size: 1.2em;
}
.header-info:nth-child(3) h2 {
color: #007bff;
}
.header-info:nth-child(4) h2 {
color: #28a745;
}
.info-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 15px;
margin-top: 15px;
}
.info-item {
background: white;
padding: 15px;
border-radius: 4px;
border: 1px solid #dee2e6;
}
.info-label {
font-weight: bold;
color: #00684a;
margin-bottom: 5px;
}
.info-value {
color: #666;
word-break: break-all;
}
.summary {
background: #f8f9fa;
padding: 20px;
border-radius: 6px;
margin-bottom: 30px;
border-left: 4px solid #007bff;
}
.summary h2 {
margin-top: 0;
color: #007bff;
}
.stat-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 15px;
margin-top: 15px;
}
.stat-item {
background: white;
padding: 15px;
border-radius: 4px;
border: 1px solid #dee2e6;
}
.stat-value {
font-size: 1.5em;
font-weight: bold;
color: #007bff;
}
.stat-label {
font-size: 0.9em;
color: #666;
margin-top: 5px;
}
table {
width: 100%;
border-collapse: collapse;
margin-top: 20px;
font-size: 14px;
}
th,
td {
padding: 12px 8px;
text-align: left;
border-bottom: 1px solid #dee2e6;
vertical-align: top;
}
th {
background-color: #00684a;
color: white;
font-weight: 600;
position: sticky;
top: 0;
z-index: 10;
}
.test-row {
cursor: pointer;
transition: background-color 0.2s;
}
.test-row:hover {
background-color: #f8f9fa;
}
.expanded-row {
background-color: #f8f9fa;
}
.details-row {
display: none;
background-color: #ffffff;
border-left: 4px solid #00684a;
}
.details-row.visible {
display: table-row;
}
.details-content {
padding: 20px;
background: #f8f9fa;
border-radius: 6px;
margin: 10px 0;
}
.conversation-section {
margin-bottom: 20px;
}
.conversation-section h4 {
color: #00684a;
margin-bottom: 10px;
}
.conversation-content {
background: white;
padding: 15px;
border-radius: 4px;
border: 1px solid #dee2e6;
white-space: pre-wrap;
font-family: "Monaco", "Menlo", monospace;
font-size: 12px;
max-height: 400px;
max-width: 1300px;
overflow-y: auto;
}
.run-status {
text-transform: capitalize;
}
.chip {
padding: 2px 6px;
border-radius: 3px;
font-weight: bold;
}
.perfect {
background-color: #d4edda;
color: #155724;
}
.good {
background-color: #fff3cd;
color: #856404;
}
.poor {
background-color: #f8d7da;
color: #721c24;
}
.tool-call {
background: #e9ecef;
padding: 2px 6px;
border-radius: 3px;
margin: 0 2px 2px 0;
cursor: help;
display: inline-block;
word-break: break-word;
}
.tokens-usage {
background: #e3f2fd;
padding: 2px 6px;
border-radius: 3px;
cursor: help;
}
.prompt-cell {
width: 35%;
min-width: 350px;
word-wrap: break-word;
font-family: "Monaco", "Menlo", monospace;
font-size: 12px;
background-color: #f8f9fa;
}
.model-cell {
width: 15%;
min-width: 180px;
word-wrap: break-word;
}
.tool-calls-cell {
width: 12%;
min-width: 120px;
word-wrap: break-word;
white-space: normal;
}
.accuracy-cell {
width: 8%;
min-width: 80px;
text-align: center;
}
.baseline-accuracy-cell {
width: 8%;
min-width: 80px;
text-align: center;
}
.accuracy-comparison {
background: #e9ecef;
padding: 2px 6px;
border-radius: 3px;
font-weight: bold;
}
.accuracy-improved {
background: #d4edda;
color: #155724;
}
.accuracy-regressed {
background: #f8d7da;
color: #721c24;
}
.accuracy-same {
background: #e2e3e5;
color: #495057;
}
.response-time-cell {
width: 10%;
min-width: 100px;
text-align: center;
}
.tokens-cell {
width: 10%;
min-width: 100px;
text-align: center;
}
.expand-indicator {
margin-right: 8px;
font-weight: bold;
color: #00684a;
}
.status-done {
color: #28a745;
font-weight: bold;
}
.status-failed {
color: #dc3545;
font-weight: bold;
}
.status-in-progress {
color: #ffc107;
font-weight: bold;
}
@media (max-width: 768px) {
.container {
padding: 15px;
}
table {
font-size: 12px;
}
th,
td {
padding: 8px 4px;
}
.info-grid,
.stat-grid {
grid-template-columns: 1fr;
}
}
</style>
</head>
<body>
<div class="container">
<h1>📊 MongoDB MCP Server - Accuracy Test Summary</h1>
<div class="header-info">
<h2>📊 Current Run Information</h2>
<div class="info-grid">
<div class="info-item">
<div class="info-label">Commit SHA</div>
<div class="info-value">{{commitSHA}}</div>
</div>
<div class="info-item">
<div class="info-label">Accuracy Run ID</div>
<div class="info-value">{{accuracyRunId}}</div>
</div>
<div class="info-item">
<div class="info-label">Accuracy Run Status</div>
<div class="info-value">{{accuracyRunStatus}}</div>
</div>
<div class="info-item">
<div class="info-label">Run Created On</div>
<div class="info-value">{{createdOn}}</div>
</div>
<div class="info-item">
<div class="info-label">Report Generated On</div>
<div class="info-value">{{reportGeneratedOn}}</div>
</div>
</div>
</div>
<div class="header-info">
<h2>📈 Test Results Summary</h2>
<div class="info-grid">
<div class="info-item">
<div class="info-label">Total Prompts Evaluated</div>
<div class="info-value">{{totalPrompts}}</div>
</div>
<div class="info-item">
<div class="info-label">Models Tested</div>
<div class="info-value">{{totalModels}}</div>
</div>
<div class="info-item">
<div class="info-label">Responses with 0% Accuracy</div>
<div class="info-value">{{responsesWithZeroAccuracy}}</div>
</div>
<div class="info-item">
<div class="info-label">Average Accuracy</div>
<div class="info-value">{{averageAccuracy}}</div>
</div>
</div>
</div>
<div class="header-info">
<h2>🔄 Baseline Comparison</h2>
<div class="info-grid">
<div class="info-item">
<div class="info-label">Baseline Commit SHA</div>
<div class="info-value">{{baselineCommitSHA}}</div>
</div>
<div class="info-item">
<div class="info-label">Baseline Accuracy Run ID</div>
<div class="info-value">{{baselineAccuracyRunId}}</div>
</div>
<div class="info-item">
<div class="info-label">Baseline Accuracy Run Status</div>
<div class="info-value">{{baselineAccuracyRunStatus}}</div>
</div>
<div class="info-item">
<div class="info-label">Baseline Run Created On</div>
<div class="info-value">{{baselineCreatedOn}}</div>
</div>
<div class="info-item">
<div class="info-label">Responses Improved vs Baseline</div>
<div class="info-value">{{responsesImproved}}</div>
</div>
<div class="info-item">
<div class="info-label">Responses Regressed vs Baseline</div>
<div class="info-value">{{responsesRegressed}}</div>
</div>
</div>
</div>
<table>
<thead>
<tr>
<th>Prompt</th>
<th>Model</th>
<th>Expected Tool Calls</th>
<th>LLM Tool Calls</th>
<th>Accuracy</th>
<th>Baseline Accuracy</th>
<th>LLM Response Time (ms)</th>
<th>Total Tokens Used</th>
</tr>
</thead>
<tbody>
{{tableRows}}
</tbody>
</table>
</div>
<script>
function toggleDetails(index) {
const detailsRow = document.getElementById("details-" + index);
const indicator = document.getElementById("indicator-" + index);
const testRow = detailsRow.previousElementSibling;
if (detailsRow.classList.contains("visible")) {
detailsRow.classList.remove("visible");
indicator.textContent = "▶";
testRow.classList.remove("expanded-row");
} else {
detailsRow.classList.add("visible");
indicator.textContent = "▼";
testRow.classList.add("expanded-row");
}
}
</script>
</body>
</html>