Backlog MCP Server

Overview Schema Related Servers Score Discussions

search-golden.test.ts•18 KiB

/** * Golden Search Benchmark Tests * * These tests document real-world search behavior, exposing: * - What WORKS (strengths) * - What DOESN'T work (limitations) * - Expected ranking behavior * * When search behavior changes, these tests reveal the impact. * Failing tests should prompt discussion: is this a regression or improvement? */ import { describe, it, expect, beforeAll } from 'vitest'; import { join } from 'node:path'; import { OramaSearchService } from '../search/orama-search-service.js'; import type { Entity } from '@backlog-mcp/shared'; const TEST_CACHE_PATH = join(process.cwd(), 'test-data', '.cache', 'search-golden.json'); function makeTask(overrides: Partial<Entity> & { id: string; title: string }): Task { return { status: 'open', created_at: new Date().toISOString(), updated_at: new Date().toISOString(), ...overrides, }; } /** * Realistic task dataset simulating actual backlog content */ const GOLDEN_TASKS: Entity[] = [ // Epics makeTask({ id: 'EPIC-0001', title: 'backlog-mcp 10x', description: 'Transform backlog-mcp from task tracker to agentic work system with keyboard-first UX', type: 'epic', }), makeTask({ id: 'EPIC-0002', title: 'Search & Discovery', description: 'Comprehensive search with RAG-ready architecture', type: 'epic', }), // Tasks with various content patterns makeTask({ id: 'TASK-0001', title: 'Implement Spotlight-style search UI', description: 'Global search modal triggered by Cmd+J with keyboard-first navigation', epic_id: 'EPIC-0001', references: [ { url: 'https://docs.orama.com', title: 'Orama documentation' }, { url: 'file:///path/to/adr-0038.md', title: 'ADR-0038: Search Architecture' }, ], }), makeTask({ id: 'TASK-0002', title: 'Fix authentication bug', description: 'Users cannot log in with SSO when MFA is enabled', status: 'in_progress', }), makeTask({ id: 'TASK-0003', title: 'Add keyboard shortcuts', description: 'Implement Cmd+K for command palette, Cmd+J for search', epic_id: 'EPIC-0001', }), makeTask({ id: 'TASK-0004', title: 'Database schema migration', description: 'Migrate from SQLite to PostgreSQL for better concurrency', status: 'blocked', blocked_reason: ['Waiting for DBA approval', 'Need production backup first'], }), makeTask({ id: 'TASK-0005', title: 'SearchService abstraction layer', description: 'Create pluggable search backend interface for Orama, future RAG', epic_id: 'EPIC-0002', evidence: ['Implemented SearchService interface', 'Added OramaSearchService'], }), makeTask({ id: 'TASK-0006', title: 'Fix first-time user onboarding', description: 'New users see blank screen on first load', status: 'done', evidence: ['Fixed in PR #42', 'Added loading state'], }), makeTask({ id: 'TASK-0007', title: 'API rate limiting', description: 'Implement rate-limiting middleware for REST endpoints', }), makeTask({ id: 'TASK-0008', title: 'Real-time collaboration', description: 'WebSocket-based real-time updates for multi-user editing', }), // CamelCase compound word task (mirrors real TASK-0273 pattern) makeTask({ id: 'TASK-0009', title: 'Create YavapaiMFE ownership transfer documentation', description: 'Create comprehensive starter doc for new team taking ownership of FeatureStore (YavapaiMFE).\n\nMFE ID: `featurestore`\nFeature flag: `featureStore`\nMain package: RhinestoneMonarchYavapaiMFE', status: 'done', }), ]; describe('Search Golden Benchmark', () => { let service: OramaSearchService; beforeAll(async () => { service = new OramaSearchService({ cachePath: TEST_CACHE_PATH }); await service.index(GOLDEN_TASKS); }); /** * =========================================== * STRENGTHS: What search does well * =========================================== */ describe('✅ Strengths', () => { describe('exact matches', () => { it('finds exact title match', async () => { const results = await service.search('Spotlight'); expect(results[0].task.id).toBe('TASK-0001'); }); it('finds exact word in description', async () => { const results = await service.search('PostgreSQL'); expect(results[0].task.id).toBe('TASK-0004'); }); it('finds task by full ID', async () => { const results = await service.search('TASK-0001'); expect(results[0].task.id).toBe('TASK-0001'); }); }); describe('fuzzy matching (typo tolerance)', () => { it('handles common typo in longer word', async () => { const results = await service.search('authentcation'); // missing 'i' expect(results.some(r => r.task.id === 'TASK-0002')).toBe(true); }); it('handles missing character', async () => { const results = await service.search('databse'); // missing 'a' expect(results.some(r => r.task.id === 'TASK-0004')).toBe(true); }); }); describe('multi-word queries', () => { it('finds documents matching multiple words', async () => { const results = await service.search('keyboard shortcuts'); expect(results[0].task.id).toBe('TASK-0003'); }); it('finds documents matching any word (OR behavior)', async () => { const results = await service.search('WebSocket migration'); const ids = results.map(r => r.task.id); expect(ids).toContain('TASK-0008'); expect(ids).toContain('TASK-0004'); }); }); describe('field searching', () => { it('searches blocked_reason field', async () => { const results = await service.search('DBA approval'); expect(results[0].task.id).toBe('TASK-0004'); }); it('searches evidence field', async () => { const results = await service.search('OramaSearchService'); expect(results[0].task.id).toBe('TASK-0005'); }); it('searches reference URLs', async () => { const results = await service.search('docs.orama.com'); expect(results.some(r => r.task.id === 'TASK-0001')).toBe(true); }); it('searches epic_id field', async () => { const results = await service.search('EPIC-0001'); const ids = results.map(r => r.task.id); // Should find tasks with this epic_id expect(ids).toContain('TASK-0001'); expect(ids).toContain('TASK-0003'); }); }); describe('case insensitivity', () => { it('matches regardless of case', async () => { const lower = await service.search('spotlight'); const upper = await service.search('SPOTLIGHT'); const mixed = await service.search('SpOtLiGhT'); expect(lower[0].task.id).toBe(upper[0].task.id); expect(lower[0].task.id).toBe(mixed[0].task.id); }); }); describe('camelCase compound words', () => { it('"feature store" finds task with "FeatureStore" in description', async () => { const results = await service.search('feature store'); expect(results.some(r => r.task.id === 'TASK-0009')).toBe(true); }); it('"featurestore" (no space) still finds the task', async () => { const results = await service.search('featurestore'); expect(results.some(r => r.task.id === 'TASK-0009')).toBe(true); }); it('"feature store mfe" finds the task', async () => { const results = await service.search('feature store mfe'); expect(results.some(r => r.task.id === 'TASK-0009')).toBe(true); }); it('PascalCase in title: "YavapaiMFE" splits into searchable parts', async () => { const results = await service.search('Yavapai'); expect(results.some(r => r.task.id === 'TASK-0009')).toBe(true); }); }); }); /** * =========================================== * LIMITATIONS: Known issues (marked with .fails) * These tests PASS when the limitation exists. * When fixed, they will FAIL (prompting removal of .fails) * =========================================== */ describe('⚠️ Known Limitations', () => { describe('hyphenated words', () => { it('"first" matches "keyboard-first"', async () => { // Custom tokenizer expands hyphenated words: "keyboard-first" → ["keyboard-first", "keyboard", "first"] const results = await service.search('first'); expect(results.some(r => r.task.id === 'TASK-0001')).toBe(true); }); it('full hyphenated term matches', async () => { const results = await service.search('keyboard-first'); expect(results.some(r => r.task.id === 'TASK-0001')).toBe(true); }); it('first word of hyphenated term matches', async () => { const results = await service.search('keyboard'); expect(results.some(r => r.task.id === 'TASK-0001')).toBe(true); }); }); describe('numeric-only queries', () => { it('numeric query "0001" finds TASK-0001', async () => { // Custom tokenizer splits "TASK-0001" → ["task-0001", "task", "0001"] const results = await service.search('0001'); expect(results.length).toBeGreaterThan(0); }); }); describe('short word fuzzy matching', () => { it('typo in short word still matches', async () => { // "Spotlght" (8 chars, missing 1) matches "Spotlight" (9 chars) with tolerance=1 const results = await service.search('Spotlght'); expect(results.some(r => r.task.id === 'TASK-0001')).toBe(true); }); }); }); /** * =========================================== * EDGE CASES: Boundary behavior * =========================================== */ describe('🔬 Edge Cases', () => { it('handles empty query', async () => { const results = await service.search(''); expect(results).toEqual([]); }); it('handles whitespace-only query', async () => { const results = await service.search(' \t\n '); expect(results).toEqual([]); }); it('handles special characters in content', async () => { const results = await service.search('Cmd'); expect(results.length).toBeGreaterThan(0); }); it('handles very long query gracefully', async () => { const longQuery = 'implement search feature with keyboard navigation and spotlight style modal'; const results = await service.search(longQuery); // Should not crash, may or may not find results expect(Array.isArray(results)).toBe(true); }); it('handles query with no matches', async () => { const results = await service.search('xyznonexistent123'); expect(results).toEqual([]); }); it('handles single character query', async () => { const results = await service.search('a'); // May or may not return results, but should not crash expect(Array.isArray(results)).toBe(true); }); }); /** * =========================================== * FILTER COMBINATIONS: Search + Filters * =========================================== */ describe('🔍 Search + Filters', () => { it('search + status filter', async () => { const results = await service.search('fix', { filters: { status: ['in_progress'] } }); expect(results.every(r => r.task.status === 'in_progress')).toBe(true); }); it('search + type filter for epics', async () => { const results = await service.search('backlog', { filters: { type: 'epic' } }); expect(results.every(r => r.task.type === 'epic')).toBe(true); }); it('search + epic_id filter', async () => { const results = await service.search('keyboard', { filters: { epic_id: 'EPIC-0001' } }); expect(results.every(r => r.task.epic_id === 'EPIC-0001')).toBe(true); }); it('search + multiple status filter', async () => { const results = await service.search('user', { filters: { status: ['open', 'in_progress'] } }); expect(results.every(r => ['open', 'in_progress'].includes(r.task.status))).toBe(true); }); it('search + limit', async () => { const results = await service.search('task', { limit: 3 }); expect(results.length).toBeLessThanOrEqual(3); }); it('filter with no search matches returns empty', async () => { const results = await service.search('nonexistent', { filters: { status: ['open'] } }); expect(results).toEqual([]); }); }); /** * =========================================== * RANKING: Position-aware assertions (ADR-0081) * * These test actual ranking order, not just presence. * With linear fusion, scores are [0,1]. Tests assert * positions and relative ordering — the things that * matter for search quality. * =========================================== */ describe('📊 Ranking (ADR-0081)', () => { // ── Structural invariants ────────────────────────────────── it('scores are in [0,1] range (linear fusion property)', async () => { for (const q of ['search', 'keyboard', 'fix', 'backlog']) { const results = await service.search(q); for (const r of results) { expect(r.score).toBeGreaterThanOrEqual(0); expect(r.score).toBeLessThanOrEqual(1.0); } } }); it('scores are in descending order', async () => { const results = await service.search('search'); for (let i = 1; i < results.length; i++) { expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score); } }); // ── Position assertions: exact title matches rank #1 ────── it('"feature store" → TASK-0009 (FeatureStore) ranks #1', async () => { // THE critical test. This is the failure that motivated TASK-0302. // Previously ranked 18th due to shadow scoring double-boosting title matches. const results = await service.search('feature store'); expect(results[0].task.id).toBe('TASK-0009'); }); it('"keyboard shortcuts" → TASK-0003 ranks #1', async () => { const results = await service.search('keyboard shortcuts'); expect(results[0].task.id).toBe('TASK-0003'); }); it('"Spotlight search UI" → TASK-0001 ranks #1', async () => { const results = await service.search('Spotlight search UI'); expect(results[0].task.id).toBe('TASK-0001'); }); it('"database migration" → TASK-0004 ranks #1', async () => { const results = await service.search('database migration'); expect(results[0].task.id).toBe('TASK-0004'); }); it('"SearchService abstraction layer" → TASK-0005 ranks #1', async () => { const results = await service.search('SearchService abstraction layer'); expect(results[0].task.id).toBe('TASK-0005'); }); it('"authentication" → TASK-0002 ranks #1', async () => { const results = await service.search('authentication'); expect(results[0].task.id).toBe('TASK-0002'); }); it('"backlog" → EPIC-0001 ranks #1', async () => { const results = await service.search('backlog'); expect(results[0].task.id).toBe('EPIC-0001'); }); it('"backlog mcp" → EPIC-0001 ranks #1 (title match beats body-only mentions)', async () => { // Both terms appear in EPIC-0001's title "backlog-mcp 10x". // Other tasks may mention "backlog" and "mcp" in description/references // but title coordination should push the exact title match to #1. const results = await service.search('backlog mcp'); expect(results[0].task.id).toBe('EPIC-0001'); }); it('"DBA approval" → TASK-0004 ranks #1', async () => { const results = await service.search('DBA approval'); expect(results[0].task.id).toBe('TASK-0004'); }); // ── Relative ordering assertions ────────────────────────── it('"search" → EPIC-0002 ranks above TASK-0005', async () => { // Both have "search" in title. EPIC-0002 has shorter title → higher BM25 term density. const results = await service.search('search'); const epicIdx = results.findIndex(r => r.task.id === 'EPIC-0002'); const taskIdx = results.findIndex(r => r.task.id === 'TASK-0005'); expect(epicIdx).toBeGreaterThanOrEqual(0); expect(taskIdx).toBeGreaterThanOrEqual(0); expect(epicIdx).toBeLessThan(taskIdx); }); it('"search" → title matches rank above description-only matches', async () => { // TASK-0001 has "search" in title. TASK-0003 has "search" only in description. const results = await service.search('search'); const titleMatch = results.findIndex(r => r.task.id === 'TASK-0001'); const descMatch = results.findIndex(r => r.task.id === 'TASK-0003'); if (titleMatch >= 0 && descMatch >= 0) { expect(titleMatch).toBeLessThan(descMatch); } }); it('"Spotlight search" → multi-field match ranks above single-field match', async () => { // TASK-0001 has "Spotlight" in title AND "search" in title → both terms match // EPIC-0002 has "Search" in title but not "Spotlight" const results = await service.search('Spotlight search'); expect(results[0].task.id).toBe('TASK-0001'); }); // ── Top-N assertions (looser, for queries with ambiguous ranking) ── it('"feature store" → TASK-0009 in top 1 (not buried at 18th)', async () => { // Regression guard: the original bug had TASK-0009 at position 18. const results = await service.search('feature store'); const idx = results.findIndex(r => r.task.id === 'TASK-0009'); expect(idx).toBe(0); }); it('"keyboard" → TASK-0003 in top 2', async () => { // TASK-0003 "Add keyboard shortcuts" has "keyboard" in title const results = await service.search('keyboard'); const idx = results.findIndex(r => r.task.id === 'TASK-0003'); expect(idx).toBeLessThan(2); }); it('"fix" → both fix tasks in top 3', async () => { const results = await service.search('fix'); const ids = results.slice(0, 3).map(r => r.task.id); expect(ids).toContain('TASK-0002'); // "Fix authentication bug" expect(ids).toContain('TASK-0006'); // "Fix first-time user onboarding" }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/gkoreli/backlog-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

search-golden.test.ts•18 KiB