Vector Memory MCP Server

squad.ts•4.91 KiB

/** * SQuAD Data Source * * Stanford Question Answering Dataset - Wikipedia passages with Q&A pairs. * Fetches from HuggingFace Datasets API. * * @see https://huggingface.co/datasets/rajpurkar/squad * @license CC BY-SA 4.0 */ import type { BenchmarkDataset, GroundTruthMemory, GroundTruthQuery } from "../../types"; import { Sampler } from "../sampler"; import type { DataSource, FetchOptions, RawSample, ConvertOptions } from "../types"; import { registerSource } from "../index"; const HUGGINGFACE_API = "https://datasets-server.huggingface.co"; const DATASET_NAME = "rajpurkar/squad"; const CONFIG = "plain_text"; /** * SQuAD row structure from HuggingFace API. */ interface SquadRow { row_idx: number; row: { id: string; title: string; context: string; question: string; answers: { text: string[]; answer_start: number[]; }; }; } /** * SQuAD API response structure. */ interface SquadApiResponse { features: Array<{ feature_idx: number; name: string; type: { dtype: string } }>; rows: SquadRow[]; num_rows_total: number; num_rows_per_page: number; } /** * SQuAD data source implementation. */ export class SquadSource implements DataSource { readonly name = "squad"; readonly description = "Stanford Question Answering Dataset - Wikipedia passages with Q&A"; readonly category = "factual" as const; readonly license = "CC BY-SA 4.0"; /** * Fetch samples from SQuAD dataset. */ async fetch(options: FetchOptions): Promise<RawSample[]> { const { limit, seed = 42 } = options; const sampler = new Sampler(seed); // Fetch more rows than needed to allow for sampling // SQuAD has ~87k training examples, we'll fetch from random offsets const fetchLimit = Math.min(limit * 3, 500); // Generate random offsets to sample from different parts of the dataset const totalRows = 87599; // SQuAD training set size const offsets = Array.from({ length: 5 }, () => sampler.nextInt(totalRows - fetchLimit)); const allRows: SquadRow[] = []; for (const offset of offsets) { try { const url = `${HUGGINGFACE_API}/rows?dataset=${DATASET_NAME}&config=${CONFIG}&split=train&offset=${offset}&length=${Math.ceil(fetchLimit / 5)}`; const response = await fetch(url); if (!response.ok) { console.warn(`SQuAD fetch failed at offset ${offset}: ${response.status}`); continue; } const data: SquadApiResponse = await response.json(); allRows.push(...data.rows); } catch (error) { console.warn(`SQuAD fetch error at offset ${offset}:`, error); } } if (allRows.length === 0) { throw new Error("Failed to fetch any rows from SQuAD dataset"); } // Group by context to avoid duplicate passages const contextMap = new Map<string, SquadRow[]>(); for (const row of allRows) { const context = row.row.context; if (!contextMap.has(context)) { contextMap.set(context, []); } contextMap.get(context)!.push(row); } // Sample unique contexts const uniqueContexts = Array.from(contextMap.entries()); const sampledContexts = sampler.sample(uniqueContexts, limit); // Convert to RawSample format return sampledContexts.map(([context, rows]) => { const firstRow = rows[0].row; return { id: `squad-${firstRow.id}`, content: context, metadata: { title: firstRow.title, source: "squad", }, queries: rows.map((r) => ({ query: r.row.question, answer: r.row.answers.text[0], relevance: "high" as const, })), }; }); } /** * Convert raw samples to benchmark dataset format. */ toDataset(samples: RawSample[], options: ConvertOptions = {}): BenchmarkDataset { const { idPrefix = "squad" } = options; const memories: GroundTruthMemory[] = []; const queries: GroundTruthQuery[] = []; for (const sample of samples) { const memoryId = `${idPrefix}-${sample.id}`; // Create memory from context memories.push({ id: memoryId, content: sample.content, metadata: sample.metadata, domain: "factual", }); // Create queries from Q&A pairs if (sample.queries) { for (let i = 0; i < sample.queries.length; i++) { const q = sample.queries[i]; queries.push({ id: `${memoryId}-q${i}`, query: q.query, relevantMemoryIds: [memoryId], partiallyRelevantIds: [], category: "exact_match", // SQuAD questions are designed to be answerable }); } } } return { name: `${idPrefix}-dataset`, description: `SQuAD dataset samples (${memories.length} passages, ${queries.length} questions)`, memories, queries, }; } } // Create and register the source export const squadSource = new SquadSource(); registerSource(squadSource);

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AerionDyseti/vector-memory-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

squad.ts•4.91 KiB