NGSS MCP Server

NGSS-MCP
src
extraction

topic-extractor.ts•3.46 KiB

/** * Topic Extractor - Detect topic boundaries and page ranges */ import type { TopicPageRange } from '../types/ngss.js'; import { PDFReader, parsePageContent } from './pdf-reader.js'; import { PatternExtractor } from './pattern-extractor.js'; export class TopicExtractor { private pdfReader: PDFReader; private patternExtractor: PatternExtractor; constructor() { this.pdfReader = new PDFReader(); this.patternExtractor = new PatternExtractor(); } async extractTopicPages( pdfPath: string, topicPattern: string ): Promise<TopicPageRange | null> { // Get all content to scan for topics const content = await this.pdfReader.extractAll(pdfPath); const pages = parsePageContent(content, 'all'); // Find pages matching topic pattern const matchingPages: number[] = []; for (const page of pages) { if (this.matchesTopic(page.content, topicPattern)) { matchingPages.push(page.pageNumber); } } if (matchingPages.length === 0) { return null; } // Determine page range const start_page = Math.min(...matchingPages); const end_page = Math.max(...matchingPages); // Extract standard codes from this range const codes = await this.patternExtractor.extractStandardCodes( pdfPath, `${start_page}-${end_page}` ); return { topic: topicPattern, start_page, end_page, standard_codes: codes.map((c) => c.code) }; } private matchesTopic(content: string, pattern: string): boolean { // Case-insensitive fuzzy matching const normalizedContent = content.toLowerCase(); const normalizedPattern = pattern.toLowerCase(); // Direct match if (normalizedContent.includes(normalizedPattern)) { return true; } // Header pattern: "MS.Topic" const headerRegex = new RegExp(`MS\\.${pattern}`, 'i'); if (headerRegex.test(content)) { return true; } // Word-based similarity (simple) const patternWords = normalizedPattern.split(/\s+/); const matchCount = patternWords.filter((word) => normalizedContent.includes(word) ).length; // At least 70% of pattern words must match return matchCount / patternWords.length >= 0.7; } async listAllTopics(pdfPath: string): Promise<TopicPageRange[]> { const content = await this.pdfReader.extractAll(pdfPath); const pages = parsePageContent(content, 'all'); const topics = new Map<string, number[]>(); for (const page of pages) { // Look for topic headers like "MS.Chemical Reactions" // Fixed regex to capture full topic name including capital letters const headerMatch = page.content.match(/MS\.([A-Z][A-Za-z\s&-]+)/); if (headerMatch && headerMatch[1]) { const topic = headerMatch[1].trim(); const existing = topics.get(topic) || []; existing.push(page.pageNumber); topics.set(topic, existing); } } // Convert to TopicPageRange array const results: TopicPageRange[] = []; for (const [topic, pages] of topics.entries()) { const start_page = Math.min(...pages); const end_page = Math.max(...pages); const codes = await this.patternExtractor.extractStandardCodes( pdfPath, `${start_page}-${end_page}` ); results.push({ topic, start_page, end_page, standard_codes: codes.map((c) => c.code) }); } return results.sort((a, b) => a.start_page - b.start_page); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Sallvainian/NGSS-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

topic-extractor.ts•3.46 KiB