NGSS MCP Server

NGSS-MCP
src
extraction

structured-extractor.ts•5.47 KiB

/** * Structured Extractor - Parse full NGSS standards with automatic section detection */ import type { Standard, SEP, DCI, CCC, LessonScope } from '../types/ngss.js'; import { PDFReader, parsePageContent } from './pdf-reader.js'; import { PatternExtractor } from './pattern-extractor.js'; export class StructuredExtractor { private pdfReader: PDFReader; private patternExtractor: PatternExtractor; constructor() { this.pdfReader = new PDFReader(); this.patternExtractor = new PatternExtractor(); } async extractStructuredStandard( pdfPath: string, standardCode: string ): Promise<Standard | null> { // Find pages containing this standard const allCodes = await this.patternExtractor.extractStandardCodes(pdfPath); const targetCode = allCodes.find((c) => c.code === standardCode); if (!targetCode) { return null; } // Extract that page's content const pageContent = await this.pdfReader.extractPages( pdfPath, targetCode.page.toString() ); const pages = parsePageContent(pageContent, targetCode.page.toString()); const content = pages[0]?.content || ''; // Parse the standard return this.parseStandardFromContent(content, standardCode); } private parseStandardFromContent( content: string, standardCode: string ): Standard { // Extract grade level and domain from code const [gradeLevel, domainCode] = standardCode.split('-'); // Determine domain name const domainMap: Record<string, string> = { 'LS': 'Life Science', 'PS': 'Physical Science', 'ESS': 'Earth and Space Science' }; const domain = domainCode ? (domainMap[domainCode.replace(/\d+$/, '')] || 'Unknown') : 'Unknown'; // Extract performance expectation (text after standard code) const pePattern = new RegExp(`${standardCode}\\.?\\s+([^\\[]+)`); const peMatch = content.match(pePattern); const performance_expectation = (peMatch && peMatch[1]) ? peMatch[1].trim().replace(/\s+/g, ' ') : ''; // Parse 3D components const sep = this.parseSEP(content); const dci = this.parseDCI(content); const ccc = this.parseCCC(content); // Extract topic (from page header) // Fixed regex to capture full topic name including capital letters const topicMatch = content.match(/^MS\.([A-Z][A-Za-z\s&-]+)/m); const topic = (topicMatch && topicMatch[1]) ? topicMatch[1].trim() : ''; // Extract keywords const keywords = this.extractKeywords(performance_expectation, topic); // Create lesson scope const lesson_scope = this.createLessonScope(content, performance_expectation); return { code: standardCode, grade_level: gradeLevel || 'MS', domain, topic, performance_expectation, sep, dci, ccc, keywords, lesson_scope }; } private parseSEP(content: string): SEP { const sepSection = content.match( /Science and Engineering Practices[\s\S]*?(?=Disciplinary Core Ideas|$)/i ); if (!sepSection) { return { code: 'SEP-1', name: 'Unknown', description: '' }; } const text = sepSection[0]; const practiceMatch = text.match(/▪\s+([^(]+)\(/); // Clean newlines and extra whitespace from name const name = (practiceMatch && practiceMatch[1]) ? practiceMatch[1].trim().replace(/\s+/g, ' ') : 'Unknown Practice'; return { code: 'SEP-1', name, description: text.slice(0, 200).trim().replace(/\s+/g, ' ') }; } private parseDCI(content: string): DCI { const dciSection = content.match( /Disciplinary Core Ideas[\s\S]*?(?=Crosscutting Concepts|$)/i ); if (!dciSection) { return { code: 'PS1.A', name: 'Unknown', description: '' }; } const text = dciSection[0]; const codeMatch = text.match(/([A-Z]{2,3}\d+\.[A-Z]):\s+([^\n]+)/); if (codeMatch && codeMatch[1] && codeMatch[2]) { return { code: codeMatch[1], name: codeMatch[2].trim(), description: text.slice(0, 200).trim().replace(/\s+/g, ' ') }; } return { code: 'PS1.A', name: 'Unknown', description: text.slice(0, 200).trim().replace(/\s+/g, ' ') }; } private parseCCC(content: string): CCC { const cccSection = content.match( /Crosscutting Concepts[\s\S]*?(?=Connections|$)/i ); if (!cccSection) { return { code: 'CCC-1', name: 'Unknown', description: '' }; } const text = cccSection[0]; // Capture across multiple lines until period or end of sentence const conceptMatch = text.match(/▪\s+([\s\S]+?\.)/); // Clean newlines and extra whitespace from name const name = (conceptMatch && conceptMatch[1]) ? conceptMatch[1].trim().replace(/\s+/g, ' ') : 'Unknown Concept'; return { code: 'CCC-1', name, description: text.slice(0, 200).trim().replace(/\s+/g, ' ') }; } private extractKeywords(pe: string, topic: string): string[] { const stopWords = new Set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']); const words = (pe + ' ' + topic) .toLowerCase() .split(/\W+/) .filter((w) => w.length > 3 && !stopWords.has(w)); return [...new Set(words)].slice(0, 8); } private createLessonScope(content: string, pe: string): LessonScope { return { key_concepts: this.extractKeywords(pe, ''), prerequisite_knowledge: [], common_misconceptions: [], depth_boundaries: { include: [], exclude: [] } }; } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Sallvainian/NGSS-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

structured-extractor.ts•5.47 KiB