CIViC MCP Server

SchemaParser.ts•20.1 KiB

import { GraphQLSchemaInfo, GraphQLTypeInfo, GraphQLFieldInfo, FieldChunkingRule } from "./ChunkingEngine.js"; export interface EntityRelationshipInfo { fromType: string; toType: string; fieldName: string; cardinality: 'one-to-one' | 'one-to-many' | 'many-to-many'; isEntityList: boolean; } export interface FieldExtractionRule { fieldName: string; typeName: string; shouldExtractEntities: boolean; targetEntityType?: string; isListField: boolean; } /** * Parses GraphQL schema files and extracts chunking-relevant information */ export class SchemaParser { private schemaInfo?: GraphQLSchemaInfo; private extractionRules: FieldExtractionRule[] = []; private relationships: EntityRelationshipInfo[] = []; /** * Parse a GraphQL schema string and extract type information */ static parseSchema(schemaContent: string): GraphQLSchemaInfo { const types: Record<string, GraphQLTypeInfo> = {}; const relationships: Array<{ fromType: string; toType: string; fieldName: string; cardinality: string; }> = []; // Parse types using regex patterns const typeMatches = schemaContent.matchAll(/type\s+(\w+)(?:\s+implements\s+[\w\s&]+)?\s*\{([^}]+(?:\}[^}]*)*)\}/g); for (const match of typeMatches) { const typeName = match[1]; const typeBody = match[2]; if (this.shouldSkipType(typeName)) { continue; } const fields = this.parseFields(typeBody, typeName, relationships); types[typeName] = { name: typeName, kind: 'OBJECT', fields, description: this.extractDescription(match[0]) }; } return { types, relationships }; } /** * Generate chunking rules based on parsed schema */ static generateChunkingRulesFromSchema(schemaInfo: GraphQLSchemaInfo): FieldChunkingRule[] { const rules: FieldChunkingRule[] = [ // Base rules that apply to all types { fieldName: 'id', typeName: '*', chunkThreshold: Infinity, priority: 'never', reason: 'ID fields should never be chunked' }, { fieldName: 'entrezId', typeName: '*', chunkThreshold: Infinity, priority: 'never', reason: 'External ID fields should never be chunked' }, { fieldName: 'pmcId', typeName: '*', chunkThreshold: Infinity, priority: 'never', reason: 'External ID fields should never be chunked' }, { fieldName: 'citationId', typeName: '*', chunkThreshold: Infinity, priority: 'never', reason: 'External ID fields should never be chunked' }, ]; // Analyze schema types for large content fields for (const [typeName, typeInfo] of Object.entries(schemaInfo.types)) { for (const [fieldName, fieldInfo] of Object.entries(typeInfo.fields)) { const rule = this.generateFieldRule(typeName, fieldName, fieldInfo); if (rule) { rules.push(rule); } } } // Add CIViC-specific knowledge rules.push( // Known large text fields from schema analysis { fieldName: 'description', typeName: 'EvidenceItem', chunkThreshold: 2048, priority: 'always', reason: 'Evidence descriptions are typically very long' }, { fieldName: 'description', typeName: 'Gene', chunkThreshold: 1024, priority: 'always', reason: 'Gene descriptions can be extensive' }, { fieldName: 'abstract', typeName: 'Source', chunkThreshold: 4096, priority: 'always', reason: 'Paper abstracts are typically long' }, { fieldName: 'statement', typeName: '*', chunkThreshold: 2048, priority: 'always', reason: 'Statement fields contain detailed explanations' }, { fieldName: 'summary', typeName: '*', chunkThreshold: 1024, priority: 'always', reason: 'Summary fields are often long' }, // JSON fields that can be very large { fieldName: 'myGeneInfoDetails', typeName: 'Gene', chunkThreshold: 8192, priority: 'size-based', reason: 'External API responses can be very large' }, { fieldName: 'clinicalTrials', typeName: '*', chunkThreshold: 4096, priority: 'size-based', reason: 'Clinical trial data arrays can be extensive' }, // Connection fields (GraphQL pagination) - these can be huge { fieldName: 'comments', typeName: '*', chunkThreshold: 8192, priority: 'size-based', reason: 'Comment connections can contain many large comment objects' }, { fieldName: 'events', typeName: '*', chunkThreshold: 6144, priority: 'size-based', reason: 'Event connections can be extensive' }, { fieldName: 'revisions', typeName: '*', chunkThreshold: 6144, priority: 'size-based', reason: 'Revision connections can be extensive' }, { fieldName: 'flags', typeName: '*', chunkThreshold: 4096, priority: 'size-based', reason: 'Flag connections can contain detailed flag information' }, // Conservative chunking for names and short text { fieldName: 'name', typeName: '*', chunkThreshold: 256, priority: 'size-based', reason: 'Names are usually short but some can be long' }, { fieldName: 'fullName', typeName: '*', chunkThreshold: 512, priority: 'size-based', reason: 'Full names might be longer than regular names' }, { fieldName: 'title', typeName: '*', chunkThreshold: 1024, priority: 'size-based', reason: 'Titles can be moderately long' }, { fieldName: 'citation', typeName: '*', chunkThreshold: 2048, priority: 'size-based', reason: 'Citations can be long formatted strings' } ); return rules; } /** * Identify the most critical types for chunking optimization */ static identifyHighValueTypes(schemaInfo: GraphQLSchemaInfo): Array<{ typeName: string; reason: string; largeFields: string[]; estimatedSize: 'small' | 'medium' | 'large' | 'very_large'; }> { const highValueTypes = []; // Core entity types that typically have large content const coreTypes = ['EvidenceItem', 'Gene', 'Source', 'Assertion', 'MolecularProfile']; for (const typeName of coreTypes) { const typeInfo = schemaInfo.types[typeName]; if (typeInfo) { const largeFields = Object.keys(typeInfo.fields).filter(fieldName => this.isLikelyLargeField(fieldName, typeInfo.fields[fieldName]) ); highValueTypes.push({ typeName, reason: `Core CIViC entity with ${largeFields.length} potentially large fields`, largeFields, estimatedSize: this.estimateTypeSize(typeInfo) }); } } return highValueTypes; } // Private helper methods private static shouldSkipType(typeName: string): boolean { // Skip GraphQL built-in types, input types, and connection/edge types const skipPatterns = [ /^__/, // Introspection types /Input$/, // Input types /Payload$/, // Mutation payloads /Connection$/, // GraphQL connections /Edge$/, // GraphQL edges /^(String|Int|Float|Boolean|ID)$/, // Scalars ]; return skipPatterns.some(pattern => pattern.test(typeName)); } private static parseFields(typeBody: string, typeName: string, relationships: any[]): Record<string, GraphQLFieldInfo> { const fields: Record<string, GraphQLFieldInfo> = {}; // Match field definitions - handle both simple and complex cases const fieldMatches = typeBody.matchAll(/^\s*([a-zA-Z]\w*)\s*(?:$[^)]*$)?\s*:\s*([^!\n]+[!]?)/gm); for (const match of fieldMatches) { const fieldName = match[1]; const fieldType = match[2].trim(); // Skip comment-like patterns if (fieldName.includes('"""') || fieldType.includes('"""')) { continue; } const fieldInfo = this.parseFieldType(fieldType); fields[fieldName] = { name: fieldName, ...fieldInfo }; // Track relationships if (this.isRelationshipField(fieldInfo, typeName)) { relationships.push({ fromType: typeName, toType: this.extractRelatedType(fieldInfo.type), fieldName: fieldName, cardinality: fieldInfo.isList ? 'one-to-many' : 'one-to-one' }); } } return fields; } private static parseFieldType(typeString: string): Omit<GraphQLFieldInfo, 'name'> { let type = typeString.trim(); let isList = false; let isNullable = true; // Handle list types if (type.startsWith('[') && type.endsWith(']')) { isList = true; type = type.slice(1, -1); } // Handle non-null types if (type.endsWith('!')) { isNullable = false; type = type.slice(0, -1); } // Handle nested non-null in lists if (isList && type.endsWith('!')) { type = type.slice(0, -1); } return { type: type.trim(), isList, isNullable }; } private static isRelationshipField(fieldInfo: Omit<GraphQLFieldInfo, 'name'>, typeName: string): boolean { // Skip scalar types const scalarTypes = ['String', 'Int', 'Float', 'Boolean', 'ID', 'JSON', 'ISO8601DateTime']; if (scalarTypes.includes(fieldInfo.type)) { return false; } // Skip enum-like types (they usually end with specific patterns) const enumPatterns = [/Level$/, /Type$/, /Status$/, /Direction$/]; if (enumPatterns.some(pattern => pattern.test(fieldInfo.type))) { return false; } return true; } private static extractRelatedType(typeString: string): string { // Remove any remaining brackets or exclamation marks return typeString.replace(/[[\]!]/g, ''); } private static extractDescription(typeDefinition: string): string | undefined { const descMatch = typeDefinition.match(/"""([^"]+)"""/); return descMatch ? descMatch[1].trim() : undefined; } private static generateFieldRule(typeName: string, fieldName: string, fieldInfo: GraphQLFieldInfo): FieldChunkingRule | null { // Generate rules for likely large content fields if (this.isLikelyLargeField(fieldName, fieldInfo)) { if (fieldInfo.type === 'String') { // Text fields that are likely to be large const textFieldThresholds: Record<string, number> = { 'description': 1024, 'summary': 1024, 'abstract': 2048, 'statement': 2048, 'citation': 1024, 'authorString': 512, 'fullName': 256, 'title': 1024 }; const threshold = textFieldThresholds[fieldName] || 512; return { fieldName, typeName, chunkThreshold: threshold, priority: 'size-based', reason: `String field '${fieldName}' on type '${typeName}' likely contains large text content` }; } else if (fieldInfo.type === 'JSON') { // JSON fields can be very large return { fieldName, typeName, chunkThreshold: 4096, priority: 'size-based', reason: `JSON field '${fieldName}' on type '${typeName}' can contain large structured data` }; } else if (fieldInfo.isList) { // List fields can accumulate to large sizes return { fieldName, typeName, chunkThreshold: 8192, priority: 'size-based', reason: `List field '${fieldName}' on type '${typeName}' can contain many items` }; } } return null; } private static isLikelyLargeField(fieldName: string, fieldInfo: GraphQLFieldInfo): boolean { const largeContentIndicators = [ 'description', 'summary', 'statement', 'content', 'text', 'body', 'abstract', 'citation', 'authorString', 'fullName', 'title', 'metadata', 'details', 'comments', 'notes', 'evidence' ]; // Check field name if (largeContentIndicators.some(indicator => fieldName.toLowerCase().includes(indicator) )) { return true; } // Check if it's a JSON field (these can be large) if (fieldInfo.type === 'JSON') { return true; } // Check if it's a connection field (GraphQL pagination) if (fieldInfo.type.includes('Connection')) { return true; } // Check if it's a list that could accumulate size if (fieldInfo.isList && !fieldName.includes('Id')) { return true; } return false; } private static estimateTypeSize(typeInfo: GraphQLTypeInfo): 'small' | 'medium' | 'large' | 'very_large' { const fieldCount = Object.keys(typeInfo.fields).length; const largeFieldCount = Object.entries(typeInfo.fields).filter(([name, field]) => this.isLikelyLargeField(name, field) ).length; if (largeFieldCount >= 5 || fieldCount >= 50) { return 'very_large'; } else if (largeFieldCount >= 3 || fieldCount >= 30) { return 'large'; } else if (largeFieldCount >= 1 || fieldCount >= 15) { return 'medium'; } else { return 'small'; } } /** * Parse the GraphQL schema file and extract structure information */ async parseSchemaFromFile(schemaPath: string): Promise<GraphQLSchemaInfo> { // For Cloudflare Workers environment, we'd need to pass content differently // This is a placeholder - in practice, schema content would be loaded at build time // or passed as a parameter throw new Error('File system access not available in Workers environment. Use parseSchemaContent() instead.'); } /** * Parse GraphQL schema content and extract type/relationship information */ parseSchemaContent(schemaContent: string): GraphQLSchemaInfo { const types: Record<string, GraphQLTypeInfo> = {}; const relationships: Array<{fromType: string, toType: string, fieldName: string, cardinality: string}> = []; // Split schema into type definitions const typeBlocks = this.extractTypeBlocks(schemaContent); for (const block of typeBlocks) { const typeInfo = this.parseTypeBlock(block); if (typeInfo) { types[typeInfo.name] = typeInfo; // Extract relationships from this type const typeRelationships = this.extractRelationshipsFromType(typeInfo); relationships.push(...typeRelationships); } } this.schemaInfo = { types, relationships }; this.generateExtractionRules(); return this.schemaInfo; } /** * Get extraction rules for intelligent entity processing */ getExtractionRules(): FieldExtractionRule[] { return this.extractionRules; } /** * Get relationship information */ getRelationships(): EntityRelationshipInfo[] { return this.relationships; } /** * Check if a field should have its entities extracted vs stored as JSON */ shouldExtractEntities(typeName: string, fieldName: string): { extract: boolean; targetType?: string; isListField: boolean; } { const rule = this.extractionRules.find(r => (r.typeName === typeName || r.typeName === '*') && r.fieldName === fieldName ); if (rule) { return { extract: rule.shouldExtractEntities, targetType: rule.targetEntityType, isListField: rule.isListField }; } // Default: extract if field name suggests entities const entityFieldPatterns = [ /.*therapies?$/i, /.*diseases?$/i, /.*genes?$/i, /.*variants?$/i, /.*evidences?$/i, /.*sources?$/i, /.*users?$/i, /.*organizations?$/i ]; const suggestsEntities = entityFieldPatterns.some(pattern => pattern.test(fieldName)); return { extract: suggestsEntities, targetType: this.inferTargetType(fieldName), isListField: fieldName.endsWith('s') // Simple heuristic }; } /** * Extract type definition blocks from schema content */ private extractTypeBlocks(schemaContent: string): string[] { const typeBlocks: string[] = []; const lines = schemaContent.split('\n'); let currentBlock = ''; let inTypeDefinition = false; let braceCount = 0; for (const line of lines) { const trimmedLine = line.trim(); // Skip comments and empty lines when not in a type if (!inTypeDefinition && (trimmedLine.startsWith('#') || trimmedLine === '')) { continue; } // Check for type definition start if (trimmedLine.match(/^(type|interface|enum|input)\s+\w+/)) { // Save previous block if exists if (currentBlock.trim()) { typeBlocks.push(currentBlock.trim()); } currentBlock = line + '\n'; inTypeDefinition = true; braceCount = 0; } else if (inTypeDefinition) { currentBlock += line + '\n'; // Count braces to determine when type definition ends braceCount += (line.match(/\{/g) || []).length; braceCount -= (line.match(/\}/g) || []).length; if (braceCount === 0 && trimmedLine.includes('}')) { typeBlocks.push(currentBlock.trim()); currentBlock = ''; inTypeDefinition = false; } } } // Add final block if exists if (currentBlock.trim()) { typeBlocks.push(currentBlock.trim()); } return typeBlocks; } /** * Parse individual type block into TypeInfo */ private parseTypeBlock(block: string): GraphQLTypeInfo | null { const lines = block.split('\n'); const firstLine = lines[0].trim(); // Extract type name and kind const typeMatch = firstLine.match(/^(type|interface|enum|input)\s+(\w+)/); if (!typeMatch) return null; const [, kind, name] = typeMatch; const fields: Record<string, GraphQLFieldInfo> = {}; // Parse fields (skip first and last lines which are type declaration and closing brace) for (let i = 1; i < lines.length - 1; i++) { const line = lines[i].trim(); if (line && !line.startsWith('#') && !line.startsWith('"""')) { const fieldInfo = this.parseFieldLine(line); if (fieldInfo) { fields[fieldInfo.name] = fieldInfo; } } } return { name, kind: kind.toUpperCase() as 'OBJECT' | 'SCALAR' | 'ENUM' | 'INTERFACE', fields, description: this.extractDescription(block) }; } /** * Parse individual field line */ private parseFieldLine(line: string): GraphQLFieldInfo | null { // Match field patterns like: fieldName: Type, fieldName: [Type], fieldName(args): Type const fieldMatch = line.match(/^(\w+)(?:$[^)]*$)?:\s*(\[?)([^!\[\]]+)(!?)\]?(!?)/); if (!fieldMatch) return null; const [, name, listStart, type, typeRequired, listRequired] = fieldMatch; return { name, type: type.trim(), isList: !!listStart, isNullable: !typeRequired && !listRequired, description: undefined // Could be enhanced to extract field descriptions }; } /** * Extract relationships from a type definition */ private extractRelationshipsFromType(typeInfo: GraphQLTypeInfo): Array<{fromType: string, toType: string, fieldName: string, cardinality: string}> { const relationships = []; for (const [fieldName, fieldInfo] of Object.entries(typeInfo.fields)) { // Check if this field references another entity type if (this.isEntityType(fieldInfo.type)) { const cardinality = fieldInfo.isList ? 'one-to-many' : 'one-to-one'; relationships.push({ fromType: typeInfo.name, toType: fieldInfo.type, fieldName, cardinality }); } } return relationships; } /** * Check if a type name represents an entity (vs scalar) */ private isEntityType(typeName: string): boolean { const scalarTypes = ['String', 'Int', 'Float', 'Boolean', 'ID', 'DateTime', 'ISO8601DateTime']; return !scalarTypes.includes(typeName) && typeName[0] === typeName[0].toUpperCase(); } /** * Generate field extraction rules based on schema analysis */ private generateExtractionRules(): void { if (!this.schemaInfo) return; this.extractionRules = []; for (const [typeName, typeInfo] of Object.entries(this.schemaInfo.types)) { for (const [fieldName, fieldInfo] of Object.entries(typeInfo.fields)) { // Rule: Extract entities from list fields that reference entity types if (fieldInfo.isList && this.isEntityType(fieldInfo.type)) { this.extractionRules.push({ fieldName, typeName, shouldExtractEntities: true, targetEntityType: fieldInfo.type, isListField: true }); } // Rule: Extract entities from single entity reference fields else if (!fieldInfo.isList && this.isEntityType(fieldInfo.type)) { this.extractionRules.push({ fieldName, typeName, shouldExtractEntities: true, targetEntityType: fieldInfo.type, isListField: false }); } // Rule: Don't extract from scalar fields else { this.extractionRules.push({ fieldName, typeName, shouldExtractEntities: false, isListField: fieldInfo.isList }); } } } // Add global rules for common patterns this.extractionRules.push( { fieldName: 'id', typeName: '*', shouldExtractEntities: false, isListField: false }, { fieldName: 'name', typeName: '*', shouldExtractEntities: false, isListField: false }, { fieldName: 'description', typeName: '*', shouldExtractEntities: false, isListField: false } ); } /** * Infer target entity type from field name */ private inferTargetType(fieldName: string): string { // Remove plural 's' and capitalize const singular = fieldName.endsWith('s') ? fieldName.slice(0, -1) : fieldName; return singular.charAt(0).toUpperCase() + singular.slice(1); } /** * Extract description from type block */ private extractDescription(block: string): string | undefined { const descMatch = block.match(/"""([\s\S]*?)"""/); return descMatch ? descMatch[1].trim() : undefined; } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/QuentinCody/civic-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

SchemaParser.ts•20.1 KiB