Skip to main content
Glama

documcp

by tosin2013
language-parsers-simple.ts23.9 kB
import { CodeElement, APIEndpoint } from './code-scanner.js'; import { spawn } from 'child_process'; export interface LanguageParser { extensions: string[]; name: string; parseFile(content: string, filePath: string): Promise<LanguageParseResult>; supportsApiEndpoints?: boolean; supportsFrameworkDetection?: boolean; } export interface LanguageParseResult { functions: CodeElement[]; classes: CodeElement[]; interfaces: CodeElement[]; types: CodeElement[]; enums: CodeElement[]; exports: CodeElement[]; imports: CodeElement[]; apiEndpoints: APIEndpoint[]; constants: CodeElement[]; variables: CodeElement[]; } export class MultiLanguageCodeScanner { private parsers = new Map<string, LanguageParser>(); constructor() { this.initializeParsers(); } private initializeParsers() { // Register parsers based on your tech stack this.registerParser(new PythonParser()); this.registerParser(new GoParser()); this.registerParser(new YamlParser()); this.registerParser(new BashParser()); } private registerParser(parser: LanguageParser) { for (const extension of parser.extensions) { this.parsers.set(extension, parser); } } async parseFile(content: string, filePath: string): Promise<LanguageParseResult> { const extension = this.getFileExtension(filePath); const parser = this.parsers.get(extension); if (parser) { return await parser.parseFile(content, filePath); } // Return empty result for unsupported files return this.getEmptyResult(); } private getFileExtension(filePath: string): string { return filePath.split('.').pop()?.toLowerCase() || ''; } private getEmptyResult(): LanguageParseResult { return { functions: [], classes: [], interfaces: [], types: [], enums: [], exports: [], imports: [], apiEndpoints: [], constants: [], variables: [], }; } getSupportedExtensions(): string[] { return Array.from(this.parsers.keys()); } getParserInfo(): { extension: string; parser: string }[] { return Array.from(this.parsers.entries()).map(([ext, parser]) => ({ extension: ext, parser: parser.name, })); } } // Python Parser Implementation using subprocess + regex fallback export class PythonParser implements LanguageParser { extensions = ['py', 'pyi', 'pyx', 'pxd']; name = 'Python'; supportsApiEndpoints = true; supportsFrameworkDetection = true; async parseFile(content: string, filePath: string): Promise<LanguageParseResult> { const result: LanguageParseResult = { functions: [], classes: [], interfaces: [], types: [], enums: [], exports: [], imports: [], apiEndpoints: [], constants: [], variables: [], }; try { // Try subprocess-based AST parsing first const astResult = await this.parseWithPythonAST(content, filePath); if (astResult) { this.mergePythonASTResults(astResult, result, filePath); } else { // Fall back to regex-based parsing this.parseWithRegex(content, result, filePath); } // Look for Flask/FastAPI/Django endpoints this.findPythonApiEndpoints(content, result, filePath); } catch (error) { console.warn(`Failed to parse Python file ${filePath}:`, error); // Fall back to regex-based parsing this.parseWithRegex(content, result, filePath); } return result; } private async parseWithPythonAST(content: string, _filePath: string): Promise<any> { return new Promise((resolve) => { // Create a Python script to parse the AST const pythonScript = ` import ast import sys import json import tempfile import os try: # Read content from stdin content = sys.stdin.read() tree = ast.parse(content) result = { 'functions': [], 'classes': [], 'imports': [], 'constants': [], 'variables': [] } for node in ast.walk(tree): if isinstance(node, ast.FunctionDef): result['functions'].append({ 'name': node.name, 'line': node.lineno, 'has_docstring': ast.get_docstring(node) is not None, 'docstring': ast.get_docstring(node), 'is_async': False, 'exported': not node.name.startswith('_') }) elif isinstance(node, ast.AsyncFunctionDef): result['functions'].append({ 'name': node.name, 'line': node.lineno, 'has_docstring': ast.get_docstring(node) is not None, 'docstring': ast.get_docstring(node), 'is_async': True, 'exported': not node.name.startswith('_') }) elif isinstance(node, ast.ClassDef): result['classes'].append({ 'name': node.name, 'line': node.lineno, 'has_docstring': ast.get_docstring(node) is not None, 'docstring': ast.get_docstring(node), 'exported': not node.name.startswith('_') }) elif isinstance(node, (ast.Import, ast.ImportFrom)): if isinstance(node, ast.Import): for alias in node.names: result['imports'].append({ 'name': alias.name, 'line': node.lineno }) else: # ImportFrom result['imports'].append({ 'name': node.module or 'relative', 'line': node.lineno }) elif isinstance(node, ast.Assign): for target in node.targets: if isinstance(target, ast.Name): is_constant = target.id.isupper() result['constants' if is_constant else 'variables'].append({ 'name': target.id, 'line': node.lineno, 'exported': not target.id.startswith('_') }) print(json.dumps(result)) except Exception as e: print(json.dumps({'error': str(e)}), file=sys.stderr) `; // Try to execute Python AST parsing const process = spawn('python3', ['-c', pythonScript], { stdio: ['pipe', 'pipe', 'pipe'], }); // Send content via stdin process.stdin.write(content); process.stdin.end(); let output = ''; let errorOutput = ''; process.stdout.on('data', (data) => { output += data.toString(); }); process.stderr.on('data', (data) => { errorOutput += data.toString(); }); process.on('close', (code) => { if (code === 0 && output.trim()) { try { const result = JSON.parse(output.trim()); if (!result.error) { resolve(result); return; } } catch (e) { // JSON parsing failed console.warn('Failed to parse Python AST output:', e); } } if (errorOutput) { console.warn('Python AST parsing errors:', errorOutput); } resolve(null); // Fall back to regex parsing }); process.on('error', () => { resolve(null); // Python not available or failed }); // Timeout after 5 seconds setTimeout(() => { process.kill(); resolve(null); }, 5000); }); } private mergePythonASTResults( astResult: any, result: LanguageParseResult, filePath: string, ): void { astResult.functions?.forEach((func: any) => { result.functions.push({ name: func.name, type: 'function', filePath, line: func.line, column: 0, exported: func.exported, isAsync: func.is_async, hasJSDoc: func.has_docstring, jsDocDescription: func.docstring || undefined, }); }); astResult.classes?.forEach((cls: any) => { result.classes.push({ name: cls.name, type: 'class', filePath, line: cls.line, column: 0, exported: cls.exported, hasJSDoc: cls.has_docstring, jsDocDescription: cls.docstring || undefined, }); }); astResult.imports?.forEach((imp: any) => { result.imports.push({ name: imp.name, type: 'import', filePath, line: imp.line, column: 0, exported: false, }); }); astResult.constants?.forEach((constant: any) => { result.constants.push({ name: constant.name, type: 'variable', filePath, line: constant.line, column: 0, exported: constant.exported, hasJSDoc: false, }); }); astResult.variables?.forEach((variable: any) => { result.variables.push({ name: variable.name, type: 'variable', filePath, line: variable.line, column: 0, exported: variable.exported, hasJSDoc: false, }); }); } private parseWithRegex(content: string, result: LanguageParseResult, filePath: string): void { const lines = content.split('\n'); lines.forEach((line, index) => { const lineNum = index + 1; // Function definitions const funcMatch = line.match(/^\s*(async\s+)?def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(/); if (funcMatch) { const isAsync = !!funcMatch[1]; const funcName = funcMatch[2]; const hasDocstring = this.hasDocstringAfterLine(lines, index); result.functions.push({ name: funcName, type: 'function', filePath, line: lineNum, column: 0, exported: !funcName.startsWith('_'), isAsync, hasJSDoc: hasDocstring, }); } // Class definitions const classMatch = line.match(/^\s*class\s+([a-zA-Z_][a-zA-Z0-9_]*)/); if (classMatch) { const className = classMatch[1]; const hasDocstring = this.hasDocstringAfterLine(lines, index); result.classes.push({ name: className, type: 'class', filePath, line: lineNum, column: 0, exported: !className.startsWith('_'), hasJSDoc: hasDocstring, }); } // Import statements const importMatch = line.match(/^\s*(?:from\s+([^\s]+)\s+)?import\s+(.+)/); if (importMatch) { const module = importMatch[1] || importMatch[2].split(',')[0].trim(); result.imports.push({ name: module, type: 'import', filePath, line: lineNum, column: 0, exported: false, }); } // Constants and variables const assignMatch = line.match(/^\s*([A-Z_][A-Z0-9_]*)\s*=/); if (assignMatch) { result.constants.push({ name: assignMatch[1], type: 'variable', filePath, line: lineNum, column: 0, exported: true, hasJSDoc: false, }); } }); } private hasDocstringAfterLine(lines: string[], lineIndex: number): boolean { // Check if next few lines contain a docstring for (let i = lineIndex + 1; i < Math.min(lineIndex + 3, lines.length); i++) { const line = lines[i].trim(); if (line.startsWith('"""') || line.startsWith("'''")) { return true; } } return false; } private findPythonApiEndpoints(content: string, result: LanguageParseResult, filePath: string) { // Flask patterns const flaskPatterns = [ /@app\.(route|get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g, /@bp\.(route|get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g, ]; // FastAPI patterns const fastApiPatterns = [ /@app\.(get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g, /router\.(get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g, ]; // Django patterns const djangoPatterns = [/path\s*\(\s*['"]([^'"]+)['"]/g, /url\s*\(\s*r?['"]([^'"]+)['"]/g]; const allPatterns = [...flaskPatterns, ...fastApiPatterns, ...djangoPatterns]; allPatterns.forEach((pattern) => { let match; while ((match = pattern.exec(content)) !== null) { const method = match[1] === 'route' ? 'ALL' : (match[1].toUpperCase() as APIEndpoint['method']); const path = match[2] || match[1]; // Handle different capture groups // Find line number const beforeMatch = content.substring(0, match.index!); const line = beforeMatch.split('\n').length; result.apiEndpoints.push({ method, path, filePath, line, hasDocumentation: this.hasEndpointDocumentation(content, match.index!), }); } }); } private hasEndpointDocumentation(content: string, matchIndex: number): boolean { const beforeMatch = content.substring(0, matchIndex); const lines = beforeMatch.split('\n'); // Check last few lines for docstrings or comments for (let i = Math.max(0, lines.length - 5); i < lines.length; i++) { const line = lines[i].trim(); if (line.startsWith('"""') || line.startsWith("'''") || line.startsWith('#')) { return true; } } return false; } } // Go Parser Implementation (regex-based) export class GoParser implements LanguageParser { extensions = ['go']; name = 'Go'; supportsApiEndpoints = true; async parseFile(content: string, filePath: string): Promise<LanguageParseResult> { const result: LanguageParseResult = { functions: [], classes: [], interfaces: [], types: [], enums: [], exports: [], imports: [], apiEndpoints: [], constants: [], variables: [], }; const lines = content.split('\n'); lines.forEach((line, index) => { const lineNum = index + 1; // Function declarations const funcMatch = line.match(/^\s*func\s+(?:\([^)]*\)\s+)?([a-zA-Z_][a-zA-Z0-9_]*)\s*\(/); if (funcMatch) { const funcName = funcMatch[1]; result.functions.push({ name: funcName, type: 'function', filePath, line: lineNum, column: 0, exported: this.isGoExported(funcName), hasJSDoc: this.hasGoDocComment(lines, index), }); } // Type declarations (struct, interface, etc.) const typeMatch = line.match(/^\s*type\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+(struct|interface)/); if (typeMatch) { const typeName = typeMatch[1]; const typeKind = typeMatch[2]; if (typeKind === 'struct') { result.classes.push({ name: typeName, type: 'class', filePath, line: lineNum, column: 0, exported: this.isGoExported(typeName), hasJSDoc: this.hasGoDocComment(lines, index), }); } else if (typeKind === 'interface') { result.interfaces.push({ name: typeName, type: 'interface', filePath, line: lineNum, column: 0, exported: this.isGoExported(typeName), hasJSDoc: this.hasGoDocComment(lines, index), }); } } // Import declarations const importMatch = line.match(/^\s*(?:import\s*)?"([^"]+)"/); if (importMatch) { result.imports.push({ name: importMatch[1], type: 'import', filePath, line: lineNum, column: 0, exported: false, }); } // Constants and variables const constMatch = line.match(/^\s*(const|var)\s+([a-zA-Z_][a-zA-Z0-9_]*)/); if (constMatch) { const declType = constMatch[1]; const varName = constMatch[2]; const element: CodeElement = { name: varName, type: 'variable', filePath, line: lineNum, column: 0, exported: this.isGoExported(varName), hasJSDoc: this.hasGoDocComment(lines, index), }; if (declType === 'const') { result.constants.push(element); } else { result.variables.push(element); } } }); // Find Go API endpoints this.findGoApiEndpoints(content, result, filePath); return result; } private isGoExported(name: string): boolean { // In Go, exported names start with uppercase letter return name.length > 0 && name[0] === name[0].toUpperCase(); } private hasGoDocComment(lines: string[], lineIndex: number): boolean { // Check if previous line has a doc comment if (lineIndex > 0) { const prevLine = lines[lineIndex - 1].trim(); return prevLine.startsWith('//'); } return false; } private findGoApiEndpoints(content: string, result: LanguageParseResult, filePath: string) { // Common Go web framework patterns const patterns = [ // Gin framework /\.(GET|POST|PUT|DELETE|PATCH)\s*\(\s*"([^"]+)"/g, // Echo framework /\.(Get|Post|Put|Delete|Patch)\s*\(\s*"([^"]+)"/g, // Gorilla mux /\.HandleFunc\s*\(\s*"([^"]+)"/g, // Standard library /http\.HandleFunc\s*\(\s*"([^"]+)"/g, ]; patterns.forEach((pattern) => { let match; while ((match = pattern.exec(content)) !== null) { let method: APIEndpoint['method'] = 'ALL'; let path: string; if (match[1] && match[2]) { method = match[1].toUpperCase() as APIEndpoint['method']; path = match[2]; } else { path = match[1] || match[2]; } const beforeMatch = content.substring(0, match.index!); const line = beforeMatch.split('\n').length; result.apiEndpoints.push({ method, path, filePath, line, hasDocumentation: this.hasEndpointDocumentation(content, match.index!), }); } }); } private hasEndpointDocumentation(content: string, matchIndex: number): boolean { const beforeMatch = content.substring(0, matchIndex); const lines = beforeMatch.split('\n'); for (let i = Math.max(0, lines.length - 5); i < lines.length; i++) { const line = lines[i].trim(); if (line.startsWith('//') || line.startsWith('/*')) { return true; } } return false; } } // YAML Parser for Kubernetes, Terraform, etc. export class YamlParser implements LanguageParser { extensions = ['yml', 'yaml']; name = 'YAML'; supportsFrameworkDetection = true; async parseFile(content: string, filePath: string): Promise<LanguageParseResult> { const result: LanguageParseResult = { functions: [], classes: [], interfaces: [], types: [], enums: [], exports: [], imports: [], apiEndpoints: [], constants: [], variables: [], }; // YAML parsing focuses on identifying Kubernetes resources, Terraform configs, etc. this.identifyKubernetesResources(content, result, filePath); this.identifyDockerComposeServices(content, result, filePath); this.identifyGitHubActions(content, result, filePath); return result; } private identifyKubernetesResources( content: string, result: LanguageParseResult, filePath: string, ) { const lines = content.split('\n'); let apiVersion = ''; let kind = ''; lines.forEach((line, index) => { const lineNum = index + 1; const apiMatch = line.match(/^\s*apiVersion:\s*(.+)/); if (apiMatch) { apiVersion = apiMatch[1].trim(); } const kindMatch = line.match(/^\s*kind:\s*(.+)/); if (kindMatch) { kind = kindMatch[1].trim(); result.types.push({ name: `${kind} (${apiVersion})`, type: 'type', filePath, line: lineNum, column: 0, exported: true, hasJSDoc: false, }); } }); } private identifyDockerComposeServices( content: string, result: LanguageParseResult, filePath: string, ) { let inServicesSection = false; const lines = content.split('\n'); lines.forEach((line, index) => { if (line.trim() === 'services:') { inServicesSection = true; return; } if (inServicesSection && line.match(/^[a-zA-Z]/)) { inServicesSection = false; // Left services section } if (inServicesSection) { const serviceMatch = line.match(/^\s+([a-zA-Z0-9_-]+):\s*$/); if (serviceMatch) { result.types.push({ name: `service: ${serviceMatch[1]}`, type: 'type', filePath, line: index + 1, column: 0, exported: true, hasJSDoc: false, }); } } }); } private identifyGitHubActions(content: string, result: LanguageParseResult, filePath: string) { if (!filePath.includes('.github/workflows/')) return; const lines = content.split('\n'); let inJobsSection = false; lines.forEach((line, index) => { if (line.trim() === 'jobs:') { inJobsSection = true; return; } if (inJobsSection && line.match(/^[a-zA-Z]/)) { inJobsSection = false; } if (inJobsSection) { const jobMatch = line.match(/^\s+([a-zA-Z0-9_-]+):\s*$/); if (jobMatch) { result.functions.push({ name: `job: ${jobMatch[1]}`, type: 'function', filePath, line: index + 1, column: 0, exported: true, hasJSDoc: false, }); } } }); } } // Bash Parser for DevOps scripts export class BashParser implements LanguageParser { extensions = ['sh', 'bash', 'zsh']; name = 'Bash'; async parseFile(content: string, filePath: string): Promise<LanguageParseResult> { const result: LanguageParseResult = { functions: [], classes: [], interfaces: [], types: [], enums: [], exports: [], imports: [], apiEndpoints: [], constants: [], variables: [], }; const lines = content.split('\n'); lines.forEach((line, index) => { const lineNum = index + 1; // Function definitions const funcMatch = line.match(/^\s*(?:function\s+)?([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\)/); if (funcMatch) { const functionName = funcMatch[1]; result.functions.push({ name: functionName, type: 'function', filePath, line: lineNum, column: 0, exported: true, // Bash functions are generally available in scope hasJSDoc: this.hasBashDocComment(lines, index), }); } // Variable assignments const varMatch = line.match(/^\s*([A-Z_][A-Z0-9_]*)\s*=/); if (varMatch) { const varName = varMatch[1]; const isConstant = varName === varName.toUpperCase(); const element: CodeElement = { name: varName, type: 'variable', filePath, line: lineNum, column: 0, exported: true, hasJSDoc: this.hasBashDocComment(lines, index), }; if (isConstant) { result.constants.push(element); } else { result.variables.push(element); } } }); return result; } private hasBashDocComment(lines: string[], lineIndex: number): boolean { // Check if previous line has a comment if (lineIndex > 0) { const prevLine = lines[lineIndex - 1].trim(); return prevLine.startsWith('#'); } return false; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tosin2013/documcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server