Skip to main content
Glama
code-parser.ts17.3 kB
import logger from './logger'; /** * Interface representing a detected package/library */ export interface DetectedPackage { name: string; version?: string; language: string; importStatement: string; } /** * Language parsers for detecting imports */ export enum ProgrammingLanguage { JAVASCRIPT = 'javascript', TYPESCRIPT = 'typescript', PYTHON = 'python', JAVA = 'java', RUBY = 'ruby', PHP = 'php', GO = 'go', RUST = 'rust', UNKNOWN = 'unknown' } /** * Determine programming language from file extension */ export function detectLanguageFromExtension(filePath: string): ProgrammingLanguage { if (!filePath) return ProgrammingLanguage.UNKNOWN; const extension = filePath.toLowerCase().split('.').pop(); switch (extension) { case 'js': return ProgrammingLanguage.JAVASCRIPT; case 'jsx': return ProgrammingLanguage.JAVASCRIPT; case 'ts': return ProgrammingLanguage.TYPESCRIPT; case 'tsx': return ProgrammingLanguage.TYPESCRIPT; case 'py': return ProgrammingLanguage.PYTHON; case 'java': return ProgrammingLanguage.JAVA; case 'rb': return ProgrammingLanguage.RUBY; case 'php': return ProgrammingLanguage.PHP; case 'go': return ProgrammingLanguage.GO; case 'rs': return ProgrammingLanguage.RUST; default: return ProgrammingLanguage.UNKNOWN; } } /** * Detect programming language from code content * This is a fallback when extension isn't available or reliable */ export function detectLanguageFromContent(code: string): ProgrammingLanguage { if (!code) return ProgrammingLanguage.UNKNOWN; // JavaScript/TypeScript detection if (code.includes('import ') || code.includes('require(') || code.includes('export ')) { // Look for TypeScript-specific syntax if (code.includes(': ') && (code.includes('interface ') || code.includes(': any') || code.includes(': string'))) { return ProgrammingLanguage.TYPESCRIPT; } // More aggressive Python detection to overcome false JavaScript positives if (code.includes('def ') && code.includes('if __name__ ==') && code.includes(':')) { return ProgrammingLanguage.PYTHON; } return ProgrammingLanguage.JAVASCRIPT; } // Python detection if (code.includes('import ') && (code.includes('def ') || code.includes('if __name__ =='))) { return ProgrammingLanguage.PYTHON; } // Java detection if (code.includes('public class ') || code.includes('import java.')) { return ProgrammingLanguage.JAVA; } // Ruby detection if (code.includes('require ') && (code.includes('def ') || code.includes('end'))) { return ProgrammingLanguage.RUBY; } // PHP detection if (code.includes('<?php') || code.includes('namespace ') || code.includes('use ')) { return ProgrammingLanguage.PHP; } // Go detection if (code.includes('package ') && code.includes('import (')) { return ProgrammingLanguage.GO; } // Rust detection if (code.includes('use ') && code.includes('fn ') && code.includes('struct ')) { return ProgrammingLanguage.RUST; } return ProgrammingLanguage.UNKNOWN; } /** * Parse JavaScript/TypeScript imports */ function parseJavaScriptImports(code: string, language: ProgrammingLanguage): DetectedPackage[] { const packages: DetectedPackage[] = []; try { // Remove comments first (both single line and multi-line) const codeWithoutComments = code .replace(/\/\/.*$/gm, '') // Remove single line comments .replace(/\/\*[\s\S]*?\*\//g, ''); // Remove multi-line comments // ES6 import statements // Match: import X from 'package' // Match: import { X } from 'package' // Match: import * as X from 'package' const es6ImportRegex = /import\s+(?:(?:{\s*[\w\s,]+\s*})|(?:[\w]+(?:\s+as\s+[\w]+)?)|(?:\*\s+as\s+[\w]+))\s+from\s+['"]([^'"]+)['"]/g; let match; while ((match = es6ImportRegex.exec(codeWithoutComments)) !== null) { const packagePath = match[1]; if (!isRelativePath(packagePath)) { // Extract root package name while preserving namespace for scoped packages let packageName: string; if (packagePath.startsWith('@')) { // For scoped packages like @angular/core, capture @angular as the package packageName = packagePath.split('/').slice(0, 2).join('/'); } else { // For regular packages like lodash/fp, capture lodash as the package packageName = packagePath.split('/')[0]; } packages.push({ name: packageName, language: language, importStatement: match[0] }); } } // CommonJS require statements // Match: require('package') // Match: const/let/var X = require('package') const requireRegex = /(?:const|let|var)?\s*(?:[\w\s{},.[\]]+)?\s*=?\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)/g; while ((match = requireRegex.exec(codeWithoutComments)) !== null) { const packagePath = match[1]; if (!isRelativePath(packagePath)) { let packageName: string; if (packagePath.startsWith('@')) { packageName = packagePath.split('/').slice(0, 2).join('/'); } else { packageName = packagePath.split('/')[0]; } packages.push({ name: packageName, language: language, importStatement: match[0] }); } } // Dynamic imports // Match: import('package') const dynamicImportRegex = /import\s*\(\s*['"]([^'"]+)['"]\s*\)/g; while ((match = dynamicImportRegex.exec(codeWithoutComments)) !== null) { const packagePath = match[1]; if (!isRelativePath(packagePath)) { let packageName: string; if (packagePath.startsWith('@')) { packageName = packagePath.split('/').slice(0, 2).join('/'); } else { packageName = packagePath.split('/')[0]; } packages.push({ name: packageName, language: language, importStatement: match[0] }); } } } catch (error) { logger.error('Error parsing JavaScript/TypeScript imports:', error); } return packages; } /** * Parse Python imports */ function parsePythonImports(code: string): DetectedPackage[] { const packages: DetectedPackage[] = []; try { // Remove comments first const codeWithoutComments = code.replace(/#.*/g, ''); // Match: import package // Match: import package as alias const simpleImportRegex = /^\s*import\s+([\w.]+)(?:\s+as\s+\w+)?/gm; let match; while ((match = simpleImportRegex.exec(codeWithoutComments)) !== null) { const importPath = match[1].trim(); // Get the top-level package const packageName = importPath.split('.')[0]; packages.push({ name: packageName, language: ProgrammingLanguage.PYTHON, importStatement: match[0] }); } // Match: from package import name // Match: from package.subpackage import name const fromImportRegex = /^\s*from\s+([\w.]+)\s+import\s+(?:[\w,\s*]+)/gm; while ((match = fromImportRegex.exec(codeWithoutComments)) !== null) { const importPath = match[1].trim(); // Get the top-level package const packageName = importPath.split('.')[0]; packages.push({ name: packageName, language: ProgrammingLanguage.PYTHON, importStatement: match[0] }); } } catch (error) { logger.error('Error parsing Python imports:', error); } return packages; } /** * Parse Java imports */ function parseJavaImports(code: string): DetectedPackage[] { const packages: DetectedPackage[] = []; try { // Match: import com.example.package; // Match: import com.example.package.Class; const importRegex = /^\s*import\s+([\w.]+);/gm; let match; while ((match = importRegex.exec(code)) !== null) { const importPath = match[1].trim(); // Get first two parts of package name as the "library" const parts = importPath.split('.'); if (parts.length >= 2) { const packageName = `${parts[0]}.${parts[1]}`; packages.push({ name: packageName, language: ProgrammingLanguage.JAVA, importStatement: match[0] }); } } } catch (error) { logger.error('Error parsing Java imports:', error); } return packages; } /** * Parse Ruby requires */ function parseRubyImports(code: string): DetectedPackage[] { const packages: DetectedPackage[] = []; try { // Match: require 'package' // Match: require_relative 'package' const requireRegex = /^\s*(?:require|require_relative)\s+['"]([^'"]+)['"]/gm; let match; while ((match = requireRegex.exec(code)) !== null) { const packagePath = match[1].trim(); if (!isRelativePath(packagePath)) { packages.push({ name: packagePath, language: ProgrammingLanguage.RUBY, importStatement: match[0] }); } } // Match: gem 'package' const gemRegex = /^\s*gem\s+['"]([^'"]+)['"]/gm; while ((match = gemRegex.exec(code)) !== null) { const packageName = match[1].trim().split(/\s+/)[0]; // Handle version constraints packages.push({ name: packageName, language: ProgrammingLanguage.RUBY, importStatement: match[0] }); } } catch (error) { logger.error('Error parsing Ruby imports:', error); } return packages; } /** * Parse PHP imports */ function parsePHPImports(code: string): DetectedPackage[] { const packages: DetectedPackage[] = []; try { // Match: use Package\Name; // Match: use Package\Name as Alias; const useRegex = /^\s*use\s+([\w\\]+)(?:\s+as\s+\w+)?;/gm; let match; while ((match = useRegex.exec(code)) !== null) { const namespace = match[1].trim(); // Get the top-level namespace const packageName = namespace.split('\\')[0]; packages.push({ name: packageName, language: ProgrammingLanguage.PHP, importStatement: match[0] }); } // Match: require 'vendor/autoload.php'; // This indicates Composer dependencies if (code.includes("require 'vendor/autoload.php'") || code.includes('require "vendor/autoload.php"')) { packages.push({ name: 'composer', language: ProgrammingLanguage.PHP, importStatement: "require 'vendor/autoload.php'" }); } } catch (error) { logger.error('Error parsing PHP imports:', error); } return packages; } /** * Parse Go imports */ function parseGoImports(code: string): DetectedPackage[] { const packages: DetectedPackage[] = []; try { // Match single imports: import "package" const singleImportRegex = /^\s*import\s+(?:[\w.]+\s+)?["']([^"']+)["']/gm; let match; while ((match = singleImportRegex.exec(code)) !== null) { const importPath = match[1].trim(); // Extract the package/repo name const parts = importPath.split('/'); if (parts.length >= 2) { // For go imports like "github.com/user/repo" const packageName = parts.slice(0, 3).join('/'); packages.push({ name: packageName, language: ProgrammingLanguage.GO, importStatement: match[0] }); } else { // Standard library packages.push({ name: importPath, language: ProgrammingLanguage.GO, importStatement: match[0] }); } } // Match block imports: import ( "package1" "package2" ) const blockImport = code.match(/import\s*\(([\s\S]*?)\)/); if (blockImport) { const blockContent = blockImport[1]; const importsRegex = /["']([^"']+)["']/g; while ((match = importsRegex.exec(blockContent)) !== null) { const importPath = match[1].trim(); const parts = importPath.split('/'); if (parts.length >= 2) { const packageName = parts.slice(0, 3).join('/'); packages.push({ name: packageName, language: ProgrammingLanguage.GO, importStatement: `import "${importPath}"` }); } else { packages.push({ name: importPath, language: ProgrammingLanguage.GO, importStatement: `import "${importPath}"` }); } } } } catch (error) { logger.error('Error parsing Go imports:', error); } return packages; } /** * Parse Rust imports */ function parseRustImports(code: string): DetectedPackage[] { const packages: DetectedPackage[] = []; try { // Match: use std::module; // Match: use package::module; const useRegex = /^\s*use\s+([\w:]+)(?:::\{[^}]+\})?;/gm; let match; while ((match = useRegex.exec(code)) !== null) { const importPath = match[1].trim(); // Get the top-level crate const packageName = importPath.split('::')[0]; // Skip std library if (packageName !== 'std' && packageName !== 'core') { packages.push({ name: packageName, language: ProgrammingLanguage.RUST, importStatement: match[0] }); } } // Look for Cargo.toml dependencies in comments const cargoRegex = /^\s*\/\/\s*(?:dependencies|Cargo\.toml).*?[\s'"]([\w-]+)[\s'"]/gm; while ((match = cargoRegex.exec(code)) !== null) { const packageName = match[1].trim(); packages.push({ name: packageName, language: ProgrammingLanguage.RUST, importStatement: match[0] }); } } catch (error) { logger.error('Error parsing Rust imports:', error); } return packages; } /** * Utility to check if a path is relative */ function isRelativePath(path: string): boolean { return path.startsWith('./') || path.startsWith('../') || path === '.' || path === '..'; } /** * Parse code to extract import statements and detect packages */ export function parseCode(code: string, filename?: string): DetectedPackage[] { if (!code) return []; try { // Determine the programming language let language = ProgrammingLanguage.UNKNOWN; if (filename) { language = detectLanguageFromExtension(filename); } if (language === ProgrammingLanguage.UNKNOWN) { language = detectLanguageFromContent(code); } // Parse imports based on the detected language switch (language) { case ProgrammingLanguage.JAVASCRIPT: case ProgrammingLanguage.TYPESCRIPT: return parseJavaScriptImports(code, language); case ProgrammingLanguage.PYTHON: return parsePythonImports(code); case ProgrammingLanguage.JAVA: return parseJavaImports(code); case ProgrammingLanguage.RUBY: return parseRubyImports(code); case ProgrammingLanguage.PHP: return parsePHPImports(code); case ProgrammingLanguage.GO: return parseGoImports(code); case ProgrammingLanguage.RUST: return parseRustImports(code); default: return []; } } catch (error) { logger.error('Error parsing code:', error); return []; } } /** * Filter out standard library packages and normalize package names */ export function normalizePackages(packages: DetectedPackage[]): DetectedPackage[] { const normalized = new Map<string, DetectedPackage>(); for (const pkg of packages) { // Skip standard library packages for certain languages if (shouldSkipPackage(pkg)) continue; // Create a unique key for the package const key = `${pkg.name}-${pkg.language}`; // Only add if not already in the map if (!normalized.has(key)) { normalized.set(key, pkg); } } return Array.from(normalized.values()); } /** * Check if a package should be skipped (e.g., standard library) */ function shouldSkipPackage(pkg: DetectedPackage): boolean { const { name, language } = pkg; // JavaScript/TypeScript standard libraries if ((language === ProgrammingLanguage.JAVASCRIPT || language === ProgrammingLanguage.TYPESCRIPT) && ['fs', 'path', 'http', 'https', 'url', 'util', 'crypto'].includes(name)) { return true; } // Python standard libraries if (language === ProgrammingLanguage.PYTHON && ['os', 'sys', 're', 'datetime', 'math', 'random', 'json', 'csv'].includes(name)) { return true; } // Java standard libraries if (language === ProgrammingLanguage.JAVA && ['java.lang', 'java.util', 'java.io', 'java.net'].includes(name)) { return true; } // Go standard libraries if (language === ProgrammingLanguage.GO && !name.includes('/')) { return true; } return false; } /** * Get the main packages from code */ export function getPackagesFromCode(code: string, filename?: string): string[] { const detectedPackages = parseCode(code, filename); const normalizedPackages = normalizePackages(detectedPackages); return normalizedPackages.map(pkg => pkg.name); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/visheshd/docmcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server