Document Automation MCP Server

framework_detector.py•31.3 KiB

""" Enhanced framework detection for various programming languages and frameworks. This module provides comprehensive detection of frameworks, libraries, and technology stacks across different programming languages. """ import re import json import logging from typing import Dict, List, Any, Optional, Set, Tuple from dataclasses import dataclass, field from pathlib import Path from collections import defaultdict logger = logging.getLogger(__name__) @dataclass class FrameworkInfo: """Information about a detected framework.""" name: str category: str # web, testing, orm, ui, etc. confidence: float # 0.0 to 1.0 version: Optional[str] = None evidence: List[str] = field(default_factory=list) files: List[str] = field(default_factory=list) metadata: Dict[str, Any] = field(default_factory=dict) @dataclass class TechnologyStack: """Complete technology stack information.""" language: str frameworks: List[FrameworkInfo] architecture_patterns: List[str] = field(default_factory=list) deployment_targets: List[str] = field(default_factory=list) database_systems: List[str] = field(default_factory=list) testing_frameworks: List[str] = field(default_factory=list) build_tools: List[str] = field(default_factory=list) def to_dict(self) -> Dict[str, Any]: """Convert to dictionary representation.""" return { 'language': self.language, 'frameworks': [ { 'name': f.name, 'category': f.category, 'confidence': f.confidence, 'version': f.version, 'evidence': f.evidence, 'files': f.files, 'metadata': f.metadata } for f in self.frameworks ], 'architecture_patterns': self.architecture_patterns, 'deployment_targets': self.deployment_targets, 'database_systems': self.database_systems, 'testing_frameworks': self.testing_frameworks, 'build_tools': self.build_tools } class FrameworkDetector: """ Enhanced framework detector supporting multiple languages and ecosystems. Detects frameworks based on: - Package dependencies - Import statements - Configuration files - File structures and naming patterns - Code patterns and signatures """ def __init__(self): self.detection_rules = self._load_detection_rules() self.pattern_cache: Dict[str, re.Pattern] = {} def _load_detection_rules(self) -> Dict[str, Any]: """Load framework detection rules.""" return { 'python': { 'web_frameworks': { 'django': { 'imports': ['django', 'django.conf', 'django.db'], 'files': ['manage.py', 'settings.py', 'urls.py'], 'patterns': [r'from django', r'import django'], 'config_files': ['requirements.txt', 'Pipfile'], 'confidence_base': 0.9 }, 'flask': { 'imports': ['flask', 'Flask'], 'patterns': [r'from flask import', r'app = Flask\('], 'config_files': ['requirements.txt'], 'confidence_base': 0.85 }, 'fastapi': { 'imports': ['fastapi', 'FastAPI'], 'patterns': [r'from fastapi import', r'app = FastAPI\('], 'config_files': ['requirements.txt'], 'confidence_base': 0.85 }, 'pyramid': { 'imports': ['pyramid'], 'patterns': [r'from pyramid'], 'confidence_base': 0.8 }, 'tornado': { 'imports': ['tornado'], 'patterns': [r'import tornado', r'from tornado'], 'confidence_base': 0.8 } }, 'testing_frameworks': { 'pytest': { 'imports': ['pytest'], 'files': ['pytest.ini', 'conftest.py'], 'patterns': [r'def test_', r'import pytest'], 'confidence_base': 0.9 }, 'unittest': { 'imports': ['unittest'], 'patterns': [r'import unittest', r'class.*TestCase'], 'confidence_base': 0.8 }, 'nose': { 'imports': ['nose'], 'patterns': [r'import nose'], 'confidence_base': 0.7 } }, 'data_science': { 'pandas': { 'imports': ['pandas'], 'patterns': [r'import pandas', r'pd\.'], 'confidence_base': 0.9 }, 'numpy': { 'imports': ['numpy'], 'patterns': [r'import numpy', r'np\.'], 'confidence_base': 0.9 }, 'scikit-learn': { 'imports': ['sklearn'], 'patterns': [r'from sklearn'], 'confidence_base': 0.9 }, 'tensorflow': { 'imports': ['tensorflow'], 'patterns': [r'import tensorflow', r'tf\.'], 'confidence_base': 0.9 }, 'pytorch': { 'imports': ['torch'], 'patterns': [r'import torch', r'torch\.'], 'confidence_base': 0.9 } }, 'orm': { 'sqlalchemy': { 'imports': ['sqlalchemy'], 'patterns': [r'from sqlalchemy'], 'confidence_base': 0.85 }, 'peewee': { 'imports': ['peewee'], 'patterns': [r'from peewee'], 'confidence_base': 0.8 } } }, 'javascript': { 'frontend_frameworks': { 'react': { 'dependencies': ['react', 'react-dom'], 'imports': ['React'], 'files': ['.jsx', '.tsx'], 'patterns': [r'import React', r'from [\'"]react[\'"]', r'<\w+.*>'], 'confidence_base': 0.9 }, 'vue': { 'dependencies': ['vue'], 'imports': ['Vue'], 'files': ['.vue'], 'patterns': [r'import Vue', r'new Vue\(', r'<template>'], 'confidence_base': 0.9 }, 'angular': { 'dependencies': ['@angular/core'], 'imports': ['@angular'], 'patterns': [r'import.*@angular', r'@Component', r'@Injectable'], 'files': ['angular.json'], 'confidence_base': 0.9 }, 'svelte': { 'dependencies': ['svelte'], 'files': ['.svelte'], 'patterns': [r'<script.*svelte'], 'confidence_base': 0.85 } }, 'backend_frameworks': { 'express': { 'dependencies': ['express'], 'patterns': [r'require\([\'"]express[\'"]\)', r'app\.get\(', r'app\.post\('], 'confidence_base': 0.9 }, 'koa': { 'dependencies': ['koa'], 'patterns': [r'require\([\'"]koa[\'"]\)', r'const Koa'], 'confidence_base': 0.85 }, 'fastify': { 'dependencies': ['fastify'], 'patterns': [r'require\([\'"]fastify[\'"]\)'], 'confidence_base': 0.8 }, 'nestjs': { 'dependencies': ['@nestjs/core'], 'patterns': [r'import.*@nestjs', r'@Controller', r'@Module'], 'confidence_base': 0.9 } }, 'testing_frameworks': { 'jest': { 'dependencies': ['jest'], 'files': ['jest.config.js'], 'patterns': [r'describe\(', r'it\(', r'test\('], 'confidence_base': 0.9 }, 'mocha': { 'dependencies': ['mocha'], 'patterns': [r'describe\(', r'it\('], 'confidence_base': 0.8 }, 'cypress': { 'dependencies': ['cypress'], 'patterns': [r'cy\.', r'cypress'], 'confidence_base': 0.9 } }, 'build_tools': { 'webpack': { 'dependencies': ['webpack'], 'files': ['webpack.config.js'], 'confidence_base': 0.9 }, 'vite': { 'dependencies': ['vite'], 'files': ['vite.config.js'], 'confidence_base': 0.9 }, 'rollup': { 'dependencies': ['rollup'], 'files': ['rollup.config.js'], 'confidence_base': 0.8 } } }, 'java': { 'frameworks': { 'spring': { 'imports': ['org.springframework'], 'annotations': ['@SpringBootApplication', '@Controller', '@Service'], 'files': ['pom.xml', 'application.properties'], 'confidence_base': 0.9 }, 'hibernate': { 'imports': ['org.hibernate'], 'annotations': ['@Entity', '@Table'], 'confidence_base': 0.85 } } } } def detect_frameworks( self, project_path: str, file_contents: Dict[str, str], language: str ) -> TechnologyStack: """ Detect frameworks and technology stack for a project. Args: project_path: Path to project root file_contents: Dictionary of file_path -> content language: Primary programming language Returns: TechnologyStack with detected frameworks and tools """ detected_frameworks = [] # Get language-specific rules lang_rules = self.detection_rules.get(language.lower(), {}) # Analyze package dependencies dependencies = self._extract_dependencies(project_path, file_contents, language) # Analyze imports and code patterns imports = self._extract_imports(file_contents, language) # Analyze file structure file_structure = self._analyze_file_structure(file_contents) # Detect frameworks for each category for category, frameworks in lang_rules.items(): for framework_name, rules in frameworks.items(): framework_info = self._detect_single_framework( framework_name, category, rules, dependencies, imports, file_structure, file_contents ) if framework_info and framework_info.confidence > 0.3: detected_frameworks.append(framework_info) # Additional analysis architecture_patterns = self._detect_architecture_patterns(file_contents, language) deployment_targets = self._detect_deployment_targets(file_contents, dependencies) database_systems = self._detect_database_systems(dependencies, imports) testing_frameworks = [f.name for f in detected_frameworks if f.category == 'testing_frameworks'] build_tools = [f.name for f in detected_frameworks if f.category == 'build_tools'] return TechnologyStack( language=language, frameworks=detected_frameworks, architecture_patterns=architecture_patterns, deployment_targets=deployment_targets, database_systems=database_systems, testing_frameworks=testing_frameworks, build_tools=build_tools ) def _detect_single_framework( self, framework_name: str, category: str, rules: Dict[str, Any], dependencies: Set[str], imports: Set[str], file_structure: Dict[str, Any], file_contents: Dict[str, str] ) -> Optional[FrameworkInfo]: """Detect a single framework based on rules.""" evidence = [] confidence = 0.0 files_found = [] # Check dependencies framework_deps = set(rules.get('dependencies', [])) if framework_deps.intersection(dependencies): evidence.append("Found in dependencies") confidence += 0.4 # Check imports framework_imports = set(rules.get('imports', [])) if framework_imports.intersection(imports): evidence.append("Found in imports") confidence += 0.3 # Check specific files required_files = rules.get('files', []) for req_file in required_files: if any(req_file in file_path for file_path in file_structure['files']): evidence.append(f"Found {req_file}") confidence += 0.2 files_found.append(req_file) # Check code patterns patterns = rules.get('patterns', []) for pattern in patterns: compiled_pattern = self._get_compiled_pattern(pattern) for file_path, content in file_contents.items(): if compiled_pattern.search(content): evidence.append(f"Pattern '{pattern}' found in {Path(file_path).name}") confidence += 0.1 break # Check annotations (for Java, etc.) annotations = rules.get('annotations', []) for annotation in annotations: pattern = re.compile(rf'{re.escape(annotation)}', re.MULTILINE) for file_path, content in file_contents.items(): if pattern.search(content): evidence.append(f"Annotation {annotation} found") confidence += 0.2 break # Apply base confidence base_confidence = rules.get('confidence_base', 0.5) confidence = min(1.0, confidence * base_confidence) if confidence > 0: # Try to extract version information version = self._extract_framework_version( framework_name, dependencies, file_contents ) return FrameworkInfo( name=framework_name, category=category, confidence=confidence, version=version, evidence=evidence, files=files_found, metadata={ 'detection_method': 'rules_based', 'matched_rules': len(evidence) } ) return None def _extract_dependencies( self, project_path: str, file_contents: Dict[str, str], language: str ) -> Set[str]: """Extract project dependencies from configuration files.""" dependencies = set() if language.lower() == 'python': # requirements.txt for file_path, content in file_contents.items(): if 'requirements' in Path(file_path).name.lower(): for line in content.split('\n'): line = line.strip() if line and not line.startswith('#'): # Extract package name pkg = re.split(r'[>=<!\[;]', line)[0].strip() if pkg: dependencies.add(pkg) # setup.py, pyproject.toml for file_path, content in file_contents.items(): if Path(file_path).name in ['setup.py', 'pyproject.toml']: # Extract dependencies from setup.py or pyproject.toml deps = self._extract_python_setup_deps(content) dependencies.update(deps) elif language.lower() in ['javascript', 'typescript']: # package.json for file_path, content in file_contents.items(): if Path(file_path).name == 'package.json': try: package_data = json.loads(content) deps = package_data.get('dependencies', {}) dev_deps = package_data.get('devDependencies', {}) dependencies.update(deps.keys()) dependencies.update(dev_deps.keys()) except json.JSONDecodeError: pass elif language.lower() == 'java': # pom.xml, build.gradle for file_path, content in file_contents.items(): file_name = Path(file_path).name if file_name == 'pom.xml': deps = self._extract_maven_deps(content) dependencies.update(deps) elif file_name in ['build.gradle', 'build.gradle.kts']: deps = self._extract_gradle_deps(content) dependencies.update(deps) return dependencies def _extract_imports(self, file_contents: Dict[str, str], language: str) -> Set[str]: """Extract import statements from source files.""" imports = set() for file_path, content in file_contents.items(): file_ext = Path(file_path).suffix.lower() if language.lower() == 'python' and file_ext == '.py': # Python imports import_patterns = [ r'^\s*import\s+([^\s#]+)', r'^\s*from\s+([^\s#]+)\s+import' ] for pattern in import_patterns: matches = re.findall(pattern, content, re.MULTILINE) for match in matches: # Get top-level module top_module = match.split('.')[0] imports.add(top_module) elif language.lower() in ['javascript', 'typescript'] and file_ext in ['.js', '.ts', '.jsx', '.tsx']: # JavaScript/TypeScript imports import_patterns = [ r'import.*from\s+[\'"]([^\'"]+)[\'"]', r'require\([\'"]([^\'"]+)[\'"]\)' ] for pattern in import_patterns: matches = re.findall(pattern, content) for match in matches: # Get package name (handle scoped packages) if match.startswith('@'): pkg = '/'.join(match.split('/')[:2]) else: pkg = match.split('/')[0] imports.add(pkg) elif language.lower() == 'java' and file_ext == '.java': # Java imports pattern = r'^\s*import\s+([^;]+);' matches = re.findall(pattern, content, re.MULTILINE) for match in matches: # Get package root parts = match.split('.') if len(parts) >= 2: imports.add(f"{parts[0]}.{parts[1]}") return imports def _analyze_file_structure(self, file_contents: Dict[str, str]) -> Dict[str, Any]: """Analyze project file structure.""" files = list(file_contents.keys()) structure = { 'files': files, 'directories': set(), 'extensions': defaultdict(int), 'config_files': [], 'special_files': [] } for file_path in files: path = Path(file_path) # Count directories for parent in path.parents: if str(parent) != '.': structure['directories'].add(str(parent)) # Count extensions if path.suffix: structure['extensions'][path.suffix.lower()] += 1 # Identify special files file_name = path.name.lower() config_patterns = [ 'config', 'settings', 'env', 'properties', '.json', '.yml', '.yaml', '.toml', '.ini' ] if any(pattern in file_name for pattern in config_patterns): structure['config_files'].append(file_path) special_files = [ 'dockerfile', 'makefile', 'readme', 'license', 'gitignore', 'requirements.txt', 'package.json' ] if any(special in file_name for special in special_files): structure['special_files'].append(file_path) return structure def _detect_architecture_patterns( self, file_contents: Dict[str, str], language: str ) -> List[str]: """Detect architectural patterns from code structure.""" patterns = [] # Analyze directory structure directories = set() for file_path in file_contents.keys(): path = Path(file_path) for parent in path.parents: if str(parent) != '.': directories.add(str(parent).lower()) # Common patterns if any(d in directories for d in ['models', 'views', 'controllers']): patterns.append('MVC') if any(d in directories for d in ['components', 'containers']): patterns.append('Component-based') if any(d in directories for d in ['services', 'repositories']): patterns.append('Service Layer') if any(d in directories for d in ['domain', 'application', 'infrastructure']): patterns.append('Clean Architecture') if 'microservices' in str(directories) or len([d for d in directories if 'service' in d]) > 2: patterns.append('Microservices') return patterns def _detect_deployment_targets( self, file_contents: Dict[str, str], dependencies: Set[str] ) -> List[str]: """Detect deployment targets and platforms.""" targets = [] # Check for deployment files for file_path in file_contents.keys(): file_name = Path(file_path).name.lower() if file_name == 'dockerfile': targets.append('Docker') elif file_name in ['docker-compose.yml', 'docker-compose.yaml']: targets.append('Docker Compose') elif file_name in ['serverless.yml', 'serverless.yaml']: targets.append('Serverless') elif file_name == 'vercel.json': targets.append('Vercel') elif file_name in ['netlify.toml', '_redirects']: targets.append('Netlify') elif '.github' in file_path and 'workflows' in file_path: targets.append('GitHub Actions') elif file_name in ['azure-pipelines.yml', 'azure-pipelines.yaml']: targets.append('Azure DevOps') elif file_name == 'cloudbuild.yaml': targets.append('Google Cloud Build') # Check dependencies for platform-specific packages cloud_deps = { 'boto3': 'AWS', 'google-cloud': 'Google Cloud', 'azure': 'Azure', 'kubernetes': 'Kubernetes', 'heroku': 'Heroku' } for dep, platform in cloud_deps.items(): if any(dep in d for d in dependencies): targets.append(platform) return targets def _detect_database_systems( self, dependencies: Set[str], imports: Set[str] ) -> List[str]: """Detect database systems in use.""" databases = [] db_patterns = { 'postgresql': ['psycopg2', 'pg', 'postgres'], 'mysql': ['mysql', 'pymysql', 'mysqlclient'], 'sqlite': ['sqlite3', 'sqlite'], 'mongodb': ['pymongo', 'mongoose', 'mongodb'], 'redis': ['redis', 'ioredis'], 'elasticsearch': ['elasticsearch'], 'cassandra': ['cassandra'], 'neo4j': ['neo4j'] } all_deps = dependencies.union(imports) for db_name, patterns in db_patterns.items(): if any(pattern in dep.lower() for dep in all_deps for pattern in patterns): databases.append(db_name) return databases def _get_compiled_pattern(self, pattern: str) -> re.Pattern: """Get compiled regex pattern with caching.""" if pattern not in self.pattern_cache: self.pattern_cache[pattern] = re.compile(pattern, re.MULTILINE | re.IGNORECASE) return self.pattern_cache[pattern] def _extract_framework_version( self, framework_name: str, dependencies: Set[str], file_contents: Dict[str, str] ) -> Optional[str]: """Try to extract framework version.""" # This is a simplified version extraction # In practice, you'd need to parse package files more carefully for file_path, content in file_contents.items(): file_name = Path(file_path).name if file_name == 'package.json': try: data = json.loads(content) deps = {**data.get('dependencies', {}), **data.get('devDependencies', {})} if framework_name in deps: return deps[framework_name].strip('^~>=<') except: pass return None def _extract_python_setup_deps(self, content: str) -> Set[str]: """Extract dependencies from setup.py or pyproject.toml.""" deps = set() # This is a simplified extraction # Real implementation would need proper parsing # Look for install_requires in setup.py install_requires_match = re.search(r'install_requires\s*=\s*\[(.*?)\]', content, re.DOTALL) if install_requires_match: deps_str = install_requires_match.group(1) for line in deps_str.split(','): line = line.strip().strip('"\'') if line: pkg = re.split(r'[>=<!\[]', line)[0].strip() if pkg: deps.add(pkg) return deps def _extract_maven_deps(self, content: str) -> Set[str]: """Extract dependencies from pom.xml.""" deps = set() # Find dependency blocks pattern = r'<dependency>.*?<groupId>(.*?)</groupId>.*?<artifactId>(.*?)</artifactId>.*?</dependency>' matches = re.findall(pattern, content, re.DOTALL) for group_id, artifact_id in matches: deps.add(f"{group_id.strip()}:{artifact_id.strip()}") return deps def _extract_gradle_deps(self, content: str) -> Set[str]: """Extract dependencies from build.gradle.""" deps = set() # Look for implementation, compile, etc. patterns = [ r"(?:implementation|compile|api|testImplementation)\s+['\"]([^'\"]+)['\"]", r"(?:implementation|compile|api|testImplementation)\s+group:\s*['\"]([^'\"]+)['\"],\s*name:\s*['\"]([^'\"]+)['\"]" ] for pattern in patterns: matches = re.findall(pattern, content) for match in matches: if isinstance(match, tuple): if len(match) == 2: deps.add(f"{match[0]}:{match[1]}") else: deps.add(match) return deps def get_framework_recommendations( self, detected_stack: TechnologyStack ) -> List[Dict[str, Any]]: """Get recommendations based on detected stack.""" recommendations = [] # Missing testing framework if not detected_stack.testing_frameworks: if detected_stack.language.lower() == 'python': recommendations.append({ 'type': 'missing_tool', 'category': 'testing', 'suggestion': 'Consider adding pytest for testing', 'priority': 'medium' }) elif detected_stack.language.lower() in ['javascript', 'typescript']: recommendations.append({ 'type': 'missing_tool', 'category': 'testing', 'suggestion': 'Consider adding Jest or Mocha for testing', 'priority': 'medium' }) # Missing build tools for frontend projects frontend_frameworks = ['react', 'vue', 'angular'] has_frontend = any( f.name.lower() in frontend_frameworks for f in detected_stack.frameworks ) if has_frontend and not detected_stack.build_tools: recommendations.append({ 'type': 'missing_tool', 'category': 'build', 'suggestion': 'Consider adding a build tool like Webpack or Vite', 'priority': 'high' }) # Security recommendations web_frameworks = ['django', 'flask', 'express', 'fastapi'] has_web = any( f.name.lower() in web_frameworks for f in detected_stack.frameworks ) if has_web: recommendations.append({ 'type': 'security', 'category': 'web_security', 'suggestion': 'Ensure proper security headers and input validation', 'priority': 'high' }) return recommendations

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/vedantparmar12/Document-Automation'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

framework_detector.py•31.3 KiB