Crawl4AI+SearXNG MCP Server

knowledge_graph.py•21.5 KiB

""" Knowledge graph tools for MCP server. This module contains Neo4j knowledge graph MCP tools including: - query_knowledge_graph: Query and explore the knowledge graph - parse_github_repository: Parse GitHub repos into Neo4j - parse_repository_branch: Parse specific branches - get_repository_info: Get repo metadata - update_parsed_repository: Update already parsed repos - parse_local_repository: Parse local Git repositories """ import json import logging from pathlib import Path from typing import TYPE_CHECKING from fastmcp import Context if TYPE_CHECKING: from fastmcp import FastMCP from src.core import MCPToolError, track_request from src.core.context import get_app_context from src.core.exceptions import ( DatabaseError, KnowledgeGraphError, ValidationError, ) from src.knowledge_graph import ( query_knowledge_graph, ) from src.knowledge_graph.repository import ( parse_github_repository as parse_github_repository_impl, ) from src.utils.validation import validate_github_url logger = logging.getLogger(__name__) async def parse_github_repository_wrapper(ctx: Context, repo_url: str) -> str: """Extract repo_extractor from context and call the implementation.""" # Get the app context that was stored during lifespan app_ctx = get_app_context() if ( not app_ctx or not hasattr(app_ctx, "repo_extractor") or not app_ctx.repo_extractor ): # Return a proper error message error_msg = ( "Repository extractor not available. Neo4j may not be configured " "or the USE_KNOWLEDGE_GRAPH environment variable may be set " "to false." ) return json.dumps( { "success": False, "error": error_msg, }, indent=2, ) return await parse_github_repository_impl(app_ctx.repo_extractor, repo_url) async def query_knowledge_graph_wrapper(ctx: Context, command: str) -> str: """Wrapper function to call the knowledge graph query implementation.""" # The query_knowledge_graph function doesn't need a context parameter # It creates its own Neo4j connection from environment variables return await query_knowledge_graph(command) def register_knowledge_graph_tools(mcp: "FastMCP") -> None: """ Register knowledge graph MCP tools. Args: mcp: FastMCP instance to register tools with """ @mcp.tool() @track_request("query_knowledge_graph") async def query_knowledge_graph( ctx: Context, command: str, ) -> str: """ Query and explore the Neo4j knowledge graph containing repository data. This tool provides comprehensive access to the knowledge graph for exploring repositories, classes, methods, functions, and their relationships. Perfect for understanding what data is available for hallucination detection and debugging validation results. **⚠️ IMPORTANT: Always start with the `repos` command first!** Before using any other commands, run `repos` to see what repositories are available in your knowledge graph. This will help you understand what data you can explore. ## Available Commands: **Repository Commands:** - `repos` - **START HERE!** List all repositories in the knowledge graph - `explore <repo_name>` - Get detailed overview of a specific repository **Class Commands:** - `classes` - List all classes across all repositories (limited to 20) - `classes <repo_name>` - List classes in a specific repository - `class <class_name>` - Get detailed information about a specific class including methods and attributes **Method Commands:** - `method <method_name>` - Search for methods by name across all classes - `method <method_name> <class_name>` - Search for a method within a specific class **Custom Query:** - `query <cypher_query>` - Execute a custom Cypher query (results limited to 20 records) ## Knowledge Graph Schema: **Node Types:** - Repository: `(r:Repository {name: string})` - File: `(f:File {path: string, module_name: string})` - Class: `(c:Class {name: string, full_name: string})` - Method: `(m:Method {name: string, params_list: [string], params_detailed: [string], return_type: string, args: [string]})` - Function: `(func:Function {name: string, params_list: [string], params_detailed: [string], return_type: string, args: [string]})` - Attribute: `(a:Attribute {name: string, type: string})` **Relationships:** - `(r:Repository)-[:CONTAINS]->(f:File)` - `(f:File)-[:DEFINES]->(c:Class)` - `(c:Class)-[:HAS_METHOD]->(m:Method)` - `(c:Class)-[:HAS_ATTRIBUTE]->(a:Attribute)` - `(f:File)-[:DEFINES]->(func:Function)` ## Example Workflow: ``` 1. repos # See what repos exist 2. explore pydantic-ai # Explore repository 3. classes pydantic-ai # List classes in repo 4. class Agent # Explore Agent class 5. method run_stream # Search run_stream method 6. method __init__ Agent # Find constructor 7. query "MATCH (c:Class)-[:HAS_METHOD]->(m:Method) WHERE m.name = 'run' RETURN c.name, m.name LIMIT 5" ``` Args: command: Command string to execute (see available commands above) Returns: JSON string with query results, statistics, and metadata """ try: return await query_knowledge_graph_wrapper(ctx, command) except KnowledgeGraphError as e: logger.exception("Knowledge graph error") msg = f"Knowledge graph query failed: {e!s}" raise MCPToolError(msg) from e except DatabaseError as e: logger.exception("Database error in knowledge graph") msg = f"Knowledge graph query failed: {e!s}" raise MCPToolError(msg) from e except Exception as e: logger.exception("Unexpected error in query_knowledge_graph tool") msg = f"Knowledge graph query failed: {e!s}" raise MCPToolError(msg) from e @mcp.tool() @track_request("parse_github_repository") async def parse_github_repository( ctx: Context, repo_url: str, ) -> str: """ Parse a GitHub repository into the Neo4j knowledge graph. This tool clones a GitHub repository, analyzes its Python files, and stores the code structure (classes, methods, functions, imports) in Neo4j for use in hallucination detection. The tool: - Clones the repository to a temporary location - Analyzes Python files to extract code structure - Stores classes, methods, functions, and imports in Neo4j - Provides detailed statistics about the parsing results - Automatically handles module name detection for imports Args: repo_url: GitHub repository URL (e.g., 'https://github.com/user/repo.git') Returns: JSON string with parsing results, statistics, and repository information """ try: # Validate GitHub URL validation_result = validate_github_url(repo_url) if not validation_result["valid"]: raise MCPToolError(validation_result.get("error", "Invalid GitHub URL")) return await parse_github_repository_wrapper(ctx, repo_url) except MCPToolError: raise except ValidationError as e: logger.exception("Validation error") msg = f"Repository parsing failed: {e!s}" raise MCPToolError(msg) from e except KnowledgeGraphError as e: logger.exception("Knowledge graph error") msg = f"Repository parsing failed: {e!s}" raise MCPToolError(msg) from e except Exception as e: logger.exception("Unexpected error in parse_github_repository tool") msg = f"Repository parsing failed: {e!s}" raise MCPToolError(msg) from e @mcp.tool() @track_request("parse_repository_branch") async def parse_repository_branch( ctx: Context, repo_url: str, branch: str, ) -> str: """ Parse a specific branch of a GitHub repository into the Neo4j knowledge graph. This enhanced tool allows parsing specific branches of a repository, useful for: - Analyzing feature branches before merging - Comparing different versions of code - Tracking code evolution across branches The tool extracts: - Code structure (classes, methods, functions, imports) - Git metadata (branches, tags, recent commits) - Repository statistics (contributors, file count, size) Args: repo_url: GitHub repository URL (e.g., 'https://github.com/user/repo.git') branch: Branch name (e.g., 'main', 'develop', 'feature/branch') Returns: JSON string with parsing results, statistics, and branch information """ try: # Validate GitHub URL validation_result = validate_github_url(repo_url) if not validation_result["valid"]: raise MCPToolError(validation_result.get("error", "Invalid GitHub URL")) # Parse repository with branch support from src.knowledge_graph.repository import ( parse_github_repository_with_branch, ) return await parse_github_repository_with_branch(repo_url, branch) except MCPToolError: raise except ValidationError as e: logger.exception("Validation error") msg = f"Repository branch parsing failed: {e!s}" raise MCPToolError(msg) from e except KnowledgeGraphError as e: logger.exception("Knowledge graph error") msg = f"Repository branch parsing failed: {e!s}" raise MCPToolError(msg) from e except Exception as e: logger.exception("Unexpected error in parse_repository_branch tool") msg = f"Repository branch parsing failed: {e!s}" raise MCPToolError(msg) from e @mcp.tool() @track_request("get_repository_info") async def get_repository_info( ctx: Context, repo_name: str, ) -> str: """ Get detailed information about a parsed repository from the knowledge graph. This tool retrieves comprehensive metadata about a repository including: - Repository statistics (file count, contributors, size) - Branch information with recent commits - Tag information - Code structure summary (classes, methods, functions) - Git history insights Use this after parsing a repository to understand its structure and history. Args: repo_name: Repository name in the knowledge graph (no .git ext) Returns: JSON string with comprehensive repository information """ try: from src.knowledge_graph.repository import ( get_repository_metadata_from_neo4j, ) return await get_repository_metadata_from_neo4j(repo_name) except KnowledgeGraphError as e: logger.exception("Knowledge graph error") msg = f"Failed to get repository info: {e!s}" raise MCPToolError(msg) from e except DatabaseError as e: logger.exception("Database error") msg = f"Failed to get repository info: {e!s}" raise MCPToolError(msg) from e except Exception as e: logger.exception("Unexpected error in get_repository_info tool") msg = f"Failed to get repository info: {e!s}" raise MCPToolError(msg) from e @mcp.tool() @track_request("update_parsed_repository") async def update_parsed_repository( ctx: Context, repo_url: str, ) -> str: """ Update an already parsed repository with latest changes. This tool performs an incremental update of a repository: - Pulls latest changes from the remote repository - Identifies modified files since last parse - Updates only the changed components in Neo4j - Preserves existing relationships and metadata More efficient than re-parsing the entire repository for large codebases. Args: repo_url: GitHub repository URL to update Returns: JSON string with update results and changed files """ try: # Validate GitHub URL validation_result = validate_github_url(repo_url) if not validation_result["valid"]: raise MCPToolError(validation_result.get("error", "Invalid GitHub URL")) from src.knowledge_graph.repository import update_repository_in_neo4j return await update_repository_in_neo4j(repo_url) except MCPToolError: raise except ValidationError as e: logger.exception("Validation error") msg = f"Repository update failed: {e!s}" raise MCPToolError(msg) from e except KnowledgeGraphError as e: logger.exception("Knowledge graph error") msg = f"Repository update failed: {e!s}" raise MCPToolError(msg) from e except Exception as e: logger.exception("Unexpected error in update_parsed_repository tool") msg = f"Repository update failed: {e!s}" raise MCPToolError(msg) from e @mcp.tool() @track_request("parse_local_repository") async def parse_local_repository( ctx: Context, local_path: str, ) -> str: """ Parse a local Git repository into the Neo4j knowledge graph. This tool parses a local Git repository directly without cloning, useful for: - Analyzing repositories already present on the system - Parsing private repositories not accessible via URL - Working with repositories that have been modified locally - Faster parsing of repositories you already have locally The tool analyzes multiple programming languages including: - Python (.py files) - JavaScript/TypeScript (.js, .ts, .jsx, .tsx files) - Go (.go files) - And more based on the multi-language analyzer factory Args: local_path: Absolute path to the local Git repository directory Returns: JSON string with parsing results, statistics, and repository information """ try: # Get the app context app_ctx = get_app_context() if not app_ctx: return json.dumps( { "success": False, "error": "Application context not available", }, indent=2, ) # Check if repository extractor is available repo_extractor = getattr(app_ctx, "repo_extractor", None) if not repo_extractor: msg = ( "Repository extractor not available. Neo4j may not be " "configured or USE_KNOWLEDGE_GRAPH may be false." ) return json.dumps( { "success": False, "error": msg, }, indent=2, ) # Security: Validate and sanitize local path local_path = str(Path(local_path).expanduser().resolve()) # Define allowed directories for repository parsing (configurable) allowed_prefixes = [ str(Path("~").expanduser()), # User home directory "/tmp/", # Temporary directory "/var/tmp/", # Var temporary "/workspace/", # Common workspace directory ] # Check if path is within allowed directories path_allowed = any( local_path.startswith(str(Path(prefix).resolve())) for prefix in allowed_prefixes ) if not path_allowed: msg = ( "Path is not within allowed directories. Repository " "must be in home directory or temporary directories." ) return json.dumps( { "success": False, "error": msg, }, indent=2, ) # Validate local path exists if not Path(local_path).exists(): return json.dumps( { "success": False, "error": f"Local path does not exist: {local_path}", }, indent=2, ) if not Path(local_path).is_dir(): return json.dumps( { "success": False, "error": f"Path is not a directory: {local_path}", }, indent=2, ) # Check if it's a Git repository git_dir = Path(local_path) / ".git" if not git_dir.exists(): msg = f"Not a Git repository (no .git directory found): {local_path}" return json.dumps( { "success": False, "error": msg, }, indent=2, ) # Extract repository name from path repo_name = Path(local_path).resolve().name logger.info("Parsing local repository: %s at %s", repo_name, local_path) # Use a custom method to analyze local repository await repo_extractor.analyze_local_repository(local_path, repo_name) # Query Neo4j to get statistics about what was stored stats_query = """ MATCH (r:Repository {name: $repo_name}) OPTIONAL MATCH (r)-[:CONTAINS]->(f:File) OPTIONAL MATCH (f)-[:DEFINES]->(c:Class) OPTIONAL MATCH (c)-[:HAS_METHOD]->(m:Method) OPTIONAL MATCH (f)-[:DEFINES]->(func:Function) WITH r, COLLECT(DISTINCT f) as files, COLLECT(DISTINCT c) as classes, COLLECT(DISTINCT m) as methods, COLLECT(DISTINCT func) as functions RETURN SIZE([f IN files WHERE f IS NOT NULL]) as file_count, SIZE([c IN classes WHERE c IS NOT NULL]) as class_count, SIZE([m IN methods WHERE m IS NOT NULL]) as method_count, SIZE([func IN functions WHERE func IS NOT NULL]) as function_count """ async with repo_extractor.driver.session() as session: stats_result = await session.run(stats_query, repo_name=repo_name) stats = await stats_result.single() success_msg = ( f"Successfully parsed local repository '{repo_name}' " "into the knowledge graph" ) next_step_1 = ( "Use 'query_knowledge_graph' tool with 'explore <repo_name>' " "to see detailed statistics" ) next_step_2 = ( "Use 'check_ai_script_hallucinations' tool to validate " "AI-generated code against this repository" ) return json.dumps( { "success": True, "local_path": local_path, "repository_name": repo_name, "statistics": { "files_processed": stats["file_count"] if stats else 0, "classes_created": stats["class_count"] if stats else 0, "methods_created": stats["method_count"] if stats else 0, "functions_created": stats["function_count"] if stats else 0, }, "message": success_msg, "next_steps": [next_step_1, next_step_2], }, indent=2, ) except ValidationError as e: logger.exception("Validation error parsing local repository") return json.dumps( { "success": False, "local_path": local_path, "error": f"Validation failed: {e!s}", }, indent=2, ) except KnowledgeGraphError as e: logger.exception("Knowledge graph error parsing local repository") return json.dumps( { "success": False, "local_path": local_path, "error": f"Knowledge graph parsing failed: {e!s}", }, indent=2, ) except Exception as e: logger.exception("Unexpected error parsing local repository %s", local_path) return json.dumps( { "success": False, "local_path": local_path, "error": f"Local repository parsing failed: {e!s}", }, indent=2, )

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AI-enthusiasts/crawl4ai-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

knowledge_graph.py•21.5 KiB