"""MCP Code Analysis Server entry point."""
import asyncio
import logging
import sys
from pathlib import Path
from typing import Any
import click
from src.config import settings
from src.logger import setup_logging
from src.mcp_server.server import create_server
# Default server configuration
DEFAULT_HOST = "127.0.0.1"
DEFAULT_PORT = 8080
@click.group()
def cli() -> None:
"""MCP Code Analysis Server CLI."""
@cli.command()
@click.option(
"--host",
default="127.0.0.1",
help="Host to bind to",
)
@click.option(
"--port",
type=int,
default=8080,
help="Port to bind to",
)
@click.option(
"--reload",
is_flag=True,
help="Enable auto-reload for development",
)
@click.option(
"--log-level",
type=click.Choice(["debug", "info", "warning", "error"]),
default="info",
help="Logging level",
)
def serve(host: str, port: int, reload: bool, log_level: str) -> None: # noqa: ARG001
"""Start the MCP server."""
# Setup logging
setup_logging()
# Override log level if specified
if log_level:
logging.getLogger().setLevel(log_level.upper())
# Import MCP server here to avoid circular imports
from src.mcp_server.server import mcp
# Override settings with CLI options if provided
if host != DEFAULT_HOST:
settings.mcp.host = host
if port != DEFAULT_PORT:
settings.mcp.port = port
# Run the MCP server
mcp.run(transport="http", host=host, port=port)
@cli.command()
@click.argument("repository_url")
@click.option(
"--branch",
help="Branch to scan",
)
@click.option(
"--embeddings/--no-embeddings",
default=True,
help="Generate embeddings after scanning",
)
def scan(repository_url: str, branch: str, embeddings: bool) -> None:
"""Scan a repository."""
async def _scan() -> None:
# Setup logging
setup_logging()
logging.getLogger().setLevel("INFO")
# Create server
server = create_server()
# Initialize server
await server.initialize()
try:
# Scan repository
result_raw = await server.scan_repository(
repository_url,
branch=branch,
generate_embeddings=embeddings,
)
# Normalize result to a dict
result: dict[str, Any] = {}
if isinstance(result_raw, dict):
result = result_raw
# Print results (best-effort based on available keys)
click.echo("Repository scanned successfully!")
repo_id = result.get("repository_id") or (
result.get("scan_result", {}) or {}
).get("repository_id")
if repo_id is not None:
click.echo(f"Repository ID: {repo_id}")
files_scanned = result.get("files_scanned")
if files_scanned is not None:
click.echo(f"Files scanned: {files_scanned}")
files_parsed = result.get("files_parsed", 0)
click.echo(f"Files parsed: {files_parsed}")
embeddings_info = result.get("embeddings")
if (
isinstance(embeddings_info, dict)
and "total_embeddings" in embeddings_info
):
click.echo(
f"Embeddings created: {embeddings_info['total_embeddings']}",
)
finally:
# Cleanup
await server.shutdown()
asyncio.run(_scan())
@cli.command()
@click.argument("query")
@click.option(
"--repository-id",
type=int,
help="Repository ID to search in",
)
@click.option(
"--limit",
type=int,
default=10,
help="Maximum number of results",
)
def search(query: str, repository_id: int, limit: int) -> None:
"""Search for code."""
async def _search() -> None:
# Setup logging
setup_logging()
logging.getLogger().setLevel("INFO")
# Create server
server = create_server()
# Initialize server
await server.initialize()
try:
# Search
results_raw = await server.search(query, repository_id, limit)
# Ensure iterable of dict-like results
results: list[dict[str, Any]] = []
if isinstance(results_raw, list):
results = [r for r in results_raw if isinstance(r, dict)]
# Print results
click.echo(f"Found {len(results)} results for '{query}':\n")
for i, result in enumerate(results, 1):
entity = result.get("entity", {})
if not isinstance(entity, dict):
entity = {}
t = entity.get("type", "?")
name = entity.get("name", "?")
file_path = entity.get("file_path", "?")
start = entity.get("start_line", "?")
end = entity.get("end_line", "?")
similarity = result.get("similarity")
click.echo(f"{i}. {t}: {name}")
click.echo(f" File: {file_path}")
click.echo(f" Lines: {start}-{end}")
if isinstance(similarity, (int | float)):
click.echo(f" Similarity: {float(similarity):.3f}")
click.echo()
finally:
# Cleanup
await server.shutdown()
asyncio.run(_search())
@cli.command()
def init_db() -> None:
"""Initialize the database."""
async def _init() -> None:
from src.database.init_db import init_database, verify_database_setup
# Setup logging
logging.basicConfig(level=logging.INFO)
try:
# Initialize database
engine = await init_database()
# Verify setup
if await verify_database_setup(engine):
click.echo("✅ Database initialized successfully")
else:
click.echo("❌ Database initialization incomplete")
sys.exit(1)
await engine.dispose()
except (ImportError, ConnectionError, OSError, ValueError) as e:
click.echo(f"❌ Database initialization failed: {e}")
sys.exit(1)
asyncio.run(_init())
@cli.command()
def create_config() -> None:
"""Create a sample configuration file."""
sample_config = """# MCP Code Analysis Server Configuration
# OpenAI API key (required for embeddings)
openai_api_key: "your-api-key-here"
# Repositories to track
repositories:
- url: https://github.com/example/repo1
branch: main
- url: https://github.com/example/repo2
# branch: develop # Optional, uses default branch if not specified
# access_token: github_pat_... # For private repos
# Database configuration
database:
host: localhost
port: 5432
database: code_analysis
user: codeanalyzer
password: your-password
# MCP server configuration
mcp:
host: 0.0.0.0
port: 8080
# Scanner configuration
scanner:
storage_path: ./repositories
exclude_patterns:
- __pycache__
- "*.pyc"
- .git
- node_modules
- venv
- .env
# Embeddings configuration
embeddings:
model: text-embedding-3-small
batch_size: 100
max_tokens: 8000
# LLM configuration
llm:
model: gpt-4o-mini
temperature: 0.2
max_tokens: 4096
# Logging configuration
logging:
level: INFO
file_enabled: true
file_path: logs/mcp-server.log
"""
config_path = Path("config.yaml")
if config_path.exists():
click.echo(f"Configuration file already exists: {config_path}")
if not click.confirm("Overwrite?"):
return
config_path.write_text(sample_config)
click.echo(f"Created configuration file: {config_path}")
click.echo("\nNext steps:")
click.echo("1. Edit config.yaml and add your OpenAI API key")
click.echo("2. Update database credentials")
click.echo("3. Add repositories to track")
click.echo("4. Run 'python -m src.mcp_server init-db' to initialize the database")
click.echo("5. Run 'python -m src.mcp_server serve' to start the server")
if __name__ == "__main__":
cli()