analyze_file
Analyze code files for bugs, vulnerabilities, and security issues. Automatically detects language by extension and handles large files by splitting into parallel chunks.
Instructions
Analyzes a whole code file from disk. Automatically detects language by file extension. Large files are split into chunks and analyzed in parallel.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes | Absolute path to the file. | |
| language | No | Language override (auto-detected from extension if not set). | |
| context | No | Description of what the file does (optional). |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
| result | Yes |
Implementation Reference
- tools/file_tool.py:64-149 (handler)Main handler for the analyze_file tool. Reads a file from disk (up to 500 KB), optionally caches results, splits large files into chunks analyzed in parallel (semaphore=3) via Groq API, merges results (single chunk uses _build_single_result, multiple chunks use _merge_chunk_results + optional FILE_SUMMARY consolidation), and returns JSON with issues, warnings, suggestions, score, and stats.
async def analyze_file(file_path: str, language: str = "", context: str = "") -> str: """ Analyzes a whole code file from disk. Automatically detects language by file extension. Large files are split into chunks and analyzed in parallel. Args: file_path: Absolute path to the file. language: Language override (auto-detected from extension if not set). context: Description of what the file does (optional). Returns: JSON with issues, warnings, suggestions, score, stats. """ path = Path(file_path).expanduser().resolve() if not path.exists(): return error_response(f"File not found: {file_path}") if not path.is_file(): return error_response(f"Not a file: {file_path}") size_kb = path.stat().st_size / 1024 if size_kb > MAX_FILE_SIZE_KB: return error_response( f"File too large ({size_kb:.0f} KB). Maximum is {MAX_FILE_SIZE_KB} KB.", "Use analyze_code with a smaller fragment instead.", ) try: code = path.read_text(encoding="utf-8", errors="replace") except OSError as e: return error_response("Failed to read file", str(e)) if not code.strip(): return error_response("File is empty.") key = cache.make_key("analyze_file", str(path), str(path.stat().st_mtime), language, context) if hit := cache.get(key): return hit lang = language.strip() or EXTENSION_MAP.get(path.suffix.lower(), "text") filename = path.name total_lines = code.count("\n") + 1 chunks = _split_into_chunks(code) semaphore = asyncio.Semaphore(3) async def _analyze_chunk(num: int, start: int, text: str) -> dict: system = FILE_CHUNK.format(chunk_num=num, total=len(chunks)) ctx = f"\nContext: {context}" if context else "" user = ( f"File: {filename} | Language: {lang}{ctx}\n" f"Lines {start}–{start + text.count(chr(10))}\n\n" f"```{lang}\n{text}\n```" ) async with semaphore: raw = await call(system, user) try: return json.loads(raw) except json.JSONDecodeError: return {"issues": [], "warnings": [], "suggestions": []} try: results = await asyncio.gather(*[ _analyze_chunk(i + 1, start, text) for i, (start, text) in enumerate(chunks) ]) except httpx.HTTPStatusError as e: return error_response(f"Groq API error {e.response.status_code}", e.response.text[:300]) except ValueError as e: return error_response(str(e)) if len(chunks) == 1: final = _build_single_result(filename, lang, total_lines, results[0]) else: merged = _merge_chunk_results(filename, lang, total_lines, list(results)) try: system = FILE_SUMMARY.format(filename=filename, language=lang, lines=total_lines) user = f"Analysis results from {len(chunks)} parts:\n\n{json.dumps(merged, ensure_ascii=False)}" raw = await call(system, user) final = json.loads(raw) except Exception: final = merged out = json.dumps(final, ensure_ascii=False, indent=2) cache.set(key, out) return out - tools/file_tool.py:64-77 (schema)Input schema for analyze_file: accepts file_path (str, required), language (str, auto-detected from extension via EXTENSION_MAP), context (str, optional description). Returns JSON string with issues, warnings, suggestions, score, stats.
async def analyze_file(file_path: str, language: str = "", context: str = "") -> str: """ Analyzes a whole code file from disk. Automatically detects language by file extension. Large files are split into chunks and analyzed in parallel. Args: file_path: Absolute path to the file. language: Language override (auto-detected from extension if not set). context: Description of what the file does (optional). Returns: JSON with issues, warnings, suggestions, score, stats. """ - server.py:34-34 (registration)Registration of analyze_file as an MCP tool via mcp.tool()(analyze_file) decorator pattern.
mcp.tool()(analyze_file) - tools/__init__.py:5-5 (registration)Re-export of analyze_file from tools.file_tool so it can be imported by server.py.
from .file_tool import analyze_file - tools/file_tool.py:15-26 (helper)Helper that splits code into chunks by whole lines (based on CHUNK_SIZE config), used by the handler for large files.
def _split_into_chunks(code: str) -> list[tuple[int, str]]: """Split code into chunks by whole lines → [(start_line, text), ...].""" chunks, current, current_len, start = [], [], 0, 1 for i, line in enumerate(code.splitlines(), 1): current.append(line) current_len += len(line) + 1 if current_len >= CHUNK_SIZE: chunks.append((start, "\n".join(current))) start, current, current_len = i + 1, [], 0 if current: chunks.append((start, "\n".join(current))) return chunks