Semantic Search MCP Server

run_sgr_comparison.py•14.5 KiB

#!/usr/bin/env python3 """ Run SGR Searcher with detailed logging for comparison with manual search. Saves all LLM reasoning steps and tool calls to log files. """ import json import os import sys import time from datetime import datetime from pathlib import Path from typing import Any, Dict, List # Add src to path sys.path.insert(0, str(Path(__file__).parent / "src")) from searchers import SGRSearcherGemini # Configuration REPO_BASE = Path(__file__).parent / "data" / "issues" DATASET_PATH = Path(__file__).parent / "data" / "dataset.jsonl" LOGS_DIR = Path(__file__).parent / "logs" / "sgr_runs" class DetailedLogger: """Captures all SGR steps for analysis.""" def __init__(self, question_id: str): self.question_id = question_id self.steps: List[Dict[str, Any]] = [] self.start_time = time.time() def log_step(self, step_name: str, data: Dict[str, Any]): """Log a step with timestamp.""" self.steps.append({ "timestamp": time.time() - self.start_time, "step": step_name, "data": data, }) def save(self, output_dir: Path): """Save logs to file.""" output_dir.mkdir(parents=True, exist_ok=True) log_file = output_dir / f"{self.question_id}_detailed.json" with open(log_file, 'w') as f: json.dump({ "question_id": self.question_id, "total_time_seconds": time.time() - self.start_time, "steps": self.steps, }, f, indent=2, default=str) # Also save human-readable version md_file = output_dir / f"{self.question_id}_steps.md" with open(md_file, 'w') as f: f.write(self._to_markdown()) return log_file, md_file def _to_markdown(self) -> str: """Convert logs to readable markdown.""" lines = [ f"# SGR Search Log: {self.question_id}", f"\nTotal time: {time.time() - self.start_time:.1f}s", f"\n---\n", ] for i, step in enumerate(self.steps, 1): lines.append(f"\n## Step {i}: {step['step']} (+{step['timestamp']:.1f}s)") lines.append("") data = step['data'] if isinstance(data, dict): for key, value in data.items(): if isinstance(value, str) and len(value) > 500: value = value[:500] + "..." elif isinstance(value, (list, dict)): value = json.dumps(value, indent=2, default=str) if len(value) > 1000: value = value[:1000] + "\n..." lines.append(f"**{key}:**") lines.append(f"```") lines.append(str(value)) lines.append(f"```") lines.append("") else: lines.append(f"```\n{data}\n```") return "\n".join(lines) class InstrumentedSGRSearcher(SGRSearcherGemini): """SGR Searcher with detailed logging of all steps.""" def __init__(self, logger: DetailedLogger, **kwargs): self.detailed_logger = logger super().__init__(**kwargs) def _log(self, message: str) -> None: """Override to capture all logs.""" super()._log(message) # Also capture to detailed logger self.detailed_logger.log_step("log", {"message": message}) def _call_structured(self, messages, response_format): """Override to capture LLM calls.""" # Log the prompt self.detailed_logger.log_step("llm_call_start", { "response_format": response_format.__name__, "messages": [ { "role": m.get("role"), "content": m.get("content", "")[:2000] + ("..." if len(m.get("content", "")) > 2000 else ""), } for m in messages ], }) # Make the call result = super()._call_structured(messages, response_format) # Log the response self.detailed_logger.log_step("llm_response", { "response_format": response_format.__name__, "result": result.model_dump() if hasattr(result, 'model_dump') else str(result), }) return result def _call_llm_text(self, messages): """Override to capture text LLM calls (for path correction etc).""" # Log the prompt self.detailed_logger.log_step("llm_text_call_start", { "messages": [ { "role": m.get("role"), "content": m.get("content", "")[:1000] + ("..." if len(m.get("content", "")) > 1000 else ""), } for m in messages ], }) # Make the call result = super()._call_llm_text(messages) # Log the response self.detailed_logger.log_step("llm_text_response", { "result": result[:500] + ("..." if len(result) > 500 else ""), }) return result def _analyze_repo_context(self, repo_path, tools): """Override to log repo analysis.""" self.detailed_logger.log_step("analyze_repo_start", { "repo_path": repo_path, }) result = super()._analyze_repo_context(repo_path, tools) self.detailed_logger.log_step("analyze_repo_result", { "primary_language": result.primary_language, "framework": result.framework, "project_type": result.project_type, "key_directories": result.key_directories, }) return result def _analyze_question(self, query, repo_hint): """Override to log question analysis.""" self.detailed_logger.log_step("analyze_question_start", { "query": query, "repo_hint": repo_hint, }) result = super()._analyze_question(query, repo_hint) self.detailed_logger.log_step("analyze_question_result", { "question_type": result.question_type, "concepts": result.concepts, "user_problem": result.user_problem, "what_to_find": result.what_to_find, }) return result def load_all_questions() -> List[Dict]: """Load all questions from dataset.""" questions = [] with open(DATASET_PATH) as f: for line in f: if line.strip(): questions.append(json.loads(line)) return questions def run_single_question(question: Dict, output_dir: Path) -> Dict: """Run SGR on a single question with detailed logging.""" question_id = question['id'] repo_path = REPO_BASE / question['repo_path'] / "repo" print(f"\n{'='*80}") print(f"Question: {question_id}") print(f"Repo: {question['repo_path']}") print(f"Query: {question['query'][:100]}...") print(f"{'='*80}") if not repo_path.exists(): print(f"ERROR: Repo not found at {repo_path}") return {"id": question_id, "error": "repo not found"} # Create logger logger = DetailedLogger(question_id) logger.log_step("start", { "question": question['query'], "repo_path": str(repo_path), "expected_files": [item['file_path'] for item in question.get('expected_items', [])], }) # Create instrumented searcher searcher = InstrumentedSGRSearcher( logger=logger, model="gemini-2.5-flash-lite", # Light model as requested max_iterations=8, verbose=True, ) # Run search try: result = searcher.search( query=question['query'], repo_path=str(repo_path), ) # Log final result logger.log_step("final_result", { "found_files": [item.file_path for item in result.items], "patterns_used": result.patterns_used, "total_time_ms": result.total_time_ms, "error": result.error, }) except Exception as e: import traceback logger.log_step("error", { "error": str(e), "traceback": traceback.format_exc(), }) result = None # Save logs log_file, md_file = logger.save(output_dir) print(f"\nLogs saved to:\n {log_file}\n {md_file}") # Compare with expected expected_files = [item['file_path'] for item in question.get('expected_items', [])] found_files = [item.file_path for item in result.items] if result else [] matched = [] for expected in expected_files: for found in found_files: if expected in found or found in expected: matched.append(expected) break recall = len(matched) / len(expected_files) if expected_files else 0 print(f"\n--- RESULTS ---") print(f"Expected: {expected_files}") print(f"Found: {found_files[:5]}") print(f"Matched: {matched}") print(f"Recall: {recall:.0%}") return { "id": question_id, "expected": expected_files, "found": found_files, "matched": matched, "recall": recall, "time_ms": result.total_time_ms if result else 0, "error": result.error if result else "exception", } def main(): """Run SGR on all questions and save detailed logs.""" # Create output directory with timestamp timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_dir = LOGS_DIR / timestamp output_dir.mkdir(parents=True, exist_ok=True) print(f"Output directory: {output_dir}") # Load questions questions = load_all_questions() print(f"Loaded {len(questions)} questions") # Run on all questions # questions = questions[:2] # Uncomment to test with first 2 questions only results = [] for question in questions: try: result = run_single_question(question, output_dir) results.append(result) except Exception as e: print(f"ERROR processing {question['id']}: {e}") import traceback traceback.print_exc() results.append({"id": question['id'], "error": str(e)}) # Save summary summary_file = output_dir / "summary.json" with open(summary_file, 'w') as f: json.dump(results, f, indent=2) # Print summary print(f"\n{'='*80}") print("SUMMARY") print(f"{'='*80}") total_recall = 0 total_time = 0 successful = 0 for r in results: recall = r.get('recall', 0) time_ms = r.get('time_ms', 0) if 'error' not in r or r.get('recall'): total_recall += recall total_time += time_ms successful += 1 print(f"{r['id']}: recall={recall:.0%}, time={time_ms:.0f}ms") else: print(f"{r['id']}: ERROR - {r.get('error')}") if successful > 0: print(f"\nSuccessful: {successful}/{len(results)}") print(f"Average recall: {total_recall/successful:.0%}") print(f"Average time: {total_time/successful:.0f}ms") print(f"\nLogs saved to: {output_dir}") # Create comparison report create_comparison_report(results, output_dir) def create_comparison_report(results: List[Dict], output_dir: Path): """Create a comparison report with my manual search.""" # My results from manual search my_results = { "django_request_response_lifecycle": {"recall": 0.25, "files_found": ["django/core/handlers/base.py"]}, "django_queryset_lazy_evaluation": {"recall": 1.0, "files_found": ["django/db/models/query.py"]}, "gin_route_tree_matching": {"recall": 0.75, "files_found": ["tree.go"]}, "gin_context_pool_reuse": {"recall": 1.0, "files_found": ["gin.go", "context.go"]}, "codeql_dataflow_configuration": {"recall": 0.30, "files_found": ["shared/dataflow/.../DataFlowImpl.qll", "Xss.qll"]}, "codeql_security_query_structure": {"recall": 0.70, "files_found": ["DomBasedXssQuery.qll", "Xss.qll"]}, "infinigen_procedural_material_nodes": {"recall": 0.70, "files_found": ["node_wrangler.py", "surface.py"]}, "infinigen_asset_placement_pipeline": {"recall": 0.75, "files_found": ["placement.py", "density.py", "instance_scatter.py"]}, "qgis_map_rendering_pipeline": {"recall": 0.80, "files_found": ["qgsmaprendererparalleljob.h", "qgsmaprendererjob.h", "qgsmapcanvas.h"]}, "qgis_expression_evaluation": {"recall": 0.70, "files_found": ["qgsexpression.h", "qgspallabeling.h"]}, "claude_flow_agent_coordination": {"recall": 0.40, "files_found": ["task/coordination.ts"]}, "claude_flow_memory_system": {"recall": 0.30, "files_found": ["hive-mind/core/Memory.ts", "DatabaseManager.ts"]}, } report_lines = [ "# SGR vs Manual Search Comparison", f"\nGenerated: {datetime.now().isoformat()}", "\n## Results Comparison\n", "| Question | SGR Recall | My Recall | Winner | SGR Files | My Files |", "|----------|------------|-----------|--------|-----------|----------|", ] sgr_wins = 0 my_wins = 0 ties = 0 for r in results: q_id = r['id'] sgr_recall = r.get('recall', 0) my_data = my_results.get(q_id, {"recall": 0, "files_found": []}) my_recall = my_data['recall'] if sgr_recall > my_recall: winner = "**SGR**" sgr_wins += 1 elif my_recall > sgr_recall: winner = "**Manual**" my_wins += 1 else: winner = "Tie" ties += 1 sgr_files = ", ".join(r.get('found', [])[:3])[:50] my_files = ", ".join(my_data['files_found'][:3])[:50] report_lines.append( f"| {q_id[:30]} | {sgr_recall:.0%} | {my_recall:.0%} | {winner} | {sgr_files}... | {my_files}... |" ) report_lines.extend([ "\n## Summary", f"\n- SGR wins: {sgr_wins}", f"- Manual wins: {my_wins}", f"- Ties: {ties}", "\n## Analysis", "\n### Where SGR does better:", "- (will be filled based on results)", "\n### Where Manual search does better:", "- (will be filled based on results)", "\n### Common failure patterns:", "- (will be analyzed from logs)", ]) report_file = output_dir / "comparison_report.md" with open(report_file, 'w') as f: f.write("\n".join(report_lines)) print(f"\nComparison report saved to: {report_file}") if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mix0z/Semantic-Search-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

run_sgr_comparison.py•14.5 KiB