Skip to main content
Glama

ToGMAL MCP Server

quick_test_real_data.pyโ€ข3.44 kB
#!/usr/bin/env python3 """ Quick test with real data - sample 1000 questions for faster testing """ import json from pathlib import Path from benchmark_vector_db import BenchmarkVectorDB, BenchmarkQuestion import random def load_sample_mmlu_data(n_samples=1000): """Load a sample of real MMLU questions""" print(f"Loading sample of {n_samples} real MMLU questions...") with open("./data/benchmark_results/mmlu_real_results.json") as f: data = json.load(f) # Sample questions all_qids = list(data['questions'].keys()) sampled_qids = random.sample(all_qids, min(n_samples, len(all_qids))) questions = [] for qid in sampled_qids: q = data['questions'][qid] questions.append(BenchmarkQuestion( question_id=q['question_id'], source_benchmark=q['source_benchmark'], domain=q['domain'], question_text=q['question_text'], correct_answer="", # Not needed for vector DB choices=q.get('choices'), success_rate=q['success_rate'], difficulty_score=1.0 - q['success_rate'], difficulty_label=q['difficulty_label'], num_models_tested=q['num_models_tested'] )) print(f"โœ“ Loaded {len(questions)} sampled questions") return questions def quick_test(): """Quick test with sampled real data""" # Initialize fresh database db = BenchmarkVectorDB( db_path=Path("./data/benchmark_vector_db"), embedding_model="all-MiniLM-L6-v2" ) # Load sample data questions = load_sample_mmlu_data(1000) # Index questions (much faster with 1000 vs 14000) print("\nIndexing into vector database...") db.index_questions(questions) # Get stats stats = db.get_statistics() print(f"\n๐Ÿ“Š Database Statistics:") print(f" Total Questions: {stats['total_questions']}") print(f" Difficulty Distribution: {stats.get('difficulty_levels', {})}") # Test with diverse prompts test_prompts = [ # Should be HARD (physics/abstract math) "Calculate the quantum correction to the partition function for a 3D harmonic oscillator", "Prove that there are infinitely many prime numbers", # Should be MODERATE (reasoning) "Diagnose a patient with acute chest pain and shortness of breath", "Explain the legal doctrine of precedent in common law systems", # Should be EASY (basic knowledge) "What is 2 + 2?", "What is the capital of France?", ] print(f"\n๐Ÿงช Testing {len(test_prompts)} diverse prompts:") print("="*80) for prompt in test_prompts: result = db.query_similar_questions(prompt, k=5) print(f"\n๐Ÿ“ '{prompt}'") print(f" ๐ŸŽฏ Risk: {result['risk_level']}") print(f" ๐Ÿ“Š Success Rate: {result['weighted_success_rate']:.1%}") print(f" ๐Ÿ“Œ Top Match: {result['similar_questions'][0]['question_text'][:80]}...") if result['similar_questions'][0]['success_rate'] < 0.5: print(f" ๐Ÿ” Found similar hard question (success: {result['similar_questions'][0]['success_rate']:.0%})") print(f" ๐Ÿ’ก {result['recommendation']}") print("\n" + "="*80) print("โœ… Quick real data test complete!") print("="*80) if __name__ == "__main__": quick_test()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/HeTalksInMaths/togmal-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server