Farnsworth

Overview Schema Related Servers Score Discussions

Farnsworth
scripts

benchmark.py•11.5 KiB

#!/usr/bin/env python3 """ Farnsworth Benchmark Suite Benchmarks system performance: - Memory operations (store, recall, search) - Agent task execution - Embedding generation - LLM inference speed - End-to-end response time """ import argparse import asyncio import json import time import statistics from datetime import datetime from pathlib import Path from typing import Optional import tempfile class BenchmarkResult: """Container for benchmark results.""" def __init__(self, name: str): self.name = name self.times: list[float] = [] self.successes: int = 0 self.failures: int = 0 self.metadata: dict = {} def add_sample(self, duration: float, success: bool = True): """Add a sample to the benchmark.""" self.times.append(duration) if success: self.successes += 1 else: self.failures += 1 @property def mean(self) -> float: return statistics.mean(self.times) if self.times else 0 @property def median(self) -> float: return statistics.median(self.times) if self.times else 0 @property def std_dev(self) -> float: return statistics.stdev(self.times) if len(self.times) > 1 else 0 @property def min_time(self) -> float: return min(self.times) if self.times else 0 @property def max_time(self) -> float: return max(self.times) if self.times else 0 @property def success_rate(self) -> float: total = self.successes + self.failures return self.successes / total if total > 0 else 0 def to_dict(self) -> dict: return { "name": self.name, "samples": len(self.times), "mean_ms": self.mean * 1000, "median_ms": self.median * 1000, "std_dev_ms": self.std_dev * 1000, "min_ms": self.min_time * 1000, "max_ms": self.max_time * 1000, "success_rate": self.success_rate, "metadata": self.metadata, } def print_header(text: str): """Print section header.""" print(f"\n{'='*60}") print(f" {text}") print(f"{'='*60}") def print_result(result: BenchmarkResult): """Print benchmark result.""" print(f"\n {result.name}") print(f" {'─'*40}") print(f" Samples: {len(result.times)}") print(f" Mean: {result.mean*1000:.2f} ms") print(f" Median: {result.median*1000:.2f} ms") print(f" Std Dev: {result.std_dev*1000:.2f} ms") print(f" Min: {result.min_time*1000:.2f} ms") print(f" Max: {result.max_time*1000:.2f} ms") print(f" Success: {result.success_rate*100:.1f}%") async def benchmark_memory_store(data_dir: str, iterations: int = 100) -> BenchmarkResult: """Benchmark memory storage operations.""" from farnsworth.memory.memory_system import MemorySystem result = BenchmarkResult("Memory Store") with tempfile.TemporaryDirectory() as tmpdir: memory = MemorySystem(data_dir=tmpdir) await memory.initialize() for i in range(iterations): content = f"Test memory content {i}. This is a sample piece of text for benchmarking." start = time.perf_counter() try: await memory.remember(content, importance=0.5) duration = time.perf_counter() - start result.add_sample(duration, success=True) except Exception as e: duration = time.perf_counter() - start result.add_sample(duration, success=False) return result async def benchmark_memory_recall(data_dir: str, iterations: int = 50) -> BenchmarkResult: """Benchmark memory recall operations.""" from farnsworth.memory.memory_system import MemorySystem result = BenchmarkResult("Memory Recall") with tempfile.TemporaryDirectory() as tmpdir: memory = MemorySystem(data_dir=tmpdir) await memory.initialize() # Seed some memories topics = ["Python programming", "Machine learning", "Data science", "Web development", "System design", "Algorithms"] for i in range(100): topic = topics[i % len(topics)] await memory.remember(f"{topic} content {i}") # Benchmark recall queries = ["programming", "machine learning", "algorithms", "web", "data"] for i in range(iterations): query = queries[i % len(queries)] start = time.perf_counter() try: results = await memory.recall(query, top_k=5) duration = time.perf_counter() - start result.add_sample(duration, success=len(results) > 0) except Exception as e: duration = time.perf_counter() - start result.add_sample(duration, success=False) return result async def benchmark_embedding_generation(iterations: int = 100) -> BenchmarkResult: """Benchmark embedding generation.""" result = BenchmarkResult("Embedding Generation") try: from farnsworth.rag.embeddings import EmbeddingManager embedder = EmbeddingManager() texts = [ "This is a short text.", "This is a medium length text that contains more words and information.", "This is a longer piece of text that simulates a more realistic document. " * 3, ] for i in range(iterations): text = texts[i % len(texts)] start = time.perf_counter() try: embedding = embedder.embed(text) duration = time.perf_counter() - start result.add_sample(duration, success=len(embedding) > 0) except Exception as e: duration = time.perf_counter() - start result.add_sample(duration, success=False) result.metadata["dimensions"] = len(embedding) if 'embedding' in dir() else 0 except ImportError: result.metadata["error"] = "sentence-transformers not installed" return result async def benchmark_llm_inference(iterations: int = 10) -> BenchmarkResult: """Benchmark LLM inference (if available).""" result = BenchmarkResult("LLM Inference") try: from farnsworth.core.llm_backend import OllamaBackend backend = OllamaBackend(model="deepseek-r1:1.5b") prompts = [ "What is 2 + 2?", "Write a haiku about coding.", "Explain recursion in one sentence.", ] for i in range(iterations): prompt = prompts[i % len(prompts)] start = time.perf_counter() try: response = await backend.generate(prompt, max_tokens=50) duration = time.perf_counter() - start result.add_sample(duration, success=True) result.metadata["tokens_per_second"] = 50 / duration if duration > 0 else 0 except Exception as e: duration = time.perf_counter() - start result.add_sample(duration, success=False) except Exception as e: result.metadata["error"] = str(e) return result async def benchmark_knowledge_graph(iterations: int = 100) -> BenchmarkResult: """Benchmark knowledge graph operations.""" from farnsworth.memory.knowledge_graph import KnowledgeGraph result = BenchmarkResult("Knowledge Graph") graph = KnowledgeGraph() # Add entities for i in range(iterations): start = time.perf_counter() try: entity_id = graph.add_entity(f"Entity_{i}", "TestType") if i > 0: graph.add_relationship(entity_id, f"entity_{i-1}", "related_to") duration = time.perf_counter() - start result.add_sample(duration, success=True) except Exception as e: duration = time.perf_counter() - start result.add_sample(duration, success=False) result.metadata["total_entities"] = len(graph.entities) result.metadata["total_relationships"] = len(graph.relationships) return result async def benchmark_fitness_tracking(iterations: int = 1000) -> BenchmarkResult: """Benchmark fitness tracking operations.""" from farnsworth.evolution.fitness_tracker import FitnessTracker result = BenchmarkResult("Fitness Tracking") tracker = FitnessTracker() for i in range(iterations): start = time.perf_counter() try: tracker.record("task_success", 0.8 + (i % 10) * 0.01) tracker.record("efficiency", 0.7 + (i % 10) * 0.02) duration = time.perf_counter() - start result.add_sample(duration, success=True) except Exception as e: duration = time.perf_counter() - start result.add_sample(duration, success=False) result.metadata["final_fitness"] = tracker.get_weighted_fitness() return result async def run_all_benchmarks(data_dir: str, output_file: Optional[str] = None): """Run all benchmarks.""" print_header("Farnsworth Benchmark Suite") print(f"\nStarted at: {datetime.now().isoformat()}") results = [] # Memory benchmarks print("\n🧠 Memory System Benchmarks") result = await benchmark_memory_store(data_dir) results.append(result) print_result(result) result = await benchmark_memory_recall(data_dir) results.append(result) print_result(result) # Embedding benchmarks print("\n📊 Embedding Benchmarks") result = await benchmark_embedding_generation() results.append(result) print_result(result) # Knowledge graph benchmarks print("\n🕸️ Knowledge Graph Benchmarks") result = await benchmark_knowledge_graph() results.append(result) print_result(result) # Fitness tracking benchmarks print("\n📈 Evolution Benchmarks") result = await benchmark_fitness_tracking() results.append(result) print_result(result) # LLM benchmarks (optional) print("\n🤖 LLM Benchmarks") result = await benchmark_llm_inference() results.append(result) if result.times: print_result(result) else: print(" (Skipped - LLM not available)") # Summary print_header("Summary") total_samples = sum(len(r.times) for r in results) avg_success = statistics.mean([r.success_rate for r in results if r.times]) print(f"\n Total Benchmarks: {len(results)}") print(f" Total Samples: {total_samples}") print(f" Avg Success Rate: {avg_success*100:.1f}%") # Save results if output_file: output = { "timestamp": datetime.now().isoformat(), "benchmarks": [r.to_dict() for r in results], } with open(output_file, "w") as f: json.dump(output, f, indent=2) print(f"\n Results saved to: {output_file}") print(f"\nCompleted at: {datetime.now().isoformat()}") def main(): """Main entry point.""" parser = argparse.ArgumentParser( description="Farnsworth Benchmark Suite" ) parser.add_argument( "--output", "-o", type=str, help="Output JSON file for results" ) parser.add_argument( "--data-dir", type=str, default="./data", help="Data directory for benchmarks" ) parser.add_argument( "--quick", action="store_true", help="Run quick benchmarks (fewer iterations)" ) args = parser.parse_args() # Run benchmarks asyncio.run(run_all_benchmarks( data_dir=args.data_dir, output_file=args.output, )) if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/timowhite88/Farnsworth'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

benchmark.py•11.5 KiB