Boring Gemini

boring-gemini
src
boring
intelligence

semantic_cache.py•6.58 KiB

""" Semantic Cache - Fuzzy query matching for LLM responses (V10.23 Enhanced) Core features: 1. Fuzzy matching using vector embeddings (ChromaDB) 2. Configurable similarity threshold (Default: 0.95) 3. Persistent storage in the project's cache directory 4. TTL and usage-based eviction 5. 50%+ API cost reduction for repeated/similar tasks This allows the agent to reuse previous thoughts and actions when facing similar scenarios. """ import logging import threading import time from pathlib import Path from ..config import settings logger = logging.getLogger(__name__) try: import chromadb from chromadb.config import Settings as ChromaSettings CHROMA_AVAILABLE = True except ImportError: chromadb = None ChromaSettings = None CHROMA_AVAILABLE = False logger.debug("chromadb not installed. Semantic Cache will be disabled.") class SemanticCache: """ Persistent fuzzy cache for LLM queries using vector similarity. """ COLLECTION_NAME = "boring_semantic_cache" def __init__( self, persist_dir: Path | None = None, threshold: float = 0.95, collection_name: str | None = None, ): """ Initialize the semantic cache. Args: persist_dir: Directory for vector DB. Defaults to PROJECT_ROOT/.boring/cache/semantic threshold: Similarity threshold (0.0 - 1.0). Default 0.95 collection_name: Name for the ChromaDB collection """ self.threshold = threshold self.persist_dir = persist_dir or (settings.CACHE_DIR / "semantic") self.collection_name = collection_name or self.COLLECTION_NAME self.collection = None self.client = None self._lock = threading.RLock() if CHROMA_AVAILABLE: try: self.persist_dir.mkdir(parents=True, exist_ok=True) self.client = chromadb.PersistentClient( path=str(self.persist_dir), settings=ChromaSettings(anonymized_telemetry=False, allow_reset=True), ) self.collection = self.client.get_or_create_collection( name=self.collection_name, metadata={"hnsw:space": "cosine"} ) logger.info(f"Semantic Cache initialized at {self.persist_dir}") except Exception as e: logger.warning(f"Failed to initialize Semantic Cache: {e}") self.collection = None @property def is_available(self) -> bool: """Check if semantic cache is available and functional.""" return CHROMA_AVAILABLE and self.collection is not None def get(self, prompt: str) -> str | None: """ Check cache for similar prompt. Args: prompt: The LLM prompt to search for. Returns: Cached response string if found and similarity >= threshold, else None. """ if not self.is_available: return None try: with self._lock: # Query ChromaDB for the most similar entry results = self.collection.query( query_texts=[prompt], n_results=1, include=["documents", "distances", "metadatas"], ) if not results or not results["documents"] or not results["documents"][0]: return None # ChromaDB cosine distance: 0.0 is perfect match, 2.0 is opposite # Similarity = 1 - (distance / 2) for cosine? # Actually ChromaDB cosine distance is 1 - CosineSimilarity. # So Distance = 1 - Similarity. # Threshold 0.95 similarity means distance <= 0.05. distance = results["distances"][0][0] similarity = 1.0 - distance logger.debug( f"Query: {prompt[:50]}... | Best match distance: {distance:.4f} | Similarity: {similarity:.4f}" ) if similarity >= self.threshold: # The response is now stored in metadata cached_response = results["metadatas"][0][0].get("response") if cached_response: logger.debug(f"Semantic cache hit (sim={similarity:.4f})") return cached_response logger.debug(f"Semantic cache miss (best sim={similarity:.4f})") return None except Exception as e: logger.error(f"Error reading from semantic cache: {e}") return None def set(self, prompt: str, response: str, metadata: dict | None = None): """ Store prompt-response pair in cache. """ if not self.is_available or not prompt or not response: return try: with self._lock: import hashlib # Generate unique ID for the prompt prompt_id = hashlib.sha256(prompt.encode()).hexdigest() base_metadata = { "timestamp": time.time(), "created_at": time.strftime("%Y-%m-%d %H:%M:%S"), "access_count": 1, "response": response, # Store response in metadata } if metadata: base_metadata.update(metadata) self.collection.upsert( ids=[prompt_id], documents=[prompt], # Embed the prompt metadatas=[base_metadata], ) logger.debug(f"Cached semantic response for prompt hash: {prompt_id}") except Exception as e: logger.error(f"Error writing to semantic cache: {e}") def clear(self): """Wipe the semantic cache.""" if not self.is_available: return try: with self._lock: self.client.delete_collection(self.collection_name) self.collection = self.client.get_or_create_collection( name=self.collection_name, metadata={"hnsw:space": "cosine"} ) logger.info("Semantic cache cleared.") except Exception as e: logger.error(f"Error clearing semantic cache: {e}") # Global instance _instance_cache = {} def get_semantic_cache(project_root: Path | None = None) -> SemanticCache: """Get or create singleton SemanticCache instance.""" root_str = str(project_root) if project_root else "default" if root_str not in _instance_cache: _instance_cache[root_str] = SemanticCache() return _instance_cache[root_str]

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Boring206/boring-gemini'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

semantic_cache.py•6.58 KiB