Chroma MCP Server

chroma_mcp_server
src
chroma_mcp_client

context.py•14.2 kB

""" Context Capture Module for Enhanced Chat Logging This module provides functionality for extracting rich contextual information from code changes, tool usage patterns, and chat interactions. It supports the enhanced context capture features of the auto_log_chat rule and other components. Key features: - Code snippet extraction from before/after edits - Diff generation and summarization - Tool sequence tracking and pattern recognition - Confidence score calculation - Bidirectional link management """ import os import re import json import logging import difflib from typing import List, Dict, Any, Optional, Tuple, Union from enum import Enum from datetime import datetime import uuid # Set up logging logger = logging.getLogger(__name__) class ModificationType(Enum): """Enumeration of standardized modification types for chat interactions.""" REFACTOR = "refactor" BUGFIX = "bugfix" FEATURE = "feature" DOCUMENTATION = "documentation" OPTIMIZATION = "optimization" TEST = "test" CONFIG = "config" STYLE = "style" UNKNOWN = "unknown" class ToolPatterns: """Common tool usage patterns and their significance.""" MULTIPLE_READS = "multiple_reads" # Multiple read_file operations before an edit SEARCH_THEN_EDIT = "search_then_edit" # Search operations followed by edits ITERATIVE_REFINEMENT = "iterative_refinement" # Edit followed by reapply EXPLORATION = "exploration" # Multiple search or read operations CODE_EXECUTION = "code_execution" # Running terminal commands to test code @classmethod def identify_patterns(cls, tool_sequence: str) -> List[str]: """ Identify common patterns in a tool sequence. Args: tool_sequence: String representation of tool usage sequence (e.g., "read_file→edit_file→run_terminal_cmd") Returns: List of pattern identifiers found in the sequence """ patterns = [] tools = tool_sequence.split("→") # Count occurrences of each tool read_count = tools.count("read_file") edit_count = tools.count("edit_file") search_count = sum(1 for t in tools if t in ["grep_search", "codebase_search"]) terminal_count = tools.count("run_terminal_cmd") reapply_count = tools.count("reapply") # Identify patterns if read_count > 2: patterns.append(cls.MULTIPLE_READS) if search_count > 0 and edit_count > 0: patterns.append(cls.SEARCH_THEN_EDIT) if edit_count > 0 and reapply_count > 0: patterns.append(cls.ITERATIVE_REFINEMENT) if read_count + search_count > 3 and edit_count == 0: patterns.append(cls.EXPLORATION) if terminal_count > 0: patterns.append(cls.CODE_EXECUTION) return patterns def extract_code_snippets(before_content: str, after_content: str, max_context_lines: int = 50) -> str: """ Extract relevant code snippets showing changes between before and after content. Args: before_content: Original content before changes after_content: Modified content after changes max_context_lines: Maximum number of lines to include in snippets Returns: Formatted string with before/after code snippets """ if not before_content and not after_content: return "" # Handle case where one content is empty (new file or deleted file) if not before_content: after_lines = after_content.splitlines()[:max_context_lines] if len(after_lines) == max_context_lines: after_lines.append("... (truncated)") return f"NEW FILE:\n```\n{os.linesep.join(after_lines)}\n```" if not after_content: before_lines = before_content.splitlines()[:max_context_lines] if len(before_lines) == max_context_lines: before_lines.append("... (truncated)") return f"DELETED FILE:\n```\n{os.linesep.join(before_lines)}\n```" # Use difflib to find differences diff = list( difflib.unified_diff(before_content.splitlines(), after_content.splitlines(), n=3, lineterm="") # Context lines ) # Extract relevant sections (headers + changes with context) relevant_diff = [] in_hunk = False for line in diff: if line.startswith("+++") or line.startswith("---"): continue if line.startswith("@@"): in_hunk = True relevant_diff.append(line) continue if in_hunk: relevant_diff.append(line) # Truncate if too long if len(relevant_diff) > max_context_lines: half_max = max_context_lines // 2 # Keep beginning and end truncated_diff = relevant_diff[:half_max] + ["... (truncated) ..."] + relevant_diff[-half_max:] relevant_diff = truncated_diff return "CHANGED FILE:\n```diff\n" + os.linesep.join(relevant_diff) + "\n```" def generate_diff_summary(before_content: str, after_content: str, file_path: str) -> str: """ Generate a concise summary of changes between two contents. Args: before_content: Original content before changes after_content: Modified content after changes file_path: Path to the file being modified Returns: Human-readable summary of the key changes """ # Handle file creation/deletion if not before_content: return f"Created new file {file_path}" if not after_content: return f"Deleted file {file_path}" # Count added/removed lines diff = list(difflib.unified_diff(before_content.splitlines(), after_content.splitlines(), n=0)) added = len([line for line in diff if line.startswith("+")]) removed = len([line for line in diff if line.startswith("-")]) # Basic summary summary = f"Modified {file_path}: {added} lines added, {removed} lines removed" # Try to determine nature of change if possible before_lines = before_content.splitlines() after_lines = after_content.splitlines() # Look for function/class changes function_pattern = re.compile(r"^\s*(def|class)\s+(\w+)") before_funcs = set() after_funcs = set() for line in before_lines: match = function_pattern.match(line) if match: before_funcs.add(f"{match.group(1)} {match.group(2)}") for line in after_lines: match = function_pattern.match(line) if match: after_funcs.add(f"{match.group(1)} {match.group(2)}") new_funcs = after_funcs - before_funcs removed_funcs = before_funcs - after_funcs if new_funcs: summary += f". Added: {', '.join(new_funcs)}" if removed_funcs: summary += f". Removed: {', '.join(removed_funcs)}" return summary def track_tool_sequence(tools_used: List[str]) -> str: """ Convert a list of used tools into a standardized sequence string. Args: tools_used: List of tool names in order of use Returns: Standardized tool sequence string (e.g., "read_file→edit_file→run_terminal_cmd") """ if not tools_used: return "" # Filter out duplicate consecutive tools (e.g., read_file→read_file→edit_file becomes read_file→edit_file) filtered_tools = [] for tool in tools_used: if not filtered_tools or filtered_tools[-1] != tool: filtered_tools.append(tool) return "→".join(filtered_tools) def calculate_confidence_score(tool_sequence: str, file_changes: List[Dict[str, Any]], response_length: int) -> float: """ Calculate a confidence score (0.0-1.0) for the value of an interaction. Args: tool_sequence: The sequence of tools used file_changes: List of files modified with change information response_length: Length of AI response in characters Returns: Confidence score between 0.0 and 1.0 """ base_score = 0.5 # Start at middle # Adjust based on tool usage tools = tool_sequence.split("→") # Complex interactions tend to be more valuable if len(tools) > 3: base_score += 0.1 # File edits are usually high value if "edit_file" in tools: base_score += 0.15 # Multiple file reads suggests research/understanding if tools.count("read_file") > 2: base_score += 0.05 # Terminal command execution suggests testing/verification if "run_terminal_cmd" in tools: base_score += 0.05 # Reapplies suggest iteration/refinement if "reapply" in tools: base_score += 0.05 # File changes are valuable if file_changes: base_score += 0.1 # Multiple file changes suggest larger impact if len(file_changes) > 1: base_score += 0.05 # Very short responses may indicate less value if response_length < 100: base_score -= 0.15 # Ensure score is within bounds return min(1.0, max(0.0, base_score)) def determine_modification_type( file_changes: List[Dict[str, Any]], prompt_summary: str, response_summary: str ) -> ModificationType: """ Determine the type of modification based on changes and summaries. Args: file_changes: List of files modified with change information prompt_summary: Summary of the user's prompt response_summary: Summary of the AI's response Returns: ModificationType enum value """ # Look for clues in the summaries combined_text = (prompt_summary + " " + response_summary).lower() # Check for documentation changes first (highest priority for these keywords) if any(term in combined_text for term in ["document", "comment", "explain", "readme", "documentation"]): return ModificationType.DOCUMENTATION # Check for test-related changes (higher priority) if any(term in combined_text for term in ["test", "unittest", "pytest", "testing", "test case"]): return ModificationType.TEST # Check for style changes if any(term in combined_text for term in ["style", "format", "indent", "lint", "formatting"]): return ModificationType.STYLE # Continue with other categories if any(term in combined_text for term in ["bug", "fix", "issue", "problem", "error"]): return ModificationType.BUGFIX if any(term in combined_text for term in ["refactor", "clean", "restructure", "improve"]): return ModificationType.REFACTOR if any(term in combined_text for term in ["add", "feature", "implement", "new"]): return ModificationType.FEATURE if any(term in combined_text for term in ["optimize", "performance", "faster", "efficient"]): return ModificationType.OPTIMIZATION if any(term in combined_text for term in ["config", "setting", "parameter", "environment"]): return ModificationType.CONFIG # Default if no clear indicators return ModificationType.UNKNOWN def manage_bidirectional_links(chat_id: str, file_changes: List[Dict[str, str]], chroma_client) -> Dict[str, List[str]]: """ Manage bidirectional links between chat history and code chunks. Args: chat_id: ID of the current chat history entry file_changes: List of files modified chroma_client: ChromaDB client instance for interacting with collections Returns: Dictionary mapping file paths to their chunk IDs in codebase_v1 """ result = {} try: # Get the codebase collection codebase_collection = chroma_client.get_collection(name="codebase_v1") if not codebase_collection: logger.warning(f"Codebase collection not found, cannot create bidirectional links for chat {chat_id}") return result # Process each file change to find related code chunks for file_change in file_changes: file_path = file_change.get("file_path", "") if not file_path: continue # Query codebase_v1 to find chunks containing this file query_response = codebase_collection.query( query_texts=[f"file:{file_path}"], n_results=10, # Get multiple chunks if file is split where={"file_path": {"$eq": file_path}}, ) # Process results if we found any if query_response and "ids" in query_response and len(query_response["ids"]) > 0: chunk_ids = query_response["ids"][0] # First query result's ids # Only add to result if we have actual chunk IDs if chunk_ids: # Store in result map for return result[file_path] = chunk_ids # For each chunk, update the related_chat_ids field for chunk_id in chunk_ids: try: # Get current metadata for this chunk chunk_data = codebase_collection.get(ids=[chunk_id]) if chunk_data and "metadatas" in chunk_data and chunk_data["metadatas"]: metadata = chunk_data["metadatas"][0] # Update related_chat_ids field related_chat_ids = metadata.get("related_chat_ids", "") chat_ids = set(related_chat_ids.split(",")) if related_chat_ids else set() chat_ids.add(chat_id) # Update metadata with new related_chat_ids metadata["related_chat_ids"] = ",".join(filter(None, chat_ids)) # Update the chunk's metadata codebase_collection.update(ids=[chunk_id], metadatas=[metadata]) logger.debug(f"Updated bidirectional link for chunk {chunk_id} with chat {chat_id}") except Exception as e: logger.error(f"Failed to update bidirectional link for chunk {chunk_id}: {e}") except Exception as e: logger.error(f"Error managing bidirectional links: {e}") return result

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/djm81/chroma_mcp_server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server