Skip to main content
Glama

search_similar

Find semantically similar documents in the Chroma vector database by querying text, filtering by metadata, and specifying result count for precise search outcomes.

Instructions

Search for semantically similar documents in the Chroma vector database

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
content_filterNo
metadata_filterNo
num_resultsNo
queryYes

Implementation Reference

  • Core handler function that executes the semantic similarity search using ChromaDB's query method with support for metadata and content filters, returning formatted results with distances.
    async def handle_search_similar(arguments: dict) -> list[types.TextContent]: """Handle similarity search with retry logic""" query = arguments.get("query") num_results = arguments.get("num_results", 5) metadata_filter = arguments.get("metadata_filter") content_filter = arguments.get("content_filter") if not query: raise DocumentOperationError("Missing query") try: # Build query parameters query_params = { "query_texts": [query], "n_results": num_results, "include": ['documents', 'metadatas', 'distances'] } # Process metadata filter if metadata_filter: where_conditions = [] for key, value in metadata_filter.items(): if isinstance(value, (int, float)): where_conditions.append({key: {"$eq": str(value)}}) elif isinstance(value, dict): # Handle operator conditions processed_value = {} for op, val in value.items(): if isinstance(val, (list, tuple)): processed_value[op] = [str(v) if isinstance(v, (int, float)) else v for v in val] else: processed_value[op] = str(val) if isinstance(val, (int, float)) else val where_conditions.append({key: processed_value}) else: where_conditions.append({key: {"$eq": str(value)}}) if len(where_conditions) == 1: query_params["where"] = where_conditions[0] else: query_params["where"] = {"$and": where_conditions} # Add content filter if content_filter: query_params["where_document"] = {"$contains": content_filter} # Execute search logger.info(f"Executing search with params: {query_params}") results = collection.query(**query_params) if not results or not results.get('ids') or len(results['ids'][0]) == 0: msg = ["No documents found matching query: " + query] if metadata_filter: msg.append(f"Metadata filter: {metadata_filter}") if content_filter: msg.append(f"Content filter: {content_filter}") return [types.TextContent(type="text", text="\n".join(msg))] # Format results response = ["Similar documents:"] for i, (doc_id, content, metadata, distance) in enumerate( zip(results['ids'][0], results['documents'][0], results['metadatas'][0], results['distances'][0]) ): response.append(f"\n{i+1}. Document '{doc_id}' (distance: {distance:.4f})") response.append(f" Content: {content}") if metadata: response.append(f" Metadata: {metadata}") return [types.TextContent(type="text", text="\n".join(response))] except Exception as e: logger.error(f"Search error: {str(e)}", exc_info=True) raise DocumentOperationError(str(e))
  • Dispatch logic in the main call_tool handler that routes 'search_similar' calls to the specific handler function.
    elif name == "search_similar": return await handle_search_similar(arguments)
  • Tool registration in the list_tools handler, providing name, description, and input schema for the MCP protocol.
    types.Tool( name="search_similar", description="Search for semantically similar documents in the Chroma vector database", inputSchema={ "type": "object", "properties": { "query": {"type": "string"}, "num_results": {"type": "integer", "minimum": 1, "default": 5}, "metadata_filter": {"type": "object", "additionalProperties": True}, "content_filter": {"type": "string"} }, "required": ["query"] } )
  • Input schema definition for the search_similar tool used in server command options, matching the exposed schema.
    "search_similar": { "type": "object", "properties": { "query": {"type": "string"}, "num_results": {"type": "integer", "minimum": 1, "default": 5}, "metadata_filter": {"type": "object", "additionalProperties": True}, "content_filter": {"type": "string"} }, "required": ["query"] }

Other Tools

Related Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/privetin/chroma'

If you have feedback or need assistance with the MCP directory API, please join our Discord server