Pinecone Economic Books

server.py•15.9 kB

#!/opt/homebrew/bin/python3.10 """ Pinecone Economic Books MCP Server This MCP server provides read-only access to a Pinecone vector database containing economic books and papers. It exposes multiple tools for searching and retrieving economic literature. """ import os from typing import Optional, List, Dict, Any from fastmcp import FastMCP from pinecone import Pinecone, SearchQuery from dotenv import load_dotenv import pandas as pd # Load environment variables load_dotenv() # Initialize FastMCP server mcp = FastMCP(name="pinecone-econ-mcp") # Initialize Pinecone pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY")) index = pc.Index('economics') # Default namespace for the economic books data DEFAULT_NAMESPACE = "book_data" def format_result(matches: List[Any]) -> List[Dict[str, Any]]: """Format Pinecone query results into a readable structure.""" formatted = [] for match in matches: result = { "score": match.score, "id": match.id, "metadata": match.metadata } formatted.append(result) return formatted @mcp.tool() def semantic_search( query: str, top_k: int = 10, ) -> str: """ Perform semantic search across economic books using natural language (DEFAULT/RECOMMENDED). This is the primary search method - it finds content semantically similar to your query by automatically converting your text to embeddings using Pinecone's integrated inference. This is the most powerful search method as it understands meaning and context, not just keywords. Use this for natural language queries like "theories about market equilibrium" or "impact of automation on labor markets". For exact matches by metadata (author, book title, etc.), use the specialized tools instead. Args: query: Natural language search query (e.g., "theories about income distribution") top_k: Number of results to return (default: 10, max: 100) namespace: Optional Pinecone namespace (defaults to "book_data") Returns: JSON formatted search results with relevance scores, IDs, and metadata """ try: top_k = min(top_k, 100) # Use Pinecone's integrated inference - pass text directly results = index.search( namespace=DEFAULT_NAMESPACE, query=SearchQuery( inputs={"text": query}, top_k=top_k ), fields=["chunk_text", "author_name", "book_name", "chapter_titles", "pages", "subjects"] ) # print(results) results = results.result['hits'] return str(results) except Exception as e: return f"Error performing semantic search: {str(e)}" @mcp.tool() def semantic_search_with_filters( query: str, author_name: Optional[str] = None, book_name: Optional[str] = None, subjects: Optional[List[str]] = None, top_k: int = 10, ) -> str: """ Perform semantic search with metadata filters for precise results. Combines the power of semantic/natural language search with exact metadata filtering. This is ideal when you want to search for concepts within a specific author's work or a particular book. Example: Search for "labor productivity" only in books by "Wassily Leontief" Args: query: Natural language search query author_name: Filter by author name book_name: Filter by book name subjects: Filter by subjects (any of these subjects) top_k: Number of results to return (default: 10, max: 100) namespace: Optional Pinecone namespace (defaults to "book_data") Returns: JSON formatted search results matching both semantic query and filters """ try: top_k = min(top_k, 100) ns = DEFAULT_NAMESPACE # Build metadata filters filter_conditions = [] if author_name: filter_conditions.append({"author_name": {"$eq": author_name}}) if book_name: filter_conditions.append({"book_name": {"$eq": book_name}}) if subjects: filter_conditions.append({"subjects": {"$in": subjects}}) # Combine filters with AND logic filters = None if len(filter_conditions) > 1: filters = {"$and": filter_conditions} elif len(filter_conditions) == 1: filters = filter_conditions[0] # Search with both semantic similarity and metadata filters search_params = { "namespace": DEFAULT_NAMESPACE, "query": SearchQuery( inputs={"text": query}, top_k=top_k ), "fields": ["chunk_text", "author_name", "book_name", "chapter_titles", "pages", "subjects"] } if filters: search_params["filter"] = filters results = index.search(**search_params) results = results.result['hits'] return str(results) except Exception as e: return f"Error performing filtered semantic search: {str(e)}" @mcp.tool() def search_by_author( query: str, author_name: str, top_k: int = 10, ) -> str: """ Search for content by a specific author using semantic search. Combines natural language query with author filtering to find relevant content within an author's works. Args: query: Natural language search query author_name: The author's name to filter by (e.g., "Wassily Leontief") top_k: Number of results to return (default: 10, max: 100) namespace: Optional Pinecone namespace (defaults to "book_data") Returns: JSON formatted results filtered by author """ try: top_k = min(top_k, 100) ns = DEFAULT_NAMESPACE results = index.search( query=SearchQuery( inputs={"text": query}, top_k=top_k, filter={"author_name": {"$eq": author_name}} ), namespace=DEFAULT_NAMESPACE, include_metadata=True ) results = results.result['hits'] return str(results) except Exception as e: return f"Error searching by author: {str(e)}" @mcp.tool() def search_by_subject( query: str, subject: str, top_k: int = 10, ) -> str: """ Search for content related to a specific economic subject/topic using semantic search. Combines natural language query with subject/topic filtering to find relevant content tagged with specific economic topics. Args: query: Natural language search query subject: The subject tag to filter by (e.g., "income", "capital", "equilibrium") top_k: Number of results to return (default: 10, max: 100) namespace: Optional Pinecone namespace (defaults to "book_data") Returns: JSON formatted results filtered by subject """ try: top_k = min(top_k, 100) # Search where subjects array contains the specified subject results = index.search( query=SearchQuery( inputs={"text": query}, top_k=top_k, filter={"subjects": {"$in": [subject]}} ), namespace=DEFAULT_NAMESPACE, include_metadata=True ) results = results.result['hits'] return str(results) except Exception as e: return f"Error searching by subject: {str(e)}" @mcp.tool() def search_by_book( query: str, book_name: str, top_k: int = 10, ) -> str: """ Search for content within a specific book using semantic search. Combines natural language query with book filtering to find relevant content within a specific book. Args: query: Natural language search query book_name: The book name to filter by top_k: Number of results to return (default: 10, max: 100) namespace: Optional Pinecone namespace (defaults to "book_data") Returns: JSON formatted results filtered by book name """ try: top_k = min(top_k, 100) ns = DEFAULT_NAMESPACE results = index.search( query=SearchQuery( inputs={"text": query}, top_k=top_k, filter={"book_name": {"$eq": book_name}} ), namespace=DEFAULT_NAMESPACE, include_metadata=True ) results = results.result['hits'] return str(results) except Exception as e: return f"Error searching by book: {str(e)}" @mcp.tool() def advanced_search( query: str, author_name: Optional[str] = None, book_name: Optional[str] = None, subjects: Optional[List[str]] = None, pages: Optional[List[str]] = None, top_k: int = 10, ) -> str: """ Perform advanced semantic search with multiple filter criteria. Combine natural language query with multiple metadata filters to narrow down results. All provided filters must match (AND logic). Args: query: Natural language search query author_name: Filter by author name book_name: Filter by book name subjects: Filter by subjects (any of these subjects) pages: Filter by specific page numbers top_k: Number of results to return (default: 10, max: 100) namespace: Optional Pinecone namespace (defaults to "book_data") Returns: JSON formatted results matching all specified criteria """ try: top_k = min(top_k, 100) ns = DEFAULT_NAMESPACE # Build filter dynamically filters = {} filter_conditions = [] if author_name: filter_conditions.append({"author_name": {"$eq": author_name}}) if book_name: filter_conditions.append({"book_name": {"$eq": book_name}}) if subjects: # Match any of the provided subjects filter_conditions.append({"subjects": {"$in": subjects}}) if pages: # Match any of the provided pages filter_conditions.append({"pages": {"$in": pages}}) # Combine all conditions with AND logic filters = None if len(filter_conditions) > 1: filters = {"$and": filter_conditions} elif len(filter_conditions) == 1: filters = filter_conditions[0] results = index.search( query=SearchQuery( inputs={"text": query}, top_k=top_k, filter=filters ), namespace=DEFAULT_NAMESPACE, include_metadata=True ) results = results.result['hits'] return str(results) except Exception as e: return f"Error performing advanced search: {str(e)}" @mcp.tool() def get_by_id( document_id: str, ) -> str: """ Retrieve a specific document by its ID. Args: document_id: The document ID (e.g., "Wassily Leontief_Leontief_Essays in economics - theories and theorizing_1966_27") namespace: Optional Pinecone namespace (defaults to "book_data") Returns: JSON formatted document with metadata """ try: result = index.fetch( ids=[document_id], namespace=DEFAULT_NAMESPACE ) if document_id in result.vectors: vector_data = result.vectors[document_id] return str({ "id": vector_data.id, "metadata": vector_data.metadata }) else: return f"Document with ID '{document_id}' not found" except Exception as e: return f"Error fetching document: {str(e)}" @mcp.tool() def search_by_page_range( query: str, start_page: str, end_page: str, author_name: Optional[str] = None, book_name: Optional[str] = None, top_k: int = 10, ) -> str: """ Search for content within a specific page range using semantic search. Combines natural language query with page range filtering to find relevant content within specific pages of books. Args: query: Natural language search query start_page: Starting page number (as string) end_page: Ending page number (as string) author_name: Optional filter by author book_name: Optional filter by book top_k: Number of results to return (default: 10, max: 100) namespace: Optional Pinecone namespace (defaults to "book_data") Returns: JSON formatted results within the specified page range """ try: top_k = min(top_k, 100) # Generate list of pages in range try: start = int(start_page) end = int(end_page) page_list = [str(p) for p in range(start, end + 1)] except ValueError: return "Error: start_page and end_page must be valid numbers" # Build filter filter_conditions = [{"pages": {"$in": page_list}}] if author_name: filter_conditions.append({"author_name": {"$eq": author_name}}) if book_name: filter_conditions.append({"book_name": {"$eq": book_name}}) filters = {"$and": filter_conditions} if len(filter_conditions) > 1 else filter_conditions[0] results = index.search( query=SearchQuery( inputs={"text": query}, top_k=top_k, filter=filters ), namespace=DEFAULT_NAMESPACE, include_metadata=True ) results = results.result['hits'] return str(results) except Exception as e: return f"Error searching by page range: {str(e)}" @mcp.tool() def get_index_stats() -> str: """ Get statistics about the Pinecone index. Returns: JSON formatted index statistics including vector counts and dimensions """ try: stats = index.describe_index_stats() return str(stats) except Exception as e: return f"Error getting index stats: {str(e)}" @mcp.tool() def get_economic_catagories() -> str: """get a list of all the economic catagories""" return 'The economic catagories are:\n' + str(pd.read_csv("economic_glossary.csv")["title"].tolist()) @mcp.tool() def get_authors() -> str: """get a list of all the authors in the database""" return 'The authors are:\n' + str([ 'Adam Smith', 'Herbert Simon', 'Wassily Leontief', 'Karl Marx', 'Michael Kalecki', 'Norbert Weiner' ]) # Resources for providing static information @mcp.resource("schema://economic-books") def get_data_schema() -> str: """Get the schema/structure of economic books data.""" return """ Economic Books Data Schema: Each document contains: - id: Unique identifier (format: "Author_Book_PageNumber") - score: Similarity score (for search results) - metadata: - author_name: Author's full name - book_name: Full book title with year - chapter_titles: Array of chapter titles (may be empty) - chunk_text: The actual text content with page markers - pages: Array of page numbers covered in this chunk - subjects: Array of economic topics/keywords Example: { "score": 0.2712, "id": "Wassily Leontief_Leontief_Essays in economics_1966_27", "metadata": { "author_name": "Wassily Leontief", "book_name": "Leontief_Essays in economics - theories and theorizing_1966", "chapter_titles": [], "chunk_text": "# Page 70\\n...", "pages": ["70", "71"], "subjects": ["income", "national income", "output", "price"] } } """ if __name__ == "__main__": # Run the MCP server mcp.run()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/SrulyRosenblat/econ_mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server