test.py•2.41 kB
import os
import re
from typing import Optional, List, Dict, Any
from fastmcp import FastMCP
from pinecone import Pinecone, SearchQuery
from dotenv import load_dotenv
import pandas as pd
import json
# Load environment variables
load_dotenv()
# Initialize FastMCP server
mcp = FastMCP(name="pinecone-econ-mcp")
# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pc.Index('economics')
DEFAULT_NAMESPACE = "book_data"
def format_result(matches: List[Any]) -> List[Dict[str, Any]]:
"""Format Pinecone query results into a readable structure."""
return matches
def semantic_search(
query: str,
top_k: int = 10,
namespace: str = 'book_data'
) -> str:
"""
Perform semantic search across economic books using natural language (DEFAULT/RECOMMENDED).
This is the primary search method - it finds content semantically similar to your query
by automatically converting your text to embeddings using Pinecone's integrated inference.
This is the most powerful search method as it understands meaning and context, not just
keywords. Use this for natural language queries like "theories about market equilibrium"
or "impact of automation on labor markets".
For exact matches by metadata (author, book title, etc.), use the specialized tools instead.
Args:
query: Natural language search query (e.g., "theories about income distribution")
top_k: Number of results to return (default: 10, max: 100)
namespace: Optional Pinecone namespace (defaults to "book_data")
Returns:
JSON formatted search results with relevance scores, IDs, and metadata
"""
try:
top_k = min(top_k, 100)
# Use Pinecone's integrated inference - pass text directly
results = index.search(
namespace=DEFAULT_NAMESPACE,
query=SearchQuery(
inputs={"text": query},
top_k=top_k
),
fields=["chunk_text", "author_name", "book_name", "chapter_titles", "pages", "subjects"]
)
# print(results)
results = results.result['hits']
formatted = format_result(results)
return str(formatted)
except Exception as e:
return f"Error performing semantic search: {str(e)}"
print(semantic_search(
query="input output analysis and economic modeling",
top_k=10
))