Skip to main content
Glama
serp_pdf_search.py3.69 kB
""" SerpAPI PDF Document Search MCP tool. This tool queries Google Search via SerpAPI to find PDF documents related to a company and returns a normalized list of PDF links with metadata. Environment variables: SERPAPI_API_KEY: API key for SerpAPI (required). """ import os from typing import Dict, Any, List from ..core.base import BaseMCPTool try: from serpapi import GoogleSearch except Exception: # pragma: no cover GoogleSearch = None def _map_pdf_result(item: Dict[str, Any]) -> Dict[str, Any]: """Map SerpAPI result to normalized PDF document format.""" return { "title": item.get("title"), "link": item.get("link"), "snippet": item.get("snippet"), "displayed_link": item.get("displayed_link"), "file_format": "PDF", } class SerpPdfSearchMCPTool(BaseMCPTool): """MCP tool to search for PDF documents via SerpAPI. Usage example: >>> from pitchlense_mcp import SerpPdfSearchMCPTool >>> tool = SerpPdfSearchMCPTool() >>> result = tool.search_pdf_documents("OpenAI filetype:pdf", num_results=10) >>> print(result["results"][0]["link"]) # first PDF URL """ def __init__(self): super().__init__( "SerpAPI PDF Search", "Search for PDF documents related to a company using Google Search via SerpAPI", ) def _get_client(self): api_key = os.getenv("SERPAPI_API_KEY") if not api_key: raise RuntimeError("SERPAPI_API_KEY not set") if GoogleSearch is None: raise RuntimeError("serpapi package not installed. Run: pip install google-search-results") return api_key def search_pdf_documents(self, query: str, num_results: int = 10) -> Dict[str, Any]: """Search for PDF documents using Google Search via SerpAPI. Args: query: Search query string (should include "filetype:pdf"). num_results: Maximum number of results to return (default: 10). Returns: A dictionary with keys: - "query": The original query string. - "results": List of PDF documents, each containing: {"title", "link", "snippet", "displayed_link", "file_format"}. Error handling: Returns a standardized error dict via create_error_response on failures (e.g., missing API key, network error, or invalid input). """ if not isinstance(query, str) or not query.strip(): return self.create_error_response("Invalid query: must be a non-empty string") try: api_key = self._get_client() params = { "engine": "google", "q": query, "api_key": api_key, "hl": "en", "gl": "us", "num": num_results or 10, } search = GoogleSearch(params) results = search.get_dict() or {} organic_results: List[Dict[str, Any]] = results.get("organic_results") or [] # Filter for PDF results and map to normalized format pdf_results = [] for item in organic_results: link = item.get("link", "") if link.lower().endswith(".pdf") or "filetype:pdf" in query.lower(): pdf_results.append(_map_pdf_result(item)) return {"query": query, "results": pdf_results} except Exception as e: return self.create_error_response(f"SerpAPI PDF search error: {str(e)}") def register_tools(self): self.register_tool(self.search_pdf_documents)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/connectaman/Pitchlense-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server