Skip to main content
Glama

search_papers

Search arXiv papers using queries, filters for date, categories, and citations, and save results to files.

Instructions

Search for papers on arXiv with advanced filtering

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
queryYes
max_resultsNo
date_fromNo
date_toNo
categoriesNo
save_to_fileNoOptional file path to save results
min_citationsNoMinimum citation count filter

Implementation Reference

  • The main handler function that executes the 'search_papers' tool. It searches arXiv using the arxiv library, enhances queries, applies filters (date, categories, citations), processes results into structured data, handles errors, and optionally saves output to a file.
    async def handle_search(arguments: Dict[str, Any]) -> List[types.TextContent]: """Handle paper search requests. Automatically adds field specifiers to plain queries for better relevance. This fixes issue #33 where queries sorted by date returned irrelevant results. """ try: client = arxiv.Client() max_results = min(int(arguments.get("max_results", 10)), settings.MAX_RESULTS) # Build search query with category filtering query = arguments["query"] # Add field specifier if not already present # This ensures the query actually searches the content if not any(field in query for field in ["all:", "ti:", "abs:", "au:", "cat:"]): # Convert plain query to use all: field for better results # Handle quoted phrases if '"' in query: # Keep quoted phrases intact query = f"all:{query}" else: # For unquoted multi-word queries, use AND operator terms = query.split() if len(terms) > 1: query = " AND ".join(f"all:{term}" for term in terms) else: query = f"all:{query}" if categories := arguments.get("categories"): category_filter = " OR ".join(f"cat:{cat}" for cat in categories) query = f"({query}) AND ({category_filter})" # Parse dates for query construction date_from = None date_to = None try: if "date_from" in arguments: date_from = parser.parse(arguments["date_from"]).replace(tzinfo=timezone.utc) if "date_to" in arguments: date_to = parser.parse(arguments["date_to"]).replace(tzinfo=timezone.utc) except (ValueError, TypeError) as e: return [ types.TextContent( type="text", text=f"Error: Invalid date format - {str(e)}" ) ] # Add date range to query if specified # Note: arXiv API date filtering is limited, so we rely mainly on post-processing # We can try to use lastUpdatedDate format but it's not always reliable if date_from or date_to: # For now, we'll rely on post-processing filtering # The arXiv API doesn't have reliable date range queries in search pass search = arxiv.Search( query=query, max_results=max_results, sort_by=arxiv.SortCriterion.SubmittedDate, ) # Process results results = [] for paper in client.results(search): # Additional date filtering for edge cases (API date query might not be precise) if date_from or date_to: if not _is_within_date_range(paper.published, date_from, date_to): continue results.append(_process_paper(paper)) if len(results) >= max_results: break response_data = {"total_results": len(results), "papers": results} # Save to file if requested if save_file := arguments.get("save_to_file"): save_success = save_results_to_file(response_data, save_file) if save_success: response_data["saved_to"] = save_file else: response_data["save_error"] = f"Failed to save to {save_file}" return [ types.TextContent(type="text", text=json.dumps(response_data, indent=2)) ] except Exception as e: return [types.TextContent(type="text", text=f"Error: {str(e)}")]
  • Defines the input schema and metadata for the 'search_papers' tool, including parameters like query, max_results, date ranges, categories, and options for saving results.
    search_tool = types.Tool( name="search_papers", description="Search for papers on arXiv with advanced filtering", inputSchema={ "type": "object", "properties": { "query": {"type": "string"}, "max_results": {"type": "integer"}, "date_from": {"type": "string"}, "date_to": {"type": "string"}, "categories": {"type": "array", "items": {"type": "string"}}, "save_to_file": {"type": "string", "description": "Optional file path to save results"}, "min_citations": {"type": "integer", "description": "Minimum citation count filter"}, }, "required": ["query"], }, )
  • Registers the 'search_papers' tool by including its schema (search_tool) in the list_tools() response.
    @server.list_tools() async def list_tools() -> List[types.Tool]: """List available arXiv research tools.""" return [search_tool, download_tool, list_tool, read_tool]
  • The MCP server call_tool handler that dispatches 'search_papers' calls to the handle_search implementation based on the tool name.
    @server.call_tool() async def call_tool(name: str, arguments: Dict[str, Any]) -> List[types.TextContent]: """Handle tool calls for arXiv research functionality.""" logger.debug(f"Calling tool {name} with arguments {arguments}") try: if name == "search_papers": return await handle_search(arguments) elif name == "download_paper": return await handle_download(arguments) elif name == "list_papers": return await handle_list_papers(arguments) elif name == "read_paper": return await handle_read_paper(arguments) else: return [types.TextContent(type="text", text=f"Error: Unknown tool {name}")] except Exception as e: logger.error(f"Tool error: {str(e)}") return [types.TextContent(type="text", text=f"Error: {str(e)}")]
  • Helper function to process individual arXiv search results into a standardized dictionary format.
    def _process_paper(paper: arxiv.Result) -> Dict[str, Any]: """Process paper information with resource URI.""" return { "id": paper.get_short_id(), "title": paper.title, "authors": [author.name for author in paper.authors], "abstract": paper.summary, "categories": paper.categories, "published": paper.published.isoformat(), "url": paper.pdf_url, "resource_uri": f"arxiv://{paper.get_short_id()}", "citation_count": getattr(paper, 'citation_count', 0), # Note: arxiv doesn't provide this directly }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/wr-web/APR'

If you have feedback or need assistance with the MCP directory API, please join our Discord server