Skip to main content
Glama

MCP-DBLP

server.py25.9 kB
""" MCP-DBLP Server Module IMPORTANT: This file must define a 'main()' function that is imported by __init__.py! Removing or renaming this function will break package imports and cause an error: ImportError: cannot import name 'main' from 'mcp_dblp.server' """ import argparse import asyncio import datetime import logging import os import re import sys from pathlib import Path import mcp.server.stdio import mcp.types as types # Import MCP SDK from mcp.server import NotificationOptions, Server from mcp.server.models import InitializationOptions # Import DBLP client functions from mcp_dblp.dblp_client import ( calculate_statistics, fetch_and_process_bibtex, fuzzy_title_search, get_author_publications, get_venue_info, search, ) # Set up logging log_dir = os.path.expanduser("~/.mcp-dblp") os.makedirs(log_dir, exist_ok=True) log_file = os.path.join(log_dir, "mcp_dblp_server.log") logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", handlers=[logging.FileHandler(log_file), logging.StreamHandler(sys.stderr)], ) logger = logging.getLogger("mcp_dblp") try: from importlib.metadata import version version_str = version("mcp-dblp") logger.info(f"Loaded version: {version_str}") except Exception: version_str = "x.x.x" # Anonymous fallback version logger.warning(f"Using default version: {version_str}") def parse_html_links(html_string): """Parse HTML links of the form <a href=biburl>key</a> and extract URLs and keys.""" pattern = r"<a\s+href=([^>]+)>([^<]+)</a>" matches = re.findall(pattern, html_string) result = [] for url, key in matches: url = url.strip("\"'") key = key.strip() result.append((url, key)) return result def export_bibtex_entries(entries, export_dir): """Export BibTeX entries to a file with timestamp filename.""" os.makedirs(export_dir, exist_ok=True) timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"{timestamp}.bib" filepath = os.path.join(export_dir, filename) with open(filepath, "w", encoding="utf-8") as f: for entry in entries: f.write(entry + "\n\n") return filepath async def serve(export_dir=None) -> None: """Main server function to handle MCP requests""" if export_dir is None: export_dir = os.path.expanduser("~/.mcp-dblp/exports") server = Server("mcp-dblp") # Provide a list of available prompts including our instructions prompt. @server.list_prompts() async def handle_list_prompts() -> list[types.Prompt]: return [ types.Prompt( name="dblp-instructions", description="Instructions for using DBLP tools efficiently with batch/parallel calls for citation processing", arguments=[], ) ] # Get prompt endpoint that loads our instructions from a file. @server.get_prompt() async def handle_get_prompt(name: str, arguments: dict | None = None) -> types.GetPromptResult: try: # Assume instructions_prompt.md is located at the project root instructions_path = Path(__file__).resolve().parents[2] / "instructions_prompt.md" with open(instructions_path, encoding="utf-8") as f: instructions_prompt = f.read() except Exception as e: instructions_prompt = f"Error loading instructions prompt: {e}" return types.GetPromptResult( description="Instructions for using DBLP tools efficiently with batch/parallel calls for citation processing", messages=[ types.PromptMessage( role="user", content=types.TextContent(type="text", text=instructions_prompt) ) ], ) # Expose instructions as a resource so it appears in ListMcpResourcesTool @server.list_resources() async def handle_list_resources() -> list[types.Resource]: return [ types.Resource( uri="dblp://instructions", name="DBLP Citation Processing Instructions", description="Complete instructions for using DBLP tools efficiently with batch/parallel calls", mimeType="text/markdown", ) ] @server.read_resource() async def handle_read_resource(uri: str) -> str: uri_str = str(uri) if not isinstance(uri, str) else uri if uri_str == "dblp://instructions": try: instructions_path = Path(__file__).resolve().parents[2] / "instructions_prompt.md" with open(instructions_path, encoding="utf-8") as f: return f.read() except Exception as e: return f"Error loading instructions: {e}" else: raise ValueError(f"Unknown resource URI: {uri_str}") @server.list_tools() async def list_tools() -> list[types.Tool]: """List all available DBLP tools with detailed descriptions.""" return [ types.Tool( name="search", description=( "Search DBLP for publications using a boolean query string.\n" "Arguments:\n" " - query (string, required): A query string that may include boolean operators 'and' and 'or' (case-insensitive).\n" " For example, 'Swin and Transformer'. Parentheses are not supported.\n" " - max_results (number, optional): Maximum number of publications to return. Default is 10.\n" " - year_from (number, optional): Lower bound for publication year.\n" " - year_to (number, optional): Upper bound for publication year.\n" " - venue_filter (string, optional): Case-insensitive substring filter for publication venues (e.g., 'iclr').\n" " - include_bibtex (boolean, optional): Whether to include BibTeX entries in the results. Default is false.\n" "Returns a list of publication objects including title, authors, venue, year, type, doi, ee, and url." ), inputSchema={ "type": "object", "properties": { "query": {"type": "string"}, "max_results": {"type": "number"}, "year_from": {"type": "number"}, "year_to": {"type": "number"}, "venue_filter": {"type": "string"}, "include_bibtex": {"type": "boolean"}, }, "required": ["query"], }, ), types.Tool( name="fuzzy_title_search", description=( "Search DBLP for publications with fuzzy title matching.\n" "Arguments:\n" " - title (string, required): Full or partial title of the publication (case-insensitive).\n" " - similarity_threshold (number, required): A float between 0 and 1 where 1.0 means an exact match.\n" " - max_results (number, optional): Maximum number of publications to return. Default is 10.\n" " - year_from (number, optional): Lower bound for publication year.\n" " - year_to (number, optional): Upper bound for publication year.\n" " - venue_filter (string, optional): Case-insensitive substring filter for publication venues.\n" " - include_bibtex (boolean, optional): Whether to include BibTeX entries in the results. Default is false.\n" "Returns a list of publication objects sorted by title similarity score." ), inputSchema={ "type": "object", "properties": { "title": {"type": "string"}, "similarity_threshold": {"type": "number"}, "max_results": {"type": "number"}, "year_from": {"type": "number"}, "year_to": {"type": "number"}, "venue_filter": {"type": "string"}, "include_bibtex": {"type": "boolean"}, }, "required": ["title", "similarity_threshold"], }, ), types.Tool( name="get_author_publications", description=( "Retrieve publication details for a specific author with fuzzy matching.\n" "Arguments:\n" " - author_name (string, required): Full or partial author name (case-insensitive).\n" " - similarity_threshold (number, required): A float between 0 and 1 where 1.0 means an exact match.\n" " - max_results (number, optional): Maximum number of publications to return. Default is 20.\n" " - include_bibtex (boolean, optional): Whether to include BibTeX entries in the results. Default is false.\n" "Returns a dictionary with keys: name, publication_count, publications, and stats (which includes top venues, years, and types)." ), inputSchema={ "type": "object", "properties": { "author_name": {"type": "string"}, "similarity_threshold": {"type": "number"}, "max_results": {"type": "number"}, "include_bibtex": {"type": "boolean"}, }, "required": ["author_name", "similarity_threshold"], }, ), types.Tool( name="get_venue_info", description=( "Retrieve information about a publication venue from DBLP.\n" "Arguments:\n" " - venue_name (string, required): Venue name or abbreviation (e.g., 'ICLR', 'NeurIPS', or full name).\n" "Returns a dictionary with fields:\n" " - venue: Full venue title\n" " - acronym: Venue acronym/abbreviation (if available)\n" " - type: Venue type (e.g., 'Conference or Workshop', 'Journal', 'Repository')\n" " - url: Canonical DBLP URL for the venue\n" "Note: Publisher, ISSN, and other metadata are not available through this endpoint." ), inputSchema={ "type": "object", "properties": {"venue_name": {"type": "string"}}, "required": ["venue_name"], }, ), types.Tool( name="calculate_statistics", description=( "Calculate statistics from a list of publication results.\n" "Arguments:\n" " - results (array, required): An array of publication objects, each with at least 'title', 'authors', 'venue', and 'year'.\n" "Returns a dictionary with:\n" " - total_publications: Total count.\n" " - time_range: Dictionary with 'min' and 'max' publication years.\n" " - top_authors: List of tuples (author, count) sorted by count.\n" " - top_venues: List of tuples (venue, count) sorted by count (empty venue is treated as '(empty)')." ), inputSchema={ "type": "object", "properties": {"results": {"type": "array"}}, "required": ["results"], }, ), types.Tool( name="export_bibtex", description=( "Export BibTeX entries from a collection of HTML hyperlinks.\n" "Arguments:\n" " - links (string, required): HTML string containing one or more <a href=biburl>key</a> links.\n" " The href attribute should contain a URL to a BibTeX file, and the link text is used as the citation key.\n" " Example input with three links:\n" ' "<a href=https://dblp.org/rec/journals/example1.bib>Smith2023</a>\n' " <a href=https://dblp.org/rec/conf/example2.bib>Jones2022</a>\n" ' <a href=https://dblp.org/rec/journals/example3.bib>Brown2021</a>"\n' "Process:\n" " - For each link, the tool fetches the BibTeX content from the URL\n" " - The citation key in each BibTeX entry is replaced with the key from the link text\n" " - All entries are combined and saved to a .bib file with a timestamp filename\n" "Returns:\n" " - A message with the full path to the saved .bib file" ), inputSchema={ "type": "object", "properties": {"links": {"type": "string"}}, "required": ["links"], }, ), ] @server.call_tool() async def handle_call_tool(name: str, arguments: dict) -> list[types.TextContent]: """Handle tool calls from clients""" try: logger.info(f"Tool call: {name} with arguments {arguments}") match name: case "search": if "query" not in arguments: return [ types.TextContent( type="text", text="Error: Missing required parameter 'query'" ) ] include_bibtex = arguments.get("include_bibtex", False) result = search( query=arguments.get("query"), max_results=arguments.get("max_results", 10), year_from=arguments.get("year_from"), year_to=arguments.get("year_to"), venue_filter=arguments.get("venue_filter"), include_bibtex=include_bibtex, ) if include_bibtex: return [ types.TextContent( type="text", text=f"Found {len(result)} publications matching your query:\n\n{format_results_with_bibtex(result)}", ) ] else: return [ types.TextContent( type="text", text=f"Found {len(result)} publications matching your query:\n\n{format_results(result)}", ) ] case "fuzzy_title_search": if "title" not in arguments or "similarity_threshold" not in arguments: return [ types.TextContent( type="text", text="Error: Missing required parameter 'title' or 'similarity_threshold'", ) ] include_bibtex = arguments.get("include_bibtex", False) result = fuzzy_title_search( title=arguments.get("title"), similarity_threshold=arguments.get("similarity_threshold"), max_results=arguments.get("max_results", 10), year_from=arguments.get("year_from"), year_to=arguments.get("year_to"), venue_filter=arguments.get("venue_filter"), include_bibtex=include_bibtex, ) if include_bibtex: return [ types.TextContent( type="text", text=f"Found {len(result)} publications with similar titles:\n\n{format_results_with_similarity_and_bibtex(result)}", ) ] else: return [ types.TextContent( type="text", text=f"Found {len(result)} publications with similar titles:\n\n{format_results_with_similarity(result)}", ) ] case "get_author_publications": if "author_name" not in arguments or "similarity_threshold" not in arguments: return [ types.TextContent( type="text", text="Error: Missing required parameter 'author_name' or 'similarity_threshold'", ) ] include_bibtex = arguments.get("include_bibtex", False) result = get_author_publications( author_name=arguments.get("author_name"), similarity_threshold=arguments.get("similarity_threshold"), max_results=arguments.get("max_results", 20), include_bibtex=include_bibtex, ) pub_count = result.get("publication_count", 0) publications = result.get("publications", []) if include_bibtex: return [ types.TextContent( type="text", text=f"Found {pub_count} publications for author {arguments['author_name']}:\n\n{format_results_with_bibtex(publications)}", ) ] else: return [ types.TextContent( type="text", text=f"Found {pub_count} publications for author {arguments['author_name']}:\n\n{format_results(publications)}", ) ] case "get_venue_info": if "venue_name" not in arguments: return [ types.TextContent( type="text", text="Error: Missing required parameter 'venue_name'" ) ] result = get_venue_info(venue_name=arguments.get("venue_name")) return [ types.TextContent( type="text", text=f"Venue information for {arguments['venue_name']}:\n\n{format_dict(result)}", ) ] case "calculate_statistics": if "results" not in arguments: return [ types.TextContent( type="text", text="Error: Missing required parameter 'results'" ) ] result = calculate_statistics(results=arguments.get("results")) return [ types.TextContent( type="text", text=f"Statistics calculated:\n\n{format_dict(result)}" ) ] case "export_bibtex": if "links" not in arguments: return [ types.TextContent( type="text", text="Error: Missing required parameter 'links'" ) ] html_links = arguments.get("links") links = parse_html_links(html_links) if not links: return [ types.TextContent( type="text", text="Error: No valid links found in the input" ) ] # Fetch and process BibTeX entries bibtex_entries = [] for url, key in links: bibtex = fetch_and_process_bibtex(url, key) bibtex_entries.append(bibtex) # Export to file filepath = export_bibtex_entries(bibtex_entries, export_dir) return [ types.TextContent( type="text", text=f"Exported {len(bibtex_entries)} BibTeX entries to {filepath}", ) ] case _: return [types.TextContent(type="text", text=f"Unknown tool: {name}")] except Exception as e: logger.error(f"Tool execution failed: {str(e)}", exc_info=True) return [types.TextContent(type="text", text=f"Error executing {name}: {str(e)}")] async with mcp.server.stdio.stdio_server() as (read_stream, write_stream): await server.run( read_stream, write_stream, InitializationOptions( server_name="mcp-dblp", server_version=version_str, capabilities=server.get_capabilities( notification_options=NotificationOptions(), experimental_capabilities={}, ), ), ) def format_results(results): if not results: return "No results found." formatted = [] for i, result in enumerate(results): title = result.get("title", "Untitled") authors = ", ".join(result.get("authors", [])) venue = result.get("venue", "Unknown venue") year = result.get("year", "") formatted.append(f"{i + 1}. {title}") formatted.append(f" Authors: {authors}") formatted.append(f" Venue: {venue} ({year})") formatted.append("") return "\n".join(formatted) def format_results_with_similarity(results): if not results: return "No results found." formatted = [] for i, result in enumerate(results): title = result.get("title", "Untitled") authors = ", ".join(result.get("authors", [])) venue = result.get("venue", "Unknown venue") year = result.get("year", "") similarity = result.get("similarity", 0.0) formatted.append(f"{i + 1}. {title} [Similarity: {similarity:.2f}]") formatted.append(f" Authors: {authors}") formatted.append(f" Venue: {venue} ({year})") formatted.append("") return "\n".join(formatted) def format_results_with_bibtex(results): if not results: return "No results found." formatted = [] for i, result in enumerate(results): title = result.get("title", "Untitled") authors = ", ".join(result.get("authors", [])) venue = result.get("venue", "Unknown venue") year = result.get("year", "") formatted.append(f"{i + 1}. {title}") formatted.append(f" Authors: {authors}") formatted.append(f" Venue: {venue} ({year})") if "bibtex" in result and result["bibtex"]: formatted.append("\n BibTeX:") bibtex_lines = result["bibtex"].strip().split("\n") formatted.append(" " + "\n ".join(bibtex_lines)) formatted.append("") return "\n".join(formatted) def format_results_with_similarity_and_bibtex(results): if not results: return "No results found." formatted = [] for i, result in enumerate(results): title = result.get("title", "Untitled") authors = ", ".join(result.get("authors", [])) venue = result.get("venue", "Unknown venue") year = result.get("year", "") similarity = result.get("similarity", 0.0) formatted.append(f"{i + 1}. {title} [Similarity: {similarity:.2f}]") formatted.append(f" Authors: {authors}") formatted.append(f" Venue: {venue} ({year})") if "bibtex" in result and result["bibtex"]: formatted.append("\n BibTeX:") bibtex_lines = result["bibtex"].strip().split("\n") formatted.append(" " + "\n ".join(bibtex_lines)) formatted.append("") return "\n".join(formatted) def format_dict(data): formatted = [] for key, value in data.items(): formatted.append(f"{key}: {value}") return "\n".join(formatted) def main() -> int: parser = argparse.ArgumentParser(description="MCP-DBLP Server") parser.add_argument( "--exportdir", type=str, default=os.path.expanduser("~/.mcp-dblp/exports"), help="Directory to export BibTeX files to", ) args = parser.parse_args() logger.info(f"Starting MCP-DBLP server with version: {version_str}") try: asyncio.run(serve(export_dir=args.exportdir)) return 0 except KeyboardInterrupt: logger.info("Server stopped by user") return 0 except Exception as e: logger.error(f"Server error: {str(e)}", exc_info=True) return 1 if __name__ == "__main__": sys.exit(main())

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/szeider/mcp-dblp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server