MCP-DBLP

MIT License

Overview InspectNew Schema Related Servers Reviews Score

mcp-dblp
src
mcp_dblp

"""
MCP-DBLP Server Module

IMPORTANT: This file must define a 'main()' function that is imported by __init__.py!
Removing or renaming this function will break package imports and cause an error:
  ImportError: cannot import name 'main' from 'mcp_dblp.server'
"""



import sys
import asyncio
import logging
from typing import List, Dict, Any, Optional
import os
from pathlib import Path
import re
import datetime
import requests
import argparse

# Import MCP SDK
from mcp.server import Server, NotificationOptions
from mcp.server.models import InitializationOptions
import mcp.server.stdio
import mcp.types as types

# Import DBLP client functions
from mcp_dblp.dblp_client import (
    search, 
    add_ccf_class, 
    get_author_publications, 
    get_venue_info, 
    calculate_statistics,
    fuzzy_title_search,
    fetch_and_process_bibtex  
)

# Set up logging
log_dir = os.path.expanduser("~/.mcp-dblp")
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(log_dir, "mcp_dblp_server.log")

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler(sys.stderr)
    ]
)
logger = logging.getLogger("mcp_dblp")


try:
    from importlib.metadata import version
    version_str = version("mcp-dblp")
    logger.info(f"Loaded version: {version_str}")
except Exception:
    version_str = "x.x.x"  # Anonymous fallback version
    logger.warning(f"Using default version: {version_str}")



def parse_html_links(html_string):
    """Parse HTML links of the form <a href=biburl>key</a> and extract URLs and keys."""
    pattern = r'<a\s+href=([^>]+)>([^<]+)</a>'
    matches = re.findall(pattern, html_string)
    result = []
    for url, key in matches:
        url = url.strip('"\'')
        key = key.strip()
        result.append((url, key))
    return result



def export_bibtex_entries(entries, export_dir):
    """Export BibTeX entries to a file with timestamp filename."""
    os.makedirs(export_dir, exist_ok=True)
    
    timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f"{timestamp}.bib"
    filepath = os.path.join(export_dir, filename)
    
    with open(filepath, 'w', encoding='utf-8') as f:
        for entry in entries:
            f.write(entry + "\n\n")
    
    return filepath

async def serve(export_dir=None) -> None:
    """Main server function to handle MCP requests"""
    if export_dir is None:
        export_dir = os.path.expanduser("~/.mcp-dblp/exports")
    
    server = Server("mcp-dblp")
    server.capabilities = {}

    # Provide a list of available prompts including our instructions prompt.
    @server.list_prompts()
    async def handle_list_prompts() -> List[types.Prompt]:
        return [
            types.Prompt(
                name="MCP-DBLP Instructions",
                description="Basic instructions for using the DBLP tools; get this prompt before any interaction with MCP-DBLP.",
                arguments=[]
            )
        ]

    # Get prompt endpoint that loads our instructions from a file.
    @server.get_prompt()
    async def handle_get_prompt(name: str, arguments: Optional[dict] = None) -> types.GetPromptResult:
        try:
            # Assume instructions_prompt.md is located at the project root
            instructions_path = Path(__file__).resolve().parents[2] / "instructions_prompt.md"
            with open(instructions_path, "r", encoding="utf-8") as f:
                instructions_prompt = f.read()
        except Exception as e:
            instructions_prompt = f"Error loading instructions prompt: {e}"
        return types.GetPromptResult(
            description="MCP-DBLP Instructions",
            messages=[
                types.PromptMessage(
                    role="user",
                    content=types.TextContent(
                        type="text",
                        text=instructions_prompt
                    )
                )
            ]
        )

    @server.list_tools()
    async def list_tools() -> List[types.Tool]:
        """List all available DBLP tools with detailed descriptions."""
        return [
            types.Tool(
                name="search",
                description=(
                    "Search DBLP for publications using a boolean query string.\n"
                    "Arguments:\n"
                    "  - query (string, required): A query string that may include boolean operators 'and' and 'or' (case-insensitive).\n"
                    "    For example, 'Swin and Transformer'. Parentheses are not supported.\n"
                    "  - max_results (number, optional): Maximum number of publications to return. Default is 10.\n"
                    "  - year_from (number, optional): Lower bound for publication year.\n"
                    "  - year_to (number, optional): Upper bound for publication year.\n"
                    "  - venue_filter (string, optional): Case-insensitive substring filter for publication venues (e.g., 'iclr').\n"
                    "  - include_bibtex (boolean, optional): Whether to include BibTeX entries in the results. Default is false.\n"
                    "Returns a list of publication objects including title, authors, venue, year, type, doi, ee, and url."
                ),
                inputSchema={
                    "type": "object",
                    "properties": {
                        "query": {"type": "string"},
                        "max_results": {"type": "number"},
                        "year_from": {"type": "number"},
                        "year_to": {"type": "number"},
                        "venue_filter": {"type": "string"},
                        "include_bibtex": {"type": "boolean"}
                    },
                    "required": ["query"]
                }
            ),
            types.Tool(
                name="fuzzy_title_search",
                description=(
                    "Search DBLP for publications with fuzzy title matching.\n"
                    "Arguments:\n"
                    "  - title (string, required): Full or partial title of the publication (case-insensitive).\n"
                    "  - similarity_threshold (number, required): A float between 0 and 1 where 1.0 means an exact match.\n"
                    "  - max_results (number, optional): Maximum number of publications to return. Default is 10.\n"
                    "  - year_from (number, optional): Lower bound for publication year.\n"
                    "  - year_to (number, optional): Upper bound for publication year.\n"
                    "  - venue_filter (string, optional): Case-insensitive substring filter for publication venues.\n"
                    "  - include_bibtex (boolean, optional): Whether to include BibTeX entries in the results. Default is false.\n"
                    "Returns a list of publication objects sorted by title similarity score."
                ),
                inputSchema={
                    "type": "object",
                    "properties": {
                        "title": {"type": "string"},
                        "similarity_threshold": {"type": "number"},
                        "max_results": {"type": "number"},
                        "year_from": {"type": "number"},
                        "year_to": {"type": "number"},
                        "venue_filter": {"type": "string"},
                        "include_bibtex": {"type": "boolean"}
                    },
                    "required": ["title", "similarity_threshold"]
                }
            ),
            types.Tool(
                name="get_author_publications",
                description=(
                    "Retrieve publication details for a specific author with fuzzy matching.\n"
                    "Arguments:\n"
                    "  - author_name (string, required): Full or partial author name (case-insensitive).\n"
                    "  - similarity_threshold (number, required): A float between 0 and 1 where 1.0 means an exact match.\n"
                    "  - max_results (number, optional): Maximum number of publications to return. Default is 20.\n"
                    "  - include_bibtex (boolean, optional): Whether to include BibTeX entries in the results. Default is false.\n"
                    "Returns a dictionary with keys: name, publication_count, publications, and stats (which includes top venues, years, and types)."
                ),
                inputSchema={
                    "type": "object",
                    "properties": {
                        "author_name": {"type": "string"},
                        "similarity_threshold": {"type": "number"},
                        "max_results": {"type": "number"},
                        "include_bibtex": {"type": "boolean"}
                    },
                    "required": ["author_name", "similarity_threshold"]
                }
            ),
            types.Tool(
                name="get_venue_info",
                description=(
                    "Retrieve detailed information about a publication venue.\n"
                    "Arguments:\n"
                    "  - venue_name (string, required): Venue name or abbreviation (e.g., 'ICLR' or full name).\n"
                    "Returns a dictionary with fields: abbreviation, name, publisher, type, and category.\n"
                    "Note: Some fields may be empty if DBLP does not provide the information."
                ),
                inputSchema={
                    "type": "object",
                    "properties": {
                        "venue_name": {"type": "string"}
                    },
                    "required": ["venue_name"]
                }
            ),
            types.Tool(
                name="calculate_statistics",
                description=(
                    "Calculate statistics from a list of publication results.\n"
                    "Arguments:\n"
                    "  - results (array, required): An array of publication objects, each with at least 'title', 'authors', 'venue', and 'year'.\n"
                    "Returns a dictionary with:\n"
                    "  - total_publications: Total count.\n"
                    "  - time_range: Dictionary with 'min' and 'max' publication years.\n"
                    "  - top_authors: List of tuples (author, count) sorted by count.\n"
                    "  - top_venues: List of tuples (venue, count) sorted by count (empty venue is treated as '(empty)')."
                ),
                inputSchema={
                    "type": "object",
                    "properties": {
                        "results": {"type": "array"}
                    },
                    "required": ["results"]
                }
            ),
            types.Tool(
                name="export_bibtex",
                description=(
                    "Export BibTeX entries from a collection of HTML hyperlinks.\n"
                    "Arguments:\n"
                    "  - links (string, required): HTML string containing one or more <a href=biburl>key</a> links.\n"
                    "    The href attribute should contain a URL to a BibTeX file, and the link text is used as the citation key.\n"
                    "    Example input with three links:\n"
                    "    \"<a href=https://dblp.org/rec/journals/example1.bib>Smith2023</a>\n"
                    "     <a href=https://dblp.org/rec/conf/example2.bib>Jones2022</a>\n"
                    "     <a href=https://dblp.org/rec/journals/example3.bib>Brown2021</a>\"\n"
                    "Process:\n"
                    "  - For each link, the tool fetches the BibTeX content from the URL\n"
                    "  - The citation key in each BibTeX entry is replaced with the key from the link text\n"
                    "  - All entries are combined and saved to a .bib file with a timestamp filename\n"
                    "Returns:\n"
                    "  - A message with the full path to the saved .bib file"
                ),
                inputSchema={
                    "type": "object",
                    "properties": {
                        "links": {"type": "string"}
                    },
                    "required": ["links"]
                }
            )
        ]

    @server.call_tool()
    async def handle_call_tool(name: str, arguments: dict) -> List[types.TextContent]:
        """Handle tool calls from clients"""
        try:
            logger.info(f"Tool call: {name} with arguments {arguments}")
            match name:
                case "search":
                    if "query" not in arguments:
                        return [types.TextContent(
                            type="text",
                            text="Error: Missing required parameter 'query'"
                        )]
                    include_bibtex = arguments.get("include_bibtex", False)
                    result = search(
                        query=arguments.get("query"),
                        max_results=arguments.get("max_results", 10),
                        year_from=arguments.get("year_from"),
                        year_to=arguments.get("year_to"),
                        venue_filter=arguments.get("venue_filter"),
                        include_bibtex=include_bibtex
                    )
                    if include_bibtex:
                        return [types.TextContent(
                            type="text",
                            text=f"Found {len(result)} publications matching your query:\n\n{format_results_with_bibtex(result)}"
                        )]
                    else:
                        return [types.TextContent(
                            type="text",
                            text=f"Found {len(result)} publications matching your query:\n\n{format_results(result)}"
                        )]
                case "fuzzy_title_search":
                    if "title" not in arguments or "similarity_threshold" not in arguments:
                        return [types.TextContent(
                            type="text",
                            text="Error: Missing required parameter 'title' or 'similarity_threshold'"
                        )]
                    include_bibtex = arguments.get("include_bibtex", False)
                    result = fuzzy_title_search(
                        title=arguments.get("title"),
                        similarity_threshold=arguments.get("similarity_threshold"),
                        max_results=arguments.get("max_results", 10),
                        year_from=arguments.get("year_from"),
                        year_to=arguments.get("year_to"),
                        venue_filter=arguments.get("venue_filter"),
                        include_bibtex=include_bibtex
                    )
                    if include_bibtex:
                        return [types.TextContent(
                            type="text",
                            text=f"Found {len(result)} publications with similar titles:\n\n{format_results_with_similarity_and_bibtex(result)}"
                        )]
                    else:
                        return [types.TextContent(
                            type="text",
                            text=f"Found {len(result)} publications with similar titles:\n\n{format_results_with_similarity(result)}"
                        )]
                case "get_author_publications":
                    if "author_name" not in arguments or "similarity_threshold" not in arguments:
                        return [types.TextContent(
                            type="text",
                            text="Error: Missing required parameter 'author_name' or 'similarity_threshold'"
                        )]
                    include_bibtex = arguments.get("include_bibtex", False)
                    result = get_author_publications(
                        author_name=arguments.get("author_name"),
                        similarity_threshold=arguments.get("similarity_threshold"),
                        max_results=arguments.get("max_results", 20),
                        include_bibtex=include_bibtex
                    )
                    pub_count = result.get("publication_count", 0)
                    publications = result.get("publications", [])
                    
                    if include_bibtex:
                        return [types.TextContent(
                            type="text",
                            text=f"Found {pub_count} publications for author {arguments['author_name']}:\n\n{format_results_with_bibtex(publications)}"
                        )]
                    else:
                        return [types.TextContent(
                            type="text",
                            text=f"Found {pub_count} publications for author {arguments['author_name']}:\n\n{format_results(publications)}"
                        )]
                case "get_venue_info":
                    if "venue_name" not in arguments:
                        return [types.TextContent(
                            type="text",
                            text="Error: Missing required parameter 'venue_name'"
                        )]
                    result = get_venue_info(
                        venue_name=arguments.get("venue_name")
                    )
                    return [types.TextContent(
                        type="text",
                        text=f"Venue information for {arguments['venue_name']}:\n\n{format_dict(result)}"
                    )]
                case "calculate_statistics":
                    if "results" not in arguments:
                        return [types.TextContent(
                            type="text",
                            text="Error: Missing required parameter 'results'"
                        )]
                    result = calculate_statistics(
                        results=arguments.get("results")
                    )
                    return [types.TextContent(
                        type="text",
                        text=f"Statistics calculated:\n\n{format_dict(result)}"
                    )]
                case "export_bibtex":
                    if "links" not in arguments:
                        return [types.TextContent(
                            type="text",
                            text="Error: Missing required parameter 'links'"
                        )]
                    
                    html_links = arguments.get("links")
                    links = parse_html_links(html_links)
                    
                    if not links:
                        return [types.TextContent(
                            type="text",
                            text="Error: No valid links found in the input"
                        )]
                    
                    # Fetch and process BibTeX entries
                    bibtex_entries = []
                    for url, key in links:
                        bibtex = fetch_and_process_bibtex(url, key)
                        bibtex_entries.append(bibtex)
                    
                    # Export to file
                    filepath = export_bibtex_entries(bibtex_entries, export_dir)
                    
                    return [types.TextContent(
                        type="text",
                        text=f"Exported {len(bibtex_entries)} BibTeX entries to {filepath}"
                    )]
                case _:
                    return [types.TextContent(
                        type="text",
                        text=f"Unknown tool: {name}"
                    )]
        except Exception as e:
            logger.error(f"Tool execution failed: {str(e)}", exc_info=True)
            return [types.TextContent(
                type="text",
                text=f"Error executing {name}: {str(e)}"
            )]

    async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
        await server.run(
            read_stream,
            write_stream,
            InitializationOptions(
                server_name="mcp-dblp",
                server_version=version_str,
                capabilities=server.get_capabilities(
                    notification_options=NotificationOptions(),
                    experimental_capabilities={},
                ),
            ),
        )

def format_results(results):
    if not results:
        return "No results found."
    formatted = []
    for i, result in enumerate(results):
        title = result.get("title", "Untitled")
        authors = ", ".join(result.get("authors", []))
        venue = result.get("venue", "Unknown venue")
        year = result.get("year", "")
        formatted.append(f"{i+1}. {title}")
        formatted.append(f"   Authors: {authors}")
        formatted.append(f"   Venue: {venue} ({year})")
        formatted.append("")
    return "\n".join(formatted)

def format_results_with_similarity(results):
    if not results:
        return "No results found."
    formatted = []
    for i, result in enumerate(results):
        title = result.get("title", "Untitled")
        authors = ", ".join(result.get("authors", []))
        venue = result.get("venue", "Unknown venue")
        year = result.get("year", "")
        similarity = result.get("similarity", 0.0)
        formatted.append(f"{i+1}. {title} [Similarity: {similarity:.2f}]")
        formatted.append(f"   Authors: {authors}")
        formatted.append(f"   Venue: {venue} ({year})")
        formatted.append("")
    return "\n".join(formatted)

def format_results_with_bibtex(results):
    if not results:
        return "No results found."
    formatted = []
    for i, result in enumerate(results):
        title = result.get("title", "Untitled")
        authors = ", ".join(result.get("authors", []))
        venue = result.get("venue", "Unknown venue")
        year = result.get("year", "")
        formatted.append(f"{i+1}. {title}")
        formatted.append(f"   Authors: {authors}")
        formatted.append(f"   Venue: {venue} ({year})")
        if "bibtex" in result and result["bibtex"]:
            formatted.append("\n   BibTeX:")
            bibtex_lines = result["bibtex"].strip().split('\n')
            formatted.append("      " + "\n      ".join(bibtex_lines))
        formatted.append("")
    return "\n".join(formatted)

def format_results_with_similarity_and_bibtex(results):
    if not results:
        return "No results found."
    formatted = []
    for i, result in enumerate(results):
        title = result.get("title", "Untitled")
        authors = ", ".join(result.get("authors", []))
        venue = result.get("venue", "Unknown venue")
        year = result.get("year", "")
        similarity = result.get("similarity", 0.0)
        formatted.append(f"{i+1}. {title} [Similarity: {similarity:.2f}]")
        formatted.append(f"   Authors: {authors}")
        formatted.append(f"   Venue: {venue} ({year})")
        if "bibtex" in result and result["bibtex"]:
            formatted.append("\n   BibTeX:")
            bibtex_lines = result["bibtex"].strip().split('\n')
            formatted.append("      " + "\n      ".join(bibtex_lines))
        formatted.append("")
    return "\n".join(formatted)

def format_dict(data):
    formatted = []
    for key, value in data.items():
        formatted.append(f"{key}: {value}")
    return "\n".join(formatted)

def main() -> int:
    parser = argparse.ArgumentParser(description="MCP-DBLP Server")
    parser.add_argument("--exportdir", type=str, default=os.path.expanduser("~/.mcp-dblp/exports"),
                        help="Directory to export BibTeX files to")
    args = parser.parse_args()
    
    logger.info(f"Starting MCP-DBLP server with version: {version_str}")
    try:
        asyncio.run(serve(export_dir=args.exportdir))
        return 0
    except KeyboardInterrupt:
        logger.info("Server stopped by user")
        return 0
    except Exception as e:
        logger.error(f"Server error: {str(e)}", exc_info=True)
        return 1

if __name__ == "__main__":
    sys.exit(main())

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/szeider/mcp-dblp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server