MCP-DBLP
by szeider
Verified
"""
MCP-DBLP Server Module
IMPORTANT: This file must define a 'main()' function that is imported by __init__.py!
Removing or renaming this function will break package imports and cause an error:
ImportError: cannot import name 'main' from 'mcp_dblp.server'
"""
import sys
import asyncio
import logging
from typing import List, Dict, Any, Optional
import os
from pathlib import Path
import re
import datetime
import requests
import argparse
# Import MCP SDK
from mcp.server import Server, NotificationOptions
from mcp.server.models import InitializationOptions
import mcp.server.stdio
import mcp.types as types
# Import DBLP client functions
from mcp_dblp.dblp_client import (
search,
add_ccf_class,
get_author_publications,
get_venue_info,
calculate_statistics,
fuzzy_title_search,
fetch_and_process_bibtex
)
# Set up logging
log_dir = os.path.expanduser("~/.mcp-dblp")
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(log_dir, "mcp_dblp_server.log")
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler(sys.stderr)
]
)
logger = logging.getLogger("mcp_dblp")
try:
from importlib.metadata import version
version_str = version("mcp-dblp")
logger.info(f"Loaded version: {version_str}")
except Exception:
version_str = "x.x.x" # Anonymous fallback version
logger.warning(f"Using default version: {version_str}")
def parse_html_links(html_string):
"""Parse HTML links of the form <a href=biburl>key</a> and extract URLs and keys."""
pattern = r'<a\s+href=([^>]+)>([^<]+)</a>'
matches = re.findall(pattern, html_string)
result = []
for url, key in matches:
url = url.strip('"\'')
key = key.strip()
result.append((url, key))
return result
def export_bibtex_entries(entries, export_dir):
"""Export BibTeX entries to a file with timestamp filename."""
os.makedirs(export_dir, exist_ok=True)
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"{timestamp}.bib"
filepath = os.path.join(export_dir, filename)
with open(filepath, 'w', encoding='utf-8') as f:
for entry in entries:
f.write(entry + "\n\n")
return filepath
async def serve(export_dir=None) -> None:
"""Main server function to handle MCP requests"""
if export_dir is None:
export_dir = os.path.expanduser("~/.mcp-dblp/exports")
server = Server("mcp-dblp")
server.capabilities = {}
# Provide a list of available prompts including our instructions prompt.
@server.list_prompts()
async def handle_list_prompts() -> List[types.Prompt]:
return [
types.Prompt(
name="MCP-DBLP Instructions",
description="Basic instructions for using the DBLP tools; get this prompt before any interaction with MCP-DBLP.",
arguments=[]
)
]
# Get prompt endpoint that loads our instructions from a file.
@server.get_prompt()
async def handle_get_prompt(name: str, arguments: Optional[dict] = None) -> types.GetPromptResult:
try:
# Assume instructions_prompt.md is located at the project root
instructions_path = Path(__file__).resolve().parents[2] / "instructions_prompt.md"
with open(instructions_path, "r", encoding="utf-8") as f:
instructions_prompt = f.read()
except Exception as e:
instructions_prompt = f"Error loading instructions prompt: {e}"
return types.GetPromptResult(
description="MCP-DBLP Instructions",
messages=[
types.PromptMessage(
role="user",
content=types.TextContent(
type="text",
text=instructions_prompt
)
)
]
)
@server.list_tools()
async def list_tools() -> List[types.Tool]:
"""List all available DBLP tools with detailed descriptions."""
return [
types.Tool(
name="search",
description=(
"Search DBLP for publications using a boolean query string.\n"
"Arguments:\n"
" - query (string, required): A query string that may include boolean operators 'and' and 'or' (case-insensitive).\n"
" For example, 'Swin and Transformer'. Parentheses are not supported.\n"
" - max_results (number, optional): Maximum number of publications to return. Default is 10.\n"
" - year_from (number, optional): Lower bound for publication year.\n"
" - year_to (number, optional): Upper bound for publication year.\n"
" - venue_filter (string, optional): Case-insensitive substring filter for publication venues (e.g., 'iclr').\n"
" - include_bibtex (boolean, optional): Whether to include BibTeX entries in the results. Default is false.\n"
"Returns a list of publication objects including title, authors, venue, year, type, doi, ee, and url."
),
inputSchema={
"type": "object",
"properties": {
"query": {"type": "string"},
"max_results": {"type": "number"},
"year_from": {"type": "number"},
"year_to": {"type": "number"},
"venue_filter": {"type": "string"},
"include_bibtex": {"type": "boolean"}
},
"required": ["query"]
}
),
types.Tool(
name="fuzzy_title_search",
description=(
"Search DBLP for publications with fuzzy title matching.\n"
"Arguments:\n"
" - title (string, required): Full or partial title of the publication (case-insensitive).\n"
" - similarity_threshold (number, required): A float between 0 and 1 where 1.0 means an exact match.\n"
" - max_results (number, optional): Maximum number of publications to return. Default is 10.\n"
" - year_from (number, optional): Lower bound for publication year.\n"
" - year_to (number, optional): Upper bound for publication year.\n"
" - venue_filter (string, optional): Case-insensitive substring filter for publication venues.\n"
" - include_bibtex (boolean, optional): Whether to include BibTeX entries in the results. Default is false.\n"
"Returns a list of publication objects sorted by title similarity score."
),
inputSchema={
"type": "object",
"properties": {
"title": {"type": "string"},
"similarity_threshold": {"type": "number"},
"max_results": {"type": "number"},
"year_from": {"type": "number"},
"year_to": {"type": "number"},
"venue_filter": {"type": "string"},
"include_bibtex": {"type": "boolean"}
},
"required": ["title", "similarity_threshold"]
}
),
types.Tool(
name="get_author_publications",
description=(
"Retrieve publication details for a specific author with fuzzy matching.\n"
"Arguments:\n"
" - author_name (string, required): Full or partial author name (case-insensitive).\n"
" - similarity_threshold (number, required): A float between 0 and 1 where 1.0 means an exact match.\n"
" - max_results (number, optional): Maximum number of publications to return. Default is 20.\n"
" - include_bibtex (boolean, optional): Whether to include BibTeX entries in the results. Default is false.\n"
"Returns a dictionary with keys: name, publication_count, publications, and stats (which includes top venues, years, and types)."
),
inputSchema={
"type": "object",
"properties": {
"author_name": {"type": "string"},
"similarity_threshold": {"type": "number"},
"max_results": {"type": "number"},
"include_bibtex": {"type": "boolean"}
},
"required": ["author_name", "similarity_threshold"]
}
),
types.Tool(
name="get_venue_info",
description=(
"Retrieve detailed information about a publication venue.\n"
"Arguments:\n"
" - venue_name (string, required): Venue name or abbreviation (e.g., 'ICLR' or full name).\n"
"Returns a dictionary with fields: abbreviation, name, publisher, type, and category.\n"
"Note: Some fields may be empty if DBLP does not provide the information."
),
inputSchema={
"type": "object",
"properties": {
"venue_name": {"type": "string"}
},
"required": ["venue_name"]
}
),
types.Tool(
name="calculate_statistics",
description=(
"Calculate statistics from a list of publication results.\n"
"Arguments:\n"
" - results (array, required): An array of publication objects, each with at least 'title', 'authors', 'venue', and 'year'.\n"
"Returns a dictionary with:\n"
" - total_publications: Total count.\n"
" - time_range: Dictionary with 'min' and 'max' publication years.\n"
" - top_authors: List of tuples (author, count) sorted by count.\n"
" - top_venues: List of tuples (venue, count) sorted by count (empty venue is treated as '(empty)')."
),
inputSchema={
"type": "object",
"properties": {
"results": {"type": "array"}
},
"required": ["results"]
}
),
types.Tool(
name="export_bibtex",
description=(
"Export BibTeX entries from a collection of HTML hyperlinks.\n"
"Arguments:\n"
" - links (string, required): HTML string containing one or more <a href=biburl>key</a> links.\n"
" The href attribute should contain a URL to a BibTeX file, and the link text is used as the citation key.\n"
" Example input with three links:\n"
" \"<a href=https://dblp.org/rec/journals/example1.bib>Smith2023</a>\n"
" <a href=https://dblp.org/rec/conf/example2.bib>Jones2022</a>\n"
" <a href=https://dblp.org/rec/journals/example3.bib>Brown2021</a>\"\n"
"Process:\n"
" - For each link, the tool fetches the BibTeX content from the URL\n"
" - The citation key in each BibTeX entry is replaced with the key from the link text\n"
" - All entries are combined and saved to a .bib file with a timestamp filename\n"
"Returns:\n"
" - A message with the full path to the saved .bib file"
),
inputSchema={
"type": "object",
"properties": {
"links": {"type": "string"}
},
"required": ["links"]
}
)
]
@server.call_tool()
async def handle_call_tool(name: str, arguments: dict) -> List[types.TextContent]:
"""Handle tool calls from clients"""
try:
logger.info(f"Tool call: {name} with arguments {arguments}")
match name:
case "search":
if "query" not in arguments:
return [types.TextContent(
type="text",
text="Error: Missing required parameter 'query'"
)]
include_bibtex = arguments.get("include_bibtex", False)
result = search(
query=arguments.get("query"),
max_results=arguments.get("max_results", 10),
year_from=arguments.get("year_from"),
year_to=arguments.get("year_to"),
venue_filter=arguments.get("venue_filter"),
include_bibtex=include_bibtex
)
if include_bibtex:
return [types.TextContent(
type="text",
text=f"Found {len(result)} publications matching your query:\n\n{format_results_with_bibtex(result)}"
)]
else:
return [types.TextContent(
type="text",
text=f"Found {len(result)} publications matching your query:\n\n{format_results(result)}"
)]
case "fuzzy_title_search":
if "title" not in arguments or "similarity_threshold" not in arguments:
return [types.TextContent(
type="text",
text="Error: Missing required parameter 'title' or 'similarity_threshold'"
)]
include_bibtex = arguments.get("include_bibtex", False)
result = fuzzy_title_search(
title=arguments.get("title"),
similarity_threshold=arguments.get("similarity_threshold"),
max_results=arguments.get("max_results", 10),
year_from=arguments.get("year_from"),
year_to=arguments.get("year_to"),
venue_filter=arguments.get("venue_filter"),
include_bibtex=include_bibtex
)
if include_bibtex:
return [types.TextContent(
type="text",
text=f"Found {len(result)} publications with similar titles:\n\n{format_results_with_similarity_and_bibtex(result)}"
)]
else:
return [types.TextContent(
type="text",
text=f"Found {len(result)} publications with similar titles:\n\n{format_results_with_similarity(result)}"
)]
case "get_author_publications":
if "author_name" not in arguments or "similarity_threshold" not in arguments:
return [types.TextContent(
type="text",
text="Error: Missing required parameter 'author_name' or 'similarity_threshold'"
)]
include_bibtex = arguments.get("include_bibtex", False)
result = get_author_publications(
author_name=arguments.get("author_name"),
similarity_threshold=arguments.get("similarity_threshold"),
max_results=arguments.get("max_results", 20),
include_bibtex=include_bibtex
)
pub_count = result.get("publication_count", 0)
publications = result.get("publications", [])
if include_bibtex:
return [types.TextContent(
type="text",
text=f"Found {pub_count} publications for author {arguments['author_name']}:\n\n{format_results_with_bibtex(publications)}"
)]
else:
return [types.TextContent(
type="text",
text=f"Found {pub_count} publications for author {arguments['author_name']}:\n\n{format_results(publications)}"
)]
case "get_venue_info":
if "venue_name" not in arguments:
return [types.TextContent(
type="text",
text="Error: Missing required parameter 'venue_name'"
)]
result = get_venue_info(
venue_name=arguments.get("venue_name")
)
return [types.TextContent(
type="text",
text=f"Venue information for {arguments['venue_name']}:\n\n{format_dict(result)}"
)]
case "calculate_statistics":
if "results" not in arguments:
return [types.TextContent(
type="text",
text="Error: Missing required parameter 'results'"
)]
result = calculate_statistics(
results=arguments.get("results")
)
return [types.TextContent(
type="text",
text=f"Statistics calculated:\n\n{format_dict(result)}"
)]
case "export_bibtex":
if "links" not in arguments:
return [types.TextContent(
type="text",
text="Error: Missing required parameter 'links'"
)]
html_links = arguments.get("links")
links = parse_html_links(html_links)
if not links:
return [types.TextContent(
type="text",
text="Error: No valid links found in the input"
)]
# Fetch and process BibTeX entries
bibtex_entries = []
for url, key in links:
bibtex = fetch_and_process_bibtex(url, key)
bibtex_entries.append(bibtex)
# Export to file
filepath = export_bibtex_entries(bibtex_entries, export_dir)
return [types.TextContent(
type="text",
text=f"Exported {len(bibtex_entries)} BibTeX entries to {filepath}"
)]
case _:
return [types.TextContent(
type="text",
text=f"Unknown tool: {name}"
)]
except Exception as e:
logger.error(f"Tool execution failed: {str(e)}", exc_info=True)
return [types.TextContent(
type="text",
text=f"Error executing {name}: {str(e)}"
)]
async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
await server.run(
read_stream,
write_stream,
InitializationOptions(
server_name="mcp-dblp",
server_version=version_str,
capabilities=server.get_capabilities(
notification_options=NotificationOptions(),
experimental_capabilities={},
),
),
)
def format_results(results):
if not results:
return "No results found."
formatted = []
for i, result in enumerate(results):
title = result.get("title", "Untitled")
authors = ", ".join(result.get("authors", []))
venue = result.get("venue", "Unknown venue")
year = result.get("year", "")
formatted.append(f"{i+1}. {title}")
formatted.append(f" Authors: {authors}")
formatted.append(f" Venue: {venue} ({year})")
formatted.append("")
return "\n".join(formatted)
def format_results_with_similarity(results):
if not results:
return "No results found."
formatted = []
for i, result in enumerate(results):
title = result.get("title", "Untitled")
authors = ", ".join(result.get("authors", []))
venue = result.get("venue", "Unknown venue")
year = result.get("year", "")
similarity = result.get("similarity", 0.0)
formatted.append(f"{i+1}. {title} [Similarity: {similarity:.2f}]")
formatted.append(f" Authors: {authors}")
formatted.append(f" Venue: {venue} ({year})")
formatted.append("")
return "\n".join(formatted)
def format_results_with_bibtex(results):
if not results:
return "No results found."
formatted = []
for i, result in enumerate(results):
title = result.get("title", "Untitled")
authors = ", ".join(result.get("authors", []))
venue = result.get("venue", "Unknown venue")
year = result.get("year", "")
formatted.append(f"{i+1}. {title}")
formatted.append(f" Authors: {authors}")
formatted.append(f" Venue: {venue} ({year})")
if "bibtex" in result and result["bibtex"]:
formatted.append("\n BibTeX:")
bibtex_lines = result["bibtex"].strip().split('\n')
formatted.append(" " + "\n ".join(bibtex_lines))
formatted.append("")
return "\n".join(formatted)
def format_results_with_similarity_and_bibtex(results):
if not results:
return "No results found."
formatted = []
for i, result in enumerate(results):
title = result.get("title", "Untitled")
authors = ", ".join(result.get("authors", []))
venue = result.get("venue", "Unknown venue")
year = result.get("year", "")
similarity = result.get("similarity", 0.0)
formatted.append(f"{i+1}. {title} [Similarity: {similarity:.2f}]")
formatted.append(f" Authors: {authors}")
formatted.append(f" Venue: {venue} ({year})")
if "bibtex" in result and result["bibtex"]:
formatted.append("\n BibTeX:")
bibtex_lines = result["bibtex"].strip().split('\n')
formatted.append(" " + "\n ".join(bibtex_lines))
formatted.append("")
return "\n".join(formatted)
def format_dict(data):
formatted = []
for key, value in data.items():
formatted.append(f"{key}: {value}")
return "\n".join(formatted)
def main() -> int:
parser = argparse.ArgumentParser(description="MCP-DBLP Server")
parser.add_argument("--exportdir", type=str, default=os.path.expanduser("~/.mcp-dblp/exports"),
help="Directory to export BibTeX files to")
args = parser.parse_args()
logger.info(f"Starting MCP-DBLP server with version: {version_str}")
try:
asyncio.run(serve(export_dir=args.exportdir))
return 0
except KeyboardInterrupt:
logger.info("Server stopped by user")
return 0
except Exception as e:
logger.error(f"Server error: {str(e)}", exc_info=True)
return 1
if __name__ == "__main__":
sys.exit(main())