BnF API Server
by Kryzo
Verified
#!/usr/bin/env python
"""
Gallica BnF API MCP Server
--------------------------
This server provides tools to search and retrieve information from the Gallica digital library
of the Bibliothèque nationale de France (BnF) using their SRU API.
It includes endpoints to search for documents by various criteria and retrieve detailed metadata.
"""
import argparse
import os
import sys
import logging
from typing import List, Dict, Any, Optional, Union
from mcp.server.fastmcp import FastMCP
from bnf_api import GallicaAPI, SearchAPI
from bnf_api.config import DEFAULT_MAX_RECORDS, DEFAULT_START_RECORD
from bnf_api.sequential_reporting import SequentialReportingServer, BNF_SEQUENTIAL_REPORTING_TOOL
# Set up basic logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def parse_arguments() -> argparse.Namespace:
"""
Parse command-line arguments.
Returns:
Namespace containing parsed arguments.
"""
parser = argparse.ArgumentParser(description="Gallica BnF API MCP Server")
return parser.parse_args()
# Initialize MCP server
mcp = FastMCP("gallica-bnf-api")
# Global variables to hold the API clients
gallica_api: Optional[GallicaAPI] = None
search_api: Optional[SearchAPI] = None
sequential_reporting_server: Optional[SequentialReportingServer] = None
# ------------------ MCP TOOL ENDPOINTS ------------------ #
@mcp.tool()
def search_by_title(
title: str,
exact_match: bool = False,
max_results: int = DEFAULT_MAX_RECORDS,
start_record: int = DEFAULT_START_RECORD
) -> Dict[str, Any]:
"""
Search for documents in the Gallica digital library by title.
Args:
title: The title to search for
exact_match: If True, search for the exact title; otherwise, search for title containing the words
max_results: Maximum number of results to return (1-50)
start_record: Starting record for pagination
Returns:
Dictionary containing search results and metadata
"""
return search_api.search_by_title(title, exact_match, max_results, start_record)
@mcp.tool()
def search_by_author(
author: str,
exact_match: bool = False,
max_results: int = DEFAULT_MAX_RECORDS,
start_record: int = DEFAULT_START_RECORD
) -> Dict[str, Any]:
"""
Search for documents in the Gallica digital library by author.
Args:
author: The author name to search for
exact_match: If True, search for the exact author name; otherwise, search for author containing the words
max_results: Maximum number of results to return (1-50)
start_record: Starting record for pagination
Returns:
Dictionary containing search results and metadata
"""
return search_api.search_by_author(author, exact_match, max_results, start_record)
@mcp.tool()
def search_by_subject(
subject: str,
exact_match: bool = False,
max_results: int = DEFAULT_MAX_RECORDS,
start_record: int = DEFAULT_START_RECORD
) -> Dict[str, Any]:
"""
Search for documents in the Gallica digital library by subject.
Args:
subject: The subject to search for
exact_match: If True, search for the exact subject; otherwise, search for subject containing the words
max_results: Maximum number of results to return (1-50)
start_record: Starting record for pagination
Returns:
Dictionary containing search results and metadata
"""
return search_api.search_by_subject(subject, exact_match, max_results, start_record)
@mcp.tool()
def search_by_date(
date: str,
max_results: int = DEFAULT_MAX_RECORDS,
start_record: int = DEFAULT_START_RECORD
) -> Dict[str, Any]:
"""
Search for documents in the Gallica digital library by date.
Args:
date: The date to search for (format: YYYY or YYYY-MM or YYYY-MM-DD)
max_results: Maximum number of results to return (1-50)
start_record: Starting record for pagination
Returns:
Dictionary containing search results and metadata
"""
return search_api.search_by_date(date, max_results, start_record)
@mcp.tool()
def search_by_document_type(
doc_type: str,
max_results: int = DEFAULT_MAX_RECORDS,
start_record: int = DEFAULT_START_RECORD
) -> Dict[str, Any]:
"""
Search for documents in the Gallica digital library by document type.
Args:
doc_type: The document type to search for (e.g., monographie, periodique, image, manuscrit, carte, musique, etc.)
max_results: Maximum number of results to return (1-50)
start_record: Starting record for pagination
Returns:
Dictionary containing search results and metadata
"""
return search_api.search_by_document_type(doc_type, max_results, start_record)
@mcp.tool()
def advanced_search(
query: str,
max_results: int = DEFAULT_MAX_RECORDS,
start_record: int = DEFAULT_START_RECORD
) -> Dict[str, Any]:
"""
Perform an advanced search using custom CQL query syntax.
This tool allows for complex queries using the CQL (Contextual Query Language) syntax.
Examples:
- Search for books by Victor Hugo: dc.creator all "Victor Hugo" and dc.type all "monographie"
- Search for maps about Paris: dc.subject all "Paris" and dc.type all "carte"
- Search for documents in English: dc.language all "eng"
Args:
query: Custom CQL query string
max_results: Maximum number of results to return (1-50)
start_record: Starting record for pagination
Returns:
Dictionary containing search results and metadata
"""
return search_api.advanced_search(query, max_results, start_record)
@mcp.tool()
def natural_language_search(
query: str,
max_results: int = DEFAULT_MAX_RECORDS,
start_record: int = DEFAULT_START_RECORD
) -> Dict[str, Any]:
"""
Search the Gallica digital library using natural language.
This is a simplified search that uses the 'gallica all' operator to search across all fields.
It's the most user-friendly way to search but may not be as precise as the other search methods.
Args:
query: Natural language search query
max_results: Maximum number of results to return (1-50)
start_record: Starting record for pagination
Returns:
Dictionary containing search results and metadata
"""
return search_api.natural_language_search(query, max_results, start_record)
@mcp.tool()
def sequential_reporting(
topic: Optional[str] = None,
page_count: Optional[int] = None,
source_count: Optional[int] = None,
search_sources: Optional[bool] = None,
section_number: Optional[int] = None,
total_sections: Optional[int] = None,
title: Optional[str] = None,
content: Optional[str] = None,
is_bibliography: Optional[bool] = None,
sources_used: Optional[List[int]] = None,
next_section_needed: Optional[bool] = None,
include_graphics: Optional[bool] = None
) -> Dict[str, Any]:
"""
Generate a research report in a sequential, step-by-step manner using Gallica BnF sources.
This tool follows a sequential approach to report generation:
1. Initialize with a topic
2. Search for sources
3. Create bibliography
4. Create content sections in order
Parameters:
- topic: Research topic (only needed for initialization)
- page_count: Number of pages for the report (default: 4)
- source_count: Number of sources to find (default: 10)
- search_sources: Set to True to search for sources after initialization
- section_number: Current section number (1-based)
- total_sections: Total number of sections in the report
- title: Title of the current section
- content: Content for the current section
- is_bibliography: Whether this section is the bibliography
- sources_used: List of source IDs used in this section
- next_section_needed: Whether another section is needed
- include_graphics: Whether to include images and maps in the report
Returns:
- Report section data
"""
# Initialize the API clients if needed
if not hasattr(sequential_reporting, 'reporting_server'):
gallica_api = GallicaAPI()
search_api = SearchAPI(gallica_api)
sequential_reporting.reporting_server = SequentialReportingServer(gallica_api, search_api)
# Prepare input data
input_data = {}
# Handle initialization with topic
if topic:
input_data['topic'] = topic
if page_count:
input_data['page_count'] = page_count
if source_count:
input_data['source_count'] = source_count
if include_graphics is not None:
input_data['include_graphics'] = include_graphics
# Handle search for sources
if search_sources:
input_data['search_sources'] = search_sources
# Handle section data
if section_number:
input_data['section_number'] = section_number
input_data['total_sections'] = total_sections
input_data['title'] = title
input_data['content'] = content
input_data['is_bibliography'] = is_bibliography
input_data['sources_used'] = sources_used
input_data['next_section_needed'] = next_section_needed
# Process the section
return sequential_reporting.reporting_server.process_section(input_data)
# ------------------ MAIN EXECUTION ------------------ #
def main():
"""
Main entry point for the Gallica BnF API MCP Server.
Initializes the API client and starts the MCP server.
"""
parse_arguments()
# Initialize the API clients
global gallica_api, search_api, sequential_reporting_server
gallica_api = GallicaAPI()
search_api = SearchAPI(gallica_api)
sequential_reporting_server = SequentialReportingServer(gallica_api, search_api)
# Start the MCP server
logger.info("Starting Gallica BnF API MCP Server")
mcp.run()
if __name__ == "__main__":
main()