en es fr ja ko zh

BnF API Server

"""
Gallica BnF API Client
---------------------
Client for the Gallica BnF SRU API.
Provides methods to search for documents and retrieve metadata.
"""

import logging
import requests
import xml.etree.ElementTree as ET
from datetime import datetime
from typing import Dict, Any, List, Optional

# Set up logging
logger = logging.getLogger(__name__)

# Constants
DEFAULT_MAX_RECORDS = 10
DEFAULT_START_RECORD = 1
BNF_SRU_URL = "https://gallica.bnf.fr/SRU"


class GallicaAPI:
    """
    Client for the Gallica BnF SRU API.
    Provides methods to search for documents and retrieve metadata.
    """
    
    def __init__(self):
        """Initialize the Gallica API client."""
        self.base_url = BNF_SRU_URL
        logger.info("Gallica API client initialized")
    
    def search(self, 
               query: str, 
               start_record: int = DEFAULT_START_RECORD,
               max_records: int = DEFAULT_MAX_RECORDS) -> Dict[str, Any]:
        """
        Search for documents in the Gallica digital library.
        
        Args:
            query: Search query in CQL format
            start_record: Starting record number for pagination
            max_records: Maximum number of records to return
            
        Returns:
            Dictionary containing search results and metadata
        """
        params = {
            'version': '1.2',
            'operation': 'searchRetrieve',
            'query': query,
            'startRecord': start_record,
            'maximumRecords': max_records
        }
        
        try:
            response = requests.get(self.base_url, params=params)
            response.raise_for_status()
            
            # Parse the XML response
            root = ET.fromstring(response.text)
            
            # Define namespaces used in the XML
            namespaces = {
                'srw': 'http://www.loc.gov/zing/srw/',
                'dc': 'http://purl.org/dc/elements/1.1/',
                'oai_dc': 'http://www.openarchives.org/OAI/2.0/oai_dc/'
            }
            
            # Get the number of records found
            num_records = root.find('.//srw:numberOfRecords', namespaces).text
            
            # Create a dictionary to store the results
            results = {
                "metadata": {
                    "query": query,
                    "total_records": num_records,
                    "records_returned": len(root.findall('.//srw:record', namespaces)),
                    "date_retrieved": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                },
                "records": []
            }
            
            # Process each record
            for record in root.findall('.//srw:record', namespaces):
                # Get the record data element that contains Dublin Core metadata
                record_data = record.find('.//srw:recordData/oai_dc:dc', namespaces)
                
                if record_data is not None:
                    # Create a dictionary for this record
                    record_dict = {}
                    
                    # Define the Dublin Core fields we want to extract
                    dc_fields = [
                        'title', 'creator', 'contributor', 'publisher', 'date',
                        'description', 'type', 'format', 'identifier', 'source',
                        'language', 'relation', 'coverage', 'rights', 'subject'
                    ]
                    
                    # Extract each field
                    for field in dc_fields:
                        elements = record_data.findall(f'./dc:{field}', namespaces)
                        if elements:
                            # If there are multiple values, store them as a list
                            if len(elements) > 1:
                                record_dict[field] = [elem.text.strip() for elem in elements if elem.text and elem.text.strip()]
                            # If there's only one value, store it as a string
                            else:
                                text = elements[0].text
                                if text and text.strip():
                                    record_dict[field] = text.strip()
                    
                    # Extract Gallica URL from identifiers
                    if 'identifier' in record_dict:
                        identifiers = record_dict['identifier']
                        if isinstance(identifiers, list):
                            for identifier in identifiers:
                                if 'gallica.bnf.fr/ark:' in identifier:
                                    record_dict['gallica_url'] = identifier
                                    break
                        elif 'gallica.bnf.fr/ark:' in identifiers:
                            record_dict['gallica_url'] = identifiers
                    
                    # Add the record to our results
                    results['records'].append(record_dict)
            
            return results
        
        except requests.exceptions.RequestException as e:
            logger.error(f"Error during Gallica API request: {e}")
            return {
                "error": str(e),
                "query": query,
                "parameters": params
            }
        except ET.ParseError as e:
            logger.error(f"Error parsing XML response: {e}")
            return {
                "error": f"XML parsing error: {str(e)}",
                "query": query
            }
        except Exception as e:
            logger.error(f"Unexpected error: {e}")
            return {
                "error": str(e),
                "query": query
            }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Kryzo/mcp-bibliotheque_nationale_de_France'

If you have feedback or need assistance with the MCP directory API, please join our Discord server