BnF API Server
by Kryzo
Verified
"""
BnF Sequential Reporting Tool
----------------------------
This module provides a tool for generating structured reports based on research
from the Gallica BnF digital library. It uses a sequential approach to gather sources,
analyze them, and generate a comprehensive report with proper citations.
"""
import json
import sys
import logging
from dataclasses import dataclass
from typing import Dict, List, Optional, Any, Union
from datetime import datetime
import textwrap
from .api import GallicaAPI
from .search import SearchAPI
# Set up logging
logger = logging.getLogger(__name__)
# Constants
DEFAULT_PAGE_COUNT = 4
DEFAULT_SOURCE_COUNT = 10
@dataclass
class ReportSection:
"""
Represents a section of the sequential report.
"""
section_number: int
total_sections: int
content: str
title: str
is_bibliography: bool = False
sources_used: List[int] = None
next_section_needed: bool = True
class SequentialReportingServer:
"""
Server for generating sequential reports based on BnF research.
"""
def __init__(self, gallica_api: GallicaAPI, search_api: SearchAPI):
"""
Initialize the Sequential Reporting Server.
Args:
gallica_api: An initialized GallicaAPI instance
search_api: An initialized SearchAPI instance
"""
self.gallica_api = gallica_api
self.search_api = search_api
self.topic = None
self.page_count = DEFAULT_PAGE_COUNT
self.source_count = DEFAULT_SOURCE_COUNT
self.sources = []
self.report_sections = []
self.plan = None
self._current_step = 0
self.include_graphics = False
self.graphics = []
def validate_section_data(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Validate the input data for a section.
Args:
input_data: The input data for the section
Returns:
Validated input data
"""
validated_data = {}
# Handle initialization with topic
if 'topic' in input_data:
validated_data['topic'] = str(input_data['topic'])
if 'page_count' in input_data:
try:
validated_data['page_count'] = int(input_data['page_count'])
except (ValueError, TypeError):
validated_data['page_count'] = DEFAULT_PAGE_COUNT
if 'source_count' in input_data:
try:
validated_data['source_count'] = int(input_data['source_count'])
except (ValueError, TypeError):
validated_data['source_count'] = DEFAULT_SOURCE_COUNT
if 'include_graphics' in input_data:
validated_data['include_graphics'] = bool(input_data['include_graphics'])
return validated_data
# Handle search_sources flag
if 'search_sources' in input_data and input_data['search_sources']:
validated_data['search_sources'] = True
return validated_data
# Check if required fields are present for section data
required_fields = ['section_number', 'total_sections']
for field in required_fields:
if field not in input_data:
raise ValueError(f"Missing required field: {field}")
# Convert section_number and total_sections to integers if they're strings
section_number = input_data['section_number']
if isinstance(section_number, str) and section_number.isdigit():
section_number = int(section_number)
elif not isinstance(section_number, int):
raise ValueError(f"Invalid sectionNumber: must be a number")
total_sections = input_data['total_sections']
if isinstance(total_sections, str) and total_sections.isdigit():
total_sections = int(total_sections)
elif not isinstance(total_sections, int):
raise ValueError(f"Invalid totalSections: must be a number")
# Get title
title = input_data.get('title', f"Section {section_number}")
# Get content (empty string if not provided)
content = input_data.get('content', '')
if content is None:
content = ''
if not isinstance(content, str):
raise ValueError(f"Invalid content: must be a string")
# Get is_bibliography flag
is_bibliography = input_data.get('is_bibliography', False)
# Get sources_used (empty list if not provided)
sources_used = input_data.get('sources_used', [])
if sources_used is None:
sources_used = []
# Get next_section_needed flag
next_section_needed = input_data.get('next_section_needed', True)
# Create and return ReportSection
return {
'section_number': section_number,
'total_sections': total_sections,
'title': title,
'content': content,
'is_bibliography': is_bibliography,
'sources_used': sources_used,
'next_section_needed': next_section_needed
}
def search_sources(self, topic: str, source_count: int = DEFAULT_SOURCE_COUNT) -> List[Dict[str, Any]]:
"""
Search for sources on the given topic.
Args:
topic: The topic to search for
source_count: The number of sources to retrieve
Returns:
List of sources as dictionaries
"""
try:
# Try natural language search first
results = self.search_api.natural_language_search(topic, max_results=source_count)
# If not enough results, try subject search
if len(results.get('records', [])) < source_count:
subject_results = self.search_api.search_by_subject(topic, max_results=source_count - len(results.get('records', [])))
# Combine results
all_records = results.get('records', []) + subject_results.get('records', [])
results['records'] = all_records
# Format the results
sources = []
for i, result in enumerate(results.get('records', [])[:source_count], 1):
source = {
'id': i,
'title': result.get('title', 'Unknown Title'),
'creator': result.get('creator', 'Unknown Author'),
'date': result.get('date', 'Unknown Date'),
'type': result.get('type', 'Unknown Type'),
'language': result.get('language', 'Unknown Language'),
'url': result.get('url', ''),
'citation': self._format_citation(result),
'thumbnail': result.get('thumbnail', '')
}
sources.append(source)
return sources
except Exception as e:
print(f"Error searching for sources: {e}")
return []
def search_graphics(self, topic: str, count: int = 5) -> List[Dict[str, Any]]:
"""
Search for graphics (images, maps) related to the topic.
Args:
topic: The topic to search for
count: The number of graphics to retrieve
Returns:
List of graphics as dictionaries
"""
try:
# Break down the topic into keywords for better search results
keywords = topic.split()
main_keyword = keywords[0] if keywords else topic
# Search for images with broader terms
image_query = f'gallica all "{main_keyword}" and dc.type all "image"'
image_results = self.search_api.advanced_search(image_query, max_results=count)
# If no results, try with the full topic
if not image_results.get('records', []):
image_query = f'gallica all "{topic}" and dc.type all "image"'
image_results = self.search_api.advanced_search(image_query, max_results=count)
# Search for maps with broader terms
map_query = f'gallica all "{main_keyword}" and dc.type all "carte"'
map_results = self.search_api.advanced_search(map_query, max_results=count)
# If no results, try with the full topic
if not map_results.get('records', []):
map_query = f'gallica all "{topic}" and dc.type all "carte"'
map_results = self.search_api.advanced_search(map_query, max_results=count)
# If still no results, try a more general search for any visual material
if not image_results.get('records', []) and not map_results.get('records', []):
general_query = f'gallica all "{main_keyword}" and (dc.type all "image" or dc.type all "carte" or dc.type all "estampe")'
general_results = self.search_api.advanced_search(general_query, max_results=count)
image_results = general_results
# Combine and format results
graphics = []
# Process image results
for i, result in enumerate(image_results.get('records', []), 1):
# Extract URL from gallica_url if available (without /thumbnail suffix)
url = result.get('gallica_url', '')
thumbnail = ''
if url:
# Remove /thumbnail suffix if it exists
url = url.replace('/thumbnail', '')
ark_id = url.split('ark:')[1] if 'ark:' in url else ''
if ark_id:
thumbnail = f"https://gallica.bnf.fr/ark:{ark_id}/thumbnail"
graphic = {
'id': i,
'title': result.get('title', 'Untitled Image'),
'description': f"Image related to {topic}: {result.get('title', 'Untitled Image')}",
'type': 'image',
'url': url,
'thumbnail': thumbnail
}
graphics.append(graphic)
# Process map results
for i, result in enumerate(map_results.get('records', []), len(graphics) + 1):
# Extract URL from gallica_url if available (without /thumbnail suffix)
url = result.get('gallica_url', '')
thumbnail = ''
if url:
# Remove /thumbnail suffix if it exists
url = url.replace('/thumbnail', '')
ark_id = url.split('ark:')[1] if 'ark:' in url else ''
if ark_id:
thumbnail = f"https://gallica.bnf.fr/ark:{ark_id}/thumbnail"
graphic = {
'id': i,
'title': result.get('title', 'Untitled Map'),
'description': f"Map related to {topic}: {result.get('title', 'Untitled Map')}",
'type': 'map',
'url': url,
'thumbnail': thumbnail
}
graphics.append(graphic)
# If we still have no graphics, create some placeholder graphics with generic URLs
if not graphics:
# Create some placeholder graphics
graphics = [
{
'id': 1,
'title': f"Illustration related to {topic}",
'description': f"Illustration related to {topic}",
'type': 'image',
'url': 'https://gallica.bnf.fr/',
'thumbnail': 'https://gallica.bnf.fr/themes/gallica2015/images/logo-gallica.png'
},
{
'id': 2,
'title': f"Map related to {topic}",
'description': f"Map related to {topic}",
'type': 'map',
'url': 'https://gallica.bnf.fr/',
'thumbnail': 'https://gallica.bnf.fr/themes/gallica2015/images/logo-gallica.png'
}
]
return graphics[:count]
except Exception as e:
print(f"Error searching for graphics: {e}")
return []
def process_section(self, input_data: Any) -> Dict[str, Any]:
"""
Process a report section following a sequential approach.
Args:
input_data: The input data for the section
Returns:
Response data as a dictionary
"""
try:
# Validate the input data
data = self.validate_section_data(input_data)
# Initialize with topic
if 'topic' in data:
self.topic = data['topic']
self.page_count = data.get('page_count', DEFAULT_PAGE_COUNT)
self.source_count = data.get('source_count', DEFAULT_SOURCE_COUNT)
self.include_graphics = data.get('include_graphics', False)
self.sources = []
self.graphics = []
self.report_sections = []
self._current_step = 0
# Create a plan for the report
self.plan = self.create_plan(self.topic, self.page_count)
return {
'content': [{
'text': json.dumps({
'topic': self.topic,
'pageCount': self.page_count,
'sourceCount': self.source_count,
'includeGraphics': self.include_graphics,
'plan': self.plan,
'nextStep': 'Search for sources using natural_language_search or search_by_subject'
})
}]
}
# Search for sources
if data.get('search_sources', False):
if not self.topic:
return {'content': [{'text': 'Error: No topic specified. Please initialize with a topic first.'}]}
self.sources = self.search_sources(self.topic, self.source_count)
# If graphics are requested, search for them
if self.include_graphics:
self.graphics = self.search_graphics(self.topic, count=5)
self._current_step = 1
return {
'content': [{
'text': json.dumps({
'sources': self.sources,
'graphics': self.graphics if self.include_graphics else [],
'nextStep': 'Create bibliography section'
})
}]
}
# Process section data for bibliography or content sections
validated_input = self.validate_section_data(input_data)
# Adjust total sections if needed
if validated_input['section_number'] > validated_input['total_sections']:
validated_input['total_sections'] = validated_input['section_number']
# Add section to report
self.report_sections.append(validated_input)
# Format and display section
formatted_section = self.format_section(validated_input)
print(formatted_section, file=sys.stderr)
# Update current step in plan
if self.plan:
self.plan["current_section"] = validated_input['section_number']
if validated_input['section_number'] < len(self.plan["sections"]):
next_section_title = self.plan["sections"][validated_input['section_number']]["title"]
next_step = f"Create section {validated_input['section_number'] + 1}: {next_section_title}"
else:
next_step = "Report complete"
else:
next_step = "Continue writing the report"
if not validated_input['next_section_needed']:
next_step = "Report complete"
# Calculate progress
progress = (len(self.report_sections) / validated_input['total_sections']) * 100
return {
'content': [{
'text': json.dumps({
'sectionNumber': validated_input['section_number'],
'totalSections': validated_input['total_sections'],
'nextSectionNeeded': validated_input['next_section_needed'],
'progress': f"{progress:.1f}%",
'reportSectionsCount': len(self.report_sections),
'nextStep': next_step,
'sources': self.sources if validated_input['is_bibliography'] else None
})
}]
}
except Exception as error:
logger.error(f"Error processing report section: {error}")
return {
'content': [{
'text': json.dumps({
'error': str(error),
'status': 'failed'
})
}],
'isError': True
}
def format_section(self, section: Dict[str, Any]) -> str:
"""
Format a report section for display.
Args:
section: The report section to format
Returns:
Formatted section as a string
"""
# Get section information
section_number = section.get('section_number', 0)
total_sections = section.get('total_sections', 0)
title = section.get('title', 'Untitled')
is_bibliography = section.get('is_bibliography', False)
# Create a box for the section
width = 80
icon = "\033[93m📚\033[0m" if is_bibliography else "\033[94m📄\033[0m" # Yellow for bibliography, blue for content
header = f" {icon} Section{section_number}/{total_sections}: {title} "
box = "┌" + "─" * (width - 2) + "┐\n"
box += "│" + header + " " * (width - len(header) - 2) + "│\n"
box += "├" + "─" * (width - 2) + "┤\n"
# Add content
content = section.get('content', '')
if content:
# Wrap content to fit in the box
wrapped_content = textwrap.wrap(content, width=width-4)
for line in wrapped_content:
box += "│ " + line + " " * (width - len(line) - 4) + " │\n"
# Add graphics if available and this is not a bibliography
if not is_bibliography and self.include_graphics and self.graphics:
# Find graphics relevant to this section
section_graphics = []
for graphic in self.graphics:
# Simple relevance check - could be improved
if any(term in graphic['title'].lower() for term in title.lower().split()):
section_graphics.append(graphic)
# Add up to 2 graphics for this section
if section_graphics:
box += "│ " + " " * (width - 4) + " │\n"
box += "│ " + "Graphics:" + " " * (width - 13) + " │\n"
for graphic in section_graphics[:2]:
desc = f"- {graphic['description']}"
wrapped_desc = textwrap.wrap(desc, width=width-4)
for line in wrapped_desc:
box += "│ " + line + " " * (width - len(line) - 4) + " │\n"
box += "│ " + f" URL: {graphic['url']}" + " " * (width - len(f" URL: {graphic['url']}") - 4) + " │\n"
box += "└" + "─" * (width - 2) + "┘"
return box
def create_plan(self, topic: str, page_count: int = DEFAULT_PAGE_COUNT) -> Dict[str, Any]:
"""
Create a sequential plan for the report based on the topic.
Args:
topic: The research topic
page_count: Number of pages to generate
Returns:
A plan dictionary with sections and steps
"""
# Calculate number of sections based on page count (1 page ≈ 2 sections + bibliography)
total_sections = min(page_count * 2 + 1, 20) # Cap at 20 sections
# Create standard sections
sections = [{"title": "Bibliography", "is_bibliography": True}]
# Add introduction
sections.append({"title": "Introduction", "is_bibliography": False})
# Add content sections based on page count
if page_count >= 2:
sections.append({"title": "Historical Context", "is_bibliography": False})
if page_count >= 3:
sections.append({"title": "Main Analysis", "is_bibliography": False})
sections.append({"title": "Key Findings", "is_bibliography": False})
if page_count >= 4:
sections.append({"title": "Detailed Examination", "is_bibliography": False})
sections.append({"title": "Critical Perspectives", "is_bibliography": False})
# Add more sections for longer reports
remaining_sections = total_sections - len(sections)
for i in range(remaining_sections):
sections.append({"title": f"Additional Analysis {i+1}", "is_bibliography": False})
# Always end with conclusion
sections.append({"title": "Conclusion", "is_bibliography": False})
return {
"topic": topic,
"total_sections": len(sections),
"sections": sections,
"current_section": 0,
"steps": [
"Initialize with topic",
"Search for sources",
"Create bibliography",
"Write introduction",
"Develop content sections",
"Write conclusion"
],
"current_step": 0,
"next_step": "Search for sources"
}
def _format_citation(self, record: Dict[str, Any]) -> str:
"""
Format a record as a citation.
Args:
record: The record to format
Returns:
Formatted citation as a string
"""
creator = record.get('creator', 'Unknown Author')
title = record.get('title', 'Unknown Title')
publisher = record.get('publisher', 'Unknown Publisher')
date = record.get('date', 'n.d.')
url = record.get('gallica_url', record.get('identifier', 'No URL available'))
# Format based on type - ensure doc_type is a string before calling lower()
doc_type = record.get('type', '')
if isinstance(doc_type, list):
# If type is a list, join it into a string
doc_type = ' '.join(str(t) for t in doc_type)
doc_type = doc_type.lower()
if 'monographie' in doc_type or 'book' in doc_type:
return f"{creator}. ({date}). {title}. {publisher}. Retrieved from {url}"
elif 'periodique' in doc_type or 'article' in doc_type:
return f"{creator}. ({date}). {title}. Retrieved from {url}"
else:
return f"{creator}. ({date}). {title}. {publisher}. Retrieved from {url}"
# Tool definition
BNF_SEQUENTIAL_REPORTING_TOOL = {
"name": "bnf_sequential_reporting",
"description": """A tool for generating comprehensive research reports using the Gallica BnF digital library.
This tool helps create well-structured, properly cited reports on any topic by breaking the process into sequential steps.
When to use this tool:
- Creating research reports on historical, literary, or cultural topics
- Generating academic papers with proper citations
- Compiling information from multiple Gallica sources into a cohesive document
- Producing educational materials based on primary and secondary sources
Key features:
- Automatically searches for relevant sources in the Gallica digital library
- Creates properly formatted citations in a bibliography
- Generates reports with a specified number of pages (default: 4)
- Supports sequential writing of report sections
- Includes in-text citations in the format [1], [2], etc.
- Maintains context across multiple sections
How it works:
1. First, provide a topic and optional configuration parameters
2. The tool searches for relevant sources in the Gallica digital library
3. Start by creating the bibliography as the first section
4. Then write each section of the report sequentially
5. Include in-text citations to reference sources from the bibliography
6. Continue until the report is complete
Parameters explained:
- topic: The research topic for the report (only needed for initialization)
- pageCount: Number of pages to generate (default: 4)
- sourceCount: Number of sources to find (default: 10)
- sectionNumber: Current section number in sequence
- totalSections: Total number of sections in the report
- title: Title of the current section
- content: The content of the current section
- isBibliography: Whether this section is the bibliography
- sourcesUsed: List of source IDs used in this section
- nextSectionNeeded: Whether another section is needed
- includeGraphics: Whether to include graphics in the report (default: False)
You should:
1. Start by providing a topic to initialize the research
2. Create the bibliography first as section 1
3. Write each section sequentially, including in-text citations [1], [2], etc.
4. Ensure each section builds on previous ones to create a cohesive report
5. Include a conclusion in the final section
6. Set nextSectionNeeded to false when the report is complete""",
"inputSchema": {
"type": "object",
"properties": {
"topic": {
"type": "string",
"description": "Research topic for the report (only needed for initialization)"
},
"pageCount": {
"type": "integer",
"description": "Number of pages to generate",
"minimum": 1,
"default": 4
},
"sourceCount": {
"type": "integer",
"description": "Number of sources to find",
"minimum": 1,
"default": 10
},
"sectionNumber": {
"type": "integer",
"description": "Current section number",
"minimum": 1
},
"totalSections": {
"type": "integer",
"description": "Total sections in the report",
"minimum": 1
},
"title": {
"type": "string",
"description": "Title of the current section"
},
"content": {
"type": "string",
"description": "Content of the current section"
},
"isBibliography": {
"type": "boolean",
"description": "Whether this section is the bibliography"
},
"sourcesUsed": {
"type": "array",
"items": {
"type": "integer"
},
"description": "List of source IDs used in this section"
},
"nextSectionNeeded": {
"type": "boolean",
"description": "Whether another section is needed"
},
"includeGraphics": {
"type": "boolean",
"description": "Whether to include graphics in the report",
"default": False
}
},
"required": ["sectionNumber", "totalSections", "title", "content", "nextSectionNeeded"]
}
}