Oxenstierna

search_client.py•6.93 KiB

""" Search API client for Riksarkivet. Provides a simplified interface to the Riksarkivet Search API (/api/records endpoint) with direct Pydantic response parsing. """ import logging from opentelemetry.trace import StatusCode from ra_mcp_common.telemetry import get_tracer from ra_mcp_common.utils.http_client import HTTPClient from ra_mcp_search.config import DEFAULT_LIMIT, REQUEST_TIMEOUT, SEARCH_API_BASE_URL from ra_mcp_search.models import RecordsResponse _tracer = get_tracer("ra_mcp.search.client") class SearchClient: """ Client for Riksarkivet Search API. Handles searching transcribed documents and returns structured responses that directly map to the API's JSON structure. """ def __init__(self, http_client: HTTPClient): self.http_client = http_client self.logger = logging.getLogger("ra_mcp.search.client") async def search( self, text: str | None = None, transcribed_text: str | None = None, only_digitised_materials: bool = True, limit: int = DEFAULT_LIMIT, offset: int = 0, max_snippets_per_record: int | None = None, sort: str = "relevance", year_min: int | None = None, year_max: int | None = None, name: str | None = None, place: str | None = None, ) -> RecordsResponse: """ Search for records using various search parameters. Parameter names match the Search API specification for clarity. You can search either transcribed materials or general text (metadata, names, places). Args: text: General free-text search across all fields transcribed_text: Search specifically in AI-transcribed text (requires only_digitised_materials=True) only_digitised_materials: Limit results to digitised materials (default: True, API default: False) limit: Maximum number of records to return offset: Pagination offset (API parameter name) max_snippets_per_record: Client-side snippet limiting per record (not sent to API) sort: Sort order — one of: relevance, timeAsc, timeDesc, alphaAsc, alphaDesc year_min: Filter results to this start year or later year_max: Filter results to this end year or earlier name: Search by person name (API field, can combine with text) place: Search by place name (API field, can combine with text) Returns: RecordsResponse with all API fields populated Raises: ValueError: If transcribed_text is used without only_digitised_materials=True """ # Validate parameters if transcribed_text and not only_digitised_materials: raise ValueError( "transcribed_text search requires only_digitised_materials=True. Use text parameter instead of transcribed_text to search all materials." ) if not text and not transcribed_text and not name and not place: raise ValueError("Must provide at least one search parameter: 'text', 'transcribed_text', 'name', or 'place'") search_term = transcribed_text or text search_type = "transcribed" if transcribed_text else "general" self.logger.info("Starting %s search: keyword='%s', limit=%d, offset=%d", search_type, search_term, limit, offset) with _tracer.start_as_current_span( "SearchClient.search", attributes={ "search.type": search_type, "search.keyword": search_term or "", "search.offset": offset, "search.limit": limit, }, ) as span: try: # Build search parameters params = { "limit": limit, "offset": offset, "sort": sort, } if only_digitised_materials: params["only_digitised_materials"] = "true" if transcribed_text: params["transcribed_text"] = transcribed_text elif text: params["text"] = text if name: params["name"] = name if place: params["place"] = place if year_min is not None: params["year_min"] = year_min if year_max is not None: params["year_max"] = year_max response_data = await self.http_client.get_json(SEARCH_API_BASE_URL, params=params, timeout=REQUEST_TIMEOUT) # Parse entire API response with Pydantic response = RecordsResponse(**response_data) # Apply client-side snippet limiting if requested if max_snippets_per_record: self._limit_snippets(response, max_snippets_per_record) self.logger.info( "✓ Search completed: %d snippets from %d records (%d total)", response.count_snippets(), len(response.items), response.total_hits ) span.set_attribute("search.total_hits", response.total_hits) span.set_attribute("search.result_count", len(response.items)) return response except Exception as error: span.set_status(StatusCode.ERROR, str(error)) span.record_exception(error) self.logger.error("✗ Search failed: %s: %s", type(error).__name__, error) raise async def search_transcribed_text( self, transcribed_text: str, limit: int = DEFAULT_LIMIT, offset: int = 0, max_snippets_per_record: int | None = None, ) -> RecordsResponse: """ Search for keyword in transcribed materials (convenience method). This is a convenience wrapper around the main search() method. Args: transcribed_text: Search term or Solr query (API parameter name) limit: Maximum number of records to return offset: Pagination offset (API parameter name) max_snippets_per_record: Client-side snippet limiting per record (not sent to API) Returns: RecordsResponse with all API fields populated """ return await self.search( transcribed_text=transcribed_text, only_digitised_materials=True, limit=limit, offset=offset, max_snippets_per_record=max_snippets_per_record, ) def _limit_snippets(self, response: RecordsResponse, max_snippets: int) -> None: """ Limit snippets per record (client-side truncation). Modifies the response in-place to keep only the first N snippets per record. """ for record in response.items: if record.transcribed_text and record.transcribed_text.snippets: record.transcribed_text.snippets = record.transcribed_text.snippets[:max_snippets]

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AI-Riksarkivet/oxenstierna'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

search_client.py•6.93 KiB