advanced_search
Search the Gallica digital library using custom CQL queries to find documents by creator, type, subject, language, or other metadata fields.
Instructions
Perform an advanced search using custom CQL query syntax. Examples: dc.creator all "Victor Hugo" and dc.type all "monographie", dc.subject all "Paris" and dc.type all "carte", dc.language all "eng".
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes | Custom CQL query string | |
| max_results | No | Maximum number of results to return (1-50) | |
| start_record | No | Starting record for pagination |
Implementation Reference
- src/tools/gallicaSearch.ts:232-264 (handler)Tool definition with input schema and handler function for advanced_search. Defines the tool name, description, input parameters (query, max_results, start_record), and handler that calls searchApi.advancedSearch().
export function createAdvancedSearchTool(searchApi: SearchAPI) { return { name: 'advanced_search', description: 'Perform an advanced search using custom CQL query syntax. Examples: dc.creator all "Victor Hugo" and dc.type all "monographie", dc.subject all "Paris" and dc.type all "carte", dc.language all "eng".', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Custom CQL query string', }, max_results: { type: 'number', description: 'Maximum number of results to return (1-50)', default: config.defaultMaxRecords, }, start_record: { type: 'number', description: 'Starting record for pagination', default: config.defaultStartRecord, }, }, required: ['query'], }, handler: async (args: unknown) => { const parsed = searchParamsSchema.extend({ query: z.string() }).parse(args); return await searchApi.advancedSearch( parsed.query, parsed.max_results ?? config.defaultMaxRecords, parsed.start_record ?? config.defaultStartRecord ); }, }; - src/gallica/search.ts:241-247 (handler)The SearchAPI.advancedSearch method implementation - a thin wrapper that delegates to the private search() method with custom CQL query support.
advancedSearch( query: string, maxResults: number = config.defaultMaxRecords, startRecord: number = config.defaultStartRecord ): Promise<SearchResult> { return this.search(query, startRecord, maxResults); } - src/gallica/search.ts:27-63 (handler)Core search method that executes the actual HTTP request to Gallica SRU API, handles error cases, and returns parsed search results.
private async search( query: string, startRecord: number = config.defaultStartRecord, maxRecords: number = config.defaultMaxRecords ): Promise<SearchResult> { logger.info(`[SEARCH] Executing search query: "${query}" (startRecord: ${startRecord}, maxRecords: ${maxRecords})`); const params = { version: '1.2', operation: 'searchRetrieve', query, startRecord: String(startRecord), maximumRecords: String(Math.min(maxRecords, 50)), // Cap at 50 like Python }; try { logger.debug(`[SEARCH] Calling Gallica SRU API with params:`, params); const xmlBody = await this.httpClient.getXml(this.sruUrl, params); logger.debug(`[SEARCH] Received XML response, length: ${xmlBody.length} bytes`); const result = this.parseSruResponse(xmlBody, query); logger.info(`[SEARCH] Search completed: ${result.records.length} records returned out of ${result.metadata.total_records} total`); return result; } catch (error) { logger.error(`[SEARCH] Error during Gallica API request: ${error instanceof Error ? error.message : String(error)}`); logger.error(`[SEARCH] Error stack:`, error instanceof Error ? error.stack : 'No stack trace'); return { metadata: { query, total_records: '0', records_returned: 0, date_retrieved: new Date().toISOString().replace('T', ' ').substring(0, 19), }, records: [], error: error instanceof Error ? error.message : String(error), parameters: params, }; } } - src/gallica/search.ts:68-172 (handler)XML parsing logic that parses SRU XML responses and extracts Dublin Core fields (title, creator, subject, etc.) and Gallica URLs from records.
private parseSruResponse(xmlBody: string, query: string): SearchResult { try { const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_', textNodeName: '#text', parseAttributeValue: true, }); const result = parser.parse(xmlBody); // Navigate through SRU response structure const sruResponse = result['srw:searchRetrieveResponse'] || result.searchRetrieveResponse; if (!sruResponse) { throw new Error('Invalid SRU response structure'); } const numberOfRecords = sruResponse['srw:numberOfRecords']?.['#text'] || sruResponse.numberOfRecords?.['#text'] || sruResponse['srw:numberOfRecords'] || sruResponse.numberOfRecords || '0'; const records = sruResponse['srw:records']?.['srw:record'] || sruResponse.records?.record || []; const recordsArray = Array.isArray(records) ? records : records ? [records] : []; const parsedRecords: Array<Record<string, string | string[] | undefined>> = []; for (const record of recordsArray) { const recordData = record['srw:recordData']?.['oai_dc:dc'] || record.recordData?.['oai_dc:dc'] || record['srw:recordData'] || record.recordData; if (!recordData) continue; const recordDict: Record<string, string | string[] | undefined> = {}; // Extract Dublin Core fields const dcFields = [ 'title', 'creator', 'contributor', 'publisher', 'date', 'description', 'type', 'format', 'identifier', 'source', 'language', 'relation', 'coverage', 'rights', 'subject', ]; for (const field of dcFields) { const elements = recordData[`dc:${field}`] || recordData[field]; if (elements) { const values = Array.isArray(elements) ? elements : [elements]; const textValues = values .map((v: unknown) => { if (typeof v === 'string') return v.trim(); if (v && typeof v === 'object' && '#text' in v) return String(v['#text']).trim(); return String(v).trim(); }) .filter((v: string) => v.length > 0); if (textValues.length > 0) { const value: string | string[] = textValues.length === 1 ? textValues[0]! : textValues; recordDict[field] = value; } } } // Extract Gallica URL from identifiers const identifiers = recordDict.identifier; if (identifiers) { const idArray = Array.isArray(identifiers) ? identifiers : [identifiers]; for (const identifier of idArray) { if (typeof identifier === 'string' && identifier.includes('gallica.bnf.fr/ark:')) { recordDict.gallica_url = identifier; break; } } } parsedRecords.push(recordDict); } return { metadata: { query, total_records: String(numberOfRecords), records_returned: parsedRecords.length, date_retrieved: new Date().toISOString().replace('T', ' ').substring(0, 19), }, records: parsedRecords, }; } catch (error) { logger.error(`Error parsing XML response: ${error instanceof Error ? error.message : String(error)}`); return { metadata: { query, total_records: '0', records_returned: 0, date_retrieved: new Date().toISOString().replace('T', ' ').substring(0, 19), }, records: [], error: `XML parsing error: ${error instanceof Error ? error.message : String(error)}`, }; } } - src/mcpServer.ts:81-106 (registration)Tool registration - creates the advancedSearch tool instance and adds it to the tools array for MCP server registration.
const advancedSearch = createAdvancedSearchTool(searchApi); const naturalLanguageSearch = createNaturalLanguageSearchTool(searchApi); // Register extended item tools (4 new tools) const getItemDetails = createGetItemDetailsTool(itemsClient); const getItemPages = createGetItemPagesTool(itemsClient); const getPageImage = createGetPageImageTool(iiifClient); const getPageText = createGetPageTextTool(textClient); // Register sequential reporting tool const sequentialReporting = createSequentialReportingTool(reportingServer); // Register all tools with error handling const tools = [ searchByTitle, searchByAuthor, searchBySubject, searchByDate, searchByDocumentType, advancedSearch, naturalLanguageSearch, getItemDetails, getItemPages, getPageImage, getPageText, sequentialReporting,