get_tables
Extract table structures from a specified section of the PDF specification, returning tables with headers, rows, and optional captions.
Instructions
Extract table structures from a specified section of the PDF specification (ISO 32000-2). Returns tables with headers, rows, and optional captions.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| spec | No | Specification ID (e.g., "iso32000-2", "ts32002", "pdfua2"). Use list_specs to see available specs. Default: "iso32000-2" (PDF 2.0). | |
| section | Yes | Section identifier (e.g., "7.3.4", "12.8", "Annex A") | |
| table_index | No | Optional 0-based index to retrieve a specific table. If omitted, returns all tables in the section. |
Implementation Reference
- src/tools/handlers.ts:150-156 (handler)The tool handler function for 'get_tables'. Receives GetTablesArgs, validates inputs (spec, section, table_index), then delegates to the getTables service function.
async function handleGetTables(args: GetTablesArgs) { const specId = validateSpecId(args.spec); validateSectionId(args.section); const tableIndex = validateTableIndex(args.table_index); await ensureRegistryInitialized(); return getTables(args.section, tableIndex, specId); } - src/services/pdf-service.ts:359-395 (handler)The main implementation in PDFSpecService.getTables(). First tries StructTree-based extraction (collectStructTreeTables), falls back to text-based detection (detectTablesFromText). Supports optional table_index to return a single table.
public async getTables( sectionId: string, tableIndex?: number, specId?: string ): Promise<TablesResult> { const id = this.registry.resolveSpecId(specId); const result = await this.getSectionContent(sectionId, id); // Collect tables from StructTree (type: 'table') let tables: TableInfo[] = collectStructTreeTables(result.content); // Fallback: text-based table detection if StructTree has no tables if (tables.length === 0) { tables = detectTablesFromText(result.content); } if (tableIndex !== undefined) { if (tableIndex >= tables.length) { throw new ContentError( `table_index ${tableIndex} out of range. Section "${sectionId}" has ${tables.length} table(s).` ); } return { section: result.sectionNumber, sectionTitle: result.title, totalTables: 1, tables: [tables[tableIndex]], }; } return { section: result.sectionNumber, sectionTitle: result.title, totalTables: tables.length, tables, }; } - src/types/index.ts:269-273 (schema)Input type definition for get_tables arguments: spec (optional), section (required), table_index (optional).
export interface GetTablesArgs { spec?: string; section: string; table_index?: number; } - src/types/index.ts:262-267 (schema)Return type definition for get_tables: section number, section title, totalTables count, and array of TableInfo objects.
export interface TablesResult { section: string; sectionTitle: string; totalTables: number; tables: TableInfo[]; } - src/tools/definitions.ts:146-168 (registration)MCP tool definition (name, description, inputSchema) for 'get_tables'. Registers it with the SDK tool list.
{ name: 'get_tables', description: 'Extract table structures from a specified section of the PDF specification (ISO 32000-2). ' + 'Returns tables with headers, rows, and optional captions.', inputSchema: { type: 'object', properties: { spec: SPEC_PARAM, section: { type: 'string', description: 'Section identifier (e.g., "7.3.4", "12.8", "Annex A")', }, table_index: { type: 'number', description: 'Optional 0-based index to retrieve a specific table. ' + 'If omitted, returns all tables in the section.', }, }, required: ['section'], }, }, - src/tools/handlers.ts:194-203 (registration)Tool handler registry mapping 'get_tables' string to handleGetTables function.
export const toolHandlers = { list_specs: handleListSpecs, get_structure: handleGetStructure, get_section: handleGetSection, search_spec: handleSearchSpec, get_requirements: handleGetRequirements, get_definitions: handleGetDefinitions, get_tables: handleGetTables, compare_versions: handleCompareVersions, } as const; - src/services/pdf-service.ts:405-441 (helper)Helper: collectStructTreeTables - Extracts tables from StructTree-based content elements (type: 'table'), merges continuation tables with same headers, and attaches preceding captions.
function collectStructTreeTables(content: ContentElement[]): TableInfo[] { const tables: TableInfo[] = []; for (let i = 0; i < content.length; i++) { const element = content[i]; if (element.type !== 'table') continue; // Check for caption in preceding paragraph let caption: string | null = null; if (i > 0) { const prev = content[i - 1]; if (prev.type === 'paragraph' && /^Table\s+\d+/.test(prev.text)) { caption = prev.text; } } // Merge with previous table if this is a continuation (same headers, no caption) if ( !caption && tables.length > 0 && element.headers.length > 0 && arraysEqual(tables[tables.length - 1].headers, element.headers) ) { tables[tables.length - 1].rows.push(...element.rows); continue; } tables.push({ index: tables.length, caption, headers: element.headers, rows: element.rows, }); } return tables; } - src/services/pdf-service.ts:457-516 (helper)Helper: detectTablesFromText - Fallback text-based table detection from paragraph patterns (Table N — Title caption format with tab/space-delimited rows).
function detectTablesFromText(content: ContentElement[]): TableInfo[] { const tables: TableInfo[] = []; const TABLE_CAPTION_RE = /^(Table\s+\d+)\s*[—–-]\s*(.+)/; for (let i = 0; i < content.length; i++) { const el = content[i]; if (el.type !== 'paragraph') continue; const captionMatch = el.text.match(TABLE_CAPTION_RE); if (!captionMatch) continue; const caption = el.text; const rows: string[][] = []; let headers: string[] = []; let j = i + 1; while (j < content.length) { const next = content[j]; if (next.type !== 'paragraph') break; if (TABLE_CAPTION_RE.test(next.text)) break; if (next.text.length > 300 && !next.text.includes('\t')) break; let cells: string[]; if (next.text.includes('\t')) { cells = next.text .split('\t') .map((c) => c.trim()) .filter(Boolean); } else { cells = next.text .split(/\s{2,}/) .map((c) => c.trim()) .filter(Boolean); } if (cells.length >= 2) { if (headers.length === 0) { headers = cells; } else { rows.push(cells); } } else { break; } j++; } if (headers.length > 0 || rows.length > 0) { tables.push({ index: tables.length, caption, headers, rows, }); } } return tables; } - src/services/pdf-service.ts:582-588 (helper)Backward-compatible exported function getTables that delegates to defaultPdfService.getTables().
export async function getTables( sectionId: string, tableIndex?: number, specId?: string ): Promise<TablesResult> { return defaultPdfService.getTables(sectionId, tableIndex, specId); }