Skip to main content
Glama
table.py2.58 kB
import pdfplumber from typing import List class TableExtractor: def __init__(self): pass def extract_tables(self, source_path: str, page_range: range = None) -> List[str]: """ Extract tables from PDF and convert to Markdown format. Args: source_path: Path to the local PDF file. page_range: Range of pages to process. Returns: List of Markdown formatted tables. """ tables_md = [] try: with pdfplumber.open(source_path) as pdf: if page_range is None: page_range = range(len(pdf.pages)) for page_num in page_range: if page_num < 0 or page_num >= len(pdf.pages): continue page = pdf.pages[page_num] extracted_tables = page.extract_tables() for table_data in extracted_tables: md_table = self._convert_to_markdown(table_data) if md_table: tables_md.append( f"**Table (Page {page_num + 1})**\n{md_table}" ) except Exception as e: # Fallback or silent error if pdfplumber fails, as table extraction is optional enhancement print(f"Warning: Table extraction failed: {e}") return tables_md def _convert_to_markdown(self, data: List[List[str]]) -> str: """ Convert a list of lists (table data) to a Markdown table string. """ if not data: return "" # Filter out None values and replace newlines cleaned_data = [ [ str(cell).replace("\n", " ").strip() if cell is not None else "" for cell in row ] for row in data ] if not cleaned_data: return "" # Create header header = cleaned_data[0] header_row = "| " + " | ".join(header) + " |" separator_row = "| " + " | ".join(["---"] * len(header)) + " |" # Create body body_rows = [] for row in cleaned_data[1:]: # Ensure row length matches header if len(row) < len(header): row += [""] * (len(header) - len(row)) elif len(row) > len(header): row = row[: len(header)] body_rows.append("| " + " | ".join(row) + " |") return "\n".join([header_row, separator_row] + body_rows)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/rexfelix/readPDF_mcp_server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server