pdf4vllm

table_converter.py•2.79 KiB

""" Table conversion utility Convert pdfplumber tables to Markdown format Includes merged cell handling """ from typing import List def fill_merged_cells(table: List[List]) -> List[List]: """ Handle merged cells: fill empty cells (None) with value from above Args: table: pdfplumber extract_tables() result (list of lists) Returns: Table with merged cells filled """ if not table or len(table) == 0: return table # Process each column filled_table = [row[:] for row in table] # Copy for col_idx in range(len(filled_table[0])): last_value = None for row in filled_table: if col_idx >= len(row): continue cell = row[col_idx] # Copy value from above if cell is empty if cell is None or (isinstance(cell, str) and cell.strip() == ''): row[col_idx] = last_value or '' else: # Store value if present last_value = cell return filled_table def convert_table_to_markdown(table: List[List]) -> str: """ Convert pdfplumber table to Markdown table Args: table: pdfplumber extract_tables() result (list of lists) Returns: Markdown format string """ if not table or len(table) == 0: return "" # Handle merged cells filled_table = fill_merged_cells(table) # Convert None to empty string cleaned_table = [] for row in filled_table: cleaned_row = [str(cell).strip() if cell is not None else "" for cell in row] cleaned_table.append(cleaned_row) # Column count max_cols = max(len(row) for row in cleaned_table) # Normalize all rows to same column count normalized_table = [] for row in cleaned_table: while len(row) < max_cols: row.append("") normalized_table.append(row[:max_cols]) if len(normalized_table) == 0: return "" # Generate Markdown lines = [] # First row as header header = normalized_table[0] lines.append("| " + " | ".join(header) + " |") # Separator line lines.append("| " + " | ".join(["---"] * max_cols) + " |") # Data rows for row in normalized_table[1:]: lines.append("| " + " | ".join(row) + " |") return "\n".join(lines) def convert_tables_to_markdown(tables: List[List[List[str]]]) -> List[str]: """ Convert multiple tables to Markdown Args: tables: pdfplumber extract_tables() result (list of tables) Returns: List of Markdown strings """ markdown_tables = [] for i, table in enumerate(tables): md = convert_table_to_markdown(table) if md: # Add table number markdown_tables.append(f"**Table {i+1}**\n\n{md}") return markdown_tables

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PyJudge/pdf4vllm-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

table_converter.py•2.79 KiB