Zotero Chunk RAG

Overview Schema Related Servers Score Discussions

zotero-chunk-mcp
tests
test_feature_extraction

test_llm_structure.py•16 KiB

"""Tests for LLM-based table transcription evaluation workflow. Covers: - TestMarkdownParsing: pipe table parsing, footnotes, escaped pipes, empty cells, code fences, malformed input - TestPromptGeneration: manifest has rawtext_path, rawtext.txt created - TestEvaluation: evaluate module importable, synthetic response scored correctly """ from __future__ import annotations import json import tempfile from pathlib import Path from unittest.mock import patch import pytest from zotero_chunk_rag.feature_extraction.ground_truth import ( create_ground_truth_db, insert_ground_truth, ) TESTS_DIR = Path(__file__).resolve().parents[1] LLM_STRUCTURE_DIR = TESTS_DIR / "llm_structure" FIXTURES_DIR = TESTS_DIR / "fixtures" / "papers" # ============================================================================ # Markdown parsing # ============================================================================ class TestMarkdownParsing: """Tests for parse_markdown.py.""" def test_basic_pipe_table(self): """A standard 3-column pipe table is parsed correctly.""" from tests.llm_structure.parse_markdown import parse_markdown_table text = ( "| A | B | C |\n" "| --- | --- | --- |\n" "| 1 | 2 | 3 |\n" "| 4 | 5 | 6 |\n" ) headers, rows, footnotes = parse_markdown_table(text) assert headers == ["A", "B", "C"] assert len(rows) == 2 assert rows[0] == ["1", "2", "3"] assert rows[1] == ["4", "5", "6"] assert footnotes == "" def test_footnotes_after_table(self): """Text after the last pipe line is captured as footnotes.""" from tests.llm_structure.parse_markdown import parse_markdown_table text = ( "| X | Y |\n" "| --- | --- |\n" "| a | b |\n" "\n" "* p < 0.05\n" "† Adjusted for age\n" ) headers, rows, footnotes = parse_markdown_table(text) assert headers == ["X", "Y"] assert len(rows) == 1 assert "p < 0.05" in footnotes assert "Adjusted for age" in footnotes def test_escaped_pipes(self): """Escaped pipes (\\|) inside cells are preserved.""" from tests.llm_structure.parse_markdown import parse_markdown_table text = ( "| Col1 | Col2 |\n" "| --- | --- |\n" "| a \\| b | c |\n" ) headers, rows, footnotes = parse_markdown_table(text) assert headers == ["Col1", "Col2"] assert rows[0][0] == "a | b" assert rows[0][1] == "c" def test_empty_cells(self): """Empty cells are parsed as empty strings.""" from tests.llm_structure.parse_markdown import parse_markdown_table text = ( "| A | B | C |\n" "| --- | --- | --- |\n" "| 1 | | 3 |\n" "| | 5 | |\n" ) headers, rows, footnotes = parse_markdown_table(text) assert rows[0] == ["1", "", "3"] assert rows[1] == ["", "5", ""] def test_code_fences_stripped(self): """Code fences around the table are removed before parsing.""" from tests.llm_structure.parse_markdown import parse_markdown_table text = ( "```markdown\n" "| A | B |\n" "| --- | --- |\n" "| 1 | 2 |\n" "```\n" ) headers, rows, footnotes = parse_markdown_table(text) assert headers == ["A", "B"] assert len(rows) == 1 assert rows[0] == ["1", "2"] def test_short_rows_padded(self): """Rows with fewer cells than headers are padded.""" from tests.llm_structure.parse_markdown import parse_markdown_table text = ( "| A | B | C |\n" "| --- | --- | --- |\n" "| 1 | 2 |\n" ) headers, rows, footnotes = parse_markdown_table(text) assert len(rows[0]) == 3 assert rows[0] == ["1", "2", ""] def test_no_table_returns_empty(self): """Input with no pipe characters returns empty headers/rows.""" from tests.llm_structure.parse_markdown import parse_markdown_table text = "This is just plain text with no table." headers, rows, footnotes = parse_markdown_table(text) assert headers == [] assert rows == [] assert footnotes == text.strip() def test_load_response(self, tmp_path: Path): """load_response reads a file and parses the table.""" from tests.llm_structure.parse_markdown import load_response md = ( "| Name | Score |\n" "| --- | --- |\n" "| Alice | 92 |\n" "| Bob | 85 |\n" ) response_path = tmp_path / "response_test.md" response_path.write_text(md, encoding="utf-8") headers, rows, footnotes = load_response(response_path) assert headers == ["Name", "Score"] assert len(rows) == 2 def test_separator_without_colons(self): """Separator line with just dashes (no colons) is recognized.""" from tests.llm_structure.parse_markdown import parse_markdown_table text = ( "| A | B |\n" "|---|---|\n" "| 1 | 2 |\n" ) headers, rows, footnotes = parse_markdown_table(text) assert headers == ["A", "B"] assert len(rows) == 1 def test_separator_with_colons(self): """Separator line with alignment colons is recognized.""" from tests.llm_structure.parse_markdown import parse_markdown_table text = ( "| A | B | C |\n" "| :--- | :---: | ---: |\n" "| 1 | 2 | 3 |\n" ) headers, rows, footnotes = parse_markdown_table(text) assert headers == ["A", "B", "C"] assert len(rows) == 1 # ============================================================================ # Prompt generation # ============================================================================ class TestPromptGeneration: """Tests for generate_prompts.py.""" def test_script_importable(self): """generate_prompts module imports without error.""" from tests.llm_structure import generate_prompts assert hasattr(generate_prompts, "generate_all") assert hasattr(generate_prompts, "generate_for_table") assert hasattr(generate_prompts, "render_table_png") assert hasattr(generate_prompts, "extract_rawtext") def test_manifest_has_rawtext_path(self, tmp_path: Path): """Generation produces manifest entries with rawtext_path field.""" fixture_pdf = FIXTURES_DIR / "noname1.pdf" if not fixture_pdf.exists(): pytest.skip(f"Fixture PDF not found: {fixture_pdf}") db_path = tmp_path / "test_gt.db" create_ground_truth_db(db_path) insert_ground_truth( db_path=db_path, table_id="TEST_table_1", paper_key="TEST", page_num=1, caption="Table 1 Test table", headers=["A", "B"], rows=[["1", "2"], ["3", "4"]], ) pdf_paths = {"TEST": fixture_pdf} from tests.llm_structure.generate_prompts import generate_all with ( patch( "tests.llm_structure.generate_prompts.TABLES_DIR", tmp_path / "tables", ), patch( "tests.llm_structure.generate_prompts.MANIFEST_PATH", tmp_path / "manifest.json", ), patch( "tests.llm_structure.generate_prompts.LLM_STRUCTURE_DIR", tmp_path, ), ): entries = generate_all(pdf_paths, db_path=db_path) assert len(entries) >= 1 entry = entries[0] assert "rawtext_path" in entry assert entry["rawtext_path"].endswith("rawtext.txt") # Verify rawtext.txt was actually created rawtext_path = tmp_path / "tables" / "TEST_table_1" / "rawtext.txt" assert rawtext_path.exists() rawtext_content = rawtext_path.read_text(encoding="utf-8") assert len(rawtext_content) > 0 # Should have some text # Verify PNG still created png_path = tmp_path / "tables" / "TEST_table_1" / "table.png" assert png_path.exists() assert png_path.stat().st_size > 100 def test_no_prompt_md_generated(self, tmp_path: Path): """Generation no longer creates prompt.md files.""" fixture_pdf = FIXTURES_DIR / "noname1.pdf" if not fixture_pdf.exists(): pytest.skip(f"Fixture PDF not found: {fixture_pdf}") db_path = tmp_path / "test_gt.db" create_ground_truth_db(db_path) insert_ground_truth( db_path=db_path, table_id="TEST_table_1", paper_key="TEST", page_num=1, caption="Table 1 Test table", headers=["A", "B"], rows=[["1", "2"], ["3", "4"]], ) pdf_paths = {"TEST": fixture_pdf} from tests.llm_structure.generate_prompts import generate_all with ( patch( "tests.llm_structure.generate_prompts.TABLES_DIR", tmp_path / "tables", ), patch( "tests.llm_structure.generate_prompts.MANIFEST_PATH", tmp_path / "manifest.json", ), patch( "tests.llm_structure.generate_prompts.LLM_STRUCTURE_DIR", tmp_path, ), ): generate_all(pdf_paths, db_path=db_path) prompt_path = tmp_path / "tables" / "TEST_table_1" / "prompt.md" assert not prompt_path.exists() # ============================================================================ # Evaluation # ============================================================================ class TestEvaluation: """Tests for evaluate.py.""" def test_module_importable(self): """evaluate module imports without error.""" from tests.llm_structure import evaluate assert hasattr(evaluate, "evaluate_response") assert hasattr(evaluate, "evaluate_all") assert hasattr(evaluate, "find_responses") def test_synthetic_response_scored(self, tmp_path: Path): """A synthetic response that exactly matches GT scores 100%.""" db_path = tmp_path / "test_gt.db" create_ground_truth_db(db_path) insert_ground_truth( db_path=db_path, table_id="TEST_table_1", paper_key="TEST", page_num=1, caption="Table 1 Test table", headers=["A", "B", "C"], rows=[["1", "2", "3"], ["4", "5", "6"]], ) # Create a response that exactly matches response_text = ( "| A | B | C |\n" "| --- | --- | --- |\n" "| 1 | 2 | 3 |\n" "| 4 | 5 | 6 |\n" ) response_path = tmp_path / "response_test.md" response_path.write_text(response_text, encoding="utf-8") from tests.llm_structure.evaluate import evaluate_response result = evaluate_response( table_id="TEST_table_1", model="test", response_path=response_path, db_path=db_path, ) assert result is not None assert result.cell_accuracy_pct == 100.0 assert result.fuzzy_accuracy_pct == 100.0 assert result.num_cell_diffs == 0 assert result.num_header_diffs == 0 def test_mismatched_response_scored(self, tmp_path: Path): """A response with wrong values scores below 100%.""" db_path = tmp_path / "test_gt.db" create_ground_truth_db(db_path) insert_ground_truth( db_path=db_path, table_id="TEST_table_1", paper_key="TEST", page_num=1, caption="Table 1 Test table", headers=["A", "B"], rows=[["1", "2"], ["3", "4"]], ) # Create a response with one wrong value response_text = ( "| A | B |\n" "| --- | --- |\n" "| 1 | WRONG |\n" "| 3 | 4 |\n" ) response_path = tmp_path / "response_test.md" response_path.write_text(response_text, encoding="utf-8") from tests.llm_structure.evaluate import evaluate_response result = evaluate_response( table_id="TEST_table_1", model="test", response_path=response_path, db_path=db_path, ) assert result is not None assert result.cell_accuracy_pct < 100.0 assert result.num_cell_diffs >= 1 def test_find_responses(self, tmp_path: Path): """find_responses discovers response_*.md files.""" table_dir = tmp_path / "tables" / "TEST_table_1" table_dir.mkdir(parents=True) (table_dir / "response_sonnet.md").write_text("| A |\n| --- |\n| 1 |\n") (table_dir / "response_haiku.md").write_text("| A |\n| --- |\n| 1 |\n") (table_dir / "table.png").write_bytes(b"fake png") from tests.llm_structure.evaluate import find_responses results = find_responses(tmp_path / "tables") assert len(results) == 2 models = {r[1] for r in results} assert "sonnet" in models assert "haiku" in models # ============================================================================ # Comparison # ============================================================================ class TestComparison: """Tests for compare.py.""" def test_module_importable(self): """compare module imports without error.""" from tests.llm_structure import compare assert hasattr(compare, "generate_report") def test_report_generation(self, tmp_path: Path): """generate_report produces a markdown report.""" from tests.llm_structure.compare import generate_report from tests.llm_structure.evaluate import EvaluationResult llm_results = [ EvaluationResult( table_id="TEST_table_1", model="sonnet", fuzzy_accuracy_pct=85.0, fuzzy_precision_pct=90.0, fuzzy_recall_pct=80.0, cell_accuracy_pct=80.0, structural_coverage_pct=100.0, gt_shape=(3, 2), ext_shape=(3, 2), num_cell_diffs=1, num_header_diffs=0, num_extra_cols=0, num_missing_cols=0, num_extra_rows=0, num_missing_rows=0, footnote_match=None, ), ] pipeline_accs = {"TEST_table_1": 70.0} output_path = tmp_path / "report.md" report = generate_report(llm_results, pipeline_accs, output_path=output_path) assert output_path.exists() assert "LLM vs Pipeline" in report assert "TEST_table_1" in report assert "sonnet" in report # ============================================================================ # Template # ============================================================================ class TestPromptTemplate: """Tests for the prompt template file.""" def test_template_exists(self): """Template file exists and is non-empty.""" template_path = LLM_STRUCTURE_DIR / "prompt_template.md" assert template_path.exists(), f"Template not found at {template_path}" content = template_path.read_text(encoding="utf-8") assert len(content) > 100, "Template is too short" def test_template_describes_markdown_output(self): """Template instructs markdown pipe table output, not JSON coordinates.""" template_path = LLM_STRUCTURE_DIR / "prompt_template.md" content = template_path.read_text(encoding="utf-8") # Should describe markdown output assert "pipe" in content.lower() or "GFM" in content assert "Phase 1" in content assert "Phase 2" in content # Should NOT describe the old coordinate-based approach assert '"position"' not in content

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ccam80/zotero-chunk-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_llm_structure.py•16 KiB