Zotero Chunk RAG

test_scoring.py•10.2 KiB

"""Tests for the quality scoring framework in zotero_chunk_rag.feature_extraction.scoring.""" from __future__ import annotations from pathlib import Path from unittest.mock import MagicMock from zotero_chunk_rag.feature_extraction.models import ( CellGrid, TableContext, ) from zotero_chunk_rag.feature_extraction.scoring import ( decimal_displacement_count, fill_rate, garbled_text_score, numeric_coherence, rank_and_select, ) # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_ctx() -> TableContext: """Create a minimal TableContext with a mock page.""" page = MagicMock() def get_text_side_effect(fmt: str, **kwargs): # noqa: ANN001 if fmt == "words": return [] if fmt == "dict": return {"blocks": []} return "" page.get_text = MagicMock(side_effect=get_text_side_effect) page.get_drawings = MagicMock(return_value=[]) rect = MagicMock() rect.height = 842.0 rect.width = 595.0 page.rect = rect return TableContext( page=page, page_num=0, bbox=(0.0, 0.0, 595.0, 842.0), pdf_path=Path("/tmp/test.pdf"), ) def _make_grid( headers: tuple[str, ...], rows: tuple[tuple[str, ...], ...], method: str = "test", ) -> CellGrid: """Create a CellGrid with the given content.""" return CellGrid( headers=headers, rows=rows, col_boundaries=(0.0, 100.0, 200.0), row_boundaries=(0.0, 20.0, 40.0), method=method, ) # --------------------------------------------------------------------------- # TestFillRate # --------------------------------------------------------------------------- class TestFillRate: def test_full_grid(self) -> None: """All cells non-empty gives fill_rate = 1.0.""" grid = _make_grid( headers=("A", "B", "C"), rows=(("1", "2", "3"), ("4", "5", "6")), ) assert fill_rate(grid) == 1.0 def test_half_empty(self) -> None: """Half cells empty gives fill_rate ~= 0.5.""" grid = _make_grid( headers=("A", "B"), rows=(("1", ""), ("", "4")), ) # 4 non-empty (A, B, 1, 4) out of 6 total # Headers: A, B (2 non-empty) # Rows: 1, "", "", 4 (2 non-empty) # Total: 4 non-empty / 6 total result = fill_rate(grid) assert abs(result - 4.0 / 6.0) < 1e-9 def test_whitespace_is_empty(self) -> None: """Cells with only spaces/tabs count as empty.""" grid = _make_grid( headers=("A", " ", "\t"), rows=(("value", " ", "\t\t"),), ) # Non-empty: A, value = 2 out of 6 result = fill_rate(grid) assert abs(result - 2.0 / 6.0) < 1e-9 # --------------------------------------------------------------------------- # TestDecimalDisplacement # --------------------------------------------------------------------------- class TestDecimalDisplacement: def test_no_displacement(self) -> None: """Cells like '0.047', '1.23' have no displacement.""" grid = _make_grid( headers=("Col",), rows=(("0.047",), ("1.23",), ("42",)), ) assert decimal_displacement_count(grid) == 0 def test_with_displacement(self) -> None: """Cells like '.047', '.23' are displaced.""" grid = _make_grid( headers=("Col",), rows=((".047",), (".23",), ("0.5",)), ) assert decimal_displacement_count(grid) == 2 # --------------------------------------------------------------------------- # TestGarbledText # --------------------------------------------------------------------------- class TestGarbledText: def test_normal_text(self) -> None: """Typical academic text has garbled score near 0.0.""" grid = _make_grid( headers=("Variable", "Coefficient", "P-value"), rows=( ("Age", "0.045", "0.023"), ("Gender", "-0.12", "0.001"), ), ) assert garbled_text_score(grid) == 0.0 def test_garbled_cells(self) -> None: """Cells with 30+ char 'words' are garbled.""" long_word = "a" * 35 grid = _make_grid( headers=("Normal",), rows=((long_word,), ("fine text",)), ) score = garbled_text_score(grid) assert score > 0 def test_greek_excluded(self) -> None: """Cells with Greek characters are not flagged as garbled even with long tokens.""" # A long token that contains Greek letters greek_text = "\u03b1" * 30 # 30 alpha characters grid = _make_grid( headers=("Formula",), rows=((greek_text,), ("normal",)), ) score = garbled_text_score(grid) assert score == 0.0 # --------------------------------------------------------------------------- # TestNumericCoherence # --------------------------------------------------------------------------- class TestNumericCoherence: def test_coherent_columns(self) -> None: """Column of all numbers and column of all text gives coherence = 1.0.""" grid = _make_grid( headers=("Name", "Value"), rows=( ("Alice", "1.5"), ("Bob", "2.3"), ("Carol", "3.1"), ), ) result = numeric_coherence(grid) assert result == 1.0 def test_mixed_column(self) -> None: """Column mixing numbers and text gives coherence < 1.0.""" grid = _make_grid( headers=("Mixed",), rows=( ("1.5",), ("text",), ("2.3",), ("more text",), ("3.1",), ), ) # Column is >50% numeric (3/5 = 60%) but not all numeric -> not coherent result = numeric_coherence(grid) assert result < 1.0 # --------------------------------------------------------------------------- # TestRankAndSelect # --------------------------------------------------------------------------- class TestRankAndSelect: def test_better_grid_wins(self) -> None: """Grid A (fill=0.9, no displacement) beats Grid B (fill=0.5, displacement=3).""" ctx = _make_ctx() grid_a = _make_grid( headers=("A", "B", "C"), rows=(("1", "2", "3"), ("4", "5", "6"), ("7", "8", "9")), method="strategy_a", ) grid_b = _make_grid( headers=("A", "", ""), rows=((".047", "", ""), (".23", "x", ""), (".5", "", "")), method="strategy_b", ) winner, scores = rank_and_select([grid_a, grid_b], ctx) assert winner is grid_a assert scores["consensus:strategy_a"] < scores["consensus:strategy_b"] def test_single_grid_returned(self) -> None: """One grid is returned as winner with score 0.""" ctx = _make_ctx() grid = _make_grid( headers=("X",), rows=(("1",),), method="only_method", ) winner, scores = rank_and_select([grid], ctx) assert winner is grid assert len(scores) == 1 assert scores["consensus:only_method"] == 0.0 def test_empty_list_returns_none(self) -> None: """Empty list returns (None, {}).""" ctx = _make_ctx() winner, scores = rank_and_select([], ctx) assert winner is None assert scores == {} def test_scores_dict_populated(self) -> None: """scores_dict has one entry per grid, keyed by method, values are rank sums.""" ctx = _make_ctx() grid_a = _make_grid( headers=("A", "B"), rows=(("1", "2"), ("3", "4")), method="m1", ) grid_b = _make_grid( headers=("A", "B"), rows=(("1", "2"), ("3", "4")), method="m2", ) _winner, scores = rank_and_select([grid_a, grid_b], ctx) assert "consensus:m1" in scores assert "consensus:m2" in scores assert len(scores) == 2 # Values should be floats (rank sums) for v in scores.values(): assert isinstance(v, (int, float)) def test_ground_truth_mode(self) -> None: """Ground truth function makes grid A win even if other metrics are slightly worse.""" ctx = _make_ctx() # Grid A: slightly worse fill but high ground truth grid_a = _make_grid( headers=("A", ""), rows=(("1", ""), ("3", "")), method="gt_winner", ) # Grid B: better fill, no displacement, but low ground truth grid_b = _make_grid( headers=("A", "B"), rows=(("1", "2"), ("3", "4")), method="gt_loser", ) def ground_truth_fn(headers, rows): # noqa: ANN001 # grid_a has method "gt_winner" -- but we check by content # The function receives headers and rows, not the grid object. # We'll use the fill level to distinguish. all_cells = list(headers) for r in rows: all_cells.extend(r) non_empty = sum(1 for c in all_cells if c.strip()) if non_empty < len(all_cells): return 0.95 # grid_a (has empty cells) return 0.50 # grid_b (all full) winner, scores = rank_and_select([grid_a, grid_b], ctx, ground_truth_fn=ground_truth_fn) assert winner is grid_a def test_tie_breaking(self) -> None: """Two identical grids: either can be selected, no crash.""" ctx = _make_ctx() grid_a = _make_grid( headers=("A", "B"), rows=(("1", "2"), ("3", "4")), method="tied_a", ) grid_b = _make_grid( headers=("A", "B"), rows=(("1", "2"), ("3", "4")), method="tied_b", ) winner, scores = rank_and_select([grid_a, grid_b], ctx) assert winner is not None assert winner.method in ("tied_a", "tied_b") # Scores should be equal for tied grids assert scores["consensus:tied_a"] == scores["consensus:tied_b"]

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ccam80/zotero-chunk-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_scoring.py•10.2 KiB