Zotero Chunk RAG

test_combination.py•28 KiB

"""Tests for the boundary combination engine in zotero_chunk_rag.feature_extraction.combination.""" from __future__ import annotations from pathlib import Path from unittest.mock import MagicMock import pytest from zotero_chunk_rag.feature_extraction.combination import ( _compute_spatial_precision, _expand_point_estimate, _merge_overlapping, _scale_point, combine_hypotheses, ) from zotero_chunk_rag.feature_extraction.models import ( AxisTrace, BoundaryHypothesis, BoundaryPoint, ClusterRecord, CombinationTrace, TableContext, ) # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_ctx( ruled_line_thickness: float | None = None, word_gap: float = 5.0, ) -> TableContext: """Create a TableContext with controlled properties via mock.""" page = MagicMock() def get_text_side_effect(fmt: str, **kwargs): # noqa: ANN001 if fmt == "words": return [] if fmt == "dict": return {"blocks": []} return "" page.get_text = MagicMock(side_effect=get_text_side_effect) page.get_drawings = MagicMock(return_value=[]) rect = MagicMock() rect.height = 842.0 rect.width = 595.0 page.rect = rect ctx = TableContext( page=page, page_num=0, bbox=(0.0, 0.0, 595.0, 842.0), pdf_path=Path("/tmp/test.pdf"), ) # Override cached_property values directly on the instance # (cached_property stores on instance __dict__, so we can set directly) ctx.__dict__["median_ruled_line_thickness"] = ruled_line_thickness ctx.__dict__["median_word_gap"] = word_gap ctx.__dict__["median_word_height"] = 10.0 return ctx def _bp(min_pos: float, max_pos: float, confidence: float = 0.8, provenance: str = "method_a") -> BoundaryPoint: """Shorthand BoundaryPoint constructor.""" return BoundaryPoint(min_pos=min_pos, max_pos=max_pos, confidence=confidence, provenance=provenance) # --------------------------------------------------------------------------- # TestExpandPointEstimates # --------------------------------------------------------------------------- class TestExpandPointEstimates: def test_narrow_expanded(self) -> None: """Point (145.0, 145.5) with spatial_precision=3.0 is expanded.""" point = _bp(145.0, 145.5) # spatial_precision=3.0 > span 0.5, so expansion triggers result = _expand_point_estimate(point, spatial_precision=3.0) # Midpoint = 145.25, expanded by spatial_precision 3.0 each side assert result.min_pos == 145.25 - 3.0 # 142.25 assert result.max_pos == 145.25 + 3.0 # 148.25 assert result.confidence == point.confidence assert result.provenance == point.provenance def test_wide_unchanged(self) -> None: """Point (140.0, 150.0) with spatial_precision=3.0 is unchanged (range >= precision).""" point = _bp(140.0, 150.0) result = _expand_point_estimate(point, spatial_precision=3.0) assert result.min_pos == 140.0 assert result.max_pos == 150.0 assert result is point # same object returned # --------------------------------------------------------------------------- # TestOverlapMerge # --------------------------------------------------------------------------- class TestOverlapMerge: def test_overlapping_merged(self) -> None: """Points (143, 146) and (145, 148) overlap and merge into one cluster.""" points = [_bp(143, 146), _bp(145, 148)] clusters = _merge_overlapping(points) assert len(clusters) == 1 assert len(clusters[0]) == 2 def test_nonoverlapping_separate(self) -> None: """Points (140, 143) and (146, 149) don't overlap -- two separate clusters.""" points = [_bp(140, 143), _bp(146, 149)] clusters = _merge_overlapping(points) assert len(clusters) == 2 assert len(clusters[0]) == 1 assert len(clusters[1]) == 1 def test_adjacent_not_merged(self) -> None: """Points (140, 145) and (145.1, 150) have a 0.1pt gap -- two clusters.""" points = [_bp(140, 145), _bp(145.1, 150)] clusters = _merge_overlapping(points) assert len(clusters) == 2 # --------------------------------------------------------------------------- # TestCombineHypotheses # --------------------------------------------------------------------------- class TestCombineHypotheses: def test_two_methods_agree(self) -> None: """Two hypotheses with overlapping column boundaries merge into one consensus boundary.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(145.0, 145.0, 0.9, "method_a"),), row_boundaries=(), method="method_a", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(146.0, 146.0, 0.8, "method_b"),), row_boundaries=(), method="method_b", metadata={}, ) result = combine_hypotheses([h1, h2], ctx) assert len(result.col_boundaries) == 1 # Mean confidence = (0.9 + 0.8) / 2 = 0.85 assert result.col_boundaries[0].confidence == pytest.approx((0.9 + 0.8) / 2) assert result.method == "consensus" def test_three_methods_one_disagrees(self) -> None: """3 hypotheses: 2 agree at ~145, 1 at ~200. Outlier rejected by method count.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(145.0, 145.0, 0.9, "method_a"),), row_boundaries=(), method="method_a", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(146.0, 146.0, 0.8, "method_b"),), row_boundaries=(), method="method_b", metadata={}, ) h3 = BoundaryHypothesis( col_boundaries=(_bp(200.0, 200.0, 0.7, "method_c"),), row_boundaries=(), method="method_c", metadata={}, ) result = combine_hypotheses([h1, h2, h3], ctx) # Two clusters: ~145 (method_count=2), ~200 (method_count=1) # Median method count = 1.5. Only count >= 1.5 passes. assert len(result.col_boundaries) == 1 def test_empty_hypotheses_list(self) -> None: """Empty input returns hypothesis with empty boundaries.""" ctx = _make_ctx() result = combine_hypotheses([], ctx) assert result.col_boundaries == () assert result.row_boundaries == () assert result.method == "consensus" def test_single_hypothesis_passthrough(self) -> None: """One hypothesis: consensus equals input (single cluster accepted unconditionally).""" ctx = _make_ctx(ruled_line_thickness=1.0) boundary = _bp(145.0, 145.0, 0.9, "method_a") h1 = BoundaryHypothesis( col_boundaries=(boundary,), row_boundaries=(_bp(50.0, 50.0, 0.85, "method_a"),), method="method_a", metadata={}, ) result = combine_hypotheses([h1], ctx) assert len(result.col_boundaries) == 1 assert len(result.row_boundaries) == 1 # Single cluster is accepted unconditionally assert result.col_boundaries[0].confidence == 0.9 assert result.row_boundaries[0].confidence == 0.85 def test_row_and_col_independent(self) -> None: """Column combination doesn't affect row combination and vice versa.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(100.0, 100.0, 0.9, "m1"),), row_boundaries=(_bp(200.0, 200.0, 0.9, "m1"),), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(300.0, 300.0, 0.9, "m2"),), row_boundaries=(_bp(400.0, 400.0, 0.9, "m2"),), method="m2", metadata={}, ) result = combine_hypotheses([h1, h2], ctx) # Each position has method_count=1, median=1, all pass. col_positions = sorted(b.min_pos for b in result.col_boundaries) row_positions = sorted(b.min_pos for b in result.row_boundaries) assert all(90 < p < 310 for p in col_positions) assert all(190 < p < 410 for p in row_positions) assert len(result.col_boundaries) == 2 assert len(result.row_boundaries) == 2 # --------------------------------------------------------------------------- # TestAcceptanceThreshold # --------------------------------------------------------------------------- class TestAcceptanceThreshold: def test_low_confidence_rejected(self) -> None: """Cluster with low method count is rejected by median method count threshold.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(145.0, 145.0, 0.9, "m1"),), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(146.0, 146.0, 0.9, "m2"),), row_boundaries=(), method="m2", metadata={}, ) h3 = BoundaryHypothesis( col_boundaries=( _bp(145.0, 145.0, 0.9, "m3"), _bp(500.0, 500.0, 0.01, "m3"), ), row_boundaries=(), method="m3", metadata={}, ) result = combine_hypotheses([h1, h2, h3], ctx) # Cluster at ~145: method_count=3 (m1, m2, m3), cluster at ~500: method_count=1 (m3) # Median method count = 2. Count 1 < 2 -> rejected. positions = [b.min_pos for b in result.col_boundaries] assert len(result.col_boundaries) == 1 assert abs(positions[0] - 145.0) < 5.0 def test_single_high_confidence_accepted(self) -> None: """One cluster from one method with confidence 0.95 is accepted (only cluster, unconditional).""" ctx = _make_ctx(ruled_line_thickness=1.0) h = BoundaryHypothesis( col_boundaries=(_bp(200.0, 200.0, 0.95, "solo_method"),), row_boundaries=(), method="solo_method", metadata={}, ) result = combine_hypotheses([h], ctx) assert len(result.col_boundaries) == 1 assert result.col_boundaries[0].confidence == 0.95 def test_equal_method_agreement_all_pass(self) -> None: """All clusters have method_count=2 (both methods contribute), median=2, all pass.""" ctx = _make_ctx(ruled_line_thickness=2.0) h1 = BoundaryHypothesis( col_boundaries=( _bp(100.0, 100.0, 0.5, "m1"), _bp(200.0, 200.0, 1.0, "m1"), _bp(300.0, 300.0, 5.0, "m1"), _bp(400.0, 400.0, 5.0, "m1"), _bp(500.0, 500.0, 5.0, "m1"), ), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=( _bp(100.5, 100.5, 0.5, "m2"), _bp(200.5, 200.5, 1.0, "m2"), _bp(300.5, 300.5, 5.0, "m2"), _bp(400.5, 400.5, 5.0, "m2"), _bp(500.5, 500.5, 5.0, "m2"), ), row_boundaries=(), method="m2", metadata={}, ) result = combine_hypotheses([h1, h2], ctx) # All 5 clusters have method_count=2, median=2, all pass. assert len(result.col_boundaries) == 5 def test_single_cluster_always_accepted(self) -> None: """When multiple hypotheses merge into a single cluster, it is accepted unconditionally.""" ctx = _make_ctx(ruled_line_thickness=2.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(100.0, 100.0, 0.1, "m1"),), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(100.5, 100.5, 0.1, "m2"),), row_boundaries=(), method="m2", metadata={}, ) result = combine_hypotheses([h1, h2], ctx) assert len(result.col_boundaries) == 1 # --------------------------------------------------------------------------- # TestMethodCountRejection # --------------------------------------------------------------------------- class TestMethodCountRejection: def test_lone_divider_rejected(self) -> None: """3 methods: m1+m2 agree at ~150, m1+m3 agree at ~300, m2 alone at ~500. ~500 rejected.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=( _bp(150.0, 150.0, 0.9, "m1"), _bp(300.0, 300.0, 0.9, "m1"), ), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=( _bp(150.5, 150.5, 0.9, "m2"), _bp(500.0, 500.0, 0.9, "m2"), ), row_boundaries=(), method="m2", metadata={}, ) h3 = BoundaryHypothesis( col_boundaries=(_bp(300.5, 300.5, 0.9, "m3"),), row_boundaries=(), method="m3", metadata={}, ) result = combine_hypotheses([h1, h2, h3], ctx) # Clusters: ~150 (m1,m2 -> count=2), ~300 (m1,m3 -> count=2), ~500 (m2 -> count=1) # Median method count = 2. Count 1 < 2 -> ~500 rejected. assert len(result.col_boundaries) == 2 positions = sorted(b.min_pos for b in result.col_boundaries) assert abs(positions[0] - 150.0) < 5.0 assert abs(positions[1] - 300.0) < 5.0 def test_single_method_all_pass(self) -> None: """1 method with 3 col boundaries: passthrough (single hypothesis), all accepted.""" ctx = _make_ctx(ruled_line_thickness=1.0) h = BoundaryHypothesis( col_boundaries=( _bp(100.0, 100.0, 0.9, "m1"), _bp(200.0, 200.0, 0.8, "m1"), _bp(300.0, 300.0, 0.7, "m1"), ), row_boundaries=(), method="m1", metadata={}, ) result = combine_hypotheses([h], ctx) assert len(result.col_boundaries) == 3 # --------------------------------------------------------------------------- # TestConfidenceMultipliers # --------------------------------------------------------------------------- class TestConfidenceMultipliers: def test_multipliers_scale_confidence(self) -> None: """Method multipliers scale confidence before combination. Mean of scaled confidences.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(145.0, 145.0, 0.9, "m1"),), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(146.0, 146.0, 0.9, "m2"),), row_boundaries=(), method="m2", metadata={}, ) result = combine_hypotheses( [h1, h2], ctx, confidence_multipliers={"m1": 2.0, "m2": 0.5}, ) assert len(result.col_boundaries) == 1 # Scaled confidences: 0.9*2.0=1.8, 0.9*0.5=0.45. Mean = (1.8 + 0.45) / 2 = 1.125 assert result.col_boundaries[0].confidence == pytest.approx(1.125) def test_no_multipliers_default(self) -> None: """Without confidence_multipliers parameter, behavior is unscaled.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(145.0, 145.0, 0.9, "m1"),), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(146.0, 146.0, 0.8, "m2"),), row_boundaries=(), method="m2", metadata={}, ) result = combine_hypotheses([h1, h2], ctx) # Without multipliers: mean confidence = (0.9 + 0.8) / 2 = 0.85 assert result.col_boundaries[0].confidence == pytest.approx(0.85) # --------------------------------------------------------------------------- # TestRuledLineOverride # --------------------------------------------------------------------------- class TestRuledLineOverride: def test_ruled_line_always_accepted(self) -> None: """Ruled line boundary accepted even with method_count below threshold.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(150.0, 150.0, 0.9, "m1"),), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(150.5, 150.5, 0.9, "m2"),), row_boundaries=(), method="m2", metadata={}, ) h3 = BoundaryHypothesis( col_boundaries=(_bp(300.0, 300.0, 0.9, "ruled_lines"),), row_boundaries=(), method="ruled_lines", metadata={}, ) result = combine_hypotheses([h1, h2, h3], ctx) # Clusters: ~150 (m1,m2 -> count=2), ~300 (ruled_lines -> count=1) # Median method count = 1.5. Count 1 < 1.5 for ~300, but ruled_lines override -> accepted. assert len(result.col_boundaries) == 2 positions = sorted(b.min_pos for b in result.col_boundaries) assert abs(positions[0] - 150.0) < 5.0 assert abs(positions[1] - 300.0) < 5.0 def test_non_ruled_line_still_rejected(self) -> None: """Non-ruled-line boundary with low method count is still rejected.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(150.0, 150.0, 0.9, "m1"),), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(150.5, 150.5, 0.9, "m2"),), row_boundaries=(), method="m2", metadata={}, ) h3 = BoundaryHypothesis( col_boundaries=(_bp(300.0, 300.0, 0.9, "hotspot"),), row_boundaries=(), method="hotspot", metadata={}, ) result = combine_hypotheses([h1, h2, h3], ctx) # Clusters: ~150 (m1,m2 -> count=2), ~300 (hotspot -> count=1) # Median method count = 1.5. Count 1 < 1.5 for ~300, no override -> rejected. assert len(result.col_boundaries) == 1 assert abs(result.col_boundaries[0].min_pos - 150.0) < 5.0 # --------------------------------------------------------------------------- # TestCombinationTrace # --------------------------------------------------------------------------- class TestCombinationTrace: def test_trace_false_returns_hypothesis_only(self) -> None: """combine_hypotheses with trace=False returns BoundaryHypothesis, not a tuple.""" ctx = _make_ctx(ruled_line_thickness=1.0) h = BoundaryHypothesis( col_boundaries=(_bp(100.0, 100.0, 0.9, "m1"),), row_boundaries=(), method="m1", metadata={}, ) result = combine_hypotheses([h], ctx, trace=False) assert isinstance(result, BoundaryHypothesis) assert not isinstance(result, tuple) def test_trace_true_returns_tuple(self) -> None: """combine_hypotheses with trace=True returns (BoundaryHypothesis, CombinationTrace).""" ctx = _make_ctx(ruled_line_thickness=1.0) h = BoundaryHypothesis( col_boundaries=(_bp(100.0, 100.0, 0.9, "m1"),), row_boundaries=(), method="m1", metadata={}, ) result = combine_hypotheses([h], ctx, trace=True) assert isinstance(result, tuple) assert len(result) == 2 assert isinstance(result[0], BoundaryHypothesis) assert isinstance(result[1], CombinationTrace) def test_empty_hypotheses_trace(self) -> None: """Empty hypothesis list with trace=True produces empty axis traces.""" ctx = _make_ctx() result, trace = combine_hypotheses([], ctx, trace=True) assert trace.col_trace.input_points == [] assert trace.row_trace.input_points == [] assert trace.col_trace.accepted_positions == [] assert trace.row_trace.accepted_positions == [] assert trace.source_methods == [] def test_single_hypothesis_passthrough_trace(self) -> None: """Single hypothesis with trace=True: source_methods has 1 entry, positions preserved.""" ctx = _make_ctx(ruled_line_thickness=1.0) h = BoundaryHypothesis( col_boundaries=(_bp(100.0, 100.0, 0.9, "m1"), _bp(200.0, 200.0, 0.8, "m1")), row_boundaries=(_bp(50.0, 50.0, 0.7, "m1"),), method="m1", metadata={}, ) result, trace = combine_hypotheses([h], ctx, trace=True) assert trace.source_methods == ["m1"] assert len(trace.col_trace.accepted_positions) == 2 assert len(trace.row_trace.accepted_positions) == 1 def test_multi_hypothesis_trace_structure(self) -> None: """3 hypotheses with trace=True: source_methods has 3, clusters have method_names and acceptance_reason.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(100.0, 100.0, 0.9, "m1"),), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(101.0, 101.0, 0.8, "m2"),), row_boundaries=(), method="m2", metadata={}, ) h3 = BoundaryHypothesis( col_boundaries=(_bp(300.0, 300.0, 0.7, "m3"),), row_boundaries=(), method="m3", metadata={}, ) _, trace = combine_hypotheses([h1, h2, h3], ctx, trace=True) assert len(trace.source_methods) == 3 assert len(trace.col_trace.clusters) == 2 for cluster in trace.col_trace.clusters: assert isinstance(cluster, ClusterRecord) assert cluster.total_confidence > 0 assert cluster.distinct_methods >= 1 assert cluster.acceptance_threshold >= 0 assert isinstance(cluster.method_names, list) assert set(cluster.method_names) <= {"m1", "m2", "m3"} assert cluster.acceptance_reason in ("above_threshold", "ruled_line_override", "rejected") def test_axis_trace_input_points_match(self) -> None: """Input points in col_trace contain all methods' boundary points.""" ctx = _make_ctx(ruled_line_thickness=1.0) bp1 = _bp(100.0, 100.0, 0.9, "m1") bp2 = _bp(200.0, 200.0, 0.8, "m2") h1 = BoundaryHypothesis( col_boundaries=(bp1,), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(bp2,), row_boundaries=(), method="m2", metadata={}, ) _, trace = combine_hypotheses([h1, h2], ctx, trace=True) input_provenances = {pt.provenance for pt in trace.col_trace.input_points} assert "m1" in input_provenances assert "m2" in input_provenances def test_trace_cluster_has_method_names(self) -> None: """Single cluster from m1+m2 has both method names.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(100.0, 100.0, 0.9, "m1"),), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(101.0, 101.0, 0.8, "m2"),), row_boundaries=(), method="m2", metadata={}, ) _, trace = combine_hypotheses([h1, h2], ctx, trace=True) assert len(trace.col_trace.clusters) == 1 cluster = trace.col_trace.clusters[0] assert set(cluster.method_names) == {"m1", "m2"} def test_trace_has_median_method_count(self) -> None: """Two methods at same position: single cluster with median_method_count == 2.0.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(100.0, 100.0, 0.9, "m1"),), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(101.0, 101.0, 0.8, "m2"),), row_boundaries=(), method="m2", metadata={}, ) _, trace = combine_hypotheses([h1, h2], ctx, trace=True) assert trace.col_trace.median_method_count == 2.0 def test_acceptance_reason_values(self) -> None: """3 hypotheses where one cluster is rejected: both acceptance reasons present.""" ctx = _make_ctx(ruled_line_thickness=1.0) h1 = BoundaryHypothesis( col_boundaries=(_bp(150.0, 150.0, 0.9, "m1"),), row_boundaries=(), method="m1", metadata={}, ) h2 = BoundaryHypothesis( col_boundaries=(_bp(150.5, 150.5, 0.9, "m2"),), row_boundaries=(), method="m2", metadata={}, ) h3 = BoundaryHypothesis( col_boundaries=(_bp(500.0, 500.0, 0.9, "m3"),), row_boundaries=(), method="m3", metadata={}, ) _, trace = combine_hypotheses([h1, h2, h3], ctx, trace=True) reasons = {c.acceptance_reason for c in trace.col_trace.clusters} assert "rejected" in reasons assert "above_threshold" in reasons # --------------------------------------------------------------------------- # TestScalePoint # --------------------------------------------------------------------------- class TestScalePoint: def test_no_multipliers(self) -> None: """None multipliers returns point unchanged.""" pt = _bp(100.0, 100.0, 0.9, "m1") result = _scale_point(pt, None) assert result is pt def test_multiplier_1_0_unchanged(self) -> None: """Multiplier of 1.0 returns point unchanged.""" pt = _bp(100.0, 100.0, 0.9, "m1") result = _scale_point(pt, {"m1": 1.0}) assert result is pt def test_multiplier_scales(self) -> None: """Multiplier of 2.0 doubles confidence.""" pt = _bp(100.0, 100.0, 0.9, "m1") result = _scale_point(pt, {"m1": 2.0}) assert result.confidence == pytest.approx(1.8) assert result.min_pos == 100.0 assert result.max_pos == 100.0 assert result.provenance == "m1" def test_missing_method_default_1_0(self) -> None: """Method not in multipliers dict gets default 1.0 (unchanged).""" pt = _bp(100.0, 100.0, 0.9, "m1") result = _scale_point(pt, {"m2": 2.0}) assert result is pt # --------------------------------------------------------------------------- # TestSpatialPrecision # --------------------------------------------------------------------------- class TestSpatialPrecision: def test_ruled_lines_take_priority(self) -> None: """Spatial precision equals median ruled line thickness when ruled lines exist.""" ctx = _make_ctx(ruled_line_thickness=2.0, word_gap=5.0) assert _compute_spatial_precision(ctx) == 2.0 def test_word_gap_fallback(self) -> None: """Without ruled lines, spatial precision falls back to median word gap.""" ctx = _make_ctx(ruled_line_thickness=None, word_gap=5.0) assert _compute_spatial_precision(ctx) == 5.0 def test_word_height_fallback(self) -> None: """Without ruled lines or word gaps, spatial precision equals median word height.""" ctx = _make_ctx(ruled_line_thickness=None, word_gap=0.0) assert _compute_spatial_precision(ctx) == 10.0 def test_varies_with_context(self) -> None: """Different contexts produce different spatial precision values.""" ctx1 = _make_ctx(ruled_line_thickness=2.0) ctx2 = _make_ctx(ruled_line_thickness=4.0) assert _compute_spatial_precision(ctx1) != _compute_spatial_precision(ctx2)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ccam80/zotero-chunk-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_combination.py•28 KiB