Zotero Chunk RAG

test_reference_matcher.py•4.27 KiB

"""Tests for reference matcher.""" from zotero_chunk_rag._reference_matcher import match_references, get_reference_context from zotero_chunk_rag.models import Chunk, ExtractedTable, ExtractedFigure def _make_chunks(texts_with_pages): """Helper to build chunks from (text, page_num) pairs.""" chunks = [] offset = 0 for i, (text, page_num) in enumerate(texts_with_pages): chunks.append(Chunk( text=text, chunk_index=i, page_num=page_num, char_start=offset, char_end=offset + len(text), )) offset += len(text) + 1 # +1 for join newline return chunks, "\n".join(t for t, _ in texts_with_pages) def test_match_references_finds_table(): """match_references maps Table 1 to the chunk containing 'Table 1'.""" chunks, full_md = _make_chunks([ ("Introduction to the paper.", 1), ("As shown in Table 1, the results are significant.", 2), ("Conclusion of the paper.", 3), ]) tables = [ExtractedTable( page_num=2, table_index=0, bbox=(0, 0, 1, 1), headers=["A"], rows=[["1"]], caption="Table 1. Results summary", )] ref_map = match_references(full_md, chunks, tables, []) assert ("table", 1) in ref_map assert ref_map[("table", 1)] == 1 # chunk_index 1 contains "Table 1" def test_match_references_finds_figure(): """match_references maps Figure 1 to the chunk containing 'Figure 1'.""" chunks, full_md = _make_chunks([ ("Introduction.", 1), ("Figure 1 shows the architecture.", 2), ("Methods section.", 3), ]) figures = [ExtractedFigure( page_num=2, figure_index=0, bbox=(0, 0, 1, 1), caption="Figure 1. System architecture", )] ref_map = match_references(full_md, chunks, [], figures) assert ("figure", 1) in ref_map assert ref_map[("figure", 1)] == 1 def test_fallback_uses_page_number(): """Unreferenced items fall back to page-based chunk estimate.""" chunks, full_md = _make_chunks([ ("Page one content.", 1), ("Page two content.", 2), ("Page three content.", 3), ]) # Table on page 2, but no "Table 1" text anywhere in markdown tables = [ExtractedTable( page_num=2, table_index=0, bbox=(0, 0, 1, 1), headers=["A"], rows=[["1"]], caption="Table 1. Data", )] ref_map = match_references(full_md, chunks, tables, []) assert ("table", 1) in ref_map assert ref_map[("table", 1)] == 1 # chunk_index 1 is on page 2 def test_get_reference_context_returns_chunk_text(): """get_reference_context returns the text of the referencing chunk.""" chunks, full_md = _make_chunks([ ("Introduction.", 1), ("As shown in Table 1, results are great.", 2), ]) ref_map = {("table", 1): 1} ctx = get_reference_context(full_md, chunks, ref_map, "table", 1) assert ctx is not None assert "Table 1" in ctx def test_empty_chunks_returns_empty_map(): """Empty chunks list returns empty map.""" ref_map = match_references("some text", [], [], []) assert ref_map == {} def test_figure_searchable_text_includes_context(): """to_searchable_text includes reference_context when set.""" fig = ExtractedFigure( page_num=1, figure_index=0, bbox=(0, 0, 1, 1), caption="Figure 1. Architecture diagram", reference_context="The architecture shown in Figure 1 demonstrates the layered approach.", ) text = fig.to_searchable_text() assert "Figure 1. Architecture diagram" in text assert "layered approach" in text def test_figure_searchable_text_without_context(): """to_searchable_text works normally without reference_context.""" fig = ExtractedFigure( page_num=1, figure_index=0, bbox=(0, 0, 1, 1), caption="Figure 1. Architecture diagram", ) text = fig.to_searchable_text() assert text == "Figure 1. Architecture diagram" def test_table_has_reference_context_field(): """ExtractedTable should have reference_context field.""" table = ExtractedTable( page_num=1, table_index=0, bbox=(0, 0, 1, 1), headers=["A"], rows=[["1"]], caption="Table 1. Results", reference_context="Table 1 summarizes the key findings.", ) assert table.reference_context == "Table 1 summarizes the key findings."

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ccam80/zotero-chunk-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_reference_matcher.py•4.27 KiB