Zotero Chunk RAG

Overview Schema Related Servers Score Discussions

zotero-chunk-mcp
src
zotero_chunk_rag

section_classifier.py•2.66 KiB

"""Lightweight section heading classification. Classifies heading text into academic paper section categories using keyword matching only. No position-based guessing. """ from __future__ import annotations from .models import SectionSpan, CONFIDENCE_FALLBACK # Category keywords mapped to labels, ordered by weight (highest first). # When multiple keywords match, highest-weighted category wins. CATEGORY_KEYWORDS: list[tuple[str, list[str], float]] = [ ("results", ["result", "findings", "outcomes"], 1.0), ("conclusion", ["conclusion", "concluding"], 1.0), ("methods", ["method", "materials", "experimental", "procedure", "protocol", "design", "participants", "subjects"], 0.85), ("abstract", ["abstract"], 0.75), ("background", ["background", "literature review", "related work"], 0.7), ("discussion", ["discussion"], 0.65), ("introduction", ["introduction"], 0.5), ("appendix", ["appendix", "supplementa", "acknowledgment", "acknowledgement", "grant", "funding", "disclosure", "conflict of interest"], 0.3), ("references", ["reference", "bibliography"], 0.1), ] # "summary" is special — matches conclusion unless combined with data words. SUMMARY_EXCLUDES = ["statistics", "table", "data", "results summary"] def categorize_heading(heading: str) -> tuple[str | None, float]: """Determine category from heading text using keyword matching. Returns (category, weight) or (None, 0) if no match. """ heading_lower = heading.lower() for category, keywords, weight in CATEGORY_KEYWORDS: for keyword in keywords: if keyword in heading_lower: return (category, weight) # Special handling for "summary" (only if no other category matched above). # Exclusion-based: an unrecognised keyword just falls through to conclusion, # rather than requiring a specific keyword for the base functionality. if "summary" in heading_lower: for exclude in SUMMARY_EXCLUDES: if exclude in heading_lower: return ("results", 1.0) return ("conclusion", 1.0) return (None, 0.0) def assign_section(char_start: int, spans: list[SectionSpan]) -> str: """Find the section label for a given character position.""" label, _ = assign_section_with_confidence(char_start, spans) return label def assign_section_with_confidence( char_start: int, spans: list[SectionSpan] ) -> tuple[str, float]: """Find section label and confidence for a character position.""" for span in spans: if span.char_start <= char_start < span.char_end: return span.label, span.confidence return "unknown", CONFIDENCE_FALLBACK

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ccam80/zotero-chunk-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

section_classifier.py•2.66 KiB