KSJ MCP Server

Overview Schema Related Servers Score Discussions

ksj-mcp
src
ksj_mcp

templates.py•7.14 KiB

""" Template-aware field parsers for KSJ journal pages. Each parser receives the raw OCR text from a journal page and returns a normalized dict of the structured fields found on that template. Template IDs and their right-page sections: RC — First Impressions / Key Points / tags SYN — Breakthrough / Patterns / Connections / tags REV — Process Notes / Observations / tags DC — Dream Narrative / Symbols / Emotions / tags """ import re from typing import Any # ── Schema tag extraction ───────────────────────────────────────────────────── # Matches: #topic @source !priority ?question $insight *sensory (DC) _INLINE_TAG = re.compile( r'(?<!\w)' # not preceded by word char r'([#@!?$*])' # prefix (* = DC sensory tag) r'([\w][\w\-\.\/]*)', # value (letters, digits, hyphen, dot, slash) re.UNICODE, ) # Matches: A→B A->B (cause-effect arrows) _ARROW_TAG = re.compile( r'([\w\-\.]+)' # left side r'\s*(?:→|->)\s*' r'([\w\-\.]+)', # right side re.UNICODE, ) def extract_schema_tags(text: str) -> list[dict[str, str]]: """ Extract all schema-prefixed tags from *text*. Returns a list of dicts: [{"prefix": "#", "value": "machine-learning"}, ...] Arrow tags are stored as: {"prefix": "->", "value": "A->B"} """ tags: list[dict[str, str]] = [] seen: set[tuple[str, str]] = set() for m in _INLINE_TAG.finditer(text): prefix, value = m.group(1), m.group(2).lower() key = (prefix, value) if key not in seen: seen.add(key) tags.append({"prefix": prefix, "value": value}) for m in _ARROW_TAG.finditer(text): left, right = m.group(1).lower(), m.group(2).lower() value = f"{left}->{right}" key = ("->", value) if key not in seen: seen.add(key) tags.append({"prefix": "->", "value": value}) return tags # ── Section splitter helper ─────────────────────────────────────────────────── def _extract_section(text: str, *headers: str) -> str: """ Extract text between a section header and the next header or end of string. Case-insensitive. Returns the first matching section, stripped. """ for header in headers: pattern = re.compile( rf'(?i){re.escape(header)}\s*[:\-]?\s*\n(.*?)(?=\n[A-Z][A-Z ]+[:\-]|\Z)', re.DOTALL, ) m = pattern.search(text) if m: return m.group(1).strip() return "" def _build_summary(fields: dict[str, Any], max_len: int = 200) -> str: """Build a one-line summary from the most informative field.""" for key in ("first_impressions", "breakthrough", "process_notes", "dream_narrative"): val = fields.get(key, "").strip() if val: return val[:max_len].replace("\n", " ") # Fallback: join non-empty fields parts = [v for v in fields.values() if isinstance(v, str) and v.strip()] return " | ".join(parts)[:max_len] # ── Per-template parsers ────────────────────────────────────────────────────── def parse_rc(text: str) -> dict[str, Any]: """Parse a Rapid Capture (RC) page.""" return { "first_impressions": _extract_section(text, "first impressions", "impressions"), "key_points": _extract_section(text, "key points", "key point", "points"), "tags_raw": _extract_section(text, "tags", "tag"), } def parse_syn(text: str) -> dict[str, Any]: """Parse a Synthesis (SYN) page.""" return { "breakthrough": _extract_section(text, "breakthrough", "★ breakthrough", "★"), "patterns": _extract_section(text, "patterns", "pattern"), "connections_raw": _extract_section(text, "connections", "connection"), "tags_raw": _extract_section(text, "tags", "tag"), } _STATUS_PATTERN = re.compile( r'\b(needs?\s+work|solid|mastered)\b', re.IGNORECASE, ) _STATUS_SECTION = re.compile( r'(?i)knowledge\s+status\s*[:\-]?\s*(.+?)(?=\n|$)' ) _STATUS_NORMALIZE = { "needs work": "Needs Work", "need work": "Needs Work", "solid": "Solid", "mastered": "Mastered", } def _extract_knowledge_status(text: str) -> str: """ Extract the Knowledge Status value (Needs Work / Solid / Mastered) from REV page OCR text. Checks: 1. "Knowledge Status: Solid" style label + value on the same line 2. Any occurrence of the three status terms as a fallback Returns one of "Needs Work", "Solid", "Mastered", or "" if not found. """ # Try label + value first m = _STATUS_SECTION.search(text) if m: candidate = m.group(1).strip().lower() for key, normalized in _STATUS_NORMALIZE.items(): if key in candidate: return normalized # Fallback: first occurrence of a status keyword anywhere in the text m = _STATUS_PATTERN.search(text) if m: return _STATUS_NORMALIZE.get(m.group(0).lower().replace("needs ", "needs ").strip(), "") return "" def parse_rev(text: str) -> dict[str, Any]: """Parse a Review (REV) page.""" return { "process_notes": _extract_section(text, "process notes", "process", "notes"), "observations": _extract_section(text, "observations", "observation"), "knowledge_status": _extract_knowledge_status(text), "tags_raw": _extract_section(text, "tags", "tag"), } def parse_dc(text: str) -> dict[str, Any]: """Parse a Dream Capture (DC) page.""" return { "dream_narrative": _extract_section(text, "narrative", "dream narrative", "dream"), "symbols": _extract_section(text, "symbols", "symbol"), "emotions": _extract_section(text, "emotions", "emotion"), "tags_raw": _extract_section(text, "tags", "tag"), } # ── Dispatcher ──────────────────────────────────────────────────────────────── _PARSERS = { "RC": parse_rc, "SYN": parse_syn, "REV": parse_rev, "DC": parse_dc, } def parse_template(template_type: str, raw_text: str) -> dict[str, Any]: """ Parse *raw_text* using the appropriate template parser. Returns a dict with: - parsed content fields - "summary": auto-generated one-liner - "tags": list of schema tag dicts """ parser = _PARSERS.get(template_type.upper()) if parser is None: # Unknown template — return raw text as a single field fields: dict[str, Any] = {"raw": raw_text} else: fields = parser(raw_text) # Extract schema tags from the entire raw text (catches tags anywhere on the page) tags = extract_schema_tags(raw_text) summary = _build_summary(fields) return { "fields": fields, "summary": summary, "tags": tags, }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ChavezAILabs/ksj-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

templates.py•7.14 KiB