wassden

Overview Schema Related Servers Score Discussions

id_extractor.py•6.68 kB

"""ID extraction utilities for spec documents. This module provides functions to extract and parse various types of IDs from specification documents (requirements, tasks, design components, etc.). """ import re class IDExtractor: """Extractor for various ID types in spec documents.""" # ID patterns (using existing patterns from validation_common.py) REQ_ID_PATTERN = r"\bREQ-\d{2}\b" NFR_ID_PATTERN = r"\bNFR-\d{2}\b" KPI_ID_PATTERN = r"\bKPI-\d{2}\b" TR_ID_PATTERN = r"\bTR-\d{2}\b" TASK_ID_PATTERN = r"\bTASK-\d{2}(?:-\d{2}){0,2}\b" DC_PATTERN = r"\bDC-\d{2}\b" # Prefixed patterns for list items # Supports formats: "REQ-01: text", "**REQ-01**: text", "[ ] **TASK-01-01**: text" # Allow optional checkbox "[ ]" or "[x]" and optional bold markers "**" PREFIXED_REQ_PATTERN = r"^(?:\[[ x]\]\s*)?\*?\*?(REQ-\d+|NFR-\d+|KPI-\d+|TR-\d+)\*?\*?:\s*(.+)$" PREFIXED_TASK_PATTERN = r"^(?:\[[ x]\]\s*)?\*?\*?(TASK-\d+(?:-\d+){0,2})\*?\*?:\s*(.+)$" # Loose patterns for malformed IDs LOOSE_REQ_PATTERN = r"^(REQ[-A-Za-z0-9]*|TR[-A-Za-z0-9]*|NFR[-A-Za-z0-9]*|KPI[-A-Za-z0-9]*):\s*(.+)$" LOOSE_TASK_PATTERN = r"^(TASK[-A-Za-z0-9]*):\s*(.+)$" # Catch-all pattern for completely invalid requirement IDs (e.g., INVALID-01) # This matches any uppercase prefix followed by hyphen and digits INVALID_REQ_PATTERN = r"^([A-Z]+[-]\d+):\s*(.+)$" @staticmethod def extract_req_id_from_text(text: str) -> tuple[str | None, str, str]: """Extract requirement ID from text. Args: text: Text that may contain a requirement ID prefix (e.g., "REQ-01: description") Returns: Tuple of (req_id, req_text_without_id, req_type) - req_id: Extracted ID (e.g., "REQ-01") or None - req_text_without_id: Text with ID prefix stripped - req_type: Requirement type ("REQ", "NFR", "KPI", "TR") """ text = text.strip() # Try strict pattern first match = re.match(IDExtractor.PREFIXED_REQ_PATTERN, text) if match: req_id = match.group(1) req_text = match.group(2).strip() req_type = req_id.split("-")[0] return req_id, req_text, req_type # Try loose pattern for malformed IDs match = re.match(IDExtractor.LOOSE_REQ_PATTERN, text) if match: req_id = match.group(1) req_text = match.group(2).strip() req_type = req_id.split("-")[0] if "-" in req_id else "REQ" return req_id, req_text, req_type # Try catch-all pattern for completely invalid IDs (e.g., INVALID-01) match = re.match(IDExtractor.INVALID_REQ_PATTERN, text) if match: req_id = match.group(1) req_text = match.group(2).strip() req_type = req_id.split("-")[0] if "-" in req_id else "REQ" return req_id, req_text, req_type # No ID found return None, text, "REQ" @staticmethod def extract_task_id_from_text(text: str) -> tuple[str | None, str]: """Extract task ID from text. Args: text: Text that may contain a task ID prefix (e.g., "TASK-01-01: description") Returns: Tuple of (task_id, task_text_without_id) - task_id: Extracted ID (e.g., "TASK-01-01") or None - task_text_without_id: Text with ID prefix stripped """ text = text.strip() # Try strict pattern first match = re.match(IDExtractor.PREFIXED_TASK_PATTERN, text) if match: task_id = match.group(1) task_text = match.group(2).strip() return task_id, task_text # Try loose pattern for malformed IDs match = re.match(IDExtractor.LOOSE_TASK_PATTERN, text) if match: task_id = match.group(1) task_text = match.group(2).strip() return task_id, task_text # No ID found return None, text @staticmethod def extract_all_req_ids(text: str) -> set[str]: """Extract all requirement IDs from text. Args: text: Text to search Returns: Set of requirement IDs found """ ids: set[str] = set() ids.update(re.findall(IDExtractor.REQ_ID_PATTERN, text)) ids.update(re.findall(IDExtractor.NFR_ID_PATTERN, text)) ids.update(re.findall(IDExtractor.KPI_ID_PATTERN, text)) ids.update(re.findall(IDExtractor.TR_ID_PATTERN, text)) return ids @staticmethod def extract_all_task_ids(text: str) -> set[str]: """Extract all task IDs from text. Args: text: Text to search Returns: Set of task IDs found """ return set(re.findall(IDExtractor.TASK_ID_PATTERN, text)) @staticmethod def extract_all_dc_refs(text: str) -> set[str]: """Extract all design component references from text. Args: text: Text to search Returns: Set of DC references found (e.g., {"DC-01", "DC-03"}) """ return set(re.findall(IDExtractor.DC_PATTERN, text)) @staticmethod def extract_task_dependencies(text: str) -> list[str]: """Extract task dependencies from task text. Looks for patterns like "depends on TASK-01-01" or "requires TASK-02-03". Args: text: Task description text Returns: List of task IDs this task depends on """ dependencies: list[str] = [] # Pattern: "depends on TASK-XX-XX" or "requires TASK-XX-XX" dependency_patterns = [ r"depends on (TASK-\d{2}(?:-\d{2}){0,2})", r"requires (TASK-\d{2}(?:-\d{2}){0,2})", r"after (TASK-\d{2}(?:-\d{2}){0,2})", r"依存:\s*(TASK-\d{2}(?:-\d{2}){0,2})", # Japanese ] for pattern in dependency_patterns: matches = re.findall(pattern, text, re.IGNORECASE) dependencies.extend(matches) return dependencies @staticmethod def is_acceptance_criteria(text: str) -> bool: """Check if text appears to be acceptance criteria rather than a requirement. Args: text: Text to check Returns: True if text looks like acceptance criteria """ skip_patterns = [ r"受け入れ観点", r"受入観点", r"Acceptance criteria", r"テスト観点", r"Test criteria", ] return any(re.search(pattern, text, re.IGNORECASE) for pattern in skip_patterns)

Loading blob content...

Latest Blog Posts

What are Claude Skills?
By punkpeye on January 10, 2026.
mcp
skills
How to Test MCP Streamable HTTP Endpoints Using cURL
By punkpeye on January 2, 2026.
tutorial
bash
What is Streamable HTTP in MCP?
By punkpeye on January 2, 2026.
Streamable HTTP

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tokusumi/wassden-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

id_extractor.py•6.68 kB