wassden

Overview Schema Related Servers Score Discussions

validate_ears.py•13.1 KiB

"""EARS (Easy Approach to Requirements Syntax) validation module. This module validates requirements against EARS patterns. Initial version supports only Ubiquitous pattern. """ import re from typing import Any import markdown from pydantic import BaseModel, Field from wassden.i18n.core import get_i18n from wassden.language_types import Language class EARSViolation(BaseModel): """Represents an EARS pattern violation.""" line: int = Field(description="Line number where violation occurred") text: str = Field(description="Original requirement text") reason: str = Field(description="Reason for violation") class EARSValidationResult(BaseModel): """Result of EARS validation.""" pattern: str = Field(description="EARS pattern validated against") total: int = Field(description="Total number of requirements") matched: int = Field(description="Number of matched requirements") rate: float = Field(description="Match rate (0.0 - 1.0)") violations: list[EARSViolation] = Field(default_factory=list, description="List of violations found") def to_dict(self) -> dict[str, Any]: """Convert to dictionary format for compatibility.""" return { "ears": { "pattern": self.pattern, "total": self.total, "matched": self.matched, "rate": self.rate, "violations": [{"line": v.line, "reason": v.reason, "text": v.text} for v in self.violations], } } class EARSValidator: """Validates requirements against EARS patterns.""" def __init__(self, language: Language = Language.JAPANESE) -> None: """Initialize validator with language support. Args: language: Language for validation and error messages """ self.language = language.value self.i18n = get_i18n(language) # Ubiquitous pattern regex self.ubiquitous_patterns = { "ja": re.compile(r"^(システムは|本システムは).+([あ-ん]ること|すること|する)[。]?$"), "en": re.compile(r"^The system shall .+\.$", re.IGNORECASE), } def validate_ubiquitous(self, requirements: list[str]) -> EARSValidationResult: """Validate requirements against Ubiquitous pattern. Args: requirements: List of requirement texts Returns: Validation result with violations """ pattern_regex = self.ubiquitous_patterns.get(self.language, self.ubiquitous_patterns["en"]) violations = [] matched_count = 0 for i, req_text in enumerate(requirements, 1): req = req_text.strip() if not req: # Skip empty lines continue if pattern_regex.match(req): matched_count += 1 else: reason = self._get_violation_reason(req) violations.append(EARSViolation(line=i, text=req, reason=reason)) total = len([r for r in requirements if r.strip()]) rate = matched_count / total if total > 0 else 0.0 return EARSValidationResult( pattern="ubiquitous", total=total, matched=matched_count, rate=rate, violations=violations ) def _get_violation_reason(self, req: str) -> str: """Get localized violation reason for requirement. Args: req: Requirement text Returns: Localized violation reason """ if self.language == "ja": return self._get_japanese_violation_reason(req) return self._get_english_violation_reason(req) def _get_japanese_violation_reason(self, req: str) -> str: """Get Japanese violation reason.""" if not req.startswith(("システムは", "本システムは")): return str(self.i18n.t("validation_errors.ears.missing_system_prefix_ja")) if not (req.endswith(("すること。", "する。", "すること", "する")) or re.search(r"[あ-ん]ること[。]?$", req)): return str(self.i18n.t("validation_errors.ears.missing_action_suffix_ja")) return str(self.i18n.t("validation_errors.ears.pattern_mismatch_ja")) def _get_english_violation_reason(self, req: str) -> str: """Get English violation reason.""" req_lower = req.lower() if not req_lower.startswith("the system "): return str(self.i18n.t("validation_errors.ears.missing_system_prefix_en")) if not req_lower.startswith("the system shall"): return str(self.i18n.t("validation_errors.ears.missing_shall_en")) if not req.endswith("."): return str(self.i18n.t("validation_errors.ears.missing_period_en")) return str(self.i18n.t("validation_errors.ears.pattern_mismatch_en")) def extract_requirements_from_markdown(self, markdown_text: str) -> list[str]: """Extract functional requirements from markdown text using markdown parser. Args: markdown_text: Markdown content Returns: List of requirement texts """ # Use markdown parser to convert to HTML and parse it properly md = markdown.Markdown(extensions=["toc"]) html_content = md.convert(markdown_text) functional_headers = [ "機能要件（EARS）", ". 機能要件", "## 6. Functional Requirements", "機能要件", "Functional Requirements", ] # Try HTML-based extraction first requirements = self._extract_from_html(html_content, functional_headers) # Fallback to line-by-line if HTML parsing didn't work if not requirements: requirements = self._extract_from_text(markdown_text, functional_headers) return requirements def _extract_from_html(self, html_content: str, functional_headers: list[str]) -> list[str]: """Extract requirements from HTML content.""" requirements = [] for header in functional_headers: # Look for the header in HTML - be more specific to avoid substring matches if header == "Functional Requirements": # For this header, ensure it's not part of "Non-Functional Requirements" header_pattern = r"<h[1-6][^>]*>.*?(?<!Non-)Functional Requirements.*?</h[1-6]>" else: header_pattern = rf"<h[1-6][^>]*>.*?{re.escape(header)}.*?</h[1-6]>" header_match = re.search(header_pattern, html_content, re.IGNORECASE | re.DOTALL) if header_match: # Find content after this header until next header start_pos = header_match.end() next_header_pattern = r"<h[1-6][^>]*>.*?</h[1-6]>" remaining_content = html_content[start_pos:] next_header_match = re.search(next_header_pattern, remaining_content) section_content = ( remaining_content[: next_header_match.start()] if next_header_match else remaining_content ) # Extract list items from this section using HTML list_item_matches = re.findall(r"<li[^>]*>(.*?)</li>", section_content, re.DOTALL) for item_html in list_item_matches: req_text = self._clean_html_text(item_html) if req_text and self._is_requirement_item(req_text): requirements.append(req_text) break return requirements def _clean_html_text(self, html_text: str) -> str: """Clean HTML text and entities.""" # Remove HTML tags and clean up the text text = re.sub(r"<[^>]+>", "", html_text) text = re.sub(r"\s+", " ", text.strip()) # Use markdown parsing knowledge to clean up entities text = text.replace("<", "<").replace(">", ">").replace("&", "&") # Strip REQ-ID prefixes for EARS validation # First try strict pattern for valid REQ-IDs req_id_pattern = r"^(REQ-\d+|TR-\d+|NFR-\d+|KPI-\d+):\s*(.+)$" req_id_match = re.match(req_id_pattern, text) if req_id_match: text = req_id_match.group(2).strip() else: # Also handle malformed REQ-ID patterns that should be stripped # This catches cases like REQ-ABC, REQ-, REQ:, REQ-01-02, etc. loose_pattern = r"^(REQ[-A-Za-z0-9]*|TR[-A-Za-z0-9]*|NFR[-A-Za-z0-9]*|KPI[-A-Za-z0-9]*):\s*(.+)$" loose_match = re.match(loose_pattern, text) if loose_match: # Extract content after the malformed REQ-ID text = loose_match.group(2).strip() return text def _extract_from_text(self, markdown_text: str, functional_headers: list[str]) -> list[str]: """Fallback extraction using line-by-line parsing.""" requirements = [] lines = markdown_text.split("\n") in_functional_section = False current_header_level = 0 for line in lines: stripped_line = line.strip() # Check for headers if stripped_line.startswith("#"): header_level = len(stripped_line) - len(stripped_line.lstrip("#")) header_text = stripped_line.lstrip("# ").strip() # Check if we're entering functional requirements section if header_text in functional_headers: in_functional_section = True current_header_level = header_level continue # Check if we're leaving the section if in_functional_section and header_level <= current_header_level: in_functional_section = False # Extract list items in functional requirements section only if in_functional_section: req_text = self._extract_list_item(stripped_line) if req_text and self._is_requirement_item(req_text): requirements.append(req_text) return requirements def _extract_list_item(self, line: str) -> str: """Extract requirement text from a markdown list item.""" # Handle various markdown list formats list_patterns = [ r"^[-*+]\s+(.+)$", # Standard markdown lists r"^\d+\.\s+(.+)$", # Numbered lists r"^•\s+(.+)$", # Bullet points r"^\s*[-*+]\s+(.+)$", # Indented lists r"^\s*\d+\.\s+(.+)$", # Indented numbered lists ] for pattern in list_patterns: match = re.match(pattern, line) if match: req_text = match.group(1).strip() # Clean up markdown syntax using markdown knowledge req_text = re.sub(r"\*\*(.+?)\*\*", r"\1", req_text) # Bold req_text = re.sub(r"\*(.+?)\*", r"\1", req_text) # Italic req_text = re.sub(r"`(.+?)`", r"\1", req_text) # Code req_text = re.sub(r"\[(.+?)\]$.+?$", r"\1", req_text) # Links # Strip REQ-ID prefixes for EARS validation # First try strict pattern for valid REQ-IDs req_id_pattern = r"^(REQ-\d+|TR-\d+|NFR-\d+|KPI-\d+):\s*(.+)$" req_id_match = re.match(req_id_pattern, req_text) if req_id_match: req_text = req_id_match.group(2).strip() else: # Also handle malformed REQ-ID patterns that should be stripped # This catches cases like REQ-ABC, REQ-, REQ:, REQ-01-02, etc. loose_pattern = r"^(REQ[-A-Za-z0-9]*|TR[-A-Za-z0-9]*|NFR[-A-Za-z0-9]*|KPI[-A-Za-z0-9]*):\s*(.+)$" loose_match = re.match(loose_pattern, req_text) if loose_match: # Extract content after the malformed REQ-ID req_text = loose_match.group(2).strip() return req_text return "" def _is_requirement_item(self, text: str) -> bool: """Check if the text is a functional requirement (not acceptance criteria or other content).""" # Skip acceptance criteria and other non-requirement items skip_patterns = [ r"受け入れ観点", # Acceptance criteria r"受入観点", # Acceptance criteria (alternative) r"Acceptance criteria", r"テスト観点", # Test criteria r"Test criteria", r"注意事項", # Notes r"Note:", r"備考", # Remarks r"Remark:", ] return all(not re.search(pattern, text, re.IGNORECASE) for pattern in skip_patterns) def validate_ears_in_content(content: str, language: Language = Language.JAPANESE) -> EARSValidationResult: """Validate EARS patterns in markdown content. This is the main integration point with existing validation system. Args: content: Markdown content to validate language: Language for validation Returns: EARS validation result """ validator = EARSValidator(language) requirements = validator.extract_requirements_from_markdown(content) return validator.validate_ubiquitous(requirements)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tokusumi/wassden-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

validate_ears.py•13.1 KiB