Coach AI

Overview Schema Related Servers Score Discussions

coach-ai
src
coach_ai

task_parser.py•7.39 KiB

""" Natural language task list parser Parses various task list formats (bullets, numbers, checkboxes) and extracts metadata. """ import re from typing import Any, Optional def parse_natural_language_task_list( text: str, default_priority: str = "medium", default_timeframe: Optional[str] = None, ) -> dict[str, Any]: """ Parse natural language task list into structured todos Supports multiple formats and extracts metadata from inline markers """ lines = text.strip().split("\n") parsed_todos = [] unparseable_lines = [] section_context = {} # Track section headers for context for line in lines: line = line.strip() if not line: continue # Check if it's a section header (used for context, not a task) section_match = re.match( r"^(.+?)(?:\s*$due\s+[\d/-]+$)?:\s*$", line, re.IGNORECASE ) if section_match: section_title = section_match.group(1).strip() section_context = extract_section_context(section_title) continue # Try to parse as a task parsed = parse_single_task_line( line, default_priority, default_timeframe, section_context ) if parsed: parsed_todos.append(parsed) else: unparseable_lines.append(line) # Generate summary summary_parts = [f"Parsed {len(parsed_todos)} tasks"] if unparseable_lines: summary_parts.append(f"{len(unparseable_lines)} lines could not be parsed") # Count by priority priority_counts: dict[str, int] = {} for todo in parsed_todos: p = todo.get("priority", "medium") priority_counts[p] = priority_counts.get(p, 0) + 1 if priority_counts: summary_parts.append( f"Priority breakdown: {', '.join(f'{p}={c}' for p, c in priority_counts.items())}" ) return { "parsed_todos": parsed_todos, "parse_summary": ". ".join(summary_parts), "unparseable_lines": unparseable_lines, } def parse_single_task_line( line: str, default_priority: str, default_timeframe: Optional[str], section_context: dict[str, Any], ) -> Optional[dict[str, Any]]: """ Parse a single task line and extract metadata Returns todo dict or None if unparseable """ # Skip completed checkboxes if re.match(r"^-?\s*\[x\]", line, re.IGNORECASE): return None # Remove common prefixes line = re.sub(r"^[-*•]\s*", "", line) # Bullet points line = re.sub(r"^\d+\.\s*", "", line) # Numbered lists line = re.sub(r"^\[\s*\]\s*", "", line) # Checkboxes # Initialize with defaults and section context todo: dict[str, Any] = { "priority": section_context.get("priority", default_priority), "timeframe": section_context.get("timeframe", default_timeframe), } # Inherit theme from section context if present if "theme_tag" in section_context: todo["theme_tag"] = section_context["theme_tag"] # Extract priority markers priority_patterns = [ (r"^(!{2,}|URGENT:?)\s*", "high"), (r"#high\b|#urgent\b", "high"), (r"#low\b", "low"), (r"#medium\b", "medium"), ] for pattern, priority in priority_patterns: match = re.search(pattern, line, re.IGNORECASE) if match: todo["priority"] = priority line = re.sub(pattern, "", line, flags=re.IGNORECASE).strip() break # Extract timeframe markers timeframe_patterns = [ (r"\bthis\s+week\b", "this_week"), (r"\bnext\s+sprint\b", "next_sprint"), (r"\bthis\s+month\b", "this_month"), (r"\bthis\s+quarter\b|q[1-4]\b", "this_quarter"), (r"\bsomeday\b|\blater\b", "someday"), ] for pattern, timeframe in timeframe_patterns: match = re.search(pattern, line, re.IGNORECASE) if match: todo["timeframe"] = timeframe line = re.sub(pattern, "", line, flags=re.IGNORECASE).strip() break # Extract time estimates time_patterns = [ ( r"$(\d+(?:\.\d+)?)\s*h(?:ou)?rs?$", lambda m: int(float(m.group(1)) * 60), ), (r"$(\d+)\s*min(?:ute)?s?$", lambda m: int(m.group(1))), (r"~(\d+)h\b", lambda m: int(m.group(1)) * 60), (r"~(\d+)min\b", lambda m: int(m.group(1))), ] for pattern, converter in time_patterns: match = re.search(pattern, line, re.IGNORECASE) if match: todo["time_estimate"] = converter(match) line = re.sub(pattern, "", line, flags=re.IGNORECASE).strip() break # Extract energy markers energy_patterns = [ (r"\[high\s+energy\]|\[deep\s+work\]", "high"), (r"\[low\s+(?:energy|effort)\]|\[easy\]", "low"), (r"\[medium\s+energy\]", "medium"), ] for pattern, energy in energy_patterns: match = re.search(pattern, line, re.IGNORECASE) if match: todo["energy_required"] = energy line = re.sub(pattern, "", line, flags=re.IGNORECASE).strip() break # Extract theme tags theme_patterns = [ (r"#sprint\b", "sprint_work"), (r"#strategic\b", "strategic"), (r"@?#?admin\b", "admin"), (r"#learning\b", "learning"), ] for pattern, theme in theme_patterns: match = re.search(pattern, line, re.IGNORECASE) if match: todo["theme_tag"] = theme line = re.sub(pattern, "", line, flags=re.IGNORECASE).strip() break # Extract quick marker if re.search(r"\bquick\b", line, re.IGNORECASE): todo["quick"] = True line = re.sub(r"\bquick\b", "", line, flags=re.IGNORECASE).strip() # Clean up line and use as title line = re.sub(r"\s+", " ", line).strip() # Collapse whitespace line = re.sub(r"^[-:]+\s*", "", line) # Remove leading separators if not line: return None # No title left after extraction todo["title"] = line return todo def extract_section_context(section_title: str) -> dict[str, Any]: """ Extract context from section headers like "Sprint work (due 11/18):" or "Strategic (this month):" Returns dict with priority/timeframe/theme that should apply to tasks in this section """ context: dict[str, Any] = {} title_lower = section_title.lower() # Detect theme from section title if "sprint" in title_lower: context["theme_tag"] = "sprint_work" context["priority"] = "high" # Sprint work is usually high priority elif "strategic" in title_lower: context["theme_tag"] = "strategic" elif "admin" in title_lower or "quick" in title_lower: context["theme_tag"] = "admin" context["priority"] = "low" elif "learning" in title_lower: context["theme_tag"] = "learning" # Detect timeframe from section title if "this week" in title_lower or "week" in title_lower: context["timeframe"] = "this_week" elif "this month" in title_lower or "month" in title_lower: context["timeframe"] = "this_month" elif "someday" in title_lower or "backlog" in title_lower: context["timeframe"] = "someday" # Extract due dates from section title due_match = re.search(r"due\s+([\d/-]+)", section_title, re.IGNORECASE) if due_match: context["timeframe"] = "this_week" # Assume due dates are urgent context["priority"] = "high" return context

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/94aharris/coach-ai'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

task_parser.py•7.39 KiB