"""
Markdown Task Parser - Extract tasks from markdown bullet lists
"""
import re
from typing import List, Dict, Optional
class MarkdownTask:
"""Represents a parsed task from markdown"""
def __init__(self, title: str):
self.title = title
self.tags = []
self.project = None
self.content = []
self.completed = False
def to_dict(self) -> Dict:
"""Convert to dictionary for API"""
return {
'title': self.title,
'tags': self.tags,
'project': self.project,
'content': '\n'.join(self.content) if self.content else '',
'completed': self.completed
}
def __repr__(self):
return f"<MarkdownTask: {self.title} | Tags: {self.tags} | Project: {self.project}>"
class MarkdownTaskParser:
"""Parse markdown files to extract tasks"""
TASK_PATTERN = re.compile(r'^[-*]?\s*\[([ xX])\]\s+(.+)$')
METADATA_PATTERN = re.compile(r'^([A-Za-z]+):\s*(.+)$')
def __init__(self):
self.tasks = []
def parse_file(self, file_path: str) -> List[MarkdownTask]:
"""
Parse a markdown file and extract tasks
Args:
file_path: Path to markdown file
Returns:
List of MarkdownTask objects
"""
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
return self.parse_content(content)
def parse_content(self, content: str) -> List[MarkdownTask]:
"""
Parse markdown content string
Args:
content: Markdown content as string
Returns:
List of MarkdownTask objects
"""
self.tasks = []
lines = content.split('\n')
current_task = None
i = 0
while i < len(lines):
line = lines[i].strip()
if not line and current_task is None:
i += 1
continue
task_match = self.TASK_PATTERN.match(lines[i])
if task_match:
if current_task:
self.tasks.append(current_task)
checkbox_state = task_match.group(1)
title = task_match.group(2).strip()
current_task = MarkdownTask(title)
current_task.completed = (checkbox_state.lower() == 'x')
elif current_task:
# Check if line is indented (part of task content)
if lines[i].startswith(' ' * 2):
# Indented line - check if it's metadata first
metadata_match = self.METADATA_PATTERN.match(line)
if metadata_match:
key = metadata_match.group(1).lower()
value = metadata_match.group(2).strip()
if key == 'tags':
current_task.tags = [
tag.strip()
for tag in value.split(',')
if tag.strip()
]
elif key == 'project':
current_task.project = value
elif key == 'description' or key == 'desc':
current_task.content.append(value)
else:
# Not metadata - add to content
current_task.content.append(lines[i].strip())
elif line:
# Non-indented, non-empty line - end of current task
self.tasks.append(current_task)
current_task = None
else:
# Empty line within task content - add blank line
if current_task.content: # Only add blank if we have content
current_task.content.append('')
i += 1
if current_task:
self.tasks.append(current_task)
return self.tasks
def get_summary(self) -> Dict:
"""Get parsing summary statistics"""
return {
'total_tasks': len(self.tasks),
'completed_tasks': sum(1 for t in self.tasks if t.completed),
'tasks_with_tags': sum(1 for t in self.tasks if t.tags),
'tasks_with_project': sum(1 for t in self.tasks if t.project),
'unique_tags': len(set(tag for t in self.tasks for tag in t.tags)),
'unique_projects': len(set(t.project for t in self.tasks if t.project))
}