We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/89jobrien/mcp-joecc'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
"""Markdown adapter for parsing local TO-DO.md and TODO.md files.
Implements the TaskAdapter interface for local markdown task files.
"""
from __future__ import annotations
import re
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any
from loguru import logger
from mcp_task_aggregator.adapters.base import TaskAdapter
from mcp_task_aggregator.models import (
ExternalTaskMetadata,
MarkdownMetadata,
Tag,
Todo,
TodoSource,
TodoStatus,
)
@dataclass
class MarkdownConfig:
"""Configuration for Markdown adapter."""
search_paths: list[Path]
file_patterns: list[str] = None
def __post_init__(self) -> None:
"""Set default file patterns if not provided."""
if self.file_patterns is None:
self.file_patterns = ["TO-DO.md", "TODO.md", "todo.md", "to-do.md"]
# Checkbox state mapping to internal TodoStatus
CHECKBOX_STATUS_MAP: dict[str, TodoStatus] = {
"[ ]": TodoStatus.TODO,
"[x]": TodoStatus.DONE,
"[X]": TodoStatus.DONE,
"[-]": TodoStatus.CANCELLED,
"[~]": TodoStatus.IN_PROGRESS,
"[>]": TodoStatus.BLOCKED,
"[?]": TodoStatus.IN_REVIEW,
}
# Regex pattern for markdown checkboxes
CHECKBOX_PATTERN = re.compile(
r"^(\s*)" # Leading whitespace (capture group 1 - indent)
r"[-*+]\s+" # List marker (-, *, or +) followed by space
r"(\[[ xX\-~>?]\])" # Checkbox state (capture group 2)
r"\s+" # Space after checkbox
r"(.+)$" # Task content (capture group 3)
)
# Regex pattern for priority markers like (P1), (P2), etc.
PRIORITY_PATTERN = re.compile(r"\(P([0-5])\)")
# Regex pattern for due date markers like (due:2024-12-25)
DUE_DATE_PATTERN = re.compile(r"\(due:(\d{4}-\d{2}-\d{2})\)")
# Regex pattern for tags like #tag or @context
TAG_PATTERN = re.compile(r"(?:^|\s)([#@]\w+)")
class MarkdownAdapter(TaskAdapter):
"""Adapter for parsing tasks from local markdown files."""
def __init__(self, config: MarkdownConfig) -> None:
"""Initialize Markdown adapter.
Args:
config: Configuration with search paths and file patterns.
"""
self.config = config
def fetch_tasks(self, **kwargs: Any) -> list[dict[str, Any]]:
"""Fetch tasks from markdown files.
Args:
**kwargs: Optional parameters:
- search_paths: Override default search paths
- file_patterns: Override default file patterns
Returns:
List of raw task data as dictionaries.
"""
search_paths = kwargs.get("search_paths", self.config.search_paths)
file_patterns = kwargs.get("file_patterns", self.config.file_patterns)
logger.info(f"Searching for markdown todo files in {len(search_paths)} paths")
tasks: list[dict[str, Any]] = []
for search_path in search_paths:
path = Path(search_path)
if not path.exists():
logger.warning(f"Search path does not exist: {path}")
continue
# Find matching files
found_files = self._find_todo_files(path, file_patterns)
logger.debug(f"Found {len(found_files)} todo files in {path}")
for file_path in found_files:
file_tasks = self._parse_file(file_path)
tasks.extend(file_tasks)
logger.info(f"Fetched {len(tasks)} tasks from markdown files")
return tasks
def _find_todo_files(self, search_path: Path, patterns: list[str]) -> list[Path]:
"""Find todo files matching patterns in a directory.
Args:
search_path: Directory to search.
patterns: File name patterns to match.
Returns:
List of matching file paths.
"""
found_files = []
if search_path.is_file():
# Direct file path provided
if search_path.name in patterns or search_path.suffix == ".md":
found_files.append(search_path)
else:
# Search directory for matching files
for pattern in patterns:
found_files.extend(search_path.glob(pattern))
# Also search subdirectories
found_files.extend(search_path.glob(f"**/{pattern}"))
# Deduplicate and sort
return sorted(set(found_files))
def _parse_file(self, file_path: Path) -> list[dict[str, Any]]:
"""Parse a markdown file for checkbox tasks.
Args:
file_path: Path to the markdown file.
Returns:
List of raw task dictionaries.
"""
tasks = []
current_heading = None
try:
content = file_path.read_text(encoding="utf-8")
lines = content.splitlines()
for line_number, line in enumerate(lines, start=1):
# Track headings for context
if line.startswith("#"):
current_heading = line.lstrip("#").strip()
continue
# Match checkbox pattern
match = CHECKBOX_PATTERN.match(line)
if match:
indent = match.group(1)
checkbox_state = match.group(2)
content = match.group(3)
tasks.append(
{
"file_path": str(file_path.resolve()),
"line_number": line_number,
"checkbox_state": checkbox_state,
"indent_level": len(indent),
"parent_heading": current_heading,
"raw_line": line,
"content": content.strip(),
}
)
except Exception as e:
logger.error(f"Error parsing {file_path}: {e}")
return tasks
def normalize_task(self, raw_task: dict[str, Any]) -> Todo:
"""Normalize a markdown task to the internal Todo model.
Args:
raw_task: Raw task data from file parsing.
Returns:
Normalized Todo instance.
"""
content = raw_task["content"]
# Map status from checkbox state
status = self.map_status(raw_task["checkbox_state"])
# Extract priority from content
priority = self._extract_priority(content)
# Extract due date from content
due_date = self._extract_due_date(content)
# Clean content (remove markers)
clean_content = self._clean_content(content)
# Generate source ID from file path and line number
source_id = f"{raw_task['file_path']}:{raw_task['line_number']}"
# Build source URL (file:// protocol for local files)
source_url = f"file://{raw_task['file_path']}#L{raw_task['line_number']}"
# Build markdown metadata
markdown_metadata = MarkdownMetadata(
file_path=raw_task["file_path"],
line_number=raw_task["line_number"],
checkbox_state=raw_task["checkbox_state"],
indent_level=raw_task["indent_level"],
parent_heading=raw_task["parent_heading"],
raw_line=raw_task["raw_line"],
)
# Build external metadata
external_metadata = ExternalTaskMetadata(
markdown=markdown_metadata,
raw_response=raw_task,
fetched_at=datetime.now(),
)
# Build tags
tags = self._build_tags(raw_task)
# Generate sync hash
sync_hash = self.generate_sync_hash(raw_task)
return Todo(
content=clean_content,
status=status,
priority=priority,
due_date=due_date,
source_system=TodoSource.MARKDOWN,
source_id=source_id,
source_url=source_url,
external_metadata=external_metadata,
sync_hash=sync_hash,
tags=tags,
)
def map_status(self, external_status: str) -> TodoStatus:
"""Map checkbox state to internal TodoStatus.
Args:
external_status: Checkbox state string (e.g., "[ ]", "[x]").
Returns:
Corresponding TodoStatus enum value.
"""
return CHECKBOX_STATUS_MAP.get(external_status, TodoStatus.TODO)
def _extract_priority(self, content: str) -> int:
"""Extract priority from task content.
Looks for patterns like (P1), (P2), etc.
Args:
content: Task content string.
Returns:
Priority integer (0-5), default 0.
"""
match = PRIORITY_PATTERN.search(content)
if match:
return int(match.group(1))
return 0
def _extract_due_date(self, content: str) -> datetime | None:
"""Extract due date from task content.
Looks for patterns like (due:2024-12-25).
Args:
content: Task content string.
Returns:
datetime if found, None otherwise.
"""
match = DUE_DATE_PATTERN.search(content)
if match:
try:
return datetime.strptime(match.group(1), "%Y-%m-%d")
except ValueError:
pass
return None
def _clean_content(self, content: str) -> str:
"""Clean task content by removing metadata markers.
Removes priority markers (P1), due date markers, etc.
Args:
content: Raw task content.
Returns:
Cleaned content string.
"""
# Remove priority markers
content = PRIORITY_PATTERN.sub("", content)
# Remove due date markers
content = DUE_DATE_PATTERN.sub("", content)
# Clean up extra whitespace
return " ".join(content.split())
def _build_tags(self, raw_task: dict[str, Any]) -> list[Tag]:
"""Build tags list from markdown task data.
Extracts:
- `markdown` tag for all markdown tasks
- `#tag` hashtags from content
- `@context` context tags from content
- `file:{filename}` for the source file
Args:
raw_task: Raw task data from file parsing.
Returns:
List of Tag objects.
"""
tags = []
# Always add markdown tag
tags.append(Tag(name="markdown"))
# Add file name tag
file_path = Path(raw_task["file_path"])
tags.append(Tag(name=f"file:{file_path.name}"))
# Add heading tag if present
if raw_task.get("parent_heading"):
# Sanitize heading for tag name
heading_tag = raw_task["parent_heading"].lower().replace(" ", "-")
tags.append(Tag(name=f"section:{heading_tag}"))
# Extract hashtags and context tags from content
content = raw_task["content"]
for match in TAG_PATTERN.finditer(content):
tag_name = match.group(1)
# Convert @context to context: prefix
if tag_name.startswith("@"):
tags.append(Tag(name=f"context:{tag_name[1:]}"))
elif tag_name.startswith("#"):
tags.append(Tag(name=tag_name[1:])) # Remove # prefix
return tags