Email Processing MCP Server

by Cam10001110101
Verified
  • src
from dataclasses import dataclass, field, asdict from typing import List, Optional, Dict, Any from datetime import datetime import re import json def validate_json(text: str, field_name: str = "") -> bool: """Test if a string can be properly encoded as JSON.""" try: json.dumps(text) return True except (TypeError, ValueError): return False def sanitize_text(text: str | None) -> str: """Sanitize text for JSON encoding.""" if text is None: return "" # Convert to string if not already text = str(text) # Remove control characters and normalize text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text) # Remove control characters text = re.sub(r'\r\n|\r|\n', ' ', text) # Normalize line endings to spaces text = re.sub(r'\s+', ' ', text) # Collapse whitespace # Escape special characters text = text.replace('\\', '\\\\') text = text.replace('"', '\\"') text = text.replace('\t', ' ') return text.strip() @dataclass class EmailMetadata: AccountName: str Entry_ID: str Folder: str Subject: str SenderName: str SenderEmailAddress: str ReceivedTime: datetime SentOn: Optional[datetime] To: str Body: str Attachments: List[str] IsMarkedAsTask: bool UnRead: bool Categories: str GeneratedCategories: Optional[List[str]] = field(default_factory=list) embedding: Optional[List[float]] = field(default_factory=list) def to_dict(self) -> Dict[str, Any]: """Convert email metadata to a dictionary with validation.""" try: # Create initial dictionary with validation raw_data = {} # Required fields required_fields = ['AccountName', 'Entry_ID', 'Folder', 'Subject', 'ReceivedTime'] for field in required_fields: value = getattr(self, field, None) if value is None: raise ValueError(f"Missing required field: {field}") if field in ['ReceivedTime', 'SentOn']: raw_data[field] = value.isoformat() if value else None else: raw_data[field] = sanitize_text(str(value)) # Optional fields raw_data.update({ "SenderName": sanitize_text(getattr(self, 'SenderName', '')), "SenderEmailAddress": sanitize_text(getattr(self, 'SenderEmailAddress', '')), "SentOn": self.SentOn.isoformat() if getattr(self, 'SentOn', None) else None, "To": sanitize_text(getattr(self, 'To', '')), "Body": sanitize_text(getattr(self, 'Body', '')), "Attachments": ', '.join(sanitize_text(att) for att in getattr(self, 'Attachments', [])), "IsMarkedAsTask": bool(getattr(self, 'IsMarkedAsTask', False)), "UnRead": bool(getattr(self, 'UnRead', False)), "Categories": sanitize_text(getattr(self, 'Categories', '')), "GeneratedCategories": ', '.join(sanitize_text(cat) for cat in (getattr(self, 'GeneratedCategories', []) or [])), "embedding": self.embedding if isinstance(getattr(self, 'embedding', None), list) else [] }) # Sanitize the data data = { "AccountName": sanitize_text(raw_data["AccountName"]), "Entry_ID": sanitize_text(raw_data["Entry_ID"]), "Folder": sanitize_text(raw_data["Folder"]), "Subject": sanitize_text(raw_data["Subject"]), "SenderName": sanitize_text(raw_data["SenderName"]), "SenderEmailAddress": sanitize_text(raw_data["SenderEmailAddress"]), "ReceivedTime": raw_data["ReceivedTime"], "SentOn": raw_data["SentOn"], "To": sanitize_text(raw_data["To"]), "Body": sanitize_text(raw_data["Body"]), "Attachments": raw_data["Attachments"], "IsMarkedAsTask": bool(raw_data["IsMarkedAsTask"]), "UnRead": bool(raw_data["UnRead"]), "Categories": sanitize_text(raw_data["Categories"]), "GeneratedCategories": raw_data["GeneratedCategories"], "embedding": raw_data["embedding"] if isinstance(raw_data["embedding"], list) else [] } # Validate each field can be properly encoded as JSON for key, value in data.items(): if not validate_json(value, key): raise ValueError(f"Field {key} contains invalid JSON data") # Validate the entire object can be encoded as JSON try: json.dumps(data) return data except (TypeError, ValueError) as e: raise ValueError(f"Email metadata cannot be encoded as JSON: {str(e)}") except Exception as e: raise