"""
Attachment Helper - Shared utilities for attachment processing
Provides:
- File storage with unique tokens
- Markdown conversion via markitdown
- Public URL generation
"""
import os
import base64
import hashlib
import logging
from datetime import datetime
from typing import Dict, Optional
from pathlib import Path
logger = logging.getLogger(__name__)
# Storage directory for attachments
ATTACHMENTS_DIR = "/app/attachments"
# Base URL for public download (configured via env or default)
# Uses /iris/ prefix which nginx proxies to iris-app:8001/mcp/
# Load from environment variable (configured in .env)
BASE_DOWNLOAD_URL = os.getenv("ATTACHMENTS_BASE_URL", "https://trustypa.brainaihub.tech/iris/attachments")
def generate_token(content: bytes) -> str:
"""Generate unique token from content hash + timestamp"""
content_hash = hashlib.sha256(content).hexdigest()[:16]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"{timestamp}_{content_hash}"
def save_attachment(
content: bytes,
filename: str,
token: Optional[str] = None
) -> Dict[str, str]:
"""
Save attachment to filesystem with unique token.
Args:
content: Binary file content
filename: Original filename
token: Optional custom token (auto-generated if None)
Returns:
{
"file_path": "/app/attachments/token_filename.pdf",
"token": "20231124_abc123",
"download_url": "https://.../download/token_filename.pdf",
"download_link_html": "<a href='...' target='_blank'>filename.pdf</a>"
}
"""
# Create attachments directory
os.makedirs(ATTACHMENTS_DIR, exist_ok=True)
# Generate token if not provided
if not token:
token = generate_token(content)
# Build safe filename
safe_filename = f"{token}_{filename}"
file_path = os.path.join(ATTACHMENTS_DIR, safe_filename)
# Save file
with open(file_path, 'wb') as f:
f.write(content)
logger.info(f"💾 Saved attachment: {file_path} ({len(content)} bytes)")
# Generate download URL
download_url = f"{BASE_DOWNLOAD_URL}/download/{safe_filename}"
# Generate HTML link that opens in new tab
download_link_html = f'<a href="{download_url}" target="_blank">{filename}</a>'
return {
"file_path": file_path,
"token": token,
"download_url": download_url,
"download_link_html": download_link_html
}
def convert_to_markdown(file_path: str) -> Optional[str]:
"""
Convert file to markdown using markitdown.
Supports: PDF, DOCX, PPTX, XLSX, images, audio, HTML, CSV, JSON, XML, ZIP
Args:
file_path: Path to file
Returns:
Markdown string or None if conversion fails
"""
try:
from markitdown import MarkItDown
md_converter = MarkItDown()
result = md_converter.convert(file_path)
markdown_content = result.text_content
logger.info(f"✅ Converted to markdown: {file_path} ({len(markdown_content)} chars)")
return markdown_content
except Exception as e:
logger.error(f"❌ Failed to convert {file_path} to markdown: {e}")
return None
def process_attachment_for_analysis(
content: bytes,
filename: str,
content_type: Optional[str] = None
) -> Dict:
"""
Complete attachment processing: save + convert to markdown.
This is the main function to use for both PEC and Microsoft attachments.
Args:
content: Binary file content
filename: Original filename
content_type: MIME type (optional)
Returns:
{
"filename": "document.pdf",
"content_type": "application/pdf",
"size": 123456,
"file_path": "/app/attachments/20231124_abc123_document.pdf",
"download_url": "https://.../download/20231124_abc123_document.pdf",
"download_link_html": "<a href='...' target='_blank'>document.pdf</a>",
"markdown_content": "# Document\n\nContent here...",
"markdown_available": true
}
"""
# Save file
storage_info = save_attachment(content, filename)
# Convert to markdown
markdown_content = convert_to_markdown(storage_info["file_path"])
return {
"filename": filename,
"content_type": content_type or "application/octet-stream",
"size": len(content),
"file_path": storage_info["file_path"],
"download_url": storage_info["download_url"],
"download_link_html": storage_info["download_link_html"],
"markdown_content": markdown_content,
"markdown_available": markdown_content is not None
}