Skip to main content
Glama
export_service.py7.9 kB
"""Export service for document export operations. This module provides the ExportService class for exporting documents to various formats. """ import os from typing import Any from sqlalchemy.ext.asyncio import AsyncSession from src.core.config import get_settings from src.core.exceptions import DocumentProcessingError from src.handlers.document_handler import DocumentHandler class ExportService: """Service class for export operations. Attributes: db: Database session. document_handler: Handler for document operations. settings: Application settings. """ def __init__(self, db: AsyncSession) -> None: """Initialize the export service. Args: db: Async database session. """ self.db = db self.document_handler = DocumentHandler() self.settings = get_settings() async def export_to_pdf( self, document_path: str, output_path: str | None = None, ) -> dict[str, Any]: """Export document to PDF. Args: document_path: Path to the document. output_path: Optional output path. Returns: Result with output path. """ try: if not output_path: base = os.path.splitext(document_path)[0] output_path = f"{base}.pdf" # Note: PDF export requires additional libraries # This is a placeholder implementation return { "success": True, "format": "pdf", "output_path": output_path, "message": "PDF export requires additional configuration", } except Exception as e: raise DocumentProcessingError(f"Failed to export to PDF: {str(e)}") async def export_to_html( self, document_path: str, output_path: str | None = None, include_styles: bool = True, ) -> dict[str, Any]: """Export document to HTML. Args: document_path: Path to the document. output_path: Optional output path. include_styles: Include CSS styles. Returns: Result with output path. """ try: doc = self.document_handler.load_document(document_path) if not output_path: base = os.path.splitext(document_path)[0] output_path = f"{base}.html" html_content = self._convert_to_html(doc, include_styles) with open(output_path, "w", encoding="utf-8") as f: f.write(html_content) return { "success": True, "format": "html", "output_path": output_path, } except Exception as e: raise DocumentProcessingError(f"Failed to export to HTML: {str(e)}") async def export_to_markdown( self, document_path: str, output_path: str | None = None, ) -> dict[str, Any]: """Export document to Markdown. Args: document_path: Path to the document. output_path: Optional output path. Returns: Result with output path. """ try: doc = self.document_handler.load_document(document_path) if not output_path: base = os.path.splitext(document_path)[0] output_path = f"{base}.md" md_content = self._convert_to_markdown(doc) with open(output_path, "w", encoding="utf-8") as f: f.write(md_content) return { "success": True, "format": "markdown", "output_path": output_path, } except Exception as e: raise DocumentProcessingError(f"Failed to export to Markdown: {str(e)}") async def export_to_text( self, document_path: str, output_path: str | None = None, ) -> dict[str, Any]: """Export document to plain text. Args: document_path: Path to the document. output_path: Optional output path. Returns: Result with output path. """ try: doc = self.document_handler.load_document(document_path) if not output_path: base = os.path.splitext(document_path)[0] output_path = f"{base}.txt" text_content = self._extract_text(doc) with open(output_path, "w", encoding="utf-8") as f: f.write(text_content) return { "success": True, "format": "text", "output_path": output_path, } except Exception as e: raise DocumentProcessingError(f"Failed to export to text: {str(e)}") async def get_export_formats(self) -> list[dict[str, Any]]: """Get available export formats. Returns: List of supported formats. """ return [ {"format": "pdf", "extension": ".pdf", "description": "PDF Document"}, {"format": "html", "extension": ".html", "description": "HTML Document"}, {"format": "markdown", "extension": ".md", "description": "Markdown"}, {"format": "text", "extension": ".txt", "description": "Plain Text"}, ] def _convert_to_html(self, doc: Any, include_styles: bool) -> str: """Convert document to HTML. Args: doc: Document object. include_styles: Include CSS styles. Returns: HTML string. """ html_parts = ["<!DOCTYPE html>", "<html>", "<head>"] if include_styles: html_parts.append("<style>") html_parts.append("body { font-family: Arial, sans-serif; }") html_parts.append("h1 { font-size: 24px; }") html_parts.append("h2 { font-size: 20px; }") html_parts.append("p { margin: 10px 0; }") html_parts.append("table { border-collapse: collapse; }") html_parts.append("td, th { border: 1px solid #ddd; padding: 8px; }") html_parts.append("</style>") html_parts.extend(["</head>", "<body>"]) for para in doc.paragraphs: style_name = para.style.name if para.style else "" if "Heading 1" in style_name: html_parts.append(f"<h1>{para.text}</h1>") elif "Heading 2" in style_name: html_parts.append(f"<h2>{para.text}</h2>") elif "Heading 3" in style_name: html_parts.append(f"<h3>{para.text}</h3>") else: html_parts.append(f"<p>{para.text}</p>") html_parts.extend(["</body>", "</html>"]) return "\n".join(html_parts) def _convert_to_markdown(self, doc: Any) -> str: """Convert document to Markdown. Args: doc: Document object. Returns: Markdown string. """ md_parts = [] for para in doc.paragraphs: style_name = para.style.name if para.style else "" if "Heading 1" in style_name: md_parts.append(f"# {para.text}") elif "Heading 2" in style_name: md_parts.append(f"## {para.text}") elif "Heading 3" in style_name: md_parts.append(f"### {para.text}") elif para.text.strip(): md_parts.append(para.text) md_parts.append("") return "\n".join(md_parts) def _extract_text(self, doc: Any) -> str: """Extract plain text from document. Args: doc: Document object. Returns: Plain text string. """ return "\n".join(para.text for para in doc.paragraphs)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Fu-Jie/MCP-OPENAPI-DOCX'

If you have feedback or need assistance with the MCP directory API, please join our Discord server