get_bedesten_document_markdown
Retrieve full text of Turkish court decisions in clean Markdown format using document IDs from Bedesten search results.
Instructions
Use this when retrieving full text of any Bedesten-supported court decision. Returns clean Markdown format.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| documentId | Yes | Document ID from Bedesten search results |
Implementation Reference
- bedesten_mcp_module/client.py:83-147 (handler)Core handler function that implements the get_bedesten_document_markdown tool logic: fetches document by ID from Bedesten API, handles base64 decoding, supports HTML and PDF mime types, converts to markdown using MarkItDown, and structures the response.async def get_document_as_markdown(self, document_id: str) -> BedestenDocumentMarkdown: """ Get document content and convert to markdown. Handles both HTML (text/html) and PDF (application/pdf) content types. """ logger.info(f"BedestenApiClient: Fetching document for markdown conversion (ID: {document_id})") try: # Prepare request doc_request = BedestenDocumentRequest( data=BedestenDocumentRequestData(documentId=document_id) ) # Get document response = await self.http_client.post( self.DOCUMENT_ENDPOINT, json=doc_request.model_dump() ) response.raise_for_status() response_json = response.json() doc_response = BedestenDocumentResponse(**response_json) # Add null safety checks for document data if not hasattr(doc_response, 'data') or doc_response.data is None: raise ValueError("Document response does not contain data") if not hasattr(doc_response.data, 'content') or doc_response.data.content is None: raise ValueError("Document data does not contain content") if not hasattr(doc_response.data, 'mimeType') or doc_response.data.mimeType is None: raise ValueError("Document data does not contain mimeType") # Decode base64 content with error handling try: content_bytes = base64.b64decode(doc_response.data.content) except Exception as e: raise ValueError(f"Failed to decode base64 content: {str(e)}") mime_type = doc_response.data.mimeType logger.info(f"BedestenApiClient: Document mime type: {mime_type}") # Convert to markdown based on mime type if mime_type == "text/html": html_content = content_bytes.decode('utf-8') markdown_content = self._convert_html_to_markdown(html_content) elif mime_type == "application/pdf": markdown_content = self._convert_pdf_to_markdown(content_bytes) else: logger.warning(f"Unsupported mime type: {mime_type}") markdown_content = f"Unsupported content type: {mime_type}. Unable to convert to markdown." return BedestenDocumentMarkdown( documentId=document_id, markdown_content=markdown_content, source_url=f"{self.BASE_URL}/document/{document_id}", mime_type=mime_type ) except httpx.RequestError as e: logger.error(f"BedestenApiClient: HTTP error fetching document {document_id}: {e}") raise except Exception as e: logger.error(f"BedestenApiClient: Error processing document {document_id}: {e}") raise
- bedesten_mcp_module/models.py:87-91 (schema)Pydantic schema defining the output structure of the tool response.class BedestenDocumentMarkdown(BaseModel): documentId: str = Field(..., description="The document ID (Belge Kimliği) from Bedesten") markdown_content: Optional[str] = Field(None, description="The decision content (Karar İçeriği) converted to Markdown") source_url: str = Field(..., description="The source URL (Kaynak URL) of the document") mime_type: Optional[str] = Field(None, description="Original content type (İçerik Türü) (text/html or application/pdf)")
- Helper function for converting HTML content to markdown.def _convert_html_to_markdown(self, html_content: str) -> Optional[str]: """Convert HTML to Markdown using MarkItDown""" if not html_content: return None try: # Convert HTML string to bytes and create BytesIO stream html_bytes = html_content.encode('utf-8') html_stream = io.BytesIO(html_bytes) # Pass BytesIO stream to MarkItDown to avoid temp file creation md_converter = MarkItDown() result = md_converter.convert(html_stream) markdown_content = result.text_content logger.info("Successfully converted HTML to Markdown") return markdown_content except Exception as e: logger.error(f"Error converting HTML to Markdown: {e}") return f"Error converting HTML content: {str(e)}"
- Helper function for converting PDF content to markdown.def _convert_pdf_to_markdown(self, pdf_bytes: bytes) -> Optional[str]: """Convert PDF to Markdown using MarkItDown""" if not pdf_bytes: return None try: # Create BytesIO stream from PDF bytes pdf_stream = io.BytesIO(pdf_bytes) # Pass BytesIO stream to MarkItDown to avoid temp file creation md_converter = MarkItDown() result = md_converter.convert(pdf_stream) markdown_content = result.text_content logger.info("Successfully converted PDF to Markdown") return markdown_content except Exception as e: logger.error(f"Error converting PDF to Markdown: {e}") return f"Error converting PDF content: {str(e)}. The document may be corrupted or in an unsupported format."
- bedesten_mcp_module/models.py:71-77 (schema)Input schema models used by the handler for API requests.class BedestenDocumentRequestData(BaseModel): documentId: str class BedestenDocumentRequest(BaseModel): data: BedestenDocumentRequestData applicationName: str = "UyapMevzuat"