get_kvkk_document_markdown
Retrieve full text of KVKK data protection decisions in paginated Markdown format with metadata for legal research and analysis.
Instructions
Use this when retrieving full text of a KVKK data protection decision. Returns paginated Markdown with metadata.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| decision_url | Yes | KVKK decision URL from search results | |
| page_number | No | Page number for paginated Markdown content (1-indexed, accepts int). Default is 1 (first 5,000 characters). |
Implementation Reference
- kvkk_mcp_module/client.py:279-367 (handler)The get_decision_document method is the core handler for the get_kvkk_document_markdown tool. It asynchronously fetches the KVKK decision webpage, extracts structured metadata and HTML content using BeautifulSoup, converts the HTML to paginated Markdown chunks (5000 chars each) using MarkItDown, and returns a typed KvkkDocumentMarkdown response handling errors gracefully.async def get_decision_document(self, decision_url: str, page_number: int = 1) -> KvkkDocumentMarkdown: """Retrieve and convert a KVKK decision document to paginated Markdown.""" logger.info(f"KvkkApiClient: Getting decision document from: {decision_url}, page: {page_number}") try: # Fetch the decision page response = await self.http_client.get(decision_url) response.raise_for_status() # Extract content from HTML extracted_data = self._extract_decision_content_from_html(response.text, decision_url) # Convert HTML content to Markdown full_markdown_content = None if extracted_data["html_content"]: full_markdown_content = self._convert_html_to_markdown(extracted_data["html_content"]) if not full_markdown_content: return KvkkDocumentMarkdown( source_url=HttpUrl(decision_url), title=extracted_data["title"], decision_date=extracted_data["decision_date"], decision_number=extracted_data["decision_number"], subject_summary=extracted_data["subject_summary"], markdown_chunk=None, current_page=page_number, total_pages=0, is_paginated=False, error_message="Could not convert document content to Markdown" ) # Calculate pagination content_length = len(full_markdown_content) total_pages = math.ceil(content_length / self.DOCUMENT_MARKDOWN_CHUNK_SIZE) if total_pages == 0: total_pages = 1 # Clamp page number to valid range current_page_clamped = max(1, min(page_number, total_pages)) # Extract the requested chunk start_index = (current_page_clamped - 1) * self.DOCUMENT_MARKDOWN_CHUNK_SIZE end_index = start_index + self.DOCUMENT_MARKDOWN_CHUNK_SIZE markdown_chunk = full_markdown_content[start_index:end_index] return KvkkDocumentMarkdown( source_url=HttpUrl(decision_url), title=extracted_data["title"], decision_date=extracted_data["decision_date"], decision_number=extracted_data["decision_number"], subject_summary=extracted_data["subject_summary"], markdown_chunk=markdown_chunk, current_page=current_page_clamped, total_pages=total_pages, is_paginated=(total_pages > 1), error_message=None ) except httpx.HTTPStatusError as e: error_msg = f"HTTP error {e.response.status_code} when fetching decision document" logger.error(f"KvkkApiClient: {error_msg}") return KvkkDocumentMarkdown( source_url=HttpUrl(decision_url), title=None, decision_date=None, decision_number=None, subject_summary=None, markdown_chunk=None, current_page=page_number, total_pages=0, is_paginated=False, error_message=error_msg ) except Exception as e: error_msg = f"Unexpected error when fetching decision document: {str(e)}" logger.error(f"KvkkApiClient: {error_msg}") return KvkkDocumentMarkdown( source_url=HttpUrl(decision_url), title=None, decision_date=None, decision_number=None, subject_summary=None, markdown_chunk=None, current_page=page_number, total_pages=0, is_paginated=False, error_message=error_msg )
- kvkk_mcp_module/models.py:33-49 (schema)Pydantic BaseModel defining the input/output schema for the tool response, including pagination fields, metadata, markdown content chunk, and error handling.class KvkkDocumentMarkdown(BaseModel): """Model for KVKK decision document content converted to paginated Markdown.""" source_url: HttpUrl = Field(description="URL of the original KVKK decision page.") title: Optional[str] = Field(None, description="Title of the KVKK decision.") decision_date: Optional[str] = Field(None, description="Decision date (Karar Tarihi).") decision_number: Optional[str] = Field(None, description="Decision number (Karar No).") subject_summary: Optional[str] = Field(None, description="Subject summary (Konu Özeti).") markdown_chunk: Optional[str] = Field(None, description="A 5,000 character chunk of the Markdown content.") current_page: int = Field(description="The current page number of the markdown chunk (1-indexed).") total_pages: int = Field(description="Total number of pages for the full markdown content.") is_paginated: bool = Field(description="True if the full markdown content is split into multiple pages.") error_message: Optional[str] = Field(None, description="Value") class Config: json_encoders = { HttpUrl: str }
- kvkk_mcp_module/client.py:261-278 (helper)Supporting utility that converts extracted HTML content to Markdown format using the MarkItDown library, handling UTF-8 encoding via BytesIO to prevent file path issues.def _convert_html_to_markdown(self, html_content: str) -> Optional[str]: """Convert HTML content to Markdown using MarkItDown with BytesIO to avoid filename length issues.""" if not html_content: return None try: # Convert HTML string to bytes and create BytesIO stream html_bytes = html_content.encode('utf-8') html_stream = io.BytesIO(html_bytes) # Pass BytesIO stream to MarkItDown to avoid temp file creation md_converter = MarkItDown(enable_plugins=False) result = md_converter.convert(html_stream) return result.text_content except Exception as e: logger.error(f"Error converting HTML to Markdown: {e}") return None
- kvkk_mcp_module/client.py:194-260 (helper)Key helper function that parses the KVKK decision HTML using BeautifulSoup to extract title, metadata (date, number, summary) from structured table, and full content div for markdown conversion.def _extract_decision_content_from_html(self, html: str, url: str) -> Dict[str, Any]: """Extract decision content from KVKK decision page HTML.""" try: soup = BeautifulSoup(html, 'html.parser') # Extract title title = None title_element = soup.find('h3', class_='blog-post-title') if title_element: title = title_element.get_text(strip=True) elif soup.title: title = soup.title.get_text(strip=True) # Extract decision content from the main content div content_div = soup.find('div', class_='blog-post-inner') if not content_div: # Fallback to other possible content containers content_div = soup.find('div', style='text-align:justify;') if not content_div: logger.warning(f"Could not find decision content div in {url}") return { "title": title, "decision_date": None, "decision_number": None, "subject_summary": None, "html_content": None } # Extract decision metadata from table decision_date = None decision_number = None subject_summary = None table = content_div.find('table') if table: rows = table.find_all('tr') for row in rows: cells = row.find_all('td') if len(cells) >= 3: field_name = cells[0].get_text(strip=True) field_value = cells[2].get_text(strip=True) if 'Karar Tarihi' in field_name: decision_date = field_value elif 'Karar No' in field_name: decision_number = field_value elif 'Konu Özeti' in field_name: subject_summary = field_value return { "title": title, "decision_date": decision_date, "decision_number": decision_number, "subject_summary": subject_summary, "html_content": str(content_div) } except Exception as e: logger.error(f"Error extracting content from HTML for {url}: {e}") return { "title": None, "decision_date": None, "decision_number": None, "subject_summary": None, "html_content": None }