Skip to main content
Glama
saidsurucu

Yargı MCP

by saidsurucu

get_kvkk_document_markdown

Retrieve full text of KVKK data protection decisions in paginated Markdown format with metadata for legal research and analysis.

Instructions

Use this when retrieving full text of a KVKK data protection decision. Returns paginated Markdown with metadata.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
decision_urlYesKVKK decision URL from search results
page_numberNoPage number for paginated Markdown content (1-indexed, accepts int). Default is 1 (first 5,000 characters).

Implementation Reference

  • The get_decision_document method is the core handler for the get_kvkk_document_markdown tool. It asynchronously fetches the KVKK decision webpage, extracts structured metadata and HTML content using BeautifulSoup, converts the HTML to paginated Markdown chunks (5000 chars each) using MarkItDown, and returns a typed KvkkDocumentMarkdown response handling errors gracefully.
    async def get_decision_document(self, decision_url: str, page_number: int = 1) -> KvkkDocumentMarkdown:
        """Retrieve and convert a KVKK decision document to paginated Markdown."""
        logger.info(f"KvkkApiClient: Getting decision document from: {decision_url}, page: {page_number}")
        
        try:
            # Fetch the decision page
            response = await self.http_client.get(decision_url)
            response.raise_for_status()
            
            # Extract content from HTML
            extracted_data = self._extract_decision_content_from_html(response.text, decision_url)
            
            # Convert HTML content to Markdown
            full_markdown_content = None
            if extracted_data["html_content"]:
                full_markdown_content = self._convert_html_to_markdown(extracted_data["html_content"])
            
            if not full_markdown_content:
                return KvkkDocumentMarkdown(
                    source_url=HttpUrl(decision_url),
                    title=extracted_data["title"],
                    decision_date=extracted_data["decision_date"],
                    decision_number=extracted_data["decision_number"],
                    subject_summary=extracted_data["subject_summary"],
                    markdown_chunk=None,
                    current_page=page_number,
                    total_pages=0,
                    is_paginated=False,
                    error_message="Could not convert document content to Markdown"
                )
            
            # Calculate pagination
            content_length = len(full_markdown_content)
            total_pages = math.ceil(content_length / self.DOCUMENT_MARKDOWN_CHUNK_SIZE)
            if total_pages == 0:
                total_pages = 1
            
            # Clamp page number to valid range
            current_page_clamped = max(1, min(page_number, total_pages))
            
            # Extract the requested chunk
            start_index = (current_page_clamped - 1) * self.DOCUMENT_MARKDOWN_CHUNK_SIZE
            end_index = start_index + self.DOCUMENT_MARKDOWN_CHUNK_SIZE
            markdown_chunk = full_markdown_content[start_index:end_index]
            
            return KvkkDocumentMarkdown(
                source_url=HttpUrl(decision_url),
                title=extracted_data["title"],
                decision_date=extracted_data["decision_date"],
                decision_number=extracted_data["decision_number"],
                subject_summary=extracted_data["subject_summary"],
                markdown_chunk=markdown_chunk,
                current_page=current_page_clamped,
                total_pages=total_pages,
                is_paginated=(total_pages > 1),
                error_message=None
            )
            
        except httpx.HTTPStatusError as e:
            error_msg = f"HTTP error {e.response.status_code} when fetching decision document"
            logger.error(f"KvkkApiClient: {error_msg}")
            return KvkkDocumentMarkdown(
                source_url=HttpUrl(decision_url),
                title=None,
                decision_date=None,
                decision_number=None,
                subject_summary=None,
                markdown_chunk=None,
                current_page=page_number,
                total_pages=0,
                is_paginated=False,
                error_message=error_msg
            )
        except Exception as e:
            error_msg = f"Unexpected error when fetching decision document: {str(e)}"
            logger.error(f"KvkkApiClient: {error_msg}")
            return KvkkDocumentMarkdown(
                source_url=HttpUrl(decision_url),
                title=None,
                decision_date=None,
                decision_number=None,
                subject_summary=None,
                markdown_chunk=None,
                current_page=page_number,
                total_pages=0,
                is_paginated=False,
                error_message=error_msg
            )
  • Pydantic BaseModel defining the input/output schema for the tool response, including pagination fields, metadata, markdown content chunk, and error handling.
    class KvkkDocumentMarkdown(BaseModel):
        """Model for KVKK decision document content converted to paginated Markdown."""
        source_url: HttpUrl = Field(description="URL of the original KVKK decision page.")
        title: Optional[str] = Field(None, description="Title of the KVKK decision.")
        decision_date: Optional[str] = Field(None, description="Decision date (Karar Tarihi).")
        decision_number: Optional[str] = Field(None, description="Decision number (Karar No).")
        subject_summary: Optional[str] = Field(None, description="Subject summary (Konu Özeti).")
        markdown_chunk: Optional[str] = Field(None, description="A 5,000 character chunk of the Markdown content.")
        current_page: int = Field(description="The current page number of the markdown chunk (1-indexed).")
        total_pages: int = Field(description="Total number of pages for the full markdown content.")
        is_paginated: bool = Field(description="True if the full markdown content is split into multiple pages.")
        error_message: Optional[str] = Field(None, description="Value")
        
        class Config:
            json_encoders = {
                HttpUrl: str
            }
  • Supporting utility that converts extracted HTML content to Markdown format using the MarkItDown library, handling UTF-8 encoding via BytesIO to prevent file path issues.
    def _convert_html_to_markdown(self, html_content: str) -> Optional[str]:
        """Convert HTML content to Markdown using MarkItDown with BytesIO to avoid filename length issues."""
        if not html_content:
            return None
        
        try:
            # Convert HTML string to bytes and create BytesIO stream
            html_bytes = html_content.encode('utf-8')
            html_stream = io.BytesIO(html_bytes)
            
            # Pass BytesIO stream to MarkItDown to avoid temp file creation
            md_converter = MarkItDown(enable_plugins=False)
            result = md_converter.convert(html_stream)
            return result.text_content
        except Exception as e:
            logger.error(f"Error converting HTML to Markdown: {e}")
            return None
  • Key helper function that parses the KVKK decision HTML using BeautifulSoup to extract title, metadata (date, number, summary) from structured table, and full content div for markdown conversion.
    def _extract_decision_content_from_html(self, html: str, url: str) -> Dict[str, Any]:
        """Extract decision content from KVKK decision page HTML."""
        try:
            soup = BeautifulSoup(html, 'html.parser')
            
            # Extract title
            title = None
            title_element = soup.find('h3', class_='blog-post-title')
            if title_element:
                title = title_element.get_text(strip=True)
            elif soup.title:
                title = soup.title.get_text(strip=True)
            
            # Extract decision content from the main content div
            content_div = soup.find('div', class_='blog-post-inner')
            if not content_div:
                # Fallback to other possible content containers
                content_div = soup.find('div', style='text-align:justify;')
                if not content_div:
                    logger.warning(f"Could not find decision content div in {url}")
                    return {
                        "title": title,
                        "decision_date": None,
                        "decision_number": None,
                        "subject_summary": None,
                        "html_content": None
                    }
            
            # Extract decision metadata from table
            decision_date = None
            decision_number = None
            subject_summary = None
            
            table = content_div.find('table')
            if table:
                rows = table.find_all('tr')
                for row in rows:
                    cells = row.find_all('td')
                    if len(cells) >= 3:
                        field_name = cells[0].get_text(strip=True)
                        field_value = cells[2].get_text(strip=True)
                        
                        if 'Karar Tarihi' in field_name:
                            decision_date = field_value
                        elif 'Karar No' in field_name:
                            decision_number = field_value
                        elif 'Konu Özeti' in field_name:
                            subject_summary = field_value
            
            return {
                "title": title,
                "decision_date": decision_date,
                "decision_number": decision_number,
                "subject_summary": subject_summary,
                "html_content": str(content_div)
            }
            
        except Exception as e:
            logger.error(f"Error extracting content from HTML for {url}: {e}")
            return {
                "title": None,
                "decision_date": None,
                "decision_number": None,
                "subject_summary": None,
                "html_content": None
            }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/saidsurucu/yargi-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server