get_rekabet_kurumu_document
Retrieve full text of Turkish Competition Authority decisions in paginated Markdown format using the decision's unique identifier for legal research and analysis.
Instructions
Use this when retrieving full text of a Competition Authority decision. Returns paginated Markdown format.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| karar_id | Yes | GUID (kararId) of the Rekabet Kurumu decision. This ID is obtained from search results. | |
| page_number | No | Requested page number for the Markdown content converted from PDF (1-indexed, accepts int). Default is 1. |
Implementation Reference
- rekabet_mcp_module/client.py:307-403 (handler)The main handler logic for retrieving a specific Rekabet Kurumu decision document by karar_id. Downloads the PDF from the landing page, extracts the requested page, converts to Markdown, and returns structured data including error handling and pagination support.async def get_decision_document(self, karar_id: str, page_number: int = 1) -> RekabetDocument: if not karar_id: return RekabetDocument( source_landing_page_url=HttpUrl(f"{self.BASE_URL}"), karar_id=karar_id or "UNKNOWN_KARAR_ID", error_message="karar_id is required.", current_page=1, total_pages=0, is_paginated=False ) decision_url_path = f"{self.DECISION_LANDING_PATH_TEMPLATE}?kararId={karar_id}" full_landing_page_url = urljoin(self.BASE_URL, decision_url_path) logger.info(f"RekabetKurumuApiClient: Getting decision document: {full_landing_page_url}, Requested PDF Page: {page_number}") pdf_url_to_report: Optional[HttpUrl] = None title_to_report: Optional[str] = f"Rekabet Kurumu Kararı {karar_id}" # Default error_message: Optional[str] = None markdown_for_requested_page: Optional[str] = None total_pdf_pages: int = 0 try: async with self.http_client.stream("GET", full_landing_page_url) as response: response.raise_for_status() content_type = response.headers.get("content-type", "").lower() final_url_of_response = HttpUrl(str(response.url)) original_pdf_bytes: Optional[bytes] = None if "application/pdf" in content_type: logger.info(f"URL {final_url_of_response} is a direct PDF. Processing content.") pdf_url_to_report = final_url_of_response original_pdf_bytes = await response.aread() elif "text/html" in content_type: logger.info(f"URL {final_url_of_response} is an HTML landing page. Looking for PDF link.") landing_page_html_bytes = await response.aread() detected_charset = response.charset_encoding or 'utf-8' try: landing_page_html = landing_page_html_bytes.decode(detected_charset) except UnicodeDecodeError: landing_page_html = landing_page_html_bytes.decode('utf-8', errors='replace') if landing_page_html.strip(): landing_page_data = self._extract_pdf_url_and_landing_page_metadata(karar_id, landing_page_html, str(final_url_of_response)) pdf_url_str_from_html = landing_page_data.get("pdf_url") if landing_page_data.get("title_on_landing_page"): title_to_report = landing_page_data.get("title_on_landing_page") if pdf_url_str_from_html: pdf_url_to_report = HttpUrl(pdf_url_str_from_html) original_pdf_bytes = await self._download_pdf_bytes(str(pdf_url_to_report)) else: error_message = (error_message or "") + " PDF URL not found on HTML landing page." else: error_message = "Decision landing page content is empty." else: error_message = f"Unexpected content type ({content_type}) for URL: {final_url_of_response}" if original_pdf_bytes: single_page_pdf_bytes, total_pdf_pages_from_extraction = self._extract_single_pdf_page_as_pdf_bytes(original_pdf_bytes, page_number) total_pdf_pages = total_pdf_pages_from_extraction if single_page_pdf_bytes: markdown_for_requested_page = self._convert_pdf_bytes_to_markdown(single_page_pdf_bytes, str(pdf_url_to_report or full_landing_page_url)) if not markdown_for_requested_page: error_message = (error_message or "") + f"; Could not convert page {page_number} of PDF to Markdown." elif total_pdf_pages > 0 : error_message = (error_message or "") + f"; Could not extract page {page_number} from PDF (page may be out of range or extraction failed)." else: error_message = (error_message or "") + "; PDF could not be processed or page count was zero (original PDF might be invalid)." elif not error_message: error_message = "PDF content could not be downloaded or identified." is_paginated = total_pdf_pages > 1 current_page_final = page_number if total_pdf_pages > 0: current_page_final = max(1, min(page_number, total_pdf_pages)) elif markdown_for_requested_page is None: current_page_final = 1 # If markdown is None but there was no specific error for markdown conversion (e.g. PDF not found first) # make sure error_message reflects that. if markdown_for_requested_page is None and pdf_url_to_report and not error_message: error_message = (error_message or "") + "; Failed to produce Markdown from PDF page." return RekabetDocument( source_landing_page_url=full_landing_page_url, karar_id=karar_id, title_on_landing_page=title_to_report, pdf_url=pdf_url_to_report, markdown_chunk=markdown_for_requested_page, current_page=current_page_final, total_pages=total_pdf_pages, is_paginated=is_paginated, error_message=error_message.strip("; ") if error_message else None ) except httpx.HTTPStatusError as e: error_msg_detail = f"HTTP Status error {e.response.status_code} while processing decision page." except httpx.RequestError as e: error_msg_detail = f"HTTP Request error while processing decision page: {str(e)}" except Exception as e: error_msg_detail = f"General error while processing decision: {str(e)}" exc_info_flag = not isinstance(e, (httpx.HTTPStatusError, httpx.RequestError)) if 'e' in locals() else True logger.error(f"RekabetKurumuApiClient: Error processing decision {karar_id} from {full_landing_page_url}: {error_msg_detail}", exc_info=exc_info_flag) error_message = (error_message + "; " if error_message else "") + error_msg_detail return RekabetDocument( source_landing_page_url=full_landing_page_url, karar_id=karar_id, title_on_landing_page=title_to_report, pdf_url=pdf_url_to_report, markdown_chunk=None, current_page=page_number, total_pages=0, is_paginated=False, error_message=error_message.strip("; ") if error_message else "An unexpected error occurred." )
- rekabet_mcp_module/models.py:54-71 (schema)Pydantic schema/model for the output of the document retrieval, including metadata (title, URLs, karar_id), paginated Markdown content, and error information.class RekabetDocument(BaseModel): """ Model for a Rekabet Kurumu decision document. Contains metadata from the landing page, a link to the PDF, and the PDF's content converted to paginated Markdown. """ source_landing_page_url: HttpUrl = Field(description="Source URL") karar_id: str = Field(description="ID") title_on_landing_page: Optional[str] = Field(None, description="Title") pdf_url: Optional[HttpUrl] = Field(None, description="PDF URL") markdown_chunk: Optional[str] = Field(None, description="Content") current_page: int = Field(1, description="Page") total_pages: int = Field(1, description="Total pages") is_paginated: bool = Field(False, description="Paginated") error_message: Optional[str] = Field(None, description="Error")
- rekabet_mcp_module/client.py:34-53 (helper)The API client class containing the get_decision_document handler and supporting methods for PDF extraction, Markdown conversion, and HTTP handling.class RekabetKurumuApiClient: BASE_URL = "https://www.rekabet.gov.tr" SEARCH_PATH = "/tr/Kararlar" DECISION_LANDING_PATH_TEMPLATE = "/Karar" # PDF sayfa bazlı Markdown döndürüldüğü için bu sabit artık doğrudan kullanılmıyor. # DOCUMENT_MARKDOWN_CHUNK_SIZE = 5000 def __init__(self, request_timeout: float = 60.0): self.http_client = httpx.AsyncClient( base_url=self.BASE_URL, headers={ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" }, timeout=request_timeout, verify=True, follow_redirects=True )