Skip to main content
Glama

Yargı MCP

by saidsurucu
MIT License
533
  • Apple
  • Linux
bireysel_client.py18.3 kB
# anayasa_mcp_module/bireysel_client.py # This client is for Bireysel Başvuru: https://kararlarbilgibankasi.anayasa.gov.tr import httpx from bs4 import BeautifulSoup, Tag from typing import Dict, Any, List, Optional, Tuple import logging import html import re import io from urllib.parse import urlencode, urljoin, quote from markitdown import MarkItDown import math # For math.ceil for pagination from .models import ( AnayasaBireyselReportSearchRequest, AnayasaBireyselReportDecisionDetail, AnayasaBireyselReportDecisionSummary, AnayasaBireyselReportSearchResult, AnayasaBireyselBasvuruDocumentMarkdown, # Model for Bireysel Başvuru document ) logger = logging.getLogger(__name__) if not logger.hasHandlers(): logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') class AnayasaBireyselBasvuruApiClient: BASE_URL = "https://kararlarbilgibankasi.anayasa.gov.tr" SEARCH_PATH = "/Ara" DOCUMENT_MARKDOWN_CHUNK_SIZE = 5000 # Character limit per page def __init__(self, request_timeout: float = 60.0): self.http_client = httpx.AsyncClient( base_url=self.BASE_URL, headers={ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" }, timeout=request_timeout, verify=True, follow_redirects=True ) def _build_query_params_for_bireysel_report(self, params: AnayasaBireyselReportSearchRequest) -> List[Tuple[str, str]]: query_params: List[Tuple[str, str]] = [] query_params.append(("KararBulteni", "1")) # Specific to this report type if params.keywords: for kw in params.keywords: query_params.append(("KelimeAra[]", kw)) if params.page_to_fetch and params.page_to_fetch > 1: query_params.append(("page", str(params.page_to_fetch))) return query_params async def search_bireysel_basvuru_report( self, params: AnayasaBireyselReportSearchRequest ) -> AnayasaBireyselReportSearchResult: final_query_params = self._build_query_params_for_bireysel_report(params) request_url = self.SEARCH_PATH logger.info(f"AnayasaBireyselBasvuruApiClient: Performing Bireysel Başvuru Report search. Path: {request_url}, Params: {final_query_params}") try: response = await self.http_client.get(request_url, params=final_query_params) response.raise_for_status() html_content = response.text except httpx.RequestError as e: logger.error(f"AnayasaBireyselBasvuruApiClient: HTTP request error during Bireysel Başvuru Report search: {e}") raise except Exception as e: logger.error(f"AnayasaBireyselBasvuruApiClient: Error processing Bireysel Başvuru Report search request: {e}") raise soup = BeautifulSoup(html_content, 'html.parser') total_records = None bulunan_karar_div = soup.find("div", class_="bulunankararsayisi") if bulunan_karar_div: match_records = re.search(r'(\d+)\s*Karar Bulundu', bulunan_karar_div.get_text(strip=True)) if match_records: total_records = int(match_records.group(1)) processed_decisions: List[AnayasaBireyselReportDecisionSummary] = [] report_content_area = soup.find("div", class_="HaberBulteni") if not report_content_area: logger.warning("HaberBulteni div not found, attempting to parse decision divs from the whole page.") report_content_area = soup decision_divs = report_content_area.find_all("div", class_="KararBulteniBirKarar") if not decision_divs: logger.warning("No KararBulteniBirKarar divs found.") for decision_div in decision_divs: title_tag = decision_div.find("h4") title_text = title_tag.get_text(strip=True) if title_tag and title_tag.strong else (title_tag.get_text(strip=True) if title_tag else "") alti_cizili_div = decision_div.find("div", class_="AltiCizili") ref_no, dec_type, body, app_date, dec_date, url_path = "", "", "", "", "", "" if alti_cizili_div: link_tag = alti_cizili_div.find("a", href=True) if link_tag: ref_no = link_tag.get_text(strip=True) url_path = link_tag['href'] parts_text = alti_cizili_div.get_text(separator="|", strip=True) parts = [part.strip() for part in parts_text.split("|")] # Clean ref_no from the first part if it was extracted from link if ref_no and parts and parts[0].strip().startswith(ref_no): parts[0] = parts[0].replace(ref_no, "").strip() if not parts[0]: parts.pop(0) # Remove empty string if ref_no was the only content # Assign parts based on typical order, adjusting for missing ref_no at start current_idx = 0 if not ref_no and len(parts) > current_idx and re.match(r"\d+/\d+", parts[current_idx]): # Check if first part is ref_no ref_no = parts[current_idx] current_idx += 1 dec_type = parts[current_idx] if len(parts) > current_idx else "" current_idx += 1 body = parts[current_idx] if len(parts) > current_idx else "" current_idx += 1 app_date_raw = parts[current_idx] if len(parts) > current_idx else "" current_idx += 1 dec_date_raw = parts[current_idx] if len(parts) > current_idx else "" if app_date_raw and "Başvuru Tarihi :" in app_date_raw: app_date = app_date_raw.replace("Başvuru Tarihi :", "").strip() elif app_date_raw: # If label is missing but format matches app_date_match = re.search(r'(\d{1,2}/\d{1,2}/\d{4})', app_date_raw) if app_date_match: app_date = app_date_match.group(1) if dec_date_raw and "Karar Tarihi :" in dec_date_raw: dec_date = dec_date_raw.replace("Karar Tarihi :", "").strip() elif dec_date_raw: # If label is missing but format matches dec_date_match = re.search(r'(\d{1,2}/\d{1,2}/\d{4})', dec_date_raw) if dec_date_match: dec_date = dec_date_match.group(1) subject_div = decision_div.find(lambda tag: tag.name == 'div' and not tag.has_attr('class') and tag.get_text(strip=True).startswith("BAŞVURU KONUSU :")) subject_text = subject_div.get_text(strip=True).replace("BAŞVURU KONUSU :", "").strip() if subject_div else "" details_list: List[AnayasaBireyselReportDecisionDetail] = [] karar_detaylari_div = decision_div.find_next_sibling("div", id="KararDetaylari") # Corrected: was KararDetaylari if karar_detaylari_div: table = karar_detaylari_div.find("table", class_="table") if table and table.find("tbody"): for row in table.find("tbody").find_all("tr"): cells = row.find_all("td") if len(cells) == 4: # Hak, Müdahale İddiası, Sonuç, Giderim details_list.append(AnayasaBireyselReportDecisionDetail( hak=cells[0].get_text(strip=True) or "", mudahale_iddiasi=cells[1].get_text(strip=True) or "", sonuc=cells[2].get_text(strip=True) or "", giderim=cells[3].get_text(strip=True) or "", )) full_decision_page_url = urljoin(self.BASE_URL, url_path) if url_path else "" processed_decisions.append(AnayasaBireyselReportDecisionSummary( title=title_text, decision_reference_no=ref_no, decision_page_url=full_decision_page_url, decision_type_summary=dec_type, decision_making_body=body, application_date_summary=app_date, decision_date_summary=dec_date, application_subject_summary=subject_text, details=details_list )) return AnayasaBireyselReportSearchResult( decisions=processed_decisions, total_records_found=total_records, retrieved_page_number=params.page_to_fetch ) def _convert_html_to_markdown_bireysel(self, full_decision_html_content: str) -> Optional[str]: if not full_decision_html_content: return None processed_html = html.unescape(full_decision_html_content) soup = BeautifulSoup(processed_html, "html.parser") html_input_for_markdown = "" karar_tab_content = soup.find("div", id="Karar") if karar_tab_content: karar_html_span = karar_tab_content.find("span", class_="kararHtml") if karar_html_span: word_section = karar_html_span.find("div", class_="WordSection1") if word_section: for s in word_section.select('script, style, .item.col-xs-12.col-sm-12, center:has(b)'): s.decompose() html_input_for_markdown = str(word_section) else: logger.warning("AnayasaBireyselBasvuruApiClient: WordSection1 not found in span.kararHtml. Using span.kararHtml content.") for s in karar_html_span.select('script, style, .item.col-xs-12.col-sm-12, center:has(b)'): s.decompose() html_input_for_markdown = str(karar_html_span) else: logger.warning("AnayasaBireyselBasvuruApiClient: span.kararHtml not found in div#Karar. Using div#Karar content.") for s in karar_tab_content.select('script, style, .item.col-xs-12.col-sm-12, center:has(b)'): s.decompose() html_input_for_markdown = str(karar_tab_content) else: logger.warning("AnayasaBireyselBasvuruApiClient: div#Karar (KARAR tab) not found. Trying WordSection1 fallback.") word_section_fallback = soup.find("div", class_="WordSection1") if word_section_fallback: for s in word_section_fallback.select('script, style, .item.col-xs-12.col-sm-12, center:has(b)'): s.decompose() html_input_for_markdown = str(word_section_fallback) else: body_tag = soup.find("body") if body_tag: for s in body_tag.select('script, style, .item.col-xs-12.col-sm-12, center:has(b), .banner, .footer, .yazdirmaalani, .filtreler, .menu, .altmenu, .geri, .arabuton, .temizlebutonu, form#KararGetir, .TabBaslik, #KararDetaylari, .share-button-container'): s.decompose() html_input_for_markdown = str(body_tag) else: html_input_for_markdown = processed_html markdown_text = None try: # Ensure the content is wrapped in basic HTML structure if it's not already if not html_input_for_markdown.strip().lower().startswith(("<html", "<!doctype")): html_content = f"<html><head><meta charset=\"UTF-8\"></head><body>{html_input_for_markdown}</body></html>" else: html_content = html_input_for_markdown # Convert HTML string to bytes and create BytesIO stream html_bytes = html_content.encode('utf-8') html_stream = io.BytesIO(html_bytes) # Pass BytesIO stream to MarkItDown to avoid temp file creation md_converter = MarkItDown() conversion_result = md_converter.convert(html_stream) markdown_text = conversion_result.text_content except Exception as e: logger.error(f"AnayasaBireyselBasvuruApiClient: MarkItDown conversion error: {e}") return markdown_text async def get_decision_document_as_markdown( self, document_url_path: str, # e.g. /BB/2021/20295 page_number: int = 1 ) -> AnayasaBireyselBasvuruDocumentMarkdown: full_url = urljoin(self.BASE_URL, document_url_path) logger.info(f"AnayasaBireyselBasvuruApiClient: Fetching Bireysel Başvuru document for Markdown (page {page_number}) from URL: {full_url}") basvuru_no_from_page = None karar_tarihi_from_page = None basvuru_tarihi_from_page = None karari_veren_birim_from_page = None karar_turu_from_page = None resmi_gazete_info_from_page = None try: response = await self.http_client.get(full_url) response.raise_for_status() html_content_from_api = response.text if not isinstance(html_content_from_api, str) or not html_content_from_api.strip(): logger.warning(f"AnayasaBireyselBasvuruApiClient: Received empty HTML from {full_url}.") return AnayasaBireyselBasvuruDocumentMarkdown( source_url=full_url, markdown_chunk=None, current_page=page_number, total_pages=0, is_paginated=False ) soup = BeautifulSoup(html_content_from_api, 'html.parser') meta_desc_tag = soup.find("meta", attrs={"name": "description"}) if meta_desc_tag and meta_desc_tag.get("content"): content = meta_desc_tag["content"] bn_match = re.search(r"B\.\s*No:\s*([\d\/]+)", content) if bn_match: basvuru_no_from_page = bn_match.group(1).strip() date_match = re.search(r"(\d{1,2}\/\d{1,2}\/\d{4}),\s*§", content) if date_match: karar_tarihi_from_page = date_match.group(1).strip() karar_detaylari_tab = soup.find("div", id="KararDetaylari") if karar_detaylari_tab: table = karar_detaylari_tab.find("table", class_="table") if table: rows = table.find_all("tr") for row in rows: cells = row.find_all("td") if len(cells) == 2: key = cells[0].get_text(strip=True) value = cells[1].get_text(strip=True) if "Kararı Veren Birim" in key: karari_veren_birim_from_page = value elif "Karar Türü (Başvuru Sonucu)" in key: karar_turu_from_page = value elif "Başvuru No" in key and not basvuru_no_from_page: basvuru_no_from_page = value elif "Başvuru Tarihi" in key: basvuru_tarihi_from_page = value elif "Karar Tarihi" in key and not karar_tarihi_from_page: karar_tarihi_from_page = value elif "Resmi Gazete Tarih / Sayı" in key: resmi_gazete_info_from_page = value full_markdown_content = self._convert_html_to_markdown_bireysel(html_content_from_api) if not full_markdown_content: return AnayasaBireyselBasvuruDocumentMarkdown( source_url=full_url, basvuru_no_from_page=basvuru_no_from_page, karar_tarihi_from_page=karar_tarihi_from_page, basvuru_tarihi_from_page=basvuru_tarihi_from_page, karari_veren_birim_from_page=karari_veren_birim_from_page, karar_turu_from_page=karar_turu_from_page, resmi_gazete_info_from_page=resmi_gazete_info_from_page, markdown_chunk=None, current_page=page_number, total_pages=0, is_paginated=False ) content_length = len(full_markdown_content) total_pages = math.ceil(content_length / self.DOCUMENT_MARKDOWN_CHUNK_SIZE) if total_pages == 0: total_pages = 1 current_page_clamped = max(1, min(page_number, total_pages)) start_index = (current_page_clamped - 1) * self.DOCUMENT_MARKDOWN_CHUNK_SIZE end_index = start_index + self.DOCUMENT_MARKDOWN_CHUNK_SIZE markdown_chunk = full_markdown_content[start_index:end_index] return AnayasaBireyselBasvuruDocumentMarkdown( source_url=full_url, basvuru_no_from_page=basvuru_no_from_page, karar_tarihi_from_page=karar_tarihi_from_page, basvuru_tarihi_from_page=basvuru_tarihi_from_page, karari_veren_birim_from_page=karari_veren_birim_from_page, karar_turu_from_page=karar_turu_from_page, resmi_gazete_info_from_page=resmi_gazete_info_from_page, markdown_chunk=markdown_chunk, current_page=current_page_clamped, total_pages=total_pages, is_paginated=(total_pages > 1) ) except httpx.RequestError as e: logger.error(f"AnayasaBireyselBasvuruApiClient: HTTP error fetching Bireysel Başvuru document from {full_url}: {e}") raise except Exception as e: logger.error(f"AnayasaBireyselBasvuruApiClient: General error processing Bireysel Başvuru document from {full_url}: {e}") raise async def close_client_session(self): if hasattr(self, 'http_client') and self.http_client and not self.http_client.is_closed: await self.http_client.aclose() logger.info("AnayasaBireyselBasvuruApiClient: HTTP client session closed.")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/saidsurucu/yargi-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server