lexlink_client.py•13.3 kB
"""
LexLink API Client
Wrapper for the Korean National Law Information Center (법제처 국가법령정보) API
"""
import asyncio
import xml.etree.ElementTree as ET
from typing import Dict, Optional, List
import httpx
class LexLinkClient:
"""Client for accessing the Korean Law API"""
BASE_SEARCH_URL = "http://www.law.go.kr/DRF/lawSearch.do"
BASE_SERVICE_URL = "http://www.law.go.kr/DRF/lawService.do"
def __init__(self, oc: str):
"""
Initialize the LexLink API client
Args:
oc: Organization code (API key)
"""
self.oc = oc
self.client = httpx.AsyncClient(timeout=30.0)
self._last_request_time = 0
async def close(self):
"""Close the HTTP client"""
await self.client.aclose()
async def _rate_limit(self):
"""Apply rate limiting between requests (0.5s delay)"""
current_time = asyncio.get_event_loop().time()
time_since_last = current_time - self._last_request_time
if time_since_last < 0.5:
await asyncio.sleep(0.5 - time_since_last)
self._last_request_time = asyncio.get_event_loop().time()
def _rank_search_results(self, results: List, query: str, name_field: str) -> List:
"""
Rank search results by relevance to query.
Prioritizes:
1. Exact matches (e.g., "민법" matches "민법")
2. Starts with query (e.g., "민법" matches "민법 시행령")
3. Contains query as whole word
4. Contains query as substring (lowest priority)
Args:
results: List of result dictionaries
query: Search query
name_field: Field name to check (e.g., '법령명한글', '사건명')
Returns:
Re-ranked list of results
"""
if not results or not query:
return results
def calculate_score(result: Dict) -> tuple:
name = result.get(name_field, '').strip()
if not name:
return (0, 0, len(name))
query_lower = query.lower().strip()
name_lower = name.lower()
# Score 1: Exact match (highest priority)
exact_match = 1 if name_lower == query_lower else 0
# Score 2: Starts with query
starts_with = 1 if name_lower.startswith(query_lower) else 0
# Score 3: Query is a complete word/segment (not just substring)
# For Korean, check if query appears at word boundaries
if query_lower in name_lower:
# Check if it's at the start or has space before it
idx = name_lower.find(query_lower)
is_word_boundary = (idx == 0 or name_lower[idx-1] == ' ')
word_match = 1 if is_word_boundary else 0
else:
word_match = 0
# Score 4: Contains query (already filtered by API)
contains = 1 if query_lower in name_lower else 0
# Penalize by length (prefer shorter, more specific results)
length_penalty = len(name)
# Return tuple for sorting (higher scores first, shorter names first)
return (-exact_match, -starts_with, -word_match, -contains, length_penalty)
# Sort by score
ranked = sorted(results, key=calculate_score)
return ranked
def _parse_xml(self, xml_content: str) -> Dict:
"""
Parse XML response into a dictionary
Args:
xml_content: XML string from API
Returns:
Parsed data as dictionary
"""
root = ET.fromstring(xml_content)
def element_to_dict(element):
"""Recursively convert XML element to dict"""
result = {}
# Add attributes
if element.attrib:
result.update(element.attrib)
# Add text content
if element.text and element.text.strip():
if len(element) == 0: # No children
return element.text.strip()
result['_text'] = element.text.strip()
# Process children
for child in element:
child_data = element_to_dict(child)
child_tag = child.tag
# Handle multiple elements with same tag
if child_tag in result:
if not isinstance(result[child_tag], list):
result[child_tag] = [result[child_tag]]
result[child_tag].append(child_data)
else:
result[child_tag] = child_data
return result if result else element.text
return element_to_dict(root)
async def search_laws(
self,
query: str,
display: int = 20,
page: int = 1,
sort: Optional[str] = None,
auto_rank: bool = True
) -> Dict:
"""
Search Korean laws and regulations
Args:
query: Search keyword
display: Number of results per page (max 100)
page: Page number
sort: Sort order (optional)
auto_rank: Automatically re-rank results by relevance (default: True)
When enabled, fetches more results and returns top matches
Returns:
Dictionary with search results (auto-ranked by default)
"""
await self._rate_limit()
# Smart fetch: request more results for better ranking
# If auto_rank is enabled and display is small, fetch more results
fetch_display = display
if auto_rank and display < 20:
fetch_display = min(50, display * 5) # Fetch 5x more results, max 50
params = {
'OC': self.oc,
'target': 'law',
'type': 'XML',
'query': query,
'display': str(fetch_display),
'page': str(page)
}
if sort:
params['sort'] = sort
response = await self.client.get(self.BASE_SEARCH_URL, params=params)
response.raise_for_status()
result = self._parse_xml(response.text)
# Apply intelligent ranking to results
if auto_rank and 'law' in result:
laws = result['law']
if not isinstance(laws, list):
laws = [laws] if laws else []
if laws:
# Rank all results
ranked_laws = self._rank_search_results(laws, query, '법령명한글')
# Return only requested number of results
result['law'] = ranked_laws[:display]
# Update numOfRows to reflect actual returned count
result['numOfRows'] = len(result['law'])
return result
async def get_law_details(
self,
mst: str,
effective_date: Optional[str] = None
) -> Dict:
"""
Get full text and details of a specific law
Args:
mst: Law serial number (법령일련번호)
effective_date: Optional effective date (YYYYMMDD)
Returns:
Dictionary with law details
"""
await self._rate_limit()
params = {
'OC': self.oc,
'target': 'law',
'MST': mst,
'type': 'XML'
}
if effective_date:
params['efYd'] = effective_date
response = await self.client.get(self.BASE_SERVICE_URL, params=params)
response.raise_for_status()
return self._parse_xml(response.text)
async def search_case_law(
self,
query: str,
search_scope: int = 2,
display: int = 20,
page: int = 1,
court_type: Optional[str] = None,
date_range: Optional[str] = None,
sort: Optional[str] = None,
auto_rank: bool = True
) -> Dict:
"""
Search court precedents and case law
Args:
query: Search keyword
search_scope: 1=case name only, 2=full text
display: Number of results per page (max 100)
page: Page number
court_type: Court type code (400201=Supreme, 400202=lower)
date_range: Date range in format YYYYMMDD~YYYYMMDD
sort: Sort order (optional)
auto_rank: Automatically re-rank results by relevance (default: True)
Returns:
Dictionary with case law search results (auto-ranked by default)
"""
await self._rate_limit()
params = {
'OC': self.oc,
'target': 'prec',
'type': 'XML',
'query': query,
'search': str(search_scope),
'display': str(display),
'page': str(page)
}
if court_type:
params['org'] = court_type
if date_range:
params['prncYd'] = date_range
if sort:
params['sort'] = sort
response = await self.client.get(self.BASE_SEARCH_URL, params=params)
response.raise_for_status()
result = self._parse_xml(response.text)
# Apply intelligent ranking to results
if auto_rank and 'prec' in result:
cases = result['prec']
if not isinstance(cases, list):
cases = [cases] if cases else []
if cases:
result['prec'] = self._rank_search_results(cases, query, '사건명')
return result
async def get_case_details(self, case_id: str) -> Dict:
"""
Get full text of a specific court case
Args:
case_id: Case serial number (판례일련번호)
Returns:
Dictionary with case details
"""
await self._rate_limit()
params = {
'OC': self.oc,
'target': 'prec',
'ID': case_id,
'type': 'XML'
}
response = await self.client.get(self.BASE_SERVICE_URL, params=params)
response.raise_for_status()
return self._parse_xml(response.text)
async def search_legal_interpretations(
self,
query: str,
display: int = 20,
page: int = 1,
search_scope: int = 1,
date_range: Optional[str] = None,
sort: Optional[str] = None,
auto_rank: bool = True
) -> Dict:
"""
Search official legal interpretations
Args:
query: Search keyword
display: Number of results per page (max 100)
page: Page number
search_scope: 1=title, 2=full text
date_range: Date range in format YYYYMMDD~YYYYMMDD
sort: Sort order (optional)
auto_rank: Automatically re-rank results by relevance (default: True)
Returns:
Dictionary with interpretation search results (auto-ranked by default)
"""
await self._rate_limit()
params = {
'OC': self.oc,
'target': 'expc',
'type': 'XML',
'query': query,
'search': str(search_scope),
'display': str(display),
'page': str(page)
}
if date_range:
params['explYd'] = date_range
if sort:
params['sort'] = sort
response = await self.client.get(self.BASE_SEARCH_URL, params=params)
response.raise_for_status()
result = self._parse_xml(response.text)
# Apply intelligent ranking to results
if auto_rank and 'expc' in result:
interps = result['expc']
if not isinstance(interps, list):
interps = [interps] if interps else []
if interps:
result['expc'] = self._rank_search_results(interps, query, '안건명')
return result
async def search_local_ordinances(
self,
query: str,
display: int = 20,
page: int = 1
) -> Dict:
"""
Search local government ordinances
Args:
query: Search keyword
display: Number of results per page (max 100)
page: Page number
Returns:
Dictionary with ordinance search results
"""
await self._rate_limit()
params = {
'OC': self.oc,
'target': 'ordin',
'type': 'XML',
'query': query,
'display': str(display),
'page': str(page)
}
response = await self.client.get(self.BASE_SEARCH_URL, params=params)
response.raise_for_status()
return self._parse_xml(response.text)
async def get_legal_interpretation_details(self, interp_id: str) -> Dict:
"""
Get full text of a specific legal interpretation
Args:
interp_id: Legal interpretation serial number (법령해석례일련번호)
Returns:
Dictionary with interpretation details
"""
await self._rate_limit()
params = {
'OC': self.oc,
'target': 'expc',
'ID': interp_id,
'type': 'XML'
}
response = await self.client.get(self.BASE_SERVICE_URL, params=params)
response.raise_for_status()
return self._parse_xml(response.text)