MCP-Server-IETF

  • src
  • mcp_server_ietf
import os import logging import re import requests from dataclasses import dataclass from typing import Dict, List, Any, Optional # Constants INDEX_URL = "https://www.rfc-editor.org/rfc-index.txt" RFC_URL_TEMPLATE = "https://www.rfc-editor.org/rfc/rfc{number}.txt" CACHE_DIR = os.path.expanduser("~/.cache/ietf-doc-server") DEFAULT_MAX_LINES = 200 # Default pagination limit @dataclass class RFCIndexData: """Data structure for RFC index information""" index_path: str docs_count: int rfc_titles: Dict[str, str] # Map of RFC number to title def download_rfc_index(cache_dir: str = CACHE_DIR) -> str: """ Download and cache the RFC index file Args: cache_dir: Directory to store cached files Returns: Path to the cached index file """ # Create cache directory if not exists os.makedirs(cache_dir, exist_ok=True) index_path = os.path.join(cache_dir, "rfc-index.txt") # Download and cache index if not present if not os.path.exists(index_path): print(f"Downloading RFC index from {INDEX_URL}") response = requests.get(INDEX_URL) response.raise_for_status() with open(index_path, "w", encoding="utf-8") as f: f.write(response.text) return index_path def parse_rfc_index(index_path: str) -> RFCIndexData: """ Parse the RFC index file to extract titles and count Args: index_path: Path to the RFC index file Returns: RFCIndexData with parsed information """ rfc_titles = {} docs_count = 0 # Parse index to extract titles and count with open(index_path, "r", encoding="utf-8") as f: parsing_started = False for line in f: # Skip until we reach the RFC INDEX section if "RFC INDEX" in line: parsing_started = True continue # Only process lines after we've found the start marker if not parsing_started: continue # Use regex to look for lines starting with RFC numbers # RFC numbers are typically zero-padded to 4 digits (e.g., "0001") # But could be 5 digits for newer RFCs match = re.match(r'^\s*(\d{4}|\d{5})\s+(.+)', line) if match: rfc_num = match.group(1).lstrip('0') # Remove leading zeros if not rfc_num: # In case it was all zeros rfc_num = "0" title_text = match.group(2) # Handle "Not Issued" RFCs if "Not Issued" in title_text: rfc_titles[rfc_num] = "Not Issued" else: # Extract title up to the first period or end of line title = title_text.split('.')[0].strip() rfc_titles[rfc_num] = title docs_count += 1 return RFCIndexData( index_path=index_path, docs_count=docs_count, rfc_titles=rfc_titles ) def download_rfc(rfc_number: str, cache_dir: str = CACHE_DIR) -> str: """ Download and cache a specific RFC document Args: rfc_number: The RFC number to download cache_dir: Directory to store cached files Returns: Path to the cached RFC document """ # Create cache directory if not exists os.makedirs(cache_dir, exist_ok=True) # Create cache path for this document doc_path = os.path.join(cache_dir, f"rfc{rfc_number}.txt") # Download if not cached if not os.path.exists(doc_path): url = RFC_URL_TEMPLATE.format(number=rfc_number) try: response = requests.get(url) response.raise_for_status() with open(doc_path, "w", encoding="utf-8") as f: f.write(response.text) except requests.RequestException as e: raise Exception(f"Failed to download RFC {rfc_number}: {str(e)}") return doc_path def get_rfc_document( rfc_number: str, start_line: int = 1, max_lines: int = 200, cache_dir: str = CACHE_DIR, index_data: Optional[RFCIndexData] = None ) -> Dict[str, Any]: """ Get an RFC document by its number with pagination support Args: rfc_number: The RFC number (e.g., "1234") start_line: The line number to start from (default: 1) max_lines: Maximum number of lines to return (default: 200) cache_dir: Directory to store cached files index_data: Optional pre-loaded index data Returns: A dictionary containing the document content and metadata """ # Validate input if not rfc_number.isdigit(): return {"error": "RFC number must be a number"} if start_line < 1: return {"error": "start_line must be 1 or greater"} if max_lines < 1: return {"error": "max_lines must be 1 or greater"} # Get index data if not provided if index_data is None: index_path = download_rfc_index(cache_dir) index_data = parse_rfc_index(index_path) # Check if RFC exists in our index if rfc_number not in index_data.rfc_titles: return {"error": f"RFC {rfc_number} not found in index"} # Download RFC if needed try: doc_path = download_rfc(rfc_number, cache_dir) except Exception as e: return {"error": str(e)} # Read and paginate the document with open(doc_path, "r", encoding="utf-8") as f: all_lines = f.readlines() total_lines = len(all_lines) # Validate start_line if start_line > total_lines: return {"error": f"start_line ({start_line}) exceeds document length ({total_lines})"} # Calculate pagination end_line = min(start_line + max_lines - 1, total_lines) paginated_lines = all_lines[start_line-1:end_line] paginated_content = ''.join(paginated_lines) # Check if truncated truncated = end_line < total_lines # Extract page numbers if available by scanning the content page_info = extract_page_info(paginated_content) # Basic metadata title = index_data.rfc_titles.get(rfc_number, "Unknown title") return { "content": paginated_content, "title": title, "path": doc_path, "start_line": start_line, "end_line": end_line, "max_lines": max_lines, "total_lines": total_lines, "truncated": truncated, "truncated_at_line": end_line if truncated else None, "page_info": page_info, "next_chunk_start": end_line + 1 if truncated else None } def extract_page_info(content: str) -> Dict[str, Any]: """Extract page numbers from RFC content if available""" page_info = { "pages_found": False, "first_page": None, "last_page": None } # Look for page markers like "[Page X]" in the content page_matches = re.findall(r'\[Page\s+(\d+)\]', content) if page_matches: page_info["pages_found"] = True page_info["first_page"] = int(page_matches[0]) page_info["last_page"] = int(page_matches[-1]) return page_info def search_rfc_by_keyword(keyword: str, index_data: RFCIndexData) -> List[Dict[str, str]]: """ Search for RFC documents by keyword in their titles Args: keyword: The keyword to search for index_data: Pre-loaded index data Returns: A list of matching RFCs with their numbers and titles """ results = [] for number, title in index_data.rfc_titles.items(): if keyword.lower() in title.lower(): results.append({ "number": number, "title": title }) return results