API Docs MCP Server

parser.py•9.97 KiB

""" Parser module for extracting structured information from API documentation. """ from bs4 import BeautifulSoup from typing import Dict, List, Optional from src.storage.models import ApiEndpoint, ApiSchema class ApiDocParser: """Parser for API documentation HTML content.""" def __init__(self): """Initialize the API documentation parser.""" pass def parse_page(self, html: str, url: str) -> Dict: """ Parse an API documentation page. Args: html: The HTML content of the page url: The URL of the page Returns: Dictionary containing parsed information """ soup = BeautifulSoup(html, 'html.parser') # Extract page title title = self._extract_title(soup) # Extract endpoints endpoints = self._extract_endpoints(soup) # Extract schemas schemas = self._extract_schemas(soup) return { 'title': title, 'endpoints': endpoints, 'schemas': schemas, 'url': url } def _extract_title(self, soup: BeautifulSoup) -> str: """Extract the page title.""" title_tag = soup.find('title') return title_tag.text.strip() if title_tag else '' def _extract_endpoints(self, soup: BeautifulSoup) -> List[Dict]: """Extract API endpoints from the page.""" endpoints = [] # Look for common endpoint documentation patterns endpoint_sections = soup.find_all(['div', 'section'], class_=lambda x: x and any( cls in x.lower() for cls in ['endpoint', 'api', 'method', 'operation'] )) for section in endpoint_sections: endpoint = { 'path': self._extract_endpoint_path(section), 'method': self._extract_endpoint_method(section), 'description': self._extract_endpoint_description(section), 'parameters': self._extract_endpoint_parameters(section), 'responses': self._extract_endpoint_responses(section) } if endpoint['path']: endpoints.append(endpoint) return endpoints def _extract_schemas(self, soup: BeautifulSoup) -> List[Dict]: """Extract data schemas from the page.""" schemas = [] # Look for common schema documentation patterns schema_sections = soup.find_all(['div', 'section'], class_=lambda x: x and any( cls in x.lower() for cls in ['schema', 'model', 'type', 'object'] )) for section in schema_sections: schema = { 'name': self._extract_schema_name(section), 'description': self._extract_schema_description(section), 'properties': self._extract_schema_properties(section) } if schema['name']: schemas.append(schema) return schemas def _extract_endpoint_path(self, section: BeautifulSoup) -> str: """Extract the endpoint path.""" path_elem = section.find(['code', 'pre'], string=lambda x: x and '/' in x) return path_elem.text.strip() if path_elem else '' def _extract_endpoint_method(self, section: BeautifulSoup) -> str: """Extract the HTTP method.""" method_elem = section.find(['code', 'span'], string=lambda x: x and x.upper() in ['GET', 'POST', 'PUT', 'DELETE', 'PATCH']) return method_elem.text.strip().upper() if method_elem else '' def _extract_endpoint_description(self, section: BeautifulSoup) -> str: """Extract the endpoint description.""" desc_elem = section.find(['p', 'div'], class_=lambda x: x and 'description' in x.lower()) return desc_elem.text.strip() if desc_elem else '' def _extract_endpoint_parameters(self, section: BeautifulSoup) -> List[Dict]: """Extract endpoint parameters.""" parameters = [] param_sections = section.find_all(['div', 'table'], class_=lambda x: x and 'parameter' in x.lower()) for param_section in param_sections: param = { 'name': self._extract_parameter_name(param_section), 'type': self._extract_parameter_type(param_section), 'required': self._extract_parameter_required(param_section), 'description': self._extract_parameter_description(param_section) } if param['name']: parameters.append(param) return parameters def _extract_endpoint_responses(self, section: BeautifulSoup) -> List[Dict]: """Extract endpoint responses.""" responses = [] response_sections = section.find_all(['div', 'table'], class_=lambda x: x and 'response' in x.lower()) for response_section in response_sections: response = { 'code': self._extract_response_code(response_section), 'description': self._extract_response_description(response_section), 'schema': self._extract_response_schema(response_section) } if response['code']: responses.append(response) return responses def _extract_schema_name(self, section: BeautifulSoup) -> str: """Extract the schema name.""" name_elem = section.find(['h2', 'h3', 'h4', 'code'], string=lambda x: x and not x.isspace()) return name_elem.text.strip() if name_elem else '' def _extract_schema_description(self, section: BeautifulSoup) -> str: """Extract the schema description.""" desc_elem = section.find(['p', 'div'], class_=lambda x: x and 'description' in x.lower()) return desc_elem.text.strip() if desc_elem else '' def _extract_schema_properties(self, section: BeautifulSoup) -> List[Dict]: """Extract schema properties.""" properties = [] prop_sections = section.find_all(['div', 'table'], class_=lambda x: x and 'property' in x.lower()) for prop_section in prop_sections: prop = { 'name': self._extract_property_name(prop_section), 'type': self._extract_property_type(prop_section), 'required': self._extract_property_required(prop_section), 'description': self._extract_property_description(prop_section) } if prop['name']: properties.append(prop) return properties def _extract_parameter_name(self, section: BeautifulSoup) -> str: """Extract parameter name.""" name_elem = section.find(['code', 'td'], string=lambda x: x and not x.isspace()) return name_elem.text.strip() if name_elem else '' def _extract_parameter_type(self, section: BeautifulSoup) -> str: """Extract parameter type.""" type_elem = section.find(['code', 'td'], string=lambda x: x and any(t in x.lower() for t in ['string', 'integer', 'boolean', 'array', 'object'])) return type_elem.text.strip() if type_elem else '' def _extract_parameter_required(self, section: BeautifulSoup) -> bool: """Extract whether parameter is required.""" required_elem = section.find(string=lambda x: x and 'required' in x.lower()) return bool(required_elem) def _extract_parameter_description(self, section: BeautifulSoup) -> str: """Extract parameter description.""" desc_elem = section.find(['p', 'td'], class_=lambda x: x and 'description' in x.lower()) return desc_elem.text.strip() if desc_elem else '' def _extract_response_code(self, section: BeautifulSoup) -> str: """Extract response code.""" code_elem = section.find(['code', 'td'], string=lambda x: x and x.isdigit()) return code_elem.text.strip() if code_elem else '' def _extract_response_description(self, section: BeautifulSoup) -> str: """Extract response description.""" desc_elem = section.find(['p', 'td'], class_=lambda x: x and 'description' in x.lower()) return desc_elem.text.strip() if desc_elem else '' def _extract_response_schema(self, section: BeautifulSoup) -> Dict: """Extract response schema.""" schema_elem = section.find(['div', 'pre'], class_=lambda x: x and 'schema' in x.lower()) if not schema_elem: return {} return { 'type': self._extract_schema_type(schema_elem), 'properties': self._extract_schema_properties(schema_elem) } def _extract_property_name(self, section: BeautifulSoup) -> str: """Extract property name.""" name_elem = section.find(['code', 'td'], string=lambda x: x and not x.isspace()) return name_elem.text.strip() if name_elem else '' def _extract_property_type(self, section: BeautifulSoup) -> str: """Extract property type.""" type_elem = section.find(['code', 'td'], string=lambda x: x and any(t in x.lower() for t in ['string', 'integer', 'boolean', 'array', 'object'])) return type_elem.text.strip() if type_elem else '' def _extract_property_required(self, section: BeautifulSoup) -> bool: """Extract whether property is required.""" required_elem = section.find(string=lambda x: x and 'required' in x.lower()) return bool(required_elem) def _extract_property_description(self, section: BeautifulSoup) -> str: """Extract property description.""" desc_elem = section.find(['p', 'td'], class_=lambda x: x and 'description' in x.lower()) return desc_elem.text.strip() if desc_elem else '' def _extract_schema_type(self, section: BeautifulSoup) -> str: """Extract schema type.""" type_elem = section.find(['code', 'td'], string=lambda x: x and any(t in x.lower() for t in ['object', 'array', 'string', 'integer', 'boolean'])) return type_elem.text.strip() if type_elem else 'object'

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ShotaNagafuchi/api-docs-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

parser.py•9.97 KiB