"""
Herramientas web para peticiones HTTP y manipulación de URLs.
"""
import json
import urllib.parse
from typing import Any, Dict
import httpx
async def make_http_request(args: Dict[str, Any]) -> str:
"""Realizar una petición HTTP."""
url = args.get("url")
method = args.get("method", "GET").upper()
headers = args.get("headers", {})
data = args.get("data")
timeout = args.get("timeout", 30)
if not url:
return "Error: Se requiere el parámetro 'url'"
try:
async with httpx.AsyncClient() as client:
response = await client.request(
method=method,
url=url,
headers=headers,
json=data if isinstance(data, dict) else None,
data=data if isinstance(data, str) else None,
timeout=timeout,
)
result = {
"status_code": response.status_code,
"status_text": response.reason_phrase,
"headers": dict(response.headers),
"url": str(response.url),
"content_type": response.headers.get("content-type", ""),
"content_length": len(response.content),
"response_body": response.text[:1000] + "..."
if len(response.text) > 1000
else response.text,
}
return json.dumps(result, indent=2, ensure_ascii=False)
except httpx.TimeoutException:
return f"Error: Timeout después de {timeout} segundos"
except httpx.RequestError as e:
return f"Error en la petición: {str(e)}"
except Exception as e:
return f"Error inesperado: {str(e)}"
def make_http_request_sync(args: Dict[str, Any]) -> str:
"""Realizar una petición HTTP (versión síncrona)."""
url = args.get("url")
method = args.get("method", "GET").upper()
headers = args.get("headers", {})
data = args.get("data")
timeout = args.get("timeout", 30)
if not url:
return "Error: Se requiere el parámetro 'url'"
try:
with httpx.Client() as client:
response = client.request(
method=method,
url=url,
headers=headers,
json=data if isinstance(data, dict) else None,
data=data if isinstance(data, str) else None,
timeout=timeout,
)
result = {
"status_code": response.status_code,
"status_text": response.reason_phrase,
"headers": dict(response.headers),
"url": str(response.url),
"content_type": response.headers.get("content-type", ""),
"content_length": len(response.content),
"response_body": response.text[:1000] + "..."
if len(response.text) > 1000
else response.text,
}
return json.dumps(result, indent=2, ensure_ascii=False)
except httpx.TimeoutException:
return f"Error: Timeout después de {timeout} segundos"
except httpx.RequestError as e:
return f"Error en la petición: {str(e)}"
except Exception as e:
return f"Error inesperado: {str(e)}"
def parse_url(args: Dict[str, Any]) -> str:
"""Parsear y analizar una URL."""
url = args.get("url")
if not url:
return "Error: Se requiere el parámetro 'url'"
try:
parsed = urllib.parse.urlparse(url)
query_params = urllib.parse.parse_qs(parsed.query)
result = {
"url_original": url,
"esquema": parsed.scheme,
"dominio": parsed.netloc,
"host": parsed.hostname,
"puerto": parsed.port,
"ruta": parsed.path,
"consulta": parsed.query,
"fragmento": parsed.fragment,
"parametros_consulta": query_params,
"es_url_valida": bool(parsed.scheme and parsed.netloc),
"es_https": parsed.scheme == "https",
"subdominio": parsed.hostname.split(".")[0]
if parsed.hostname and "." in parsed.hostname
else None,
}
return json.dumps(result, indent=2, ensure_ascii=False)
except Exception as e:
return f"Error parseando URL: {str(e)}"
def build_url(args: Dict[str, Any]) -> str:
"""Construir una URL a partir de componentes."""
scheme = args.get("scheme", "https")
host = args.get("host")
port = args.get("port")
path = args.get("path", "")
query_params = args.get("query_params", {})
fragment = args.get("fragment", "")
if not host:
return "Error: Se requiere el parámetro 'host'"
try:
# Construir netloc
netloc = host
if port:
netloc += f":{port}"
# Construir query string
query = urllib.parse.urlencode(query_params) if query_params else ""
# Construir URL completa
url_parts = (scheme, netloc, path, "", query, fragment)
url = urllib.parse.urlunparse(url_parts)
result = {
"url_construida": url,
"componentes": {
"esquema": scheme,
"host": host,
"puerto": port,
"ruta": path,
"parametros": query_params,
"fragmento": fragment,
},
}
return json.dumps(result, indent=2, ensure_ascii=False)
except Exception as e:
return f"Error construyendo URL: {str(e)}"
def encode_decode_url(args: Dict[str, Any]) -> str:
"""Codificar/decodificar componentes de URL."""
text = args.get("text")
operation = args.get("operation", "encode") # encode, decode
component = args.get("component", "query") # query, path, full
if not text:
return "Error: Se requiere el parámetro 'text'"
try:
if operation == "encode":
if component == "query":
result = urllib.parse.quote_plus(text)
elif component == "path":
result = urllib.parse.quote(text)
elif component == "full":
result = urllib.parse.quote(text, safe="")
else:
return f"Error: Componente no válido: {component}"
else: # decode
result = urllib.parse.unquote_plus(text)
return f"Texto {operation}ado para URL ({component}):\n{result}"
except Exception as e:
return f"Error {operation}ando URL: {str(e)}"
def validate_url(args: Dict[str, Any]) -> str:
"""Validar formato y accesibilidad de una URL."""
url = args.get("url")
check_accessibility = args.get("check_accessibility", False)
if not url:
return "Error: Se requiere el parámetro 'url'"
try:
# Validación de formato
parsed = urllib.parse.urlparse(url)
validation_result = {
"url": url,
"formato_valido": bool(parsed.scheme and parsed.netloc),
"esquema_valido": parsed.scheme in ["http", "https", "ftp", "ftps"],
"tiene_dominio": bool(parsed.netloc),
"ruta_presente": bool(parsed.path),
"parametros_presentes": bool(parsed.query),
"fragmento_presente": bool(parsed.fragment),
}
# Verificar accesibilidad si se solicita
if check_accessibility and validation_result["formato_valido"]:
try:
with httpx.Client() as client:
response = client.head(url, timeout=10)
validation_result["accesible"] = True
validation_result["status_code"] = response.status_code
validation_result["servidor"] = response.headers.get("server", "")
except Exception:
validation_result["accesible"] = False
validation_result["status_code"] = None
return json.dumps(validation_result, indent=2, ensure_ascii=False)
except Exception as e:
return f"Error validando URL: {str(e)}"
def extract_domain_info(args: Dict[str, Any]) -> str:
"""Extraer información detallada del dominio de una URL."""
url = args.get("url")
if not url:
return "Error: Se requiere el parámetro 'url'"
try:
parsed = urllib.parse.urlparse(url)
if not parsed.hostname:
return "Error: URL no contiene un hostname válido"
hostname = parsed.hostname
parts = hostname.split(".")
domain_info = {
"hostname_completo": hostname,
"puerto": parsed.port,
"partes_dominio": parts,
"subdominio": parts[0] if len(parts) > 2 else None,
"dominio_principal": ".".join(parts[-2:]) if len(parts) >= 2 else hostname,
"tld": parts[-1] if len(parts) > 1 else None,
"es_ip": hostname.replace(".", "").isdigit(),
"longitud_hostname": len(hostname),
"numero_subdominios": len(parts) - 2 if len(parts) > 2 else 0,
}
return json.dumps(domain_info, indent=2, ensure_ascii=False)
except Exception as e:
return f"Error extrayendo información del dominio: {str(e)}"
def register_web_tools(tools: Dict[str, Dict[str, Any]]) -> None:
"""Registrar herramientas web."""
tools["http_request"] = {
"description": "Realizar petición HTTP a una URL",
"handler": make_http_request_sync,
"inputSchema": {
"type": "object",
"properties": {
"url": {"type": "string", "description": "URL de destino"},
"method": {
"type": "string",
"description": "Método HTTP",
"enum": [
"GET",
"POST",
"PUT",
"DELETE",
"HEAD",
"OPTIONS",
"PATCH",
],
"default": "GET",
},
"headers": {
"type": "object",
"description": "Headers de la petición",
"default": {},
},
"data": {"description": "Datos a enviar (JSON object o string)"},
"timeout": {
"type": "integer",
"description": "Timeout en segundos",
"default": 30,
"minimum": 1,
"maximum": 120,
},
},
"required": ["url"],
},
}
tools["parse_url"] = {
"description": "Parsear y analizar componentes de una URL",
"handler": parse_url,
"inputSchema": {
"type": "object",
"properties": {"url": {"type": "string", "description": "URL a parsear"}},
"required": ["url"],
},
}
tools["build_url"] = {
"description": "Construir URL a partir de componentes",
"handler": build_url,
"inputSchema": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "Esquema de la URL",
"default": "https",
},
"host": {"type": "string", "description": "Hostname o dominio"},
"port": {"type": "integer", "description": "Puerto (opcional)"},
"path": {"type": "string", "description": "Ruta", "default": ""},
"query_params": {
"type": "object",
"description": "Parámetros de consulta",
"default": {},
},
"fragment": {
"type": "string",
"description": "Fragmento (anchor)",
"default": "",
},
},
"required": ["host"],
},
}
tools["url_encode_decode"] = {
"description": "Codificar/decodificar componentes de URL",
"handler": encode_decode_url,
"inputSchema": {
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "Texto a codificar/decodificar",
},
"operation": {
"type": "string",
"description": "Operación a realizar",
"enum": ["encode", "decode"],
"default": "encode",
},
"component": {
"type": "string",
"description": "Tipo de componente URL",
"enum": ["query", "path", "full"],
"default": "query",
},
},
"required": ["text"],
},
}
tools["validate_url"] = {
"description": "Validar formato y accesibilidad de una URL",
"handler": validate_url,
"inputSchema": {
"type": "object",
"properties": {
"url": {"type": "string", "description": "URL a validar"},
"check_accessibility": {
"type": "boolean",
"description": "Verificar si la URL es accesible",
"default": False,
},
},
"required": ["url"],
},
}
tools["extract_domain"] = {
"description": "Extraer información detallada del dominio de una URL",
"handler": extract_domain_info,
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "URL de la que extraer información del dominio",
}
},
"required": ["url"],
},
}