"""
Herramientas de procesamiento de texto.
"""
import base64
import hashlib
import json
import re
from collections import Counter
from typing import Any, Dict
def count_words(args: Dict[str, Any]) -> str:
"""Contar palabras, líneas y caracteres en un texto."""
text = args.get("text", "")
if not text:
return "Error: Se requiere el parámetro 'text'"
lines = text.split("\n")
words = text.split()
characters = len(text)
characters_no_spaces = len(text.replace(" ", ""))
result = {
"total_caracteres": characters,
"caracteres_sin_espacios": characters_no_spaces,
"total_palabras": len(words),
"total_lineas": len(lines),
"lineas_no_vacias": len([line for line in lines if line.strip()]),
"promedio_palabras_por_linea": round(
len(words) / len(lines) if lines else 0, 2
),
}
return json.dumps(result, indent=2, ensure_ascii=False)
def find_and_replace(args: Dict[str, Any]) -> str:
"""Buscar y reemplazar texto con soporte para expresiones regulares."""
text = args.get("text", "")
search = args.get("search", "")
replace = args.get("replace", "")
use_regex = args.get("use_regex", False)
case_sensitive = args.get("case_sensitive", True)
if not text or not search:
return "Error: Se requieren los parámetros 'text' y 'search'"
try:
if use_regex:
flags = 0 if case_sensitive else re.IGNORECASE
result_text = re.sub(search, replace, text, flags=flags)
# Contar coincidencias
matches = len(re.findall(search, text, flags=flags))
else:
if not case_sensitive:
# Para búsqueda no regex, hacer case-insensitive manualmente
search_lower = search.lower()
text_parts = []
text_lower = text.lower()
start = 0
matches = 0
while True:
pos = text_lower.find(search_lower, start)
if pos == -1:
text_parts.append(text[start:])
break
text_parts.append(text[start:pos])
text_parts.append(replace)
matches += 1
start = pos + len(search)
result_text = "".join(text_parts)
else:
matches = text.count(search)
result_text = text.replace(search, replace)
return f"Texto procesado. Reemplazos realizados: {matches}\n\nTexto resultante:\n{result_text}"
except re.error as e:
return f"Error en expresión regular: {str(e)}"
except Exception as e:
return f"Error procesando texto: {str(e)}"
def extract_emails(args: Dict[str, Any]) -> str:
"""Extraer direcciones de email de un texto."""
text = args.get("text", "")
if not text:
return "Error: Se requiere el parámetro 'text'"
email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
emails = re.findall(email_pattern, text)
# Eliminar duplicados manteniendo orden
unique_emails = list(dict.fromkeys(emails))
result = {
"total_emails_encontrados": len(emails),
"emails_unicos": len(unique_emails),
"emails": unique_emails,
}
return json.dumps(result, indent=2, ensure_ascii=False)
def extract_urls(args: Dict[str, Any]) -> str:
"""Extraer URLs de un texto."""
text = args.get("text", "")
if not text:
return "Error: Se requiere el parámetro 'text'"
url_pattern = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
urls = re.findall(url_pattern, text)
# Eliminar duplicados manteniendo orden
unique_urls = list(dict.fromkeys(urls))
result = {
"total_urls_encontradas": len(urls),
"urls_unicas": len(unique_urls),
"urls": unique_urls,
}
return json.dumps(result, indent=2, ensure_ascii=False)
def analyze_text(args: Dict[str, Any]) -> str:
"""Análisis completo de texto: estadísticas y patrones."""
text = args.get("text", "")
if not text:
return "Error: Se requiere el parámetro 'text'"
# Estadísticas básicas
lines = text.split("\n")
words = text.split()
# Análisis de palabras
word_freq = Counter(
word.lower().strip('.,!?;:"()[]{}') for word in words if word.strip()
)
most_common_words = word_freq.most_common(10)
# Análisis de caracteres
char_freq = Counter(char for char in text if char.isalpha())
most_common_chars = char_freq.most_common(10)
# Patrones encontrados
emails = len(
re.findall(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", text)
)
urls = len(
re.findall(
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
text,
)
)
numbers = len(re.findall(r"\b\d+\b", text))
# Longitud de palabras
word_lengths = [len(word) for word in words]
avg_word_length = sum(word_lengths) / len(word_lengths) if word_lengths else 0
result = {
"estadisticas_basicas": {
"total_caracteres": len(text),
"total_palabras": len(words),
"total_lineas": len(lines),
"promedio_longitud_palabra": round(avg_word_length, 2),
},
"palabras_mas_frecuentes": [
{"palabra": word, "frecuencia": freq} for word, freq in most_common_words
],
"caracteres_mas_frecuentes": [
{"caracter": char, "frecuencia": freq} for char, freq in most_common_chars
],
"patrones_encontrados": {"emails": emails, "urls": urls, "numeros": numbers},
}
return json.dumps(result, indent=2, ensure_ascii=False)
def encode_decode_text(args: Dict[str, Any]) -> str:
"""Codificar/decodificar texto en diferentes formatos."""
text = args.get("text", "")
operation = args.get("operation", "encode") # encode, decode
format_type = args.get("format", "base64") # base64, url, html
if not text:
return "Error: Se requiere el parámetro 'text'"
try:
if format_type == "base64":
if operation == "encode":
result = base64.b64encode(text.encode("utf-8")).decode("utf-8")
return f"Texto codificado en Base64:\n{result}"
else:
result = base64.b64decode(text.encode("utf-8")).decode("utf-8")
return f"Texto decodificado de Base64:\n{result}"
elif format_type == "url":
import urllib.parse
if operation == "encode":
result = urllib.parse.quote(text)
return f"Texto codificado para URL:\n{result}"
else:
result = urllib.parse.unquote(text)
return f"Texto decodificado de URL:\n{result}"
elif format_type == "html":
import html
if operation == "encode":
result = html.escape(text)
return f"Texto codificado para HTML:\n{result}"
else:
result = html.unescape(text)
return f"Texto decodificado de HTML:\n{result}"
else:
return f"Error: Formato no soportado: {format_type}"
except Exception as e:
return f"Error {operation}ando texto: {str(e)}"
def generate_hash(args: Dict[str, Any]) -> str:
"""Generar hash de un texto."""
text = args.get("text", "")
algorithm = args.get("algorithm", "md5").lower()
if not text:
return "Error: Se requiere el parámetro 'text'"
if algorithm not in ["md5", "sha1", "sha256", "sha512"]:
return "Error: Algoritmo no soportado. Use: md5, sha1, sha256, sha512"
try:
hash_obj = hashlib.new(algorithm)
hash_obj.update(text.encode("utf-8"))
hash_value = hash_obj.hexdigest()
return f"Hash {algorithm.upper()} del texto:\n{hash_value}"
except Exception as e:
return f"Error generando hash: {str(e)}"
def split_text(args: Dict[str, Any]) -> str:
"""Dividir texto por diferentes criterios."""
text = args.get("text", "")
delimiter = args.get("delimiter", "\n")
max_length = args.get("max_length")
if not text:
return "Error: Se requiere el parámetro 'text'"
try:
if max_length:
# Dividir por longitud máxima
chunks = []
for i in range(0, len(text), max_length):
chunks.append(text[i : i + max_length])
else:
# Dividir por delimitador
chunks = text.split(delimiter)
result = {
"total_fragmentos": len(chunks),
"criterio_division": f"longitud máxima: {max_length}"
if max_length
else f"delimitador: '{delimiter}'",
"fragmentos": chunks,
}
return json.dumps(result, indent=2, ensure_ascii=False)
except Exception as e:
return f"Error dividiendo texto: {str(e)}"
def register_text_tools(tools: Dict[str, Dict[str, Any]]) -> None:
"""Registrar herramientas de texto."""
tools["count_words"] = {
"description": "Contar palabras, líneas y caracteres en un texto",
"handler": count_words,
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Texto a analizar"}
},
"required": ["text"],
},
}
tools["find_replace"] = {
"description": "Buscar y reemplazar texto con soporte para regex",
"handler": find_and_replace,
"inputSchema": {
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "Texto donde buscar y reemplazar",
},
"search": {"type": "string", "description": "Texto o patrón a buscar"},
"replace": {
"type": "string",
"description": "Texto de reemplazo",
"default": "",
},
"use_regex": {
"type": "boolean",
"description": "Usar expresiones regulares",
"default": False,
},
"case_sensitive": {
"type": "boolean",
"description": "Búsqueda sensible a mayúsculas",
"default": True,
},
},
"required": ["text", "search"],
},
}
tools["extract_emails"] = {
"description": "Extraer direcciones de email de un texto",
"handler": extract_emails,
"inputSchema": {
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "Texto del que extraer emails",
}
},
"required": ["text"],
},
}
tools["extract_urls"] = {
"description": "Extraer URLs de un texto",
"handler": extract_urls,
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Texto del que extraer URLs"}
},
"required": ["text"],
},
}
tools["analyze_text"] = {
"description": "Análisis completo de texto con estadísticas y patrones",
"handler": analyze_text,
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Texto a analizar"}
},
"required": ["text"],
},
}
tools["encode_decode"] = {
"description": "Codificar/decodificar texto en diferentes formatos",
"handler": encode_decode_text,
"inputSchema": {
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "Texto a codificar/decodificar",
},
"operation": {
"type": "string",
"description": "Operación a realizar",
"enum": ["encode", "decode"],
"default": "encode",
},
"format": {
"type": "string",
"description": "Formato de codificación",
"enum": ["base64", "url", "html"],
"default": "base64",
},
},
"required": ["text"],
},
}
tools["generate_hash"] = {
"description": "Generar hash de un texto",
"handler": generate_hash,
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Texto para generar hash"},
"algorithm": {
"type": "string",
"description": "Algoritmo de hash",
"enum": ["md5", "sha1", "sha256", "sha512"],
"default": "md5",
},
},
"required": ["text"],
},
}
tools["split_text"] = {
"description": "Dividir texto por diferentes criterios",
"handler": split_text,
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Texto a dividir"},
"delimiter": {
"type": "string",
"description": "Delimitador para dividir (ignorado si se usa max_length)",
"default": "\n",
},
"max_length": {
"type": "integer",
"description": "Longitud máxima por fragmento (opcional)",
"minimum": 1,
},
},
"required": ["text"],
},
}