Skip to main content
Glama
test_reprocess_pdf.py2.6 kB
""" Test para re-procesar el documento PDF con la nueva lógica de extracción """ import asyncio import sys from pathlib import Path # Agregar src al path sys.path.insert(0, str(Path(__file__).parent / 'src')) from src.config import config from src.main import _store_document_chunks_impl from supabase import create_client # Cliente de Supabase supabase = create_client(config.SUPABASE_URL, config.SUPABASE_SERVICE_ROLE_KEY) document_id = "7c912acb-e74c-402d-9639-f8a183e1bbe7" async def test_reprocess_pdf(): print(f"\n{'='*80}") print("TEST: Re-procesar documento PDF con extracción correcta") print(f"{'='*80}\n") # Paso 1: Eliminar chunks antiguos print("🗑️ Paso 1: Eliminando chunks antiguos...") result = supabase.table("classroom_document_chunks").delete().eq( "classroom_document_id", document_id ).execute() print(f" ✅ Eliminados {len(result.data) if result.data else 0} chunks antiguos\n") # Paso 2: Re-procesar documento print("🔄 Paso 2: Re-procesando documento con nueva lógica...") result = await _store_document_chunks_impl( classroom_document_id=document_id, chunk_size=1000, chunk_overlap=100 ) print(f"\n{'='*80}") print("✅ RESULTADO:") print(f"{'='*80}") print(f" Success: {result.get('success')}") print(f" Chunks creados: {result.get('chunks_created', 0)}") print(f" Total caracteres: {result.get('total_characters', 0):,}") if result.get('error'): print(f" ❌ Error: {result.get('error')}") # Paso 3: Verificar los nuevos chunks print(f"\n{'='*80}") print("🔍 Paso 3: Verificando chunks nuevos...") print(f"{'='*80}\n") chunks = supabase.table("classroom_document_chunks").select( "chunk_index, content" ).eq("classroom_document_id", document_id).order("chunk_index").limit(3).execute() for chunk in chunks.data: content = chunk['content'] # Análisis has_pdf_markers = '%PDF' in content or '/Type' in content or 'endobj' in content words = [w for w in content.split() if any(c.isalpha() for c in w)] print(f"📦 Chunk #{chunk['chunk_index']}") print(f" Longitud: {len(content)} chars") print(f" Palabras: {len(words)}") print(f" Tiene marcadores PDF: {'❌ SÍ (MAL)' if has_pdf_markers else '✅ NO (BIEN)'}") print(f" Preview: {content[:200]}...") print() print(f"{'='*80}") if __name__ == "__main__": asyncio.run(test_reprocess_pdf())

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/JpAboytes/estudIA-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server