Skip to main content
Glama
test_chunk.py737 B
#!/usr/bin/env python3 """Simple chunking test.""" import time from pathlib import Path from src.parsers import parse_document # Parse the small PDF print("Parsing RL.pdf...") doc = parse_document(Path('docs/RL.pdf')) print(f"Parsed: {len(doc.text)} chars") # Simple chunking - FIXED print("\nChunking...") start = time.time() chunk_size = 20000 # ~5K tokens overlap = 2000 # ~500 tokens chunks = [] i = 0 while i < len(doc.text): end = min(i + chunk_size, len(doc.text)) chunks.append(doc.text[i:end]) # If we hit the end, break if end >= len(doc.text): break # Otherwise advance with overlap i = end - overlap print(f"Chunked in {time.time() - start:.4f}s") print(f"Chunks: {len(chunks)}")

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dl1683/ickyMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server