Personal RAG MCP Server

chunking.py•2.3 KiB

"""Text chunking utilities with token-aware splitting.""" from typing import List import tiktoken class TextChunker: """Token-aware text chunker using tiktoken.""" def __init__( self, chunk_size: int = 512, chunk_overlap: int = 50, encoding_name: str = "cl100k_base" # GPT-4 tokenizer ): self.chunk_size = chunk_size self.chunk_overlap = chunk_overlap self.encoding = tiktoken.get_encoding(encoding_name) def chunk_text(self, text: str) -> List[str]: """Split text into overlapping chunks based on token count. Args: text: The text to chunk Returns: List of text chunks """ # Encode text to tokens tokens = self.encoding.encode(text) chunks = [] start = 0 while start < len(tokens): # Get chunk of tokens end = start + self.chunk_size chunk_tokens = tokens[start:end] # Decode back to text chunk_text = self.encoding.decode(chunk_tokens) chunks.append(chunk_text) # Move start position with overlap start += self.chunk_size - self.chunk_overlap return chunks def count_tokens(self, text: str) -> int: """Count tokens in text.""" return len(self.encoding.encode(text)) def needs_chunking(self, text: str) -> bool: """Check if text needs to be chunked.""" return self.count_tokens(text) > self.chunk_size class SimpleChunker: """Simple character-based chunker (fallback if tiktoken unavailable).""" def __init__( self, chunk_size: int = 2000, # characters chunk_overlap: int = 200 ): self.chunk_size = chunk_size self.chunk_overlap = chunk_overlap def chunk_text(self, text: str) -> List[str]: """Split text into overlapping chunks by character count.""" chunks = [] start = 0 while start < len(text): end = start + self.chunk_size chunk = text[start:end] chunks.append(chunk) start += self.chunk_size - self.chunk_overlap return chunks def needs_chunking(self, text: str) -> bool: """Check if text needs to be chunked.""" return len(text) > self.chunk_size

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/timerickson/personal-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

chunking.py•2.3 KiB