Skip to main content
Glama

PyTorch Documentation Search Tool

process.py1.51 kB
#!/usr/bin/env python3 """ Document processing script for PyTorch Documentation Search Tool. Processes documentation into chunks with code-aware boundaries. """ import argparse import sys import os # Add parent directory to path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from ptsearch.document import DocumentProcessor from ptsearch.config import DEFAULT_CHUNKS_PATH, CHUNK_SIZE, OVERLAP_SIZE def main(): # Parse command line arguments parser = argparse.ArgumentParser(description="Process documents into chunks") parser.add_argument("--docs-dir", type=str, required=True, help="Directory containing documentation files") parser.add_argument("--output-file", type=str, default=DEFAULT_CHUNKS_PATH, help="Output JSON file to save chunks") parser.add_argument("--chunk-size", type=int, default=CHUNK_SIZE, help="Size of document chunks") parser.add_argument("--overlap", type=int, default=OVERLAP_SIZE, help="Overlap between chunks") args = parser.parse_args() # Create processor and process documents processor = DocumentProcessor(chunk_size=args.chunk_size, overlap=args.overlap) chunks = processor.process_directory(args.docs_dir, args.output_file) print(f"Processing complete! Generated {len(chunks)} chunks from {args.docs_dir}") print(f"Chunks saved to {args.output_file}") if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/seanmichaelmcgee/pytorch-docs-refactored'

If you have feedback or need assistance with the MCP directory API, please join our Discord server