QuantConnect PDF MCP Server

convert_pdfs.py•5.08 KiB

#!/usr/bin/env python3 """ Utility script to manually convert PDFs to markdown Usage: python convert_pdfs.py [pdf_folder] [markdown_folder] """ import sys import json from pathlib import Path from datetime import datetime import PyPDF2 import hashlib def get_file_hash(file_path: Path) -> str: """Calculate hash of file to detect changes""" hash_md5 = hashlib.md5() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() def pdf_to_markdown(pdf_path: Path) -> str: """Convert PDF to markdown format""" markdown_lines = [] try: with open(pdf_path, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) total_pages = len(pdf_reader.pages) markdown_lines.append(f"# {pdf_path.stem}") markdown_lines.append(f"\n*Source: {pdf_path.name}*") markdown_lines.append(f"*Total Pages: {total_pages}*") markdown_lines.append(f"*Processed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*") markdown_lines.append("\n---\n") for page_num in range(total_pages): try: page = pdf_reader.pages[page_num] text = page.extract_text() # Clean up text for markdown text = text.strip() if text: markdown_lines.append(f"\n## Page {page_num + 1}\n") # Process text to improve markdown formatting lines = text.split('\n') for line in lines: line = line.strip() if line: markdown_lines.append(line) if line.endswith('.') or line.endswith(':'): markdown_lines.append("") # Add blank line print(f" Processed page {page_num + 1}/{total_pages}", end='\r') except Exception as e: print(f"\n Error on page {page_num + 1}: {e}") markdown_lines.append(f"\n## Page {page_num + 1}\n") markdown_lines.append(f"*[Error extracting page: {e}]*\n") print() # New line after progress except Exception as e: print(f"Error processing PDF: {e}") markdown_lines = [f"# Error Processing {pdf_path.name}\n\n{str(e)}"] return '\n'.join(markdown_lines) def main(): # Get folders from arguments or use defaults pdf_folder = Path(sys.argv[1] if len(sys.argv) > 1 else "./quantconnect-docs") markdown_folder = Path(sys.argv[2] if len(sys.argv) > 2 else pdf_folder / "markdown") print(f"PDF folder: {pdf_folder}") print(f"Markdown folder: {markdown_folder}") if not pdf_folder.exists(): print(f"Error: PDF folder {pdf_folder} does not exist") return # Create markdown folder markdown_folder.mkdir(exist_ok=True) # Load cache cache_file = markdown_folder / ".pdf_cache.json" cache = {} if cache_file.exists(): try: with open(cache_file, 'r') as f: cache = json.load(f) except Exception as e: print(f"Warning: Could not load cache: {e}") # Process PDFs pdf_files = list(pdf_folder.glob("*.pdf")) print(f"\nFound {len(pdf_files)} PDF files") for i, pdf_file in enumerate(pdf_files, 1): print(f"\n[{i}/{len(pdf_files)}] Processing {pdf_file.name}...") # Check if already processed file_hash = get_file_hash(pdf_file) cached_info = cache.get(pdf_file.name, {}) if cached_info.get('hash') == file_hash: markdown_path = markdown_folder / cached_info.get('markdown_file', f"{pdf_file.stem}.md") if markdown_path.exists(): print(" Already processed (skipping)") continue # Convert to markdown try: markdown_content = pdf_to_markdown(pdf_file) # Save markdown markdown_filename = f"{pdf_file.stem}.md" markdown_path = markdown_folder / markdown_filename with open(markdown_path, 'w', encoding='utf-8') as f: f.write(markdown_content) # Update cache cache[pdf_file.name] = { 'hash': file_hash, 'markdown_file': markdown_filename, 'processed_date': datetime.now().isoformat(), 'size': pdf_file.stat().st_size } print(f" Saved to {markdown_path}") except Exception as e: print(f" Error: {e}") # Save cache with open(cache_file, 'w') as f: json.dump(cache, f, indent=2) print(f"\nProcessing complete. Markdown files saved to {markdown_folder}") if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/lhstorm/mcp_server_quantconnect_docs'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

convert_pdfs.py•5.08 KiB