Lenny RAG MCP Server

utils.py•5.05 KiB

"""Utility functions for file loading and data handling.""" import json from pathlib import Path def get_project_root() -> Path: """Get the project root directory.""" return Path(__file__).parent.parent def get_transcripts_dir() -> Path: """Get the transcripts directory path.""" return get_project_root() / "transcripts" def get_preprocessed_dir() -> Path: """Get the preprocessed JSON directory path.""" return get_project_root() / "preprocessed" def get_chroma_dir() -> Path: """Get the ChromaDB directory path.""" return get_project_root() / "chroma_db" def load_transcript(episode_file: str) -> str | None: """ Load raw transcript text for an episode. Args: episode_file: Filename like "Brian Chesky.txt" Returns: Transcript text or None if not found """ transcript_path = get_transcripts_dir() / episode_file if not transcript_path.exists(): return None return transcript_path.read_text(encoding="utf-8") def load_preprocessed(episode_file: str) -> dict | None: """ Load preprocessed JSON data for an episode. Args: episode_file: Filename like "Brian Chesky.txt" Returns: Preprocessed data dict or None if not found """ # Remove .txt if present and add .json stem = episode_file.replace(".txt", "") json_path = get_preprocessed_dir() / f"{stem}.json" if not json_path.exists(): return None with open(json_path, "r", encoding="utf-8") as f: return json.load(f) def get_transcript_segment( episode_file: str, line_start: int, line_end: int ) -> str | None: """ Get a segment of transcript by line numbers. Args: episode_file: Filename like "Brian Chesky.txt" line_start: Starting line (1-indexed) line_end: Ending line (1-indexed, inclusive) Returns: Transcript segment or None if not found """ transcript = load_transcript(episode_file) if transcript is None: return None lines = transcript.split("\n") # Convert to 0-indexed and clamp start_idx = max(0, line_start - 1) end_idx = min(len(lines), line_end) return "\n".join(lines[start_idx:end_idx]) def list_available_episodes() -> list[dict]: """ List all available episodes with metadata. Returns: List of dicts with episode info """ transcripts_dir = get_transcripts_dir() preprocessed_dir = get_preprocessed_dir() episodes = [] for transcript_path in sorted(transcripts_dir.glob("*.txt")): episode_file = transcript_path.name json_path = preprocessed_dir / f"{transcript_path.stem}.json" episode_info = { "filename": episode_file, "guest": transcript_path.stem, "preprocessed": json_path.exists(), } # Load metadata if preprocessed if json_path.exists(): try: with open(json_path, "r", encoding="utf-8") as f: data = json.load(f) if "episode" in data: ep = data["episode"] episode_info["guest"] = ep.get("guest", episode_info["guest"]) episode_info["expertise_tags"] = ep.get("expertise_tags", []) episode_info["summary"] = ep.get("summary", "") except Exception: pass episodes.append(episode_info) return episodes def get_topic_by_id(episode_file: str, topic_id: str) -> dict | None: """ Get a specific topic from preprocessed data. Args: episode_file: Filename like "Brian Chesky.txt" topic_id: Topic ID like "topic_1" Returns: Topic dict or None if not found """ data = load_preprocessed(episode_file) if data is None: return None for topic in data.get("topics", []): if topic.get("id") == topic_id: return topic return None def get_insights_for_topic(episode_file: str, topic_id: str) -> list[dict]: """ Get all insights for a specific topic. Args: episode_file: Filename like "Brian Chesky.txt" topic_id: Topic ID like "topic_1" Returns: List of insight dicts """ data = load_preprocessed(episode_file) if data is None: return [] return [ insight for insight in data.get("insights", []) if insight.get("topic_id") == topic_id ] def get_examples_for_topic(episode_file: str, topic_id: str) -> list[dict]: """ Get all examples for a specific topic. Args: episode_file: Filename like "Brian Chesky.txt" topic_id: Topic ID like "topic_1" Returns: List of example dicts """ data = load_preprocessed(episode_file) if data is None: return [] return [ example for example in data.get("examples", []) if example.get("topic_id") == topic_id ]

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mpnikhil/lenny-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

utils.py•5.05 KiB