get_file_content
Retrieve file content from GitHub repositories to examine source code, understand implementations, or read configuration files directly.
Instructions
Get content of a specific file from a GitHub repository.
USE THIS WHEN: You need to read the actual source code or contents of a specific file.
BEST FOR: Examining implementation details, understanding how code works, or reading configuration files.
Returns the full file content (UTF-8 text only, binary files are rejected).
Automatically handles:
- Base64 decoding from GitHub API
- UTF-8 conversion with safe truncation
- Binary file detection
Args:
repo: Repository in format "owner/repo" (e.g., "psf/requests")
path: Path to file (e.g., "requests/api.py")
max_bytes: Maximum content size (default 100KB, increase for large files)
Returns:
JSON with file content, size, truncation status, and metadata
Example: get_file_content("psf/requests", "requests/api.py") → Returns source code of api.py
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| repo | Yes | ||
| path | Yes | ||
| max_bytes | No |
Implementation Reference
- src/RTFD/providers/github.py:815-852 (handler)The primary handler function for the 'get_file_content' MCP tool. It validates the repository format, delegates to the private _get_file_content helper, and serializes the response as a CallToolResult.async def get_file_content(repo: str, path: str, max_bytes: int = 102400) -> CallToolResult: """ Get content of a specific file from a GitHub repository. USE THIS WHEN: You need to read the actual source code or contents of a specific file. BEST FOR: Examining implementation details, understanding how code works, or reading configuration files. Returns the full file content (UTF-8 text only, binary files are rejected). Automatically handles: - Base64 decoding from GitHub API - UTF-8 conversion with safe truncation - Binary file detection Args: repo: Repository in format "owner/repo" (e.g., "psf/requests") path: Path to file (e.g., "requests/api.py") max_bytes: Maximum content size (default 100KB, increase for large files) Returns: JSON with file content, size, truncation status, and metadata Example: get_file_content("psf/requests", "requests/api.py") → Returns source code of api.py """ parts = repo.split("/", 1) if len(parts) != 2: error_result = { "repository": repo, "path": path, "content": "", "error": "Invalid repo format. Use 'owner/repo'", } return serialize_response_with_meta(error_result) owner, repo_name = parts result = await self._get_file_content(owner, repo_name, path, max_bytes) return serialize_response_with_meta(result)
- src/RTFD/providers/github.py:284-367 (helper)Private helper method implementing the core logic: GitHub API request, base64 decoding, binary detection, UTF-8 handling, truncation, and error management.async def _get_file_content( self, owner: str, repo: str, path: str, max_bytes: int = 102400 ) -> dict[str, Any]: """ Get content of a specific file from a GitHub repository. Args: owner: Repository owner repo: Repository name path: Path to file max_bytes: Maximum content size (default 100KB) Returns: Dict with file content and metadata """ try: headers = self._get_headers() url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" async with await self._http_client() as client: resp = await client.get(url, headers=headers) resp.raise_for_status() data = resp.json() # Check if it's a file if data.get("type") != "file": return { "repository": f"{owner}/{repo}", "path": path, "content": "", "error": f"Path is a {data.get('type')}, not a file", } # Decode base64 content try: content = base64.b64decode(data["content"]).decode("utf-8") except UnicodeDecodeError: # Binary file return { "repository": f"{owner}/{repo}", "path": path, "content": "", "error": "File appears to be binary", "size_bytes": data.get("size", 0), "encoding": data.get("encoding"), } # Truncate if needed truncated = False if len(content.encode("utf-8")) > max_bytes: encoded = content.encode("utf-8")[:max_bytes] while len(encoded) > 0: try: content = encoded.decode("utf-8") break except UnicodeDecodeError: encoded = encoded[:-1] truncated = True return { "repository": f"{owner}/{repo}", "path": path, "content": content, "size_bytes": len(content.encode("utf-8")), "truncated": truncated, "sha": data.get("sha"), "url": data.get("html_url"), } except httpx.HTTPStatusError as exc: return { "repository": f"{owner}/{repo}", "path": path, "content": "", "error": f"GitHub returned {exc.response.status_code}", } except Exception as exc: return { "repository": f"{owner}/{repo}", "path": path, "content": "", "error": f"Failed to get file content: {exc!s}", }
- src/RTFD/providers/github.py:623-941 (registration)The get_tools method registers the get_file_content tool (conditionally if fetch is enabled) into the tools dictionary returned for MCP.def get_tools(self) -> dict[str, Callable]: """Return MCP tool functions.""" async def github_repo_search( query: str, limit: int = 5, language: str | None = "Python" ) -> CallToolResult: """ Search for GitHub repositories by keyword or topic. USE THIS WHEN: You need to find repositories for a library, framework, or topic. BEST FOR: Discovering which repository contains a specific project. Returns repository names, descriptions, stars, and URLs - but NOT the code itself. To explore code after finding a repo, use: - get_repo_tree() to see all files - list_repo_contents() to browse directories - get_file_content() to read specific files Args: query: Search keywords (e.g., "machine learning", "web framework") limit: Maximum number of results (default 5) language: Filter by programming language (default "Python") Example: github_repo_search("requests") → Finds psf/requests repository """ result = await self._search_repos(query, limit=limit, language=language) return serialize_response_with_meta(result) async def github_code_search( query: str, repo: str | None = None, limit: int = 5 ) -> CallToolResult: """ Search for code snippets across GitHub or within a specific repository. USE THIS WHEN: You need to find code examples, function definitions, or usage patterns. RETURNS: File paths and locations where code was found - NOT the actual file contents. To read the files, use get_file_content() with the returned paths. NOTE: Requires authentication - rate limited without GITHUB_TOKEN. Args: query: Code search query (e.g., "def parse_args", "class HTTPClient") repo: Optional repository filter in "owner/repo" format limit: Maximum number of results (default 5) Example: github_code_search("async def fetch", repo="psf/requests") """ result = await self._search_code(query, repo=repo, limit=limit) return serialize_response_with_meta(result) async def list_github_packages( owner: str, package_type: str = "container" ) -> CallToolResult: """ List packages (including GHCR images) for a GitHub user or organization. USE THIS WHEN: You want to find Docker images or other packages hosted on GitHub for a specific user/org. Note: GitHub does not support global package search; you must provide an owner. Args: owner: GitHub username or organization name (e.g. "github", "octocat") package_type: Type of package to list. Defaults to "container" (GHCR). Options: "container", "npm", "maven", "rubygems", "nuget", "docker" (legacy) Returns: JSON list of packages with metadata (name, repository, version count, etc.) """ try: data = await self._list_github_packages(owner, package_type) result = { "owner": owner, "package_type": package_type, "packages": data, "count": len(data), } return serialize_response_with_meta(result) except Exception as exc: return serialize_response_with_meta( {"owner": owner, "error": f"Failed to list packages: {exc!s}"} ) async def get_package_versions( owner: str, package_type: str, package_name: str ) -> CallToolResult: """ Get versions for a specific GitHub package. USE THIS WHEN: You found a package using list_github_packages and want to see available tags/versions. Args: owner: GitHub username or organization name package_type: Type of package (e.g., "container") package_name: Name of the package (e.g., "rtfd") Returns: JSON list of versions/tags. """ try: data = await self._get_package_versions(owner, package_type, package_name) result = { "owner": owner, "package_name": package_name, "versions": data, "count": len(data), } return serialize_response_with_meta(result) except Exception as exc: return serialize_response_with_meta( { "owner": owner, "package_name": package_name, "error": f"Failed to get versions: {exc!s}", } ) async def fetch_github_readme(repo: str, max_bytes: int = 20480) -> CallToolResult: """ Fetch README file from a GitHub repository. USE THIS WHEN: You need the project overview, quick start, or basic documentation. BEST FOR: Getting a high-level understanding of a project. The README typically contains installation, usage examples, and project description. For deeper code exploration, use: - get_repo_tree() to see the complete file structure - get_file_content() to read specific source files Args: repo: Repository in "owner/repo" format (e.g., "psf/requests") max_bytes: Maximum content size, default 20KB Returns: JSON with README content, size, and metadata Example: fetch_github_readme("psf/requests") → Returns the requests README """ # Parse owner/repo format parts = repo.split("/", 1) if len(parts) != 2: error_result = { "repository": repo, "content": "", "error": "Invalid repo format. Use 'owner/repo'", "size_bytes": 0, "source": None, } return serialize_response_with_meta(error_result) owner, repo_name = parts result = await self._fetch_github_readme(owner, repo_name, max_bytes) return serialize_response_with_meta(result) async def list_repo_contents(repo: str, path: str = "") -> CallToolResult: """ List contents of a directory in a GitHub repository. USE THIS WHEN: You need to browse or explore the structure of a repository directory. BEST FOR: Discovering what files and folders exist in a specific location. Returns names, paths, types (file/dir), sizes for each item. Common workflow: 1. Use github_repo_search() to find the repository 2. Use get_repo_tree() to see the overall structure 3. Use list_repo_contents() to browse specific directories 4. Use get_file_content() to read individual files Args: repo: Repository in format "owner/repo" (e.g., "psf/requests") path: Path to directory (empty string for root, e.g., "src/utils") Returns: JSON with list of files and directories with metadata Example: list_repo_contents("psf/requests", "requests") → Lists files in requests/ directory """ parts = repo.split("/", 1) if len(parts) != 2: error_result = { "repository": repo, "path": path, "contents": [], "error": "Invalid repo format. Use 'owner/repo'", } return serialize_response_with_meta(error_result) owner, repo_name = parts result = await self._list_repo_contents(owner, repo_name, path) return serialize_response_with_meta(result) async def get_file_content(repo: str, path: str, max_bytes: int = 102400) -> CallToolResult: """ Get content of a specific file from a GitHub repository. USE THIS WHEN: You need to read the actual source code or contents of a specific file. BEST FOR: Examining implementation details, understanding how code works, or reading configuration files. Returns the full file content (UTF-8 text only, binary files are rejected). Automatically handles: - Base64 decoding from GitHub API - UTF-8 conversion with safe truncation - Binary file detection Args: repo: Repository in format "owner/repo" (e.g., "psf/requests") path: Path to file (e.g., "requests/api.py") max_bytes: Maximum content size (default 100KB, increase for large files) Returns: JSON with file content, size, truncation status, and metadata Example: get_file_content("psf/requests", "requests/api.py") → Returns source code of api.py """ parts = repo.split("/", 1) if len(parts) != 2: error_result = { "repository": repo, "path": path, "content": "", "error": "Invalid repo format. Use 'owner/repo'", } return serialize_response_with_meta(error_result) owner, repo_name = parts result = await self._get_file_content(owner, repo_name, path, max_bytes) return serialize_response_with_meta(result) async def get_repo_tree( repo: str, recursive: bool = False, max_items: int = 1000 ) -> CallToolResult: """ Get the full file tree of a GitHub repository. USE THIS WHEN: You need to see the overall structure and organization of a repository. BEST FOR: Understanding project layout, finding specific files, or getting a complete directory listing. Returns all file paths, types (file/directory), and sizes in a single call. Use recursive=True for complete tree (all files in all subdirectories). Use recursive=False for just top-level overview (faster, less data). After getting the tree, use: - get_file_content() to read specific files you identified - list_repo_contents() to browse specific directories in detail Args: repo: Repository in format "owner/repo" (e.g., "psf/requests") recursive: Whether to get full tree recursively (default False) max_items: Maximum number of items to return (default 1000) Returns: JSON with complete file tree structure, branch, and count Example: get_repo_tree("psf/requests", recursive=True) → Returns complete file listing """ parts = repo.split("/", 1) if len(parts) != 2: error_result = { "repository": repo, "tree": [], "error": "Invalid repo format. Use 'owner/repo'", } return serialize_response_with_meta(error_result) owner, repo_name = parts result = await self._get_repo_tree(owner, repo_name, recursive, max_items) return serialize_response_with_meta(result) async def get_commit_diff(repo: str, base: str, head: str) -> CallToolResult: """ Get the diff between two commits, branches, or tags in a GitHub repository. USE THIS WHEN: You need to see what changed between two versions of code. BEST FOR: Analyzing changes, reviewing pull requests (by comparing branches), or checking version differences. Returns the raw git diff output. Args: repo: Repository in format "owner/repo" (e.g., "psf/requests") base: Base commit SHA, branch name, or tag (e.g., "main", "v1.0.0", "a1b2c3d") head: Head commit SHA, branch name, or tag (e.g., "feature-branch", "v1.1.0", "e5f6g7h") Returns: JSON with the raw git diff content. Example: get_commit_diff("psf/requests", "v2.28.0", "v2.28.1") → Returns diff between versions """ parts = repo.split("/", 1) if len(parts) != 2: error_result = { "repository": repo, "base": base, "head": head, "diff": "", "error": "Invalid repo format. Use 'owner/repo'", } return serialize_response_with_meta(error_result) owner, repo_name = parts result = await self._get_commit_diff(owner, repo_name, base, head) return serialize_response_with_meta(result) tools = { "github_repo_search": github_repo_search, "github_code_search": github_code_search, "list_github_packages": list_github_packages, "get_package_versions": get_package_versions, } if is_fetch_enabled(): tools["fetch_github_readme"] = fetch_github_readme tools["list_repo_contents"] = list_repo_contents tools["get_file_content"] = get_file_content tools["get_repo_tree"] = get_repo_tree tools["get_commit_diff"] = get_commit_diff return tools