github_repo
Fetch GitHub repository metrics including stars, forks, issues, activity, and license to evaluate open source projects before using them.
Instructions
Fetch GitHub repository information and health metrics.
Returns stars, forks, issues, recent activity, language, license, and description.
Use this to evaluate open source projects before using them.
Examples:
- github_repo("microsoft/vscode", reasoning="Evaluate editor project")
- github_repo("https://github.com/facebook/react", reasoning="Research UI framework")
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| repo | Yes | ||
| reasoning | Yes | ||
| include_commits | No |
Input Schema (JSON Schema)
{
"properties": {
"include_commits": {
"default": true,
"title": "Include Commits",
"type": "boolean"
},
"reasoning": {
"title": "Reasoning",
"type": "string"
},
"repo": {
"title": "Repo",
"type": "string"
}
},
"required": [
"repo",
"reasoning"
],
"type": "object"
}
Implementation Reference
- src/searxng_mcp/server.py:668-768 (handler)The main handler function for the 'github_repo' MCP tool. It parses the repository input, fetches metadata and optional recent commits using GitHubClient, formats the output, handles errors, and tracks usage.@mcp.tool() async def github_repo( repo: Annotated[str, "GitHub repository (owner/repo format or full URL)"], reasoning: Annotated[str, "Why you're checking this repository (required for analytics)"], include_commits: Annotated[bool, "Include recent commit history"] = True, ) -> str: """ Fetch GitHub repository information and health metrics. Returns stars, forks, issues, recent activity, language, license, and description. Use this to evaluate open source projects before using them. Examples: - github_repo("microsoft/vscode", reasoning="Evaluate editor project") - github_repo("https://github.com/facebook/react", reasoning="Research UI framework") """ start_time = time.time() success = False error_msg = None result = "" try: owner, repo_name = github_client.parse_repo_url(repo) # Get repo info repo_info = await github_client.get_repo_info(owner, repo_name) # Optionally get recent commits commits = None if include_commits: try: commits = await github_client.get_recent_commits(owner, repo_name, count=3) except Exception: # noqa: BLE001, S110 # Don't fail the whole request if commits fail pass result = clamp_text(_format_repo_info(repo_info, commits), MAX_RESPONSE_CHARS) success = True except httpx.HTTPStatusError as exc: error_msg = f"HTTP {exc.response.status_code}" if exc.response.status_code == 404: # Try to provide helpful suggestions suggestions = [] if "/" in repo: parts = repo.replace("https://github.com/", "").split("/") if len(parts) >= 2: owner_guess = parts[0] suggestions.append( f"- Check if '{owner_guess}' is the correct organization/user" ) suggestions.append(f"- The repository may have been renamed or deleted") suggestions.append(f"- Try searching: https://github.com/search?q={parts[1]}") result = ( f"Repository '{repo}' not found (HTTP 404).\n\n" f"Possible reasons:\n" f"- The repository doesn't exist or was deleted\n" f"- The repository is private\n" f"- There's a typo in the owner or repository name\n" ) if suggestions: result += f"\nSuggestions:\n" + "\n".join(suggestions) elif exc.response.status_code == 403: result = ( f"Access denied to repository '{repo}' (HTTP 403).\n\n" f"Possible reasons:\n" f"- The repository is private\n" f"- GitHub API rate limit exceeded\n" f"- Set GITHUB_TOKEN environment variable for higher rate limits" ) elif exc.response.status_code == 301: # This shouldn't happen anymore with our redirect handling, but just in case result = ( f"Repository '{repo}' has moved (HTTP 301).\n\n" f"The repository may have been renamed or transferred.\n" f"Try searching for the new location on GitHub." ) else: result = f"Failed to fetch repository '{repo}': HTTP {exc.response.status_code}" except ValueError as exc: error_msg = str(exc) result = str(exc) # Invalid repo format - already has good error message except Exception as exc: # noqa: BLE001 error_msg = str(exc) result = f"Failed to fetch repository '{repo}': {exc}" finally: # Track usage response_time = (time.time() - start_time) * 1000 tracker.track_usage( tool_name="github_repo", reasoning=reasoning, parameters={"repo": repo, "include_commits": include_commits}, response_time_ms=response_time, success=success, error_message=error_msg, response_size=len(result.encode("utf-8")), ) return result
- src/searxng_mcp/github.py:88-128 (helper)Core helper method in GitHubClient that fetches and structures repository metadata from GitHub API, including stars, forks, issues, PRs, etc.async def get_repo_info(self, owner: str, repo: str) -> RepoInfo: """Fetch repository information from GitHub API.""" # First resolve any redirects (renamed repos) resolved_owner, resolved_repo = await self._resolve_repo_redirect(owner, repo) url = f"https://api.github.com/repos/{resolved_owner}/{resolved_repo}" async with httpx.AsyncClient( timeout=self.timeout, headers=self._headers, follow_redirects=True, ) as client: response = await client.get(url) response.raise_for_status() data = response.json() # Get open PRs count (separate API call) open_prs = await self._get_open_prs_count(owner, repo) # Format last updated time updated_at = data.get("updated_at", "") last_updated = self._format_time_ago(updated_at) if updated_at else "unknown" return RepoInfo( name=data.get("name", ""), full_name=data.get("full_name", ""), description=data.get("description") or "No description available", stars=data.get("stargazers_count", 0), forks=data.get("forks_count", 0), watchers=data.get("watchers_count", 0), license=data.get("license", {}).get("name") if data.get("license") else None, language=data.get("language"), last_updated=last_updated, open_issues=data.get("open_issues_count", 0), open_prs=open_prs, homepage=data.get("homepage"), topics=data.get("topics", []), archived=data.get("archived", False), size_kb=data.get("size", 0), )
- src/searxng_mcp/github.py:217-310 (helper)Static method that parses various GitHub repo URL formats or owner/repo strings into owner and repo names, with validation.def parse_repo_url(repo_input: str) -> tuple[str, str]: """Parse various GitHub repo input formats to (owner, repo). Supported formats: - owner/repo - https://github.com/owner/repo - https://github.com/owner/repo.git - https://github.com/owner/repo/tree/main - https://github.com/owner/repo/blob/main/file.py Invalid inputs that will raise ValueError: - Non-GitHub URLs (e.g., https://example.com) - GitHub search URLs (e.g., https://github.com/search?q=...) - GitHub user/org pages without repo (e.g., https://github.com/microsoft) """ repo_input = repo_input.strip() # Handle full URLs if repo_input.startswith(("https://", "http://")): # Must be a github.com URL if "github.com" not in repo_input.lower(): raise ValueError( f"Not a GitHub URL: {repo_input}. " f"Please provide a GitHub repository URL or use 'owner/repo' format." ) # Reject GitHub search/explore/etc URLs invalid_patterns = [ r"github\.com/search", r"github\.com/explore", r"github\.com/topics", r"github\.com/trending", r"github\.com/settings", r"github\.com/notifications", r"github\.com/new", r"github\.com/organizations", r"github\.com/marketplace", ] for pattern in invalid_patterns: if re.search(pattern, repo_input, re.IGNORECASE): raise ValueError( f"Invalid GitHub URL: {repo_input}. " f"This appears to be a GitHub search/explore page, not a repository. " f"Please provide a repository URL like 'https://github.com/owner/repo'." ) # Parse repository URL - must have owner/repo match = re.match( r"https?://(?:www\.)?github\.com/([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+?)(?:\.git|/.*)?$", repo_input, ) if match: owner, repo = match.group(1), match.group(2) # Validate owner and repo names if owner and repo and len(owner) > 0 and len(repo) > 0: return owner, repo # Check if it's just a user/org page (no repo) user_match = re.match( r"https?://(?:www\.)?github\.com/([a-zA-Z0-9_.-]+)/?$", repo_input, ) if user_match: raise ValueError( f"Invalid GitHub URL: {repo_input}. " f"This appears to be a user/organization page, not a repository. " f"Please provide a repository URL like 'https://github.com/{user_match.group(1)}/repo-name'." ) raise ValueError( f"Could not parse GitHub URL: {repo_input}. " f"Please use format 'https://github.com/owner/repo'." ) # Handle owner/repo format if "/" in repo_input: parts = repo_input.split("/") if len(parts) >= 2: owner, repo = parts[0].strip(), parts[1].strip() # Validate: must be non-empty and contain only valid characters valid_pattern = r"^[a-zA-Z0-9_.-]+$" if ( owner and repo and re.match(valid_pattern, owner) and re.match(valid_pattern, repo) ): return owner, repo raise ValueError( f"Invalid repository format: {repo_input}. " f"Use 'owner/repo' format (e.g., 'microsoft/vscode') or a full GitHub URL." )
- src/searxng_mcp/github.py:12-31 (schema)Dataclass defining the structured output schema for GitHub repository information.@dataclass(slots=True) class RepoInfo: """GitHub repository metadata.""" name: str full_name: str description: str stars: int forks: int watchers: int license: str | None language: str | None last_updated: str open_issues: int open_prs: int | None homepage: str | None topics: list[str] archived: bool size_kb: int
- src/searxng_mcp/server.py:624-665 (helper)Helper function in server.py that formats the RepoInfo and optional commits into a human-readable string response.def _format_repo_info(info: RepoInfo, commits=None) -> str: """Format RepoInfo into readable text response.""" lines = [ f"Repository: {info.full_name}", "ā" * 50, f"ā {info.stars:,} | š“ {info.forks:,} | šļø {info.watchers:,}", ] if info.language: lines.append(f"Language: {info.language}") if info.license: lines.append(f"License: {info.license}") lines.append(f"Last Updated: {info.last_updated}") # Issues and PRs issue_line = f"Open Issues: {info.open_issues}" if info.open_prs is not None: issue_line += f" | Open PRs: {info.open_prs}" lines.append(issue_line) if info.archived: lines.append("ā ļø This repository is archived (read-only)") if info.topics: lines.append(f"Topics: {', '.join(info.topics[:5])}") # Show first 5 topics lines.append("") # blank line if info.homepage: lines.append(f"Homepage: {info.homepage}") if commits: lines.append("Recent Commits:") for commit in commits[:3]: # Show top 3 lines.append(f"- [{commit.date}] {commit.message} ({commit.author})") lines.append(f"\nDescription: {info.description}") return "\n".join(lines)