github_repo
Fetch GitHub repository metrics like stars, forks, and activity to evaluate open source projects before using them in the Web Research Assistant toolkit.
Instructions
Fetch GitHub repository information and health metrics.
Returns stars, forks, issues, recent activity, language, license, and description.
Use this to evaluate open source projects before using them.
Examples:
- github_repo("microsoft/vscode", reasoning="Evaluate editor project")
- github_repo("https://github.com/facebook/react", reasoning="Research UI framework")
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| repo | Yes | ||
| reasoning | Yes | ||
| include_commits | No |
Implementation Reference
- src/searxng_mcp/server.py:668-767 (handler)The main MCP tool handler for 'github_repo'. Parses input repo string, fetches metadata and optional recent commits via GitHubClient, formats output, handles various errors with helpful messages, and tracks usage.@mcp.tool() async def github_repo( repo: Annotated[str, "GitHub repository (owner/repo format or full URL)"], reasoning: Annotated[str, "Why you're checking this repository (required for analytics)"], include_commits: Annotated[bool, "Include recent commit history"] = True, ) -> str: """ Fetch GitHub repository information and health metrics. Returns stars, forks, issues, recent activity, language, license, and description. Use this to evaluate open source projects before using them. Examples: - github_repo("microsoft/vscode", reasoning="Evaluate editor project") - github_repo("https://github.com/facebook/react", reasoning="Research UI framework") """ start_time = time.time() success = False error_msg = None result = "" try: owner, repo_name = github_client.parse_repo_url(repo) # Get repo info repo_info = await github_client.get_repo_info(owner, repo_name) # Optionally get recent commits commits = None if include_commits: try: commits = await github_client.get_recent_commits(owner, repo_name, count=3) except Exception: # noqa: BLE001, S110 # Don't fail the whole request if commits fail pass result = clamp_text(_format_repo_info(repo_info, commits), MAX_RESPONSE_CHARS) success = True except httpx.HTTPStatusError as exc: error_msg = f"HTTP {exc.response.status_code}" if exc.response.status_code == 404: # Try to provide helpful suggestions suggestions = [] if "/" in repo: parts = repo.replace("https://github.com/", "").split("/") if len(parts) >= 2: owner_guess = parts[0] suggestions.append( f"- Check if '{owner_guess}' is the correct organization/user" ) suggestions.append(f"- The repository may have been renamed or deleted") suggestions.append(f"- Try searching: https://github.com/search?q={parts[1]}") result = ( f"Repository '{repo}' not found (HTTP 404).\n\n" f"Possible reasons:\n" f"- The repository doesn't exist or was deleted\n" f"- The repository is private\n" f"- There's a typo in the owner or repository name\n" ) if suggestions: result += f"\nSuggestions:\n" + "\n".join(suggestions) elif exc.response.status_code == 403: result = ( f"Access denied to repository '{repo}' (HTTP 403).\n\n" f"Possible reasons:\n" f"- The repository is private\n" f"- GitHub API rate limit exceeded\n" f"- Set GITHUB_TOKEN environment variable for higher rate limits" ) elif exc.response.status_code == 301: # This shouldn't happen anymore with our redirect handling, but just in case result = ( f"Repository '{repo}' has moved (HTTP 301).\n\n" f"The repository may have been renamed or transferred.\n" f"Try searching for the new location on GitHub." ) else: result = f"Failed to fetch repository '{repo}': HTTP {exc.response.status_code}" except ValueError as exc: error_msg = str(exc) result = str(exc) # Invalid repo format - already has good error message except Exception as exc: # noqa: BLE001 error_msg = str(exc) result = f"Failed to fetch repository '{repo}': {exc}" finally: # Track usage response_time = (time.time() - start_time) * 1000 tracker.track_usage( tool_name="github_repo", reasoning=reasoning, parameters={"repo": repo, "include_commits": include_commits}, response_time_ms=response_time, success=success, error_message=error_msg, response_size=len(result.encode("utf-8")), ) return result
- src/searxng_mcp/github.py:88-128 (helper)Core helper method in GitHubClient that fetches comprehensive repository metadata from GitHub API, handles redirects, retrieves open PRs count, and constructs RepoInfo object.async def get_repo_info(self, owner: str, repo: str) -> RepoInfo: """Fetch repository information from GitHub API.""" # First resolve any redirects (renamed repos) resolved_owner, resolved_repo = await self._resolve_repo_redirect(owner, repo) url = f"https://api.github.com/repos/{resolved_owner}/{resolved_repo}" async with httpx.AsyncClient( timeout=self.timeout, headers=self._headers, follow_redirects=True, ) as client: response = await client.get(url) response.raise_for_status() data = response.json() # Get open PRs count (separate API call) open_prs = await self._get_open_prs_count(owner, repo) # Format last updated time updated_at = data.get("updated_at", "") last_updated = self._format_time_ago(updated_at) if updated_at else "unknown" return RepoInfo( name=data.get("name", ""), full_name=data.get("full_name", ""), description=data.get("description") or "No description available", stars=data.get("stargazers_count", 0), forks=data.get("forks_count", 0), watchers=data.get("watchers_count", 0), license=data.get("license", {}).get("name") if data.get("license") else None, language=data.get("language"), last_updated=last_updated, open_issues=data.get("open_issues_count", 0), open_prs=open_prs, homepage=data.get("homepage"), topics=data.get("topics", []), archived=data.get("archived", False), size_kb=data.get("size", 0), )
- src/searxng_mcp/github.py:217-309 (helper)Static method to parse flexible GitHub repo inputs (URLs or owner/repo strings) into owner/repo tuple, with extensive validation for common formats and errors.def parse_repo_url(repo_input: str) -> tuple[str, str]: """Parse various GitHub repo input formats to (owner, repo). Supported formats: - owner/repo - https://github.com/owner/repo - https://github.com/owner/repo.git - https://github.com/owner/repo/tree/main - https://github.com/owner/repo/blob/main/file.py Invalid inputs that will raise ValueError: - Non-GitHub URLs (e.g., https://example.com) - GitHub search URLs (e.g., https://github.com/search?q=...) - GitHub user/org pages without repo (e.g., https://github.com/microsoft) """ repo_input = repo_input.strip() # Handle full URLs if repo_input.startswith(("https://", "http://")): # Must be a github.com URL if "github.com" not in repo_input.lower(): raise ValueError( f"Not a GitHub URL: {repo_input}. " f"Please provide a GitHub repository URL or use 'owner/repo' format." ) # Reject GitHub search/explore/etc URLs invalid_patterns = [ r"github\.com/search", r"github\.com/explore", r"github\.com/topics", r"github\.com/trending", r"github\.com/settings", r"github\.com/notifications", r"github\.com/new", r"github\.com/organizations", r"github\.com/marketplace", ] for pattern in invalid_patterns: if re.search(pattern, repo_input, re.IGNORECASE): raise ValueError( f"Invalid GitHub URL: {repo_input}. " f"This appears to be a GitHub search/explore page, not a repository. " f"Please provide a repository URL like 'https://github.com/owner/repo'." ) # Parse repository URL - must have owner/repo match = re.match( r"https?://(?:www\.)?github\.com/([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+?)(?:\.git|/.*)?$", repo_input, ) if match: owner, repo = match.group(1), match.group(2) # Validate owner and repo names if owner and repo and len(owner) > 0 and len(repo) > 0: return owner, repo # Check if it's just a user/org page (no repo) user_match = re.match( r"https?://(?:www\.)?github\.com/([a-zA-Z0-9_.-]+)/?$", repo_input, ) if user_match: raise ValueError( f"Invalid GitHub URL: {repo_input}. " f"This appears to be a user/organization page, not a repository. " f"Please provide a repository URL like 'https://github.com/{user_match.group(1)}/repo-name'." ) raise ValueError( f"Could not parse GitHub URL: {repo_input}. " f"Please use format 'https://github.com/owner/repo'." ) # Handle owner/repo format if "/" in repo_input: parts = repo_input.split("/") if len(parts) >= 2: owner, repo = parts[0].strip(), parts[1].strip() # Validate: must be non-empty and contain only valid characters valid_pattern = r"^[a-zA-Z0-9_.-]+$" if ( owner and repo and re.match(valid_pattern, owner) and re.match(valid_pattern, repo) ): return owner, repo raise ValueError( f"Invalid repository format: {repo_input}. " f"Use 'owner/repo' format (e.g., 'microsoft/vscode') or a full GitHub URL." )
- src/searxng_mcp/server.py:624-665 (helper)Helper function to format the RepoInfo dataclass and optional commits into a user-friendly string with emojis, stats, and structured layout.def _format_repo_info(info: RepoInfo, commits=None) -> str: """Format RepoInfo into readable text response.""" lines = [ f"Repository: {info.full_name}", "ā" * 50, f"ā {info.stars:,} | š“ {info.forks:,} | šļø {info.watchers:,}", ] if info.language: lines.append(f"Language: {info.language}") if info.license: lines.append(f"License: {info.license}") lines.append(f"Last Updated: {info.last_updated}") # Issues and PRs issue_line = f"Open Issues: {info.open_issues}" if info.open_prs is not None: issue_line += f" | Open PRs: {info.open_prs}" lines.append(issue_line) if info.archived: lines.append("ā ļø This repository is archived (read-only)") if info.topics: lines.append(f"Topics: {', '.join(info.topics[:5])}") # Show first 5 topics lines.append("") # blank line if info.homepage: lines.append(f"Homepage: {info.homepage}") if commits: lines.append("Recent Commits:") for commit in commits[:3]: # Show top 3 lines.append(f"- [{commit.date}] {commit.message} ({commit.author})") lines.append(f"\nDescription: {info.description}") return "\n".join(lines)
- src/searxng_mcp/github.py:12-30 (schema)Dataclass defining the structured output schema for GitHub repository information used by the tool.@dataclass(slots=True) class RepoInfo: """GitHub repository metadata.""" name: str full_name: str description: str stars: int forks: int watchers: int license: str | None language: str | None last_updated: str open_issues: int open_prs: int | None homepage: str | None topics: list[str] archived: bool size_kb: int