Skip to main content
Glama
git_detector.py10.6 kB
import os import re from typing import Optional, Dict, Any, List from urllib.parse import urlparse import logging logger = logging.getLogger(__name__) class GitDetector: """Detect and parse GitLab project information from git repositories using file-based approach""" @staticmethod def is_git_repository(path: str = ".") -> bool: """Check if the given path is inside a git repository by looking for .git directory""" # Validate and sanitize path to prevent directory traversal try: path = os.path.abspath(os.path.expanduser(path)) if not os.path.exists(path): return False if not os.path.isdir(path): return False except (ValueError, OSError): return False # Check for .git directory git_dir = os.path.join(path, ".git") if os.path.isdir(git_dir): return True # Check if we're in a subdirectory of a git repository current = path while current != os.path.dirname(current): # While not at root git_dir = os.path.join(current, ".git") if os.path.isdir(git_dir): return True current = os.path.dirname(current) return False @staticmethod def find_git_directory(path: str = ".") -> Optional[str]: """Find the .git directory for the given path""" # Validate path try: path = os.path.abspath(os.path.expanduser(path)) if not os.path.exists(path) or not os.path.isdir(path): return None except (ValueError, OSError): return None # Check current directory first git_dir = os.path.join(path, ".git") if os.path.isdir(git_dir): return git_dir # Check parent directories current = path while current != os.path.dirname(current): git_dir = os.path.join(current, ".git") if os.path.isdir(git_dir): return git_dir current = os.path.dirname(current) return None @staticmethod def parse_git_config(config_content: str) -> Dict[str, Dict[str, str]]: """Parse git config file content into a dictionary""" config = {} current_section = None for line in config_content.split('\n'): line = line.strip() # Skip empty lines and comments if not line or line.startswith('#'): continue # Section header [section "name"] section_match = re.match(r'\[([^\s]+)(?:\s+"([^"]+)")?\]', line) if section_match: section_type = section_match.group(1) section_name = section_match.group(2) or "" current_section = f"{section_type}:{section_name}" if section_name else section_type config[current_section] = {} continue # Key-value pair if current_section and '=' in line: key, value = line.split('=', 1) config[current_section][key.strip()] = value.strip() return config @staticmethod def get_remote_urls(path: str = ".") -> Dict[str, str]: """Get all git remote URLs by reading .git/config file""" git_dir = GitDetector.find_git_directory(path) if not git_dir: return {} config_file = os.path.join(git_dir, "config") if not os.path.isfile(config_file): return {} try: with open(config_file, 'r', encoding='utf-8') as f: config_content = f.read() config = GitDetector.parse_git_config(config_content) remotes = {} # Find all remote sections for section_name, section_data in config.items(): if section_name.startswith("remote:"): remote_name = section_name.split(":", 1)[1] if "url" in section_data: remotes[remote_name] = section_data["url"] return remotes except (IOError, OSError) as e: logger.debug(f"Failed to read git config: {e}") return {} @staticmethod def get_current_branch(path: str = ".") -> Optional[str]: """Get the current git branch name by reading .git/HEAD""" git_dir = GitDetector.find_git_directory(path) if not git_dir: return None head_file = os.path.join(git_dir, "HEAD") if not os.path.isfile(head_file): return None try: with open(head_file, 'r', encoding='utf-8') as f: head_content = f.read().strip() # HEAD can be either a ref or a commit hash if head_content.startswith("ref: refs/heads/"): return head_content.replace("ref: refs/heads/", "") else: # Detached HEAD state - return None or the commit hash return None except (IOError, OSError) as e: logger.debug(f"Failed to read HEAD file: {e}") return None @staticmethod def parse_gitlab_url(url: str) -> Optional[Dict[str, Any]]: """Parse a GitLab URL and extract project information Supports formats: - https://gitlab.com/group/project.git - https://gitlab.com/group/subgroup/project - git@gitlab.com:group/project.git - ssh://git@gitlab.com/group/project.git """ try: # Handle SSH URLs if url.startswith("git@"): # Convert git@host:path to ssh://git@host/path parts = url.split(":", 1) if len(parts) == 2: host_part = parts[0] path_part = parts[1] url = f"ssh://{host_part}/{path_part}" parsed = urlparse(url) # Extract host host = parsed.hostname or parsed.netloc.split("@")[-1].split(":")[0] if not host: return None # Extract path and clean it path = parsed.path if not path or path == "/": # For SSH URLs like git@host:path if ":" in url and not url.startswith("ssh://"): path = url.split(":", 1)[1] else: return None # Remove leading slash and .git suffix path = path.strip("/") if path.endswith(".git"): path = path[:-4] # Split into namespace and project path_parts = path.split("/") if len(path_parts) < 2: return None project_name = path_parts[-1] namespace = "/".join(path_parts[:-1]) return { "host": host, "namespace": namespace, "project": project_name, "path": path, "url": url } except Exception as e: logger.debug(f"Failed to parse GitLab URL '{url}': {e}") return None @classmethod def detect_gitlab_project(cls, path: str = ".", preferred_remote: str = "origin") -> Optional[Dict[str, Any]]: """Detect GitLab project from git repository Returns: Dictionary with project information or None if not found { "host": "gitlab.com", "namespace": "group/subgroup", "project": "project-name", "path": "group/subgroup/project-name", "url": "original-remote-url", "branch": "current-branch" } """ if not cls.is_git_repository(path): return None remotes = cls.get_remote_urls(path) if not remotes: return None # Try preferred remote first if preferred_remote in remotes: parsed = cls.parse_gitlab_url(remotes[preferred_remote]) if parsed: parsed["branch"] = cls.get_current_branch(path) return parsed # Try other remotes for remote_name, remote_url in remotes.items(): parsed = cls.parse_gitlab_url(remote_url) if parsed: parsed["branch"] = cls.get_current_branch(path) return parsed return None @classmethod def is_gitlab_url(cls, url: str, gitlab_host: Optional[str] = None) -> bool: """Check if a URL is a GitLab URL. Args: url: The URL to check. gitlab_host: If provided, the URL must match this specific GitLab host. When not provided, the hostname must either be ``gitlab.com``, a subdomain of ``gitlab.com``, or include a distinct ``gitlab`` label (e.g., ``gitlab.example.com``). """ parsed = cls.parse_gitlab_url(url) if not parsed: return False if gitlab_host: # Normalize hosts for comparison url_host = parsed["host"].lower().replace("www.", "") check_host = ( gitlab_host.lower() .replace("www.", "") .replace("https://", "") .replace("http://", "") .split("/")[0] ) return url_host == check_host # Default behavior: ensure the URL points to a GitLab host using # common hostname patterns. We explicitly allow: # - gitlab.com and any subdomain of gitlab.com # - self-hosted instances whose hostname contains a distinct # ``gitlab`` label (e.g., ``gitlab.example.com`` or # ``sub.gitlab.example.org``) host = parsed["host"].lower() # Official GitLab SaaS domain if host == "gitlab.com" or host.endswith(".gitlab.com"): return True # Self-hosted GitLab instances: look for a '.gitlab.' label or a # hostname starting with 'gitlab.' if host.startswith("gitlab.") or ".gitlab." in host: return True return False

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Vijay-Duke/mcp-gitlab'

If you have feedback or need assistance with the MCP directory API, please join our Discord server