Skip to main content
Glama

Rootly MCP server

Official
server.py98.7 kB
""" Rootly MCP Server - A Model Context Protocol server for Rootly API integration. This module implements a server that dynamically generates MCP tools based on the Rootly API's OpenAPI (Swagger) specification using FastMCP's OpenAPI integration. """ import json import os import logging from copy import deepcopy from pathlib import Path import requests import httpx from typing import Any, Dict, List, Optional, Annotated from fastmcp import FastMCP from pydantic import Field from .utils import sanitize_parameters_in_spec from .smart_utils import TextSimilarityAnalyzer, SolutionExtractor # Set up logger logger = logging.getLogger(__name__) class MCPError: """Enhanced error handling for MCP protocol compliance.""" @staticmethod def protocol_error(code: int, message: str, data: Optional[Dict] = None): """Create a JSON-RPC protocol-level error response.""" error_response = { "jsonrpc": "2.0", "error": { "code": code, "message": message } } if data: error_response["error"]["data"] = data return error_response @staticmethod def tool_error(error_message: str, error_type: str = "execution_error", details: Optional[Dict] = None): """Create a tool-level error response (returned as successful tool result).""" error_response = { "error": True, "error_type": error_type, "message": error_message } if details: error_response["details"] = details return error_response @staticmethod def categorize_error(exception: Exception) -> tuple[str, str]: """Categorize an exception into error type and appropriate message.""" error_str = str(exception) exception_type = type(exception).__name__ # Authentication/Authorization errors if any(keyword in error_str.lower() for keyword in ["401", "unauthorized", "authentication", "token", "forbidden"]): return "authentication_error", f"Authentication failed: {error_str}" # Network/Connection errors if any(keyword in exception_type.lower() for keyword in ["connection", "timeout", "network"]): return "network_error", f"Network error: {error_str}" # HTTP errors if "40" in error_str[:10]: # 4xx client errors return "client_error", f"Client error: {error_str}" elif "50" in error_str[:10]: # 5xx server errors return "server_error", f"Server error: {error_str}" # Validation errors if any(keyword in exception_type.lower() for keyword in ["validation", "pydantic", "field"]): return "validation_error", f"Input validation error: {error_str}" # Generic execution errors return "execution_error", f"Tool execution error: {error_str}" # Default Swagger URL SWAGGER_URL = "https://rootly-heroku.s3.amazonaws.com/swagger/v1/swagger.json" # Default allowed API paths def _generate_recommendation(solution_data: dict) -> str: """Generate a high-level recommendation based on solution analysis.""" solutions = solution_data.get("solutions", []) avg_time = solution_data.get("average_resolution_time") if not solutions: return "No similar incidents found. This may be a novel issue requiring escalation." recommendation_parts = [] # Time expectation if avg_time: if avg_time < 1: recommendation_parts.append("Similar incidents typically resolve quickly (< 1 hour).") elif avg_time > 4: recommendation_parts.append("Similar incidents typically require more time (> 4 hours).") # Top solution if solutions: top_solution = solutions[0] if top_solution.get("suggested_actions"): actions = top_solution["suggested_actions"][:2] # Top 2 actions recommendation_parts.append(f"Consider trying: {', '.join(actions)}") # Pattern insights patterns = solution_data.get("common_patterns", []) if patterns: recommendation_parts.append(f"Common patterns: {patterns[0]}") return " ".join(recommendation_parts) if recommendation_parts else "Review similar incidents above for resolution guidance." # Default allowed API paths DEFAULT_ALLOWED_PATHS = [ "/incidents/{incident_id}/alerts", "/alerts", "/alerts/{alert_id}", "/severities", "/severities/{severity_id}", "/teams", "/teams/{team_id}", "/services", "/services/{service_id}", "/functionalities", "/functionalities/{functionality_id}", # Incident types "/incident_types", "/incident_types/{incident_type_id}", # Action items (all, by id, by incident) "/incident_action_items", "/incident_action_items/{incident_action_item_id}", "/incidents/{incident_id}/action_items", # Workflows "/workflows", "/workflows/{workflow_id}", # Workflow runs "/workflow_runs", "/workflow_runs/{workflow_run_id}", # Environments "/environments", "/environments/{environment_id}", # Users "/users", "/users/{user_id}", "/users/me", # Status pages "/status_pages", "/status_pages/{status_page_id}", # On-call schedules and shifts "/schedules", "/schedules/{schedule_id}", "/schedules/{schedule_id}/shifts", "/shifts", "/schedule_rotations/{schedule_rotation_id}", "/schedule_rotations/{schedule_rotation_id}/schedule_rotation_users", "/schedule_rotations/{schedule_rotation_id}/schedule_rotation_active_days", # On-call overrides "/schedules/{schedule_id}/override_shifts", "/override_shifts/{override_shift_id}", # On-call shadows and roles "/schedules/{schedule_id}/on_call_shadows", "/on_call_shadows/{on_call_shadow_id}", "/on_call_roles", "/on_call_roles/{on_call_role_id}", ] class AuthenticatedHTTPXClient: """An HTTPX client wrapper that handles Rootly API authentication and parameter transformation.""" def __init__(self, base_url: str = "https://api.rootly.com", hosted: bool = False, parameter_mapping: Optional[Dict[str, str]] = None): self._base_url = base_url self.hosted = hosted self._api_token = None self.parameter_mapping = parameter_mapping or {} if not self.hosted: self._api_token = self._get_api_token() # Create the HTTPX client headers = { "Content-Type": "application/vnd.api+json", "Accept": "application/vnd.api+json" # Let httpx handle Accept-Encoding automatically with all supported formats } if self._api_token: headers["Authorization"] = f"Bearer {self._api_token}" self.client = httpx.AsyncClient( base_url=base_url, headers=headers, timeout=30.0, follow_redirects=True, # Ensure proper handling of compressed responses limits=httpx.Limits(max_keepalive_connections=5, max_connections=10) ) def _get_api_token(self) -> Optional[str]: """Get the API token from environment variables.""" api_token = os.getenv("ROOTLY_API_TOKEN") if not api_token: logger.warning("ROOTLY_API_TOKEN environment variable is not set") return None return api_token def _transform_params(self, params: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: """Transform sanitized parameter names back to original names.""" if not params or not self.parameter_mapping: return params transformed = {} for key, value in params.items(): # Use the original name if we have a mapping, otherwise keep the sanitized name original_key = self.parameter_mapping.get(key, key) transformed[original_key] = value if original_key != key: logger.debug(f"Transformed parameter: '{key}' -> '{original_key}'") return transformed async def request(self, method: str, url: str, **kwargs): """Override request to transform parameters.""" # Transform query parameters if 'params' in kwargs: kwargs['params'] = self._transform_params(kwargs['params']) # Call the underlying client's request method and let it handle everything return await self.client.request(method, url, **kwargs) async def get(self, url: str, **kwargs): """Proxy to request with GET method.""" return await self.request('GET', url, **kwargs) async def post(self, url: str, **kwargs): """Proxy to request with POST method.""" return await self.request('POST', url, **kwargs) async def put(self, url: str, **kwargs): """Proxy to request with PUT method.""" return await self.request('PUT', url, **kwargs) async def patch(self, url: str, **kwargs): """Proxy to request with PATCH method.""" return await self.request('PATCH', url, **kwargs) async def delete(self, url: str, **kwargs): """Proxy to request with DELETE method.""" return await self.request('DELETE', url, **kwargs) async def __aenter__(self): return self async def __aexit__(self, exc_type, exc_val, exc_tb): pass def __getattr__(self, name): # Delegate all other attributes to the underlying client, except for request methods if name in ['request', 'get', 'post', 'put', 'patch', 'delete']: # Use our overridden methods instead return getattr(self, name) return getattr(self.client, name) @property def base_url(self): return self._base_url @property def headers(self): return self.client.headers def create_rootly_mcp_server( swagger_path: Optional[str] = None, name: str = "Rootly", allowed_paths: Optional[List[str]] = None, hosted: bool = False, base_url: Optional[str] = None, ) -> FastMCP: """ Create a Rootly MCP Server using FastMCP's OpenAPI integration. Args: swagger_path: Path to the Swagger JSON file. If None, will fetch from URL. name: Name of the MCP server. allowed_paths: List of API paths to include. If None, includes default paths. hosted: Whether the server is hosted (affects authentication). base_url: Base URL for Rootly API. If None, uses ROOTLY_BASE_URL env var or default. Returns: A FastMCP server instance. """ # Set default allowed paths if none provided if allowed_paths is None: allowed_paths = DEFAULT_ALLOWED_PATHS # Add /v1 prefix to paths if not present allowed_paths_v1 = [ f"/v1{path}" if not path.startswith("/v1") else path for path in allowed_paths ] logger.info(f"Creating Rootly MCP Server with allowed paths: {allowed_paths_v1}") # Load the Swagger specification swagger_spec = _load_swagger_spec(swagger_path) logger.info(f"Loaded Swagger spec with {len(swagger_spec.get('paths', {}))} total paths") # Filter the OpenAPI spec to only include allowed paths filtered_spec = _filter_openapi_spec(swagger_spec, allowed_paths_v1) logger.info(f"Filtered spec to {len(filtered_spec.get('paths', {}))} allowed paths") # Sanitize all parameter names in the filtered spec to be MCP-compliant parameter_mapping = sanitize_parameters_in_spec(filtered_spec) logger.info(f"Sanitized parameter names for MCP compatibility (mapped {len(parameter_mapping)} parameters)") # Determine the base URL if base_url is None: base_url = os.getenv("ROOTLY_BASE_URL", "https://api.rootly.com") logger.info(f"Using Rootly API base URL: {base_url}") # Create the authenticated HTTP client with parameter mapping http_client = AuthenticatedHTTPXClient( base_url=base_url, hosted=hosted, parameter_mapping=parameter_mapping ) # Create the MCP server using OpenAPI integration # By default, all routes become tools which is what we want mcp = FastMCP.from_openapi( openapi_spec=filtered_spec, client=http_client.client, name=name, timeout=30.0, tags={"rootly", "incident-management"}, ) @mcp.custom_route("/healthz", methods=["GET"]) @mcp.custom_route("/health", methods=["GET"]) async def health_check(request): from starlette.responses import PlainTextResponse return PlainTextResponse("OK") # Add some custom tools for enhanced functionality @mcp.tool() def list_endpoints() -> list: """List all available Rootly API endpoints with their descriptions.""" endpoints = [] for path, path_item in filtered_spec.get("paths", {}).items(): for method, operation in path_item.items(): if method.lower() not in ["get", "post", "put", "delete", "patch"]: continue summary = operation.get("summary", "") description = operation.get("description", "") endpoints.append({ "path": path, "method": method.upper(), "summary": summary, "description": description, }) return endpoints async def make_authenticated_request(method: str, url: str, **kwargs): """Make an authenticated request, extracting token from MCP headers in hosted mode.""" # In hosted mode, get token from MCP request headers if hosted: try: from fastmcp.server.dependencies import get_http_headers request_headers = get_http_headers() auth_header = request_headers.get("authorization", "") if auth_header: # Add authorization header to the request if "headers" not in kwargs: kwargs["headers"] = {} kwargs["headers"]["Authorization"] = auth_header except Exception: pass # Fallback to default client behavior # Use our custom client with proper error handling instead of bypassing it return await http_client.request(method, url, **kwargs) @mcp.tool() async def search_incidents( query: Annotated[str, Field(description="Search query to filter incidents by title/summary")] = "", page_size: Annotated[int, Field(description="Number of results per page (max: 20)", ge=1, le=20)] = 10, page_number: Annotated[int, Field(description="Page number to retrieve (use 0 for all pages)", ge=0)] = 1, max_results: Annotated[int, Field(description="Maximum total results when fetching all pages (ignored if page_number > 0)", ge=1, le=10)] = 5, ) -> dict: """ Search incidents with flexible pagination control. Use page_number=0 to fetch all matching results across multiple pages up to max_results. Use page_number>0 to fetch a specific page. """ # Single page mode if page_number > 0: params = { "page[size]": page_size, # Use requested page size (already limited to max 20) "page[number]": page_number, "include": "", "fields[incidents]": "id,title,summary,status,severity,created_at,updated_at,url,started_at", } if query: params["filter[search]"] = query try: response = await make_authenticated_request("GET", "/v1/incidents", params=params) response.raise_for_status() return response.json() except Exception as e: error_type, error_message = MCPError.categorize_error(e) return MCPError.tool_error(error_message, error_type) # Multi-page mode (page_number = 0) all_incidents = [] current_page = 1 effective_page_size = page_size # Use requested page size (already limited to max 20) max_pages = 10 # Safety limit to prevent infinite loops try: while len(all_incidents) < max_results and current_page <= max_pages: params = { "page[size]": effective_page_size, "page[number]": current_page, "include": "", "fields[incidents]": "id,title,summary,status,severity,created_at,updated_at,url,started_at", } if query: params["filter[search]"] = query try: response = await make_authenticated_request("GET", "/v1/incidents", params=params) response.raise_for_status() response_data = response.json() if "data" in response_data: incidents = response_data["data"] if not incidents: # No more incidents available break # Check if we got fewer incidents than requested (last page) if len(incidents) < effective_page_size: all_incidents.extend(incidents) break all_incidents.extend(incidents) # Check metadata if available meta = response_data.get("meta", {}) current_page_meta = meta.get("current_page", current_page) total_pages = meta.get("total_pages") # If we have reliable metadata, use it if total_pages and current_page_meta >= total_pages: break current_page += 1 else: break except Exception as e: # Re-raise authentication or critical errors for immediate handling if "401" in str(e) or "Unauthorized" in str(e) or "authentication" in str(e).lower(): error_type, error_message = MCPError.categorize_error(e) return MCPError.tool_error(error_message, error_type) # For other errors, break loop and return partial results break # Limit to max_results if len(all_incidents) > max_results: all_incidents = all_incidents[:max_results] return { "data": all_incidents, "meta": { "total_fetched": len(all_incidents), "max_results": max_results, "query": query, "pages_fetched": current_page - 1, "page_size": effective_page_size } } except Exception as e: error_type, error_message = MCPError.categorize_error(e) return MCPError.tool_error(error_message, error_type) # Initialize smart analysis tools similarity_analyzer = TextSimilarityAnalyzer() solution_extractor = SolutionExtractor() @mcp.tool() async def find_related_incidents( incident_id: str = "", incident_description: str = "", similarity_threshold: Annotated[float, Field(description="Minimum similarity score (0.0-1.0)", ge=0.0, le=1.0)] = 0.15, max_results: Annotated[int, Field(description="Maximum number of related incidents to return", ge=1, le=20)] = 5, status_filter: Annotated[str, Field(description="Filter incidents by status (empty for all, 'resolved', 'investigating', etc.)")] = "" ) -> dict: """Find similar incidents to help with context and resolution strategies. Provide either incident_id OR incident_description (e.g., 'website is down', 'database timeout errors'). Use status_filter to limit to specific incident statuses or leave empty for all incidents.""" try: target_incident = {} if incident_id: # Get the target incident details by ID target_response = await make_authenticated_request("GET", f"/v1/incidents/{incident_id}") target_response.raise_for_status() target_incident_data = target_response.json() target_incident = target_incident_data.get("data", {}) if not target_incident: return MCPError.tool_error("Incident not found", "not_found") elif incident_description: # Create synthetic incident for analysis from descriptive text target_incident = { "id": "synthetic", "attributes": { "title": incident_description, "summary": incident_description, "description": incident_description } } else: return MCPError.tool_error("Must provide either incident_id or incident_description", "validation_error") # Get historical incidents for comparison params = { "page[size]": 100, # Get more incidents for better matching "page[number]": 1, "include": "" } # Only add status filter if specified if status_filter: params["filter[status]"] = status_filter historical_response = await make_authenticated_request("GET", "/v1/incidents", params=params) historical_response.raise_for_status() historical_data = historical_response.json() historical_incidents = historical_data.get("data", []) # Filter out the target incident itself if it exists if incident_id: historical_incidents = [inc for inc in historical_incidents if str(inc.get('id')) != str(incident_id)] if not historical_incidents: return { "related_incidents": [], "message": "No historical incidents found for comparison", "target_incident": { "id": incident_id or "synthetic", "title": target_incident.get("attributes", {}).get("title", incident_description) } } # Calculate similarities similar_incidents = similarity_analyzer.calculate_similarity(historical_incidents, target_incident) # Filter by threshold and limit results filtered_incidents = [ inc for inc in similar_incidents if inc.similarity_score >= similarity_threshold ][:max_results] # Format response related_incidents = [] for incident in filtered_incidents: related_incidents.append({ "incident_id": incident.incident_id, "title": incident.title, "similarity_score": round(incident.similarity_score, 3), "matched_services": incident.matched_services, "matched_keywords": incident.matched_keywords, "resolution_summary": incident.resolution_summary, "resolution_time_hours": incident.resolution_time_hours }) return { "target_incident": { "id": incident_id or "synthetic", "title": target_incident.get("attributes", {}).get("title", incident_description) }, "related_incidents": related_incidents, "total_found": len(filtered_incidents), "similarity_threshold": similarity_threshold, "analysis_summary": f"Found {len(filtered_incidents)} similar incidents out of {len(historical_incidents)} historical incidents" } except Exception as e: error_type, error_message = MCPError.categorize_error(e) return MCPError.tool_error(f"Failed to find related incidents: {error_message}", error_type) @mcp.tool() async def suggest_solutions( incident_id: str = "", incident_title: str = "", incident_description: str = "", max_solutions: Annotated[int, Field(description="Maximum number of solution suggestions", ge=1, le=10)] = 3, status_filter: Annotated[str, Field(description="Filter incidents by status (default 'resolved', empty for all, 'investigating', etc.)")] = "resolved" ) -> dict: """Suggest solutions based on similar incidents. Provide either incident_id OR title/description. Defaults to resolved incidents for solution mining, but can search all statuses.""" try: target_incident = {} if incident_id: # Get incident details by ID response = await make_authenticated_request("GET", f"/v1/incidents/{incident_id}") response.raise_for_status() incident_data = response.json() target_incident = incident_data.get("data", {}) if not target_incident: return MCPError.tool_error("Incident not found", "not_found") elif incident_title or incident_description: # Create synthetic incident for analysis target_incident = { "id": "synthetic", "attributes": { "title": incident_title, "summary": incident_description, "description": incident_description } } else: return MCPError.tool_error("Must provide either incident_id or incident_title/description", "validation_error") # Get incidents for solution mining params = { "page[size]": 150, # Get more incidents for better solution matching "page[number]": 1, "include": "" } # Only add status filter if specified if status_filter: params["filter[status]"] = status_filter historical_response = await make_authenticated_request("GET", "/v1/incidents", params=params) historical_response.raise_for_status() historical_data = historical_response.json() historical_incidents = historical_data.get("data", []) # Filter out target incident if it exists if incident_id: historical_incidents = [inc for inc in historical_incidents if str(inc.get('id')) != str(incident_id)] if not historical_incidents: status_msg = f" with status '{status_filter}'" if status_filter else "" return { "solutions": [], "message": f"No historical incidents found{status_msg} for solution mining" } # Find similar incidents similar_incidents = similarity_analyzer.calculate_similarity(historical_incidents, target_incident) # Filter to reasonably similar incidents (lower threshold for solution suggestions) relevant_incidents = [inc for inc in similar_incidents if inc.similarity_score >= 0.2][:max_solutions * 2] if not relevant_incidents: return { "solutions": [], "message": "No sufficiently similar incidents found for solution suggestions", "suggestion": "This appears to be a unique incident. Consider escalating or consulting documentation." } # Extract solutions solution_data = solution_extractor.extract_solutions(relevant_incidents) # Format response return { "target_incident": { "id": incident_id or "synthetic", "title": target_incident.get("attributes", {}).get("title", incident_title), "description": target_incident.get("attributes", {}).get("summary", incident_description) }, "solutions": solution_data["solutions"][:max_solutions], "insights": { "common_patterns": solution_data["common_patterns"], "average_resolution_time_hours": solution_data["average_resolution_time"], "total_similar_incidents": solution_data["total_similar_incidents"] }, "recommendation": _generate_recommendation(solution_data) } except Exception as e: error_type, error_message = MCPError.categorize_error(e) return MCPError.tool_error(f"Failed to suggest solutions: {error_message}", error_type) @mcp.tool() async def get_oncall_shift_metrics( start_date: Annotated[str, Field(description="Start date for metrics (ISO 8601 format, e.g., '2025-10-01' or '2025-10-01T00:00:00Z')")], end_date: Annotated[str, Field(description="End date for metrics (ISO 8601 format, e.g., '2025-10-31' or '2025-10-31T23:59:59Z')")], user_ids: Annotated[str, Field(description="Comma-separated list of user IDs to filter by (optional)")] = "", schedule_ids: Annotated[str, Field(description="Comma-separated list of schedule IDs to filter by (optional)")] = "", team_ids: Annotated[str, Field(description="Comma-separated list of team IDs to filter by (requires querying schedules first)")] = "", group_by: Annotated[str, Field(description="Group results by: 'user', 'schedule', 'team', or 'none'")] = "user" ) -> dict: """ Get on-call shift metrics for a specified time period. Returns shift counts, total hours, and other statistics grouped by user, schedule, or team. Examples: - Monthly report: start_date='2025-10-01', end_date='2025-10-31' - Specific user: start_date='2025-10-01', end_date='2025-10-31', user_ids='123,456' - Specific team: team_ids='team-1' (will query schedules for that team first) """ try: from datetime import datetime, timedelta from collections import defaultdict from typing import Any, Dict # Build query parameters params: Dict[str, Any] = { "from": start_date, "to": end_date, } # Fetch schedules (schedules don't have team relationship, they have owner_group_ids) schedules_response = await make_authenticated_request("GET", "/v1/schedules", params={"page[size]": 100}) if schedules_response is None: return MCPError.tool_error("Failed to get schedules: API request returned None", "execution_error") schedules_response.raise_for_status() schedules_data = schedules_response.json() all_schedules = schedules_data.get("data", []) # Collect all unique team IDs from schedules' owner_group_ids team_ids_set = set() for schedule in all_schedules: owner_group_ids = schedule.get("attributes", {}).get("owner_group_ids", []) team_ids_set.update(owner_group_ids) # Fetch all teams teams_map = {} if team_ids_set: teams_response = await make_authenticated_request("GET", "/v1/teams", params={"page[size]": 100}) if teams_response and teams_response.status_code == 200: teams_data = teams_response.json() for team in teams_data.get("data", []): teams_map[team.get("id")] = team # Build schedule -> team mapping schedule_to_team_map = {} for schedule in all_schedules: schedule_id = schedule.get("id") schedule_name = schedule.get("attributes", {}).get("name", "Unknown") owner_group_ids = schedule.get("attributes", {}).get("owner_group_ids", []) # Use the first owner group as the primary team if owner_group_ids: team_id = owner_group_ids[0] team_attrs = teams_map.get(team_id, {}).get("attributes", {}) team_name = team_attrs.get("name", "Unknown Team") schedule_to_team_map[schedule_id] = { "team_id": team_id, "team_name": team_name, "schedule_name": schedule_name } # Handle team filtering (requires multi-step query) target_schedule_ids = [] if team_ids: team_id_list = [tid.strip() for tid in team_ids.split(",") if tid.strip()] # Filter schedules by team for schedule_id, team_info in schedule_to_team_map.items(): if str(team_info["team_id"]) in team_id_list: target_schedule_ids.append(schedule_id) # Apply schedule filtering if schedule_ids: schedule_id_list = [sid.strip() for sid in schedule_ids.split(",") if sid.strip()] target_schedule_ids.extend(schedule_id_list) if target_schedule_ids: params["schedule_ids[]"] = target_schedule_ids # Apply user filtering if user_ids: user_id_list = [uid.strip() for uid in user_ids.split(",") if uid.strip()] params["user_ids[]"] = user_id_list # Include relationships for richer data params["include"] = "user,shift_override,on_call_role,schedule_rotation" # Query shifts try: shifts_response = await make_authenticated_request("GET", "/v1/shifts", params=params) if shifts_response is None: return MCPError.tool_error("Failed to get shifts: API request returned None", "execution_error") shifts_response.raise_for_status() shifts_data = shifts_response.json() if shifts_data is None: return MCPError.tool_error("Failed to get shifts: API returned null/empty response", "execution_error", details={"status": shifts_response.status_code}) shifts = shifts_data.get("data", []) included = shifts_data.get("included", []) except AttributeError as e: return MCPError.tool_error(f"Failed to get shifts: Response object error - {str(e)}", "execution_error", details={"params": params}) except Exception as e: return MCPError.tool_error(f"Failed to get shifts: {str(e)}", "execution_error", details={"params": params, "error_type": type(e).__name__}) # Build lookup maps for included resources users_map = {} on_call_roles_map = {} for resource in included: if resource.get("type") == "users": users_map[resource.get("id")] = resource elif resource.get("type") == "on_call_roles": on_call_roles_map[resource.get("id")] = resource # Calculate metrics metrics: Dict[str, Dict[str, Any]] = defaultdict(lambda: { "shift_count": 0, "total_hours": 0.0, "override_count": 0, "regular_count": 0, "primary_count": 0, "secondary_count": 0, "primary_hours": 0.0, "secondary_hours": 0.0, "unknown_role_count": 0, "unique_days": set(), "shifts": [] }) for shift in shifts: attrs = shift.get("attributes", {}) relationships = shift.get("relationships", {}) # Parse timestamps starts_at = attrs.get("starts_at") ends_at = attrs.get("ends_at") is_override = attrs.get("is_override", False) schedule_id = attrs.get("schedule_id") # Calculate shift duration in hours and track unique days duration_hours = 0.0 shift_days = set() if starts_at and ends_at: try: start_dt = datetime.fromisoformat(starts_at.replace("Z", "+00:00")) end_dt = datetime.fromisoformat(ends_at.replace("Z", "+00:00")) duration_hours = (end_dt - start_dt).total_seconds() / 3600 # Track all unique calendar days this shift spans shift_start_date = start_dt.date() shift_end_date = end_dt.date() while shift_start_date <= shift_end_date: shift_days.add(shift_start_date) shift_start_date += timedelta(days=1) except (ValueError, AttributeError): pass # Get user info user_rel = relationships.get("user", {}).get("data") or {} user_id = user_rel.get("id") user_name = "Unknown" user_email = "" if user_id and user_id in users_map: user_attrs = users_map[user_id].get("attributes", {}) user_name = user_attrs.get("full_name") or user_attrs.get("email", "Unknown") user_email = user_attrs.get("email", "") # Get on-call role info (primary vs secondary) role_rel = relationships.get("on_call_role", {}).get("data") or {} role_id = role_rel.get("id") role_name = "unknown" is_primary = False if role_id and role_id in on_call_roles_map: role_attrs = on_call_roles_map[role_id].get("attributes", {}) role_name = role_attrs.get("name", "").lower() # Typically primary roles contain "primary" and secondary contain "secondary" # Common patterns: "Primary", "Secondary", "L1", "L2", etc. is_primary = "primary" in role_name or role_name == "l1" or role_name == "p1" # Determine grouping key if group_by == "user": key = f"{user_id}|{user_name}" elif group_by == "schedule": schedule_info = schedule_to_team_map.get(schedule_id, {}) schedule_name = schedule_info.get("schedule_name", f"schedule_{schedule_id}") key = f"{schedule_id}|{schedule_name}" elif group_by == "team": team_info = schedule_to_team_map.get(schedule_id, {}) if team_info: team_id = team_info["team_id"] team_name = team_info["team_name"] key = f"{team_id}|{team_name}" else: key = "unknown_team|Unknown Team" else: key = "all" # Update metrics metrics[key]["shift_count"] += 1 metrics[key]["total_hours"] += duration_hours if is_override: metrics[key]["override_count"] += 1 else: metrics[key]["regular_count"] += 1 # Track primary vs secondary if role_id: if is_primary: metrics[key]["primary_count"] += 1 metrics[key]["primary_hours"] += duration_hours else: metrics[key]["secondary_count"] += 1 metrics[key]["secondary_hours"] += duration_hours else: metrics[key]["unknown_role_count"] += 1 # Track unique days metrics[key]["unique_days"].update(shift_days) metrics[key]["shifts"].append({ "shift_id": shift.get("id"), "starts_at": starts_at, "ends_at": ends_at, "duration_hours": round(duration_hours, 2), "is_override": is_override, "schedule_id": schedule_id, "user_id": user_id, "user_name": user_name, "user_email": user_email, "role_name": role_name, "is_primary": is_primary }) # Format results results = [] for key, data in metrics.items(): if group_by == "user": user_id, user_name = key.split("|", 1) result = { "user_id": user_id, "user_name": user_name, "shift_count": data["shift_count"], "days_on_call": len(data["unique_days"]), "total_hours": round(data["total_hours"], 2), "regular_shifts": data["regular_count"], "override_shifts": data["override_count"], "primary_shifts": data["primary_count"], "secondary_shifts": data["secondary_count"], "primary_hours": round(data["primary_hours"], 2), "secondary_hours": round(data["secondary_hours"], 2), "unknown_role_shifts": data["unknown_role_count"], } elif group_by == "schedule": schedule_id, schedule_name = key.split("|", 1) result = { "schedule_id": schedule_id, "schedule_name": schedule_name, "shift_count": data["shift_count"], "days_on_call": len(data["unique_days"]), "total_hours": round(data["total_hours"], 2), "regular_shifts": data["regular_count"], "override_shifts": data["override_count"], "primary_shifts": data["primary_count"], "secondary_shifts": data["secondary_count"], "primary_hours": round(data["primary_hours"], 2), "secondary_hours": round(data["secondary_hours"], 2), "unknown_role_shifts": data["unknown_role_count"], } elif group_by == "team": team_id, team_name = key.split("|", 1) result = { "team_id": team_id, "team_name": team_name, "shift_count": data["shift_count"], "days_on_call": len(data["unique_days"]), "total_hours": round(data["total_hours"], 2), "regular_shifts": data["regular_count"], "override_shifts": data["override_count"], "primary_shifts": data["primary_count"], "secondary_shifts": data["secondary_count"], "primary_hours": round(data["primary_hours"], 2), "secondary_hours": round(data["secondary_hours"], 2), "unknown_role_shifts": data["unknown_role_count"], } else: result = { "group_key": key, "shift_count": data["shift_count"], "days_on_call": len(data["unique_days"]), "total_hours": round(data["total_hours"], 2), "regular_shifts": data["regular_count"], "override_shifts": data["override_count"], "primary_shifts": data["primary_count"], "secondary_shifts": data["secondary_count"], "primary_hours": round(data["primary_hours"], 2), "secondary_hours": round(data["secondary_hours"], 2), "unknown_role_shifts": data["unknown_role_count"], } results.append(result) # Sort by shift count descending results.sort(key=lambda x: x["shift_count"], reverse=True) return { "period": { "start_date": start_date, "end_date": end_date }, "total_shifts": len(shifts), "grouped_by": group_by, "metrics": results, "summary": { "total_hours": round(sum(m["total_hours"] for m in results), 2), "total_regular_shifts": sum(m["regular_shifts"] for m in results), "total_override_shifts": sum(m["override_shifts"] for m in results), "unique_people": len(results) if group_by == "user" else None } } except Exception as e: import traceback error_type, error_message = MCPError.categorize_error(e) return MCPError.tool_error( f"Failed to get on-call shift metrics: {error_message}", error_type, details={ "params": {"start_date": start_date, "end_date": end_date}, "exception_type": type(e).__name__, "exception_str": str(e), "traceback": traceback.format_exc() } ) @mcp.tool() async def get_oncall_handoff_summary( team_ids: Annotated[str, Field(description="Comma-separated list of team IDs to filter schedules (optional)")] = "", schedule_ids: Annotated[str, Field(description="Comma-separated list of schedule IDs (optional)")] = "", timezone: Annotated[str, Field(description="Timezone to use for display and filtering (e.g., 'America/Los_Angeles', 'Europe/London', 'Asia/Tokyo'). IMPORTANT: If user mentions a city, location, or region (e.g., 'Toronto', 'APAC', 'my time'), infer the appropriate IANA timezone. Defaults to UTC if not specified.")] = "UTC", filter_by_region: Annotated[bool, Field(description="If True, only show on-call for people whose shifts are during business hours (9am-5pm) in the specified timezone. Defaults to False.")] = False, include_incidents: Annotated[bool, Field(description="If True, fetch incidents for each shift (slower). If False, only show on-call info (faster). Defaults to False for better performance.")] = False ) -> dict: """ Get current on-call handoff summary. Shows who's currently on-call and who's next. Optionally fetch incidents (set include_incidents=True, but slower). Timezone handling: If user mentions their location/timezone, infer it (e.g., "Toronto" → "America/Toronto", "my time" → ask clarifying question or use a common timezone). Regional filtering: Use timezone + filter_by_region=True to see only people on-call during business hours in that region (e.g., timezone='Asia/Tokyo', filter_by_region=True shows only APAC on-call during APAC business hours). Performance: By default, incidents are NOT fetched for faster response. Set include_incidents=True to fetch incidents for each shift (slower, may timeout with many schedules). Useful for: - Quick on-call status checks - Daily handoff meetings - Regional on-call status (APAC, EU, Americas) - Team coordination across timezones """ try: from datetime import datetime, timedelta from zoneinfo import ZoneInfo # Validate and set timezone try: tz = ZoneInfo(timezone) except Exception: tz = ZoneInfo("UTC") # Fallback to UTC if invalid timezone now = datetime.now(tz) def convert_to_timezone(iso_string: str) -> str: """Convert ISO timestamp to target timezone.""" if not iso_string: return iso_string try: dt = datetime.fromisoformat(iso_string.replace("Z", "+00:00")) dt_converted = dt.astimezone(tz) return dt_converted.isoformat() except (ValueError, AttributeError): return iso_string # Return original if conversion fails # Fetch schedules with team info (with pagination) all_schedules = [] page = 1 max_pages = 5 # Schedules shouldn't have many pages while page <= max_pages: schedules_response = await make_authenticated_request("GET", "/v1/schedules", params={"page[size]": 100, "page[number]": page}) if not schedules_response: return MCPError.tool_error("Failed to fetch schedules - no response from API", "execution_error") if schedules_response.status_code != 200: return MCPError.tool_error( f"Failed to fetch schedules - API returned status {schedules_response.status_code}", "execution_error", details={"status_code": schedules_response.status_code} ) schedules_data = schedules_response.json() page_schedules = schedules_data.get("data", []) if not page_schedules: break all_schedules.extend(page_schedules) # Check if there are more pages meta = schedules_data.get("meta", {}) total_pages = meta.get("total_pages", 1) if page >= total_pages: break page += 1 # Build team mapping team_ids_set = set() for schedule in all_schedules: owner_group_ids = schedule.get("attributes", {}).get("owner_group_ids", []) team_ids_set.update(owner_group_ids) teams_map = {} if team_ids_set: teams_response = await make_authenticated_request("GET", "/v1/teams", params={"page[size]": 100}) if teams_response and teams_response.status_code == 200: teams_data = teams_response.json() for team in teams_data.get("data", []): teams_map[team.get("id")] = team # Filter schedules target_schedules = [] team_filter = [tid.strip() for tid in team_ids.split(",") if tid.strip()] if team_ids else [] schedule_filter = [sid.strip() for sid in schedule_ids.split(",") if sid.strip()] if schedule_ids else [] for schedule in all_schedules: schedule_id = schedule.get("id") owner_group_ids = schedule.get("attributes", {}).get("owner_group_ids", []) # Apply filters if schedule_filter and schedule_id not in schedule_filter: continue if team_filter and not any(str(tgid) in team_filter for tgid in owner_group_ids): continue target_schedules.append(schedule) # Get current and upcoming shifts for each schedule handoff_data = [] for schedule in target_schedules: schedule_id = schedule.get("id") schedule_attrs = schedule.get("attributes", {}) schedule_name = schedule_attrs.get("name", "Unknown Schedule") owner_group_ids = schedule_attrs.get("owner_group_ids", []) # Get team info team_name = "No Team" if owner_group_ids: team_id = owner_group_ids[0] team_attrs = teams_map.get(team_id, {}).get("attributes", {}) team_name = team_attrs.get("name", "Unknown Team") # Query shifts for this schedule shifts_response = await make_authenticated_request( "GET", "/v1/shifts", params={ "schedule_ids[]": [schedule_id], "filter[starts_at][gte]": (now - timedelta(days=1)).isoformat(), "filter[starts_at][lte]": (now + timedelta(days=7)).isoformat(), "include": "user,on_call_role", "page[size]": 50 } ) if not shifts_response: continue shifts_data = shifts_response.json() shifts = shifts_data.get("data", []) included = shifts_data.get("included", []) # Build user and role maps users_map = {} roles_map = {} for resource in included: if resource.get("type") == "users": users_map[resource.get("id")] = resource elif resource.get("type") == "on_call_roles": roles_map[resource.get("id")] = resource # Find current and next shifts current_shift = None next_shift = None for shift in sorted(shifts, key=lambda s: s.get("attributes", {}).get("starts_at", "")): attrs = shift.get("attributes", {}) starts_at_str = attrs.get("starts_at") ends_at_str = attrs.get("ends_at") if not starts_at_str or not ends_at_str: continue try: starts_at = datetime.fromisoformat(starts_at_str.replace("Z", "+00:00")) ends_at = datetime.fromisoformat(ends_at_str.replace("Z", "+00:00")) # Current shift: ongoing now if starts_at <= now <= ends_at: current_shift = shift # Next shift: starts after now and no current shift found yet elif starts_at > now and not next_shift: next_shift = shift except (ValueError, AttributeError): continue # Build response for this schedule schedule_info = { "schedule_id": schedule_id, "schedule_name": schedule_name, "team_name": team_name, "current_oncall": None, "next_oncall": None } if current_shift: current_attrs = current_shift.get("attributes", {}) current_rels = current_shift.get("relationships", {}) user_data = (current_rels.get("user", {}).get("data") or {}) user_id = user_data.get("id") role_data = (current_rels.get("on_call_role", {}).get("data") or {}) role_id = role_data.get("id") user_name = "Unknown" if user_id and user_id in users_map: user_attrs = users_map[user_id].get("attributes", {}) user_name = user_attrs.get("full_name") or user_attrs.get("email", "Unknown") role_name = "Unknown Role" if role_id and role_id in roles_map: role_attrs = roles_map[role_id].get("attributes", {}) role_name = role_attrs.get("name", "Unknown Role") schedule_info["current_oncall"] = { "user_name": user_name, "user_id": user_id, "role": role_name, "starts_at": convert_to_timezone(current_attrs.get("starts_at")), "ends_at": convert_to_timezone(current_attrs.get("ends_at")), "is_override": current_attrs.get("is_override", False) } if next_shift: next_attrs = next_shift.get("attributes", {}) next_rels = next_shift.get("relationships", {}) user_data = (next_rels.get("user", {}).get("data") or {}) user_id = user_data.get("id") role_data = (next_rels.get("on_call_role", {}).get("data") or {}) role_id = role_data.get("id") user_name = "Unknown" if user_id and user_id in users_map: user_attrs = users_map[user_id].get("attributes", {}) user_name = user_attrs.get("full_name") or user_attrs.get("email", "Unknown") role_name = "Unknown Role" if role_id and role_id in roles_map: role_attrs = roles_map[role_id].get("attributes", {}) role_name = role_attrs.get("name", "Unknown Role") schedule_info["next_oncall"] = { "user_name": user_name, "user_id": user_id, "role": role_name, "starts_at": convert_to_timezone(next_attrs.get("starts_at")), "ends_at": convert_to_timezone(next_attrs.get("ends_at")), "is_override": next_attrs.get("is_override", False) } handoff_data.append(schedule_info) # Filter by region if requested if filter_by_region: # Define business hours (9am-5pm) in the target timezone business_start_hour = 9 business_end_hour = 17 # Create datetime objects for today's business hours in target timezone today_business_start = now.replace(hour=business_start_hour, minute=0, second=0, microsecond=0) today_business_end = now.replace(hour=business_end_hour, minute=0, second=0, microsecond=0) # Filter schedules where current shift overlaps with business hours filtered_data = [] for schedule_info in handoff_data: current_oncall = schedule_info.get("current_oncall") if current_oncall: # Parse shift times (already in target timezone) shift_start_str = current_oncall.get("starts_at") shift_end_str = current_oncall.get("ends_at") if shift_start_str and shift_end_str: try: shift_start = datetime.fromisoformat(shift_start_str.replace("Z", "+00:00")) shift_end = datetime.fromisoformat(shift_end_str.replace("Z", "+00:00")) # Check if shift overlaps with today's business hours # Shift overlaps if: shift_start < business_end AND shift_end > business_start if shift_start < today_business_end and shift_end > today_business_start: filtered_data.append(schedule_info) except (ValueError, AttributeError): # Skip if we can't parse times continue handoff_data = filtered_data # Fetch incidents for each current shift (only if requested) if include_incidents: for schedule_info in handoff_data: current_oncall = schedule_info.get("current_oncall") if current_oncall: shift_start = current_oncall["starts_at"] shift_end = current_oncall["ends_at"] incidents_result = await _fetch_shift_incidents_internal( start_time=shift_start, end_time=shift_end, schedule_ids="", severity="", status="", tags="" ) schedule_info["shift_incidents"] = incidents_result if incidents_result.get("success") else None else: schedule_info["shift_incidents"] = None else: # Skip incident fetching for better performance for schedule_info in handoff_data: schedule_info["shift_incidents"] = None return { "success": True, "timestamp": now.isoformat(), "timezone": timezone, "schedules": handoff_data, "summary": { "total_schedules": len(handoff_data), "schedules_with_current_oncall": sum(1 for s in handoff_data if s["current_oncall"]), "schedules_with_next_oncall": sum(1 for s in handoff_data if s["next_oncall"]), "total_incidents": sum( s.get("shift_incidents", {}).get("summary", {}).get("total_incidents", 0) for s in handoff_data if s.get("shift_incidents") ) } } except Exception as e: import traceback error_type, error_message = MCPError.categorize_error(e) return MCPError.tool_error( f"Failed to get on-call handoff summary: {error_message}", error_type, details={ "exception_type": type(e).__name__, "exception_str": str(e), "traceback": traceback.format_exc() } ) async def _fetch_shift_incidents_internal( start_time: str, end_time: str, schedule_ids: str = "", severity: str = "", status: str = "", tags: str = "" ) -> dict: """Internal helper to fetch incidents - used by both get_shift_incidents and get_oncall_handoff_summary.""" try: from datetime import datetime # Build query parameters # Fetch incidents that: # 1. Were created during the shift (created_at in range) # 2. OR are currently active/unresolved (started but not resolved yet) params = { "page[size]": 100, "sort": "-created_at" } # Get incidents created during shift OR still active # We'll fetch all incidents and filter in-memory for active ones params["filter[started_at][lte]"] = end_time # Started before shift ended # Add severity filter if provided if severity: params["filter[severity]"] = severity.lower() # Add status filter if provided if status: params["filter[status]"] = status.lower() # Add tags filter if provided if tags: tag_list = [t.strip() for t in tags.split(",") if t.strip()] if tag_list: params["filter[tags][]"] = tag_list # Query incidents with pagination all_incidents = [] page = 1 max_pages = 10 # Safety limit to prevent infinite loops while page <= max_pages: params["page[number]"] = page incidents_response = await make_authenticated_request("GET", "/v1/incidents", params=params) if not incidents_response: return MCPError.tool_error("Failed to fetch incidents - no response from API", "execution_error") if incidents_response.status_code != 200: return MCPError.tool_error( f"Failed to fetch incidents - API returned status {incidents_response.status_code}", "execution_error", details={"status_code": incidents_response.status_code, "time_range": f"{start_time} to {end_time}"} ) incidents_data = incidents_response.json() page_incidents = incidents_data.get("data", []) if not page_incidents: break # No more data all_incidents.extend(page_incidents) # Check if there are more pages meta = incidents_data.get("meta", {}) total_pages = meta.get("total_pages", 1) if page >= total_pages: break # Reached the last page page += 1 # Filter incidents to include: # 1. Created during shift (created_at between start_time and end_time) # 2. Currently active (started but not resolved, regardless of when created) from datetime import timezone as dt_timezone shift_start_dt = datetime.fromisoformat(start_time.replace("Z", "+00:00")) shift_end_dt = datetime.fromisoformat(end_time.replace("Z", "+00:00")) now_dt = datetime.now(dt_timezone.utc) # Format incidents for handoff summary incidents_summary = [] for incident in all_incidents: incident_id = incident.get("id") attrs = incident.get("attributes", {}) # Check if incident is relevant to this shift created_at = attrs.get("created_at") started_at = attrs.get("started_at") resolved_at = attrs.get("resolved_at") # Parse timestamps try: created_dt = datetime.fromisoformat(created_at.replace("Z", "+00:00")) if created_at else None started_dt = datetime.fromisoformat(started_at.replace("Z", "+00:00")) if started_at else None resolved_dt = datetime.fromisoformat(resolved_at.replace("Z", "+00:00")) if resolved_at else None except (ValueError, AttributeError): continue # Skip if we can't parse dates # Include incident if: # 1. Created during shift # 2. Started during shift # 3. Resolved during shift # 4. Currently active (not resolved and started before now) include_incident = False if created_dt and shift_start_dt <= created_dt <= shift_end_dt: include_incident = True # Created during shift if started_dt and shift_start_dt <= started_dt <= shift_end_dt: include_incident = True # Started during shift if resolved_dt and shift_start_dt <= resolved_dt <= shift_end_dt: include_incident = True # Resolved during shift if not resolved_dt and started_dt and started_dt <= now_dt: include_incident = True # Currently active if not include_incident: continue # Calculate duration if resolved duration_minutes = None if started_dt and resolved_dt: duration_minutes = int((resolved_dt - started_dt).total_seconds() / 60) # Build narrative summary narrative_parts = [] # What happened title = attrs.get("title", "Untitled Incident") severity = attrs.get("severity", "unknown") narrative_parts.append(f"[{severity.upper()}] {title}") # When and duration if started_at: narrative_parts.append(f"Started at {started_at}") if resolved_at: narrative_parts.append(f"Resolved at {resolved_at}") if duration_minutes: narrative_parts.append(f"Duration: {duration_minutes} minutes") elif attrs.get("status"): narrative_parts.append(f"Status: {attrs.get('status')}") # What was the issue if attrs.get("summary"): narrative_parts.append(f"Details: {attrs.get('summary')}") # Impact if attrs.get("customer_impact_summary"): narrative_parts.append(f"Impact: {attrs.get('customer_impact_summary')}") # Resolution (if available) if attrs.get("mitigation"): narrative_parts.append(f"Resolution: {attrs.get('mitigation')}") elif attrs.get("action_items_count") and attrs.get("action_items_count") > 0: narrative_parts.append(f"Action items created: {attrs.get('action_items_count')}") narrative = " | ".join(narrative_parts) incidents_summary.append({ "incident_id": incident_id, "title": attrs.get("title", "Untitled Incident"), "severity": attrs.get("severity"), "status": attrs.get("status"), "started_at": started_at, "resolved_at": resolved_at, "duration_minutes": duration_minutes, "summary": attrs.get("summary"), "impact": attrs.get("customer_impact_summary"), "mitigation": attrs.get("mitigation"), "narrative": narrative, "incident_url": attrs.get("incident_url") }) # Group by severity by_severity = {} for inc in incidents_summary: sev = inc["severity"] or "unknown" if sev not in by_severity: by_severity[sev] = [] by_severity[sev].append(inc) # Calculate statistics total_incidents = len(incidents_summary) resolved_count = sum(1 for inc in incidents_summary if inc["resolved_at"]) ongoing_count = total_incidents - resolved_count avg_resolution_time = None durations = [inc["duration_minutes"] for inc in incidents_summary if inc["duration_minutes"]] if durations: avg_resolution_time = int(sum(durations) / len(durations)) return { "success": True, "period": { "start_time": start_time, "end_time": end_time }, "summary": { "total_incidents": total_incidents, "resolved": resolved_count, "ongoing": ongoing_count, "average_resolution_minutes": avg_resolution_time, "by_severity": {k: len(v) for k, v in by_severity.items()} }, "incidents": incidents_summary } except Exception as e: import traceback error_type, error_message = MCPError.categorize_error(e) return MCPError.tool_error( f"Failed to get shift incidents: {error_message}", error_type, details={ "params": {"start_time": start_time, "end_time": end_time}, "exception_type": type(e).__name__, "exception_str": str(e), "traceback": traceback.format_exc() } ) @mcp.tool() async def get_shift_incidents( start_time: Annotated[str, Field(description="Start time for incident search (ISO 8601 format, e.g., '2025-10-01T00:00:00Z')")], end_time: Annotated[str, Field(description="End time for incident search (ISO 8601 format, e.g., '2025-10-01T23:59:59Z')")], schedule_ids: Annotated[str, Field(description="Comma-separated list of schedule IDs to filter incidents (optional)")] = "", severity: Annotated[str, Field(description="Filter by severity: 'critical', 'high', 'medium', 'low' (optional)")] = "", status: Annotated[str, Field(description="Filter by status: 'started', 'detected', 'acknowledged', 'investigating', 'identified', 'monitoring', 'resolved', 'cancelled' (optional)")] = "", tags: Annotated[str, Field(description="Comma-separated list of tag slugs to filter incidents (optional)")] = "" ) -> dict: """ Get incidents and alerts that occurred during a specific shift or time period. Useful for: - Shift handoff summaries showing what happened during the shift - Post-shift debriefs and reporting - Incident analysis by time period - Understanding team workload during specific shifts Returns incident details including severity, status, duration, and basic summary. """ return await _fetch_shift_incidents_internal(start_time, end_time, schedule_ids, severity, status, tags) # Add MCP resources for incidents and teams @mcp.resource("incident://{incident_id}") async def get_incident_resource(incident_id: str): """Expose incident details as an MCP resource for easy reference and context.""" try: response = await make_authenticated_request("GET", f"/v1/incidents/{incident_id}") response.raise_for_status() incident_data = response.json() # Format incident data as readable text incident = incident_data.get("data", {}) attributes = incident.get("attributes", {}) text_content = f"""Incident #{incident_id} Title: {attributes.get('title', 'N/A')} Status: {attributes.get('status', 'N/A')} Severity: {attributes.get('severity', 'N/A')} Created: {attributes.get('created_at', 'N/A')} Updated: {attributes.get('updated_at', 'N/A')} Summary: {attributes.get('summary', 'N/A')} URL: {attributes.get('url', 'N/A')}""" return { "uri": f"incident://{incident_id}", "name": f"Incident #{incident_id}", "text": text_content, "mimeType": "text/plain" } except Exception as e: error_type, error_message = MCPError.categorize_error(e) return { "uri": f"incident://{incident_id}", "name": f"Incident #{incident_id} (Error)", "text": f"Error ({error_type}): {error_message}", "mimeType": "text/plain" } @mcp.resource("team://{team_id}") async def get_team_resource(team_id: str): """Expose team details as an MCP resource for easy reference and context.""" try: response = await make_authenticated_request("GET", f"/v1/teams/{team_id}") response.raise_for_status() team_data = response.json() # Format team data as readable text team = team_data.get("data", {}) attributes = team.get("attributes", {}) text_content = f"""Team #{team_id} Name: {attributes.get('name', 'N/A')} Color: {attributes.get('color', 'N/A')} Slug: {attributes.get('slug', 'N/A')} Created: {attributes.get('created_at', 'N/A')} Updated: {attributes.get('updated_at', 'N/A')}""" return { "uri": f"team://{team_id}", "name": f"Team: {attributes.get('name', team_id)}", "text": text_content, "mimeType": "text/plain" } except Exception as e: error_type, error_message = MCPError.categorize_error(e) return { "uri": f"team://{team_id}", "name": f"Team #{team_id} (Error)", "text": f"Error ({error_type}): {error_message}", "mimeType": "text/plain" } @mcp.resource("rootly://incidents") async def list_incidents_resource(): """List recent incidents as an MCP resource for quick reference.""" try: response = await make_authenticated_request("GET", "/v1/incidents", params={ "page[size]": 10, "page[number]": 1, "include": "" }) response.raise_for_status() data = response.json() incidents = data.get("data", []) text_lines = ["Recent Incidents:\n"] for incident in incidents: attrs = incident.get("attributes", {}) text_lines.append(f"• #{incident.get('id', 'N/A')} - {attrs.get('title', 'N/A')} [{attrs.get('status', 'N/A')}]") return { "uri": "rootly://incidents", "name": "Recent Incidents", "text": "\n".join(text_lines), "mimeType": "text/plain" } except Exception as e: error_type, error_message = MCPError.categorize_error(e) return { "uri": "rootly://incidents", "name": "Recent Incidents (Error)", "text": f"Error ({error_type}): {error_message}", "mimeType": "text/plain" } # Log server creation (tool count will be shown when tools are accessed) logger.info("Created Rootly MCP Server successfully") return mcp def _load_swagger_spec(swagger_path: Optional[str] = None) -> Dict[str, Any]: """ Load the Swagger specification from a file or URL. Args: swagger_path: Path to the Swagger JSON file. If None, will fetch from URL. Returns: The Swagger specification as a dictionary. """ if swagger_path: # Use the provided path logger.info(f"Using provided Swagger path: {swagger_path}") if not os.path.isfile(swagger_path): raise FileNotFoundError(f"Swagger file not found at {swagger_path}") with open(swagger_path, "r", encoding="utf-8") as f: return json.load(f) else: # First, check in the package data directory try: package_data_path = Path(__file__).parent / "data" / "swagger.json" if package_data_path.is_file(): logger.info(f"Found Swagger file in package data: {package_data_path}") with open(package_data_path, "r", encoding="utf-8") as f: return json.load(f) except Exception as e: logger.debug(f"Could not load Swagger file from package data: {e}") # Then, look for swagger.json in the current directory and parent directories logger.info("Looking for swagger.json in current directory and parent directories") current_dir = Path.cwd() # Check current directory first local_swagger_path = current_dir / "swagger.json" if local_swagger_path.is_file(): logger.info(f"Found Swagger file at {local_swagger_path}") with open(local_swagger_path, "r", encoding="utf-8") as f: return json.load(f) # Check parent directories for parent in current_dir.parents: parent_swagger_path = parent / "swagger.json" if parent_swagger_path.is_file(): logger.info(f"Found Swagger file at {parent_swagger_path}") with open(parent_swagger_path, "r", encoding="utf-8") as f: return json.load(f) # If the file wasn't found, fetch it from the URL and save it logger.info("Swagger file not found locally, fetching from URL") swagger_spec = _fetch_swagger_from_url() # Save the fetched spec to the current directory save_swagger_path = current_dir / "swagger.json" logger.info(f"Saving Swagger file to {save_swagger_path}") try: with open(save_swagger_path, "w", encoding="utf-8") as f: json.dump(swagger_spec, f) logger.info(f"Saved Swagger file to {save_swagger_path}") except Exception as e: logger.warning(f"Failed to save Swagger file: {e}") return swagger_spec def _fetch_swagger_from_url(url: str = SWAGGER_URL) -> Dict[str, Any]: """ Fetch the Swagger specification from the specified URL. Args: url: URL of the Swagger JSON file. Returns: The Swagger specification as a dictionary. """ logger.info(f"Fetching Swagger specification from {url}") try: response = requests.get(url) response.raise_for_status() return response.json() except requests.RequestException as e: logger.error(f"Failed to fetch Swagger spec: {e}") raise Exception(f"Failed to fetch Swagger specification: {e}") except json.JSONDecodeError as e: logger.error(f"Failed to parse Swagger spec: {e}") raise Exception(f"Failed to parse Swagger specification: {e}") def _filter_openapi_spec(spec: Dict[str, Any], allowed_paths: List[str]) -> Dict[str, Any]: """ Filter an OpenAPI specification to only include specified paths and clean up schema references. Args: spec: The original OpenAPI specification. allowed_paths: List of paths to include. Returns: A filtered OpenAPI specification with cleaned schema references. """ # Use deepcopy to ensure all nested structures are properly copied filtered_spec = deepcopy(spec) # Filter paths original_paths = filtered_spec.get("paths", {}) filtered_paths = { path: path_item for path, path_item in original_paths.items() if path in allowed_paths } filtered_spec["paths"] = filtered_paths # Clean up schema references that might be broken # Remove problematic schema references from request bodies and parameters for path, path_item in filtered_paths.items(): for method, operation in path_item.items(): if method.lower() not in ["get", "post", "put", "delete", "patch"]: continue # Clean request body schemas if "requestBody" in operation: request_body = operation["requestBody"] if "content" in request_body: for content_type, content_info in request_body["content"].items(): if "schema" in content_info: schema = content_info["schema"] # Remove problematic $ref references if "$ref" in schema and "incident_trigger_params" in schema["$ref"]: # Replace with a generic object schema content_info["schema"] = { "type": "object", "description": "Request parameters for this endpoint", "additionalProperties": True } # Remove response schemas to avoid validation issues # FastMCP will still return the data, just without strict validation if "responses" in operation: for status_code, response in operation["responses"].items(): if "content" in response: for content_type, content_info in response["content"].items(): if "schema" in content_info: # Replace with a simple schema that accepts any response content_info["schema"] = { "type": "object", "additionalProperties": True } # Clean parameter schemas (parameter names are already sanitized) if "parameters" in operation: for param in operation["parameters"]: if "schema" in param and "$ref" in param["schema"]: ref_path = param["schema"]["$ref"] if "incident_trigger_params" in ref_path: # Replace with a simple string schema param["schema"] = { "type": "string", "description": param.get("description", "Parameter value") } # Add/modify pagination limits to alerts and incident-related endpoints to prevent infinite loops if method.lower() == "get" and ("alerts" in path.lower() or "incident" in path.lower()): if "parameters" not in operation: operation["parameters"] = [] # Find existing pagination parameters and update them with limits page_size_param = None page_number_param = None for param in operation["parameters"]: if param.get("name") == "page[size]": page_size_param = param elif param.get("name") == "page[number]": page_number_param = param # Update or add page[size] parameter with limits if page_size_param: # Update existing parameter with limits if "schema" not in page_size_param: page_size_param["schema"] = {} page_size_param["schema"].update({ "type": "integer", "default": 10, "minimum": 1, "maximum": 20, "description": "Number of results per page (max: 20)" }) else: # Add new parameter operation["parameters"].append({ "name": "page[size]", "in": "query", "required": False, "schema": { "type": "integer", "default": 10, "minimum": 1, "maximum": 20, "description": "Number of results per page (max: 20)" } }) # Update or add page[number] parameter with defaults if page_number_param: # Update existing parameter if "schema" not in page_number_param: page_number_param["schema"] = {} page_number_param["schema"].update({ "type": "integer", "default": 1, "minimum": 1, "description": "Page number to retrieve" }) else: # Add new parameter operation["parameters"].append({ "name": "page[number]", "in": "query", "required": False, "schema": { "type": "integer", "default": 1, "minimum": 1, "description": "Page number to retrieve" } }) # Add sparse fieldsets for alerts endpoints to reduce payload size if "alert" in path.lower(): # Add fields[alerts] parameter with essential fields only - make it required with default operation["parameters"].append({ "name": "fields[alerts]", "in": "query", "required": True, "schema": { "type": "string", "default": "id,summary,status,started_at,ended_at,short_id,alert_urgency_id,source,noise", "description": "Comma-separated list of alert fields to include (reduces payload size)" } }) # Add include parameter for alerts endpoints to minimize relationships if "alert" in path.lower(): # Check if include parameter already exists include_param_exists = any(param.get("name") == "include" for param in operation["parameters"]) if not include_param_exists: operation["parameters"].append({ "name": "include", "in": "query", "required": True, "schema": { "type": "string", "default": "", "description": "Related resources to include (empty for minimal payload)" } }) # Add sparse fieldsets for incidents endpoints to reduce payload size if "incident" in path.lower(): # Add fields[incidents] parameter with essential fields only - make it required with default operation["parameters"].append({ "name": "fields[incidents]", "in": "query", "required": True, "schema": { "type": "string", "default": "id,title,summary,status,severity,created_at,updated_at,url,started_at", "description": "Comma-separated list of incident fields to include (reduces payload size)" } }) # Add include parameter for incidents endpoints to minimize relationships if "incident" in path.lower(): # Check if include parameter already exists include_param_exists = any(param.get("name") == "include" for param in operation["parameters"]) if not include_param_exists: operation["parameters"].append({ "name": "include", "in": "query", "required": True, "schema": { "type": "string", "default": "", "description": "Related resources to include (empty for minimal payload)" } }) # Also clean up any remaining broken references in components if "components" in filtered_spec and "schemas" in filtered_spec["components"]: schemas = filtered_spec["components"]["schemas"] # Remove or fix any schemas that reference missing components schemas_to_remove = [] for schema_name, schema_def in schemas.items(): if isinstance(schema_def, dict) and _has_broken_references(schema_def): schemas_to_remove.append(schema_name) for schema_name in schemas_to_remove: logger.warning(f"Removing schema with broken references: {schema_name}") del schemas[schema_name] # Clean up any operation-level references to removed schemas removed_schemas = set() if "components" in filtered_spec and "schemas" in filtered_spec["components"]: removed_schemas = {"new_workflow", "update_workflow", "workflow", "workflow_task", "workflow_response", "workflow_list", "new_workflow_task", "update_workflow_task", "workflow_task_response", "workflow_task_list"} for path, path_item in filtered_spec.get("paths", {}).items(): for method, operation in path_item.items(): if method.lower() not in ["get", "post", "put", "delete", "patch"]: continue # Clean request body references if "requestBody" in operation: request_body = operation["requestBody"] if "content" in request_body: for content_type, content_info in request_body["content"].items(): if "schema" in content_info and "$ref" in content_info["schema"]: ref_path = content_info["schema"]["$ref"] schema_name = ref_path.split("/")[-1] if schema_name in removed_schemas: # Replace with generic object schema content_info["schema"] = { "type": "object", "description": "Request data for this endpoint", "additionalProperties": True } logger.debug(f"Cleaned broken reference in {method.upper()} {path} request body: {ref_path}") # Clean response references if "responses" in operation: for status_code, response in operation["responses"].items(): if "content" in response: for content_type, content_info in response["content"].items(): if "schema" in content_info and "$ref" in content_info["schema"]: ref_path = content_info["schema"]["$ref"] schema_name = ref_path.split("/")[-1] if schema_name in removed_schemas: # Replace with generic object schema content_info["schema"] = { "type": "object", "description": "Response data from this endpoint", "additionalProperties": True } logger.debug(f"Cleaned broken reference in {method.upper()} {path} response: {ref_path}") return filtered_spec def _has_broken_references(schema_def: Dict[str, Any]) -> bool: """Check if a schema definition has broken references.""" if "$ref" in schema_def: ref_path = schema_def["$ref"] # List of known broken references in the Rootly API spec broken_refs = [ "incident_trigger_params", "new_workflow", "update_workflow", "workflow", "new_workflow_task", "update_workflow_task", "workflow_task", "workflow_task_response", "workflow_task_list", "workflow_response", "workflow_list", "workflow_custom_field_selection_response", "workflow_custom_field_selection_list", "workflow_form_field_condition_response", "workflow_form_field_condition_list", "workflow_group_response", "workflow_group_list", "workflow_run_response", "workflow_runs_list" ] if any(broken_ref in ref_path for broken_ref in broken_refs): return True # Recursively check nested schemas for key, value in schema_def.items(): if isinstance(value, dict): if _has_broken_references(value): return True elif isinstance(value, list): for item in value: if isinstance(item, dict) and _has_broken_references(item): return True return False # Legacy class for backward compatibility class RootlyMCPServer(FastMCP): """ Legacy Rootly MCP Server class for backward compatibility. This class is deprecated. Use create_rootly_mcp_server() instead. """ def __init__( self, swagger_path: Optional[str] = None, name: str = "Rootly", default_page_size: int = 10, allowed_paths: Optional[List[str]] = None, hosted: bool = False, *args, **kwargs, ): logger.warning( "RootlyMCPServer class is deprecated. Use create_rootly_mcp_server() function instead." ) # Create the server using the new function server = create_rootly_mcp_server( swagger_path=swagger_path, name=name, allowed_paths=allowed_paths, hosted=hosted ) # Copy the server's state to this instance super().__init__(name, *args, **kwargs) # For compatibility, store reference to the new server # Tools will be accessed via async methods when needed self._server = server self._tools = {} # Placeholder - tools should be accessed via async methods self._resources = getattr(server, '_resources', {}) self._prompts = getattr(server, '_prompts', {})

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Rootly-AI-Labs/Rootly-MCP-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server