studies.py•9.52 kB
"""
Studies endpoint module for the cBioPortal MCP server.
Contains all study-related endpoint methods:
- get_cancer_studies: List cancer studies with pagination
- search_studies: Search studies by keyword
- get_study_details: Get detailed study information
- get_multiple_studies: Fetch multiple studies concurrently
"""
import asyncio
import time
from typing import Any, Dict, List, Optional
import httpx
from .base import BaseEndpoint, handle_api_errors, validate_paginated_params
from ..utils.validation import (
    validate_page_params,
    validate_sort_params,
    validate_study_id,
    validate_keyword,
)
from ..utils.pagination import collect_all_results
from ..utils.logging import get_logger
logger = get_logger(__name__)
class StudiesEndpoints(BaseEndpoint):
    """Handles all study-related endpoints for the cBioPortal MCP server."""
    def __init__(self, api_client):
        super().__init__(api_client)
    @handle_api_errors("get cancer studies")
    @validate_paginated_params
    async def get_cancer_studies(
        self,
        page_number: int = 0,
        page_size: int = 50,
        sort_by: Optional[str] = None,
        direction: str = "ASC",
        limit: Optional[int] = None,
    ) -> Dict[str, Any]:
        """
        Get a list of cancer studies in cBioPortal with pagination support.
        Args:
            page_number: Page number to retrieve (0-based)
            page_size: Number of items per page
            sort_by: Field to sort by
            direction: Sort direction (ASC or DESC)
            limit: Maximum number of items to return across all pages (None for no limit)
        Returns:
            Dictionary containing list of studies and metadata
        """
        return await self.paginated_request(
            endpoint="studies",
            page_number=page_number,
            page_size=page_size,
            sort_by=sort_by,
            direction=direction,
            limit=limit,
            data_key="studies"
        )
    @handle_api_errors("search studies")
    async def search_studies(
        self,
        keyword: str,
        page_number: int = 0,
        page_size: int = 50,
        sort_by: Optional[str] = None,
        direction: str = "ASC",
        limit: Optional[int] = None,
    ) -> Dict[str, Any]:
        """
        Search for cancer studies by keyword in their name or description with pagination support.
        Args:
            keyword: Keyword to search for
            page_number: Page number to retrieve (0-based)
            page_size: Number of items per page
            sort_by: Field to sort by
            direction: Sort direction (ASC or DESC)
            limit: Maximum number of items to return across all pages (None for no limit)
        Returns:
            Dictionary containing list of studies and metadata
        """
        # Input Validation
        validate_keyword(keyword)
        validate_page_params(page_number, page_size, limit)
        validate_sort_params(sort_by, direction)
        try:
            # Await the asynchronous API call
            all_studies = await self.api_client.make_api_request("studies")
            if not isinstance(all_studies, list):
                # Handle cases where API request might not return a list (e.g., error response)
                error_message = (
                    "Unexpected response from API when fetching all studies."
                )
                if isinstance(all_studies, dict) and "error" in all_studies:
                    error_message = all_studies["error"]
                return {
                    "error": f"Failed to search studies for '{keyword}': {error_message}"
                }
            keyword_lower = keyword.lower()
            matching_studies = [
                study
                for study in all_studies
                if keyword_lower in study.get("name", "").lower()
                or keyword_lower in study.get("description", "").lower()
            ]
            if sort_by:
                reverse = direction.upper() == "DESC"
                matching_studies.sort(
                    key=lambda s: str(s.get(sort_by, "")), reverse=reverse
                )
            total_count = len(matching_studies)
            start_idx = page_number * page_size
            end_idx = start_idx + page_size
            paginated_studies = matching_studies[start_idx:end_idx]
            if limit and limit > 0 and len(paginated_studies) > limit:
                paginated_studies = paginated_studies[:limit]
            has_more = end_idx < total_count
            return {
                "studies": paginated_studies,
                "pagination": {
                    "page": page_number,
                    "page_size": page_size
                    if limit != 0
                    else total_count,  # Adjust page_size if all were requested
                    "total_found": total_count,
                    "has_more": has_more
                    if limit != 0
                    else False,  # No more if all were requested
                },
            }
        except httpx.HTTPStatusError as exc:
            return {
                "error": f"Failed to search studies for '{keyword}': API request failed with status {exc.response.status_code}"
            }
        except httpx.RequestError as exc:
            return {
                "error": f"Failed to search studies for '{keyword}': Network error - {str(exc)}"
            }
        except Exception as e:
            return {
                "error": f"Failed to search studies for '{keyword}': An unexpected error occurred - {str(e)}"
            }
    @handle_api_errors("get study details")
    async def get_study_details(self, study_id: str) -> Dict[str, Any]:
        """
        Get detailed information for a specific cancer study.
        Args:
            study_id: The ID of the cancer study
        Returns:
            Dictionary containing study details
        """
        # Input Validation
        validate_study_id(study_id)
        endpoint = f"studies/{study_id}"
        try:
            study = await self.api_client.make_api_request(endpoint)
            return {"study": study}
        except Exception as e:
            return {"error": f"Failed to get study details for {study_id}: {str(e)}"}
    @handle_api_errors("get multiple studies")
    async def get_multiple_studies(self, study_ids: List[str]) -> Dict:
        """
        Get details for multiple studies concurrently.
        This method demonstrates the power of async concurrency by fetching
        multiple studies in parallel, which is much faster than sequential requests.
        Args:
            study_ids: List of study IDs to fetch
        Returns:
            Dictionary mapping study IDs to their details, with metadata about the operation
        """
        if not study_ids:
            return {
                "studies": {},
                "metadata": {"count": 0, "errors": 0, "concurrent": True},
            }
        # Create a reusable async function for fetching a single study
        async def fetch_study(study_id):
            try:
                data = await self.api_client.make_api_request(f"studies/{study_id}")
                return {"study_id": study_id, "data": data, "success": True}
            except Exception as e:
                return {"study_id": study_id, "error": str(e), "success": False}
        # Create tasks for all study IDs and run them concurrently
        tasks = [fetch_study(study_id) for study_id in study_ids]
        start_time = time.perf_counter()
        # Use asyncio.gather to execute all tasks concurrently
        results = await asyncio.gather(*tasks)
        end_time = time.perf_counter()
        # Process results into a structured response
        studies_dict = {}
        error_count = 0
        for result in results:
            if result["success"]:
                studies_dict[result["study_id"]] = result["data"]
            else:
                studies_dict[result["study_id"]] = {"error": result["error"]}
                error_count += 1
        return {
            "studies": studies_dict,
            "metadata": {
                "count": len(study_ids),
                "errors": error_count,
                "execution_time": round(end_time - start_time, 3),
                "concurrent": True,
            },
        }
    @handle_api_errors("get cancer types")
    @validate_paginated_params
    async def get_cancer_types(
        self,
        page_number: int = 0,
        page_size: int = 50,
        sort_by: Optional[str] = None,
        direction: str = "ASC",
        limit: Optional[int] = None,
    ) -> Dict:
        """
        Get a list of all available cancer types in cBioPortal with pagination support.
        Args:
            page_number: Page number (0-based)
            page_size: Number of items per page
            sort_by: Field to sort by
            direction: Sort direction (ASC or DESC)
            limit: Maximum number of items to return across all pages (None for no limit)
        Returns:
            Dictionary containing list of cancer types and metadata
        """
        return await self.paginated_request(
            endpoint="cancer-types",
            page_number=page_number,
            page_size=page_size,
            sort_by=sort_by,
            direction=direction,
            limit=limit,
            data_key="cancer_types"
        )