Glama
Semantic Scholar MCP Server

"""
Paper-related API endpoints for the Semantic Scholar API.
"""

from typing import Dict, List, Optional
from fastmcp import Context
import httpx

# Import mcp from centralized location instead of server
from ..mcp import mcp
from ..config import PaperFields, CitationReferenceFields, AuthorDetailFields, Config, ErrorType
from ..utils.http import make_request, get_api_key
from ..utils.errors import create_error_response

@mcp.tool()
async def paper_relevance_search(
    context: Context,
    query: str,
    fields: Optional[List[str]] = None,
    publication_types: Optional[List[str]] = None,
    open_access_pdf: bool = False,
    min_citation_count: Optional[int] = None,
    year: Optional[str] = None,
    venue: Optional[List[str]] = None,
    fields_of_study: Optional[List[str]] = None,
    offset: int = 0,
    limit: int = 10
) -> Dict:
    """
    Search for papers on Semantic Scholar using relevance-based ranking.
    This endpoint is optimized for finding the most relevant papers matching a text query.
    Results are sorted by relevance score.

    Args:
        query (str): A text query to search for. The query will be matched against paper titles,
            abstracts, venue names, and author names.

        fields (Optional[List[str]]): List of fields to return for each paper.
            paperId and title are always returned.

        publication_types (Optional[List[str]]): Filter by publication types.

        open_access_pdf (bool): If True, only include papers with a public PDF.
            Default: False

        min_citation_count (Optional[int]): Minimum number of citations required.

        year (Optional[str]): Filter by publication year. Supports several formats:
            - Single year: "2019"
            - Year range: "2016-2020"
            - Since year: "2010-"
            - Until year: "-2015"

        venue (Optional[List[str]]): Filter by publication venues.
            Accepts full venue names or ISO4 abbreviations.

        fields_of_study (Optional[List[str]]): Filter by fields of study.

        offset (int): Number of results to skip for pagination.
            Default: 0

        limit (int): Maximum number of results to return.
            Default: 10
            Maximum: 100

    Returns:
        Dict: {
            "total": int,      # Total number of papers matching the query
            "offset": int,     # Current offset in the results
            "next": int,       # Offset for the next page of results (if available)
            "data": List[Dict] # List of papers with requested fields
        }
    """
    if not query.strip():
        return create_error_response(
            ErrorType.VALIDATION,
            "Query string cannot be empty"
        )

    # Validate and prepare fields
    if fields is None:
        fields = PaperFields.DEFAULT
    else:
        invalid_fields = set(fields) - PaperFields.VALID_FIELDS
        if invalid_fields:
            return create_error_response(
                ErrorType.VALIDATION,
                f"Invalid fields: {', '.join(invalid_fields)}",
                {"valid_fields": list(PaperFields.VALID_FIELDS)}
            )

    # Validate and prepare parameters
    limit = min(limit, 100)
    params = {
        "query": query,
        "offset": offset,
        "limit": limit,
        "fields": ",".join(fields)
    }

    # Add optional filters
    if publication_types:
        params["publicationTypes"] = ",".join(publication_types)
    if open_access_pdf:
        params["openAccessPdf"] = "true"
    if min_citation_count is not None:
        params["minCitationCount"] = min_citation_count
    if year:
        params["year"] = year
    if venue:
        params["venue"] = ",".join(venue)
    if fields_of_study:
        params["fieldsOfStudy"] = ",".join(fields_of_study)

    return await make_request("/paper/search", params)

@mcp.tool()
async def paper_bulk_search(
    context: Context,
    query: Optional[str] = None,
    token: Optional[str] = None,
    fields: Optional[List[str]] = None,
    sort: Optional[str] = None,
    publication_types: Optional[List[str]] = None,
    open_access_pdf: bool = False,
    min_citation_count: Optional[int] = None,
    publication_date_or_year: Optional[str] = None,
    year: Optional[str] = None,
    venue: Optional[List[str]] = None,
    fields_of_study: Optional[List[str]] = None
) -> Dict:
    """
    Bulk search for papers with advanced filtering and sorting options.
    Intended for retrieving large sets of papers efficiently.
    
    Args:
        query (Optional[str]): Text query to match against paper title and abstract.
            Supports boolean logic with +, |, -, ", *, (), and ~N.
            
        token (Optional[str]): Continuation token for pagination
        
        fields (Optional[List[str]]): Fields to return for each paper
            paperId is always returned
            Default: paperId and title only
            
        sort (Optional[str]): Sort order in format 'field:order'
            Fields: paperId, publicationDate, citationCount
            Order: asc (default), desc
            Default: 'paperId:asc'
            
        publication_types (Optional[List[str]]): Filter by publication types
            
        open_access_pdf (bool): Only include papers with public PDF
        
        min_citation_count (Optional[int]): Minimum citation threshold
        
        publication_date_or_year (Optional[str]): Date/year range filter
            Format: <startDate>:<endDate> in YYYY-MM-DD
            
        year (Optional[str]): Publication year filter
            Examples: '2019', '2016-2020', '2010-', '-2015'
            
        venue (Optional[List[str]]): Filter by publication venues
            
        fields_of_study (Optional[List[str]]): Filter by fields of study
    
    Returns:
        Dict: {
            'total': int,      # Total matching papers
            'token': str,      # Continuation token for next batch
            'data': List[Dict] # Papers with requested fields
        }
    """
    # Build request parameters
    params = {}
    
    # Add query if provided
    if query:
        params["query"] = query.strip()
        
    # Add continuation token if provided
    if token:
        params["token"] = token
        
    # Add fields if provided
    if fields:
        # Validate fields
        invalid_fields = set(fields) - PaperFields.VALID_FIELDS
        if invalid_fields:
            return create_error_response(
                ErrorType.VALIDATION,
                f"Invalid fields: {', '.join(invalid_fields)}",
                {"valid_fields": list(PaperFields.VALID_FIELDS)}
            )
        params["fields"] = ",".join(fields)
        
    # Add sort if provided
    if sort:
        # Validate sort format
        valid_sort_fields = ["paperId", "publicationDate", "citationCount"]
        valid_sort_orders = ["asc", "desc"]
        
        try:
            field, order = sort.split(":")
            if field not in valid_sort_fields:
                return create_error_response(
                    ErrorType.VALIDATION,
                    f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields)}"
                )
            if order not in valid_sort_orders:
                return create_error_response(
                    ErrorType.VALIDATION,
                    f"Invalid sort order. Must be one of: {', '.join(valid_sort_orders)}"
                )
            params["sort"] = sort
        except ValueError:
            return create_error_response(
                ErrorType.VALIDATION,
                "Sort must be in format 'field:order'"
            )
            
    # Add publication types if provided
    if publication_types:
        valid_types = {
            "Review", "JournalArticle", "CaseReport", "ClinicalTrial",
            "Conference", "Dataset", "Editorial", "LettersAndComments",
            "MetaAnalysis", "News", "Study", "Book", "BookSection"
        }
        invalid_types = set(publication_types) - valid_types
        if invalid_types:
            return create_error_response(
                ErrorType.VALIDATION,
                f"Invalid publication types: {', '.join(invalid_types)}",
                {"valid_types": list(valid_types)}
            )
        params["publicationTypes"] = ",".join(publication_types)
        
    # Add open access PDF filter
    if open_access_pdf:
        params["openAccessPdf"] = "true"
        
    # Add minimum citation count if provided
    if min_citation_count is not None:
        if min_citation_count < 0:
            return create_error_response(
                ErrorType.VALIDATION,
                "Minimum citation count cannot be negative"
            )
        params["minCitationCount"] = str(min_citation_count)
        
    # Add publication date/year if provided
    if publication_date_or_year:
        params["publicationDateOrYear"] = publication_date_or_year
    elif year:
        params["year"] = year
        
    # Add venue filter if provided
    if venue:
        params["venue"] = ",".join(venue)
        
    # Add fields of study filter if provided
    if fields_of_study:
        valid_fields = {
            "Computer Science", "Medicine", "Chemistry", "Biology",
            "Materials Science", "Physics", "Geology", "Psychology",
            "Art", "History", "Geography", "Sociology", "Business",
            "Political Science", "Economics", "Philosophy", "Mathematics",
            "Engineering", "Environmental Science", "Agricultural and Food Sciences",
            "Education", "Law", "Linguistics"
        }
        invalid_fields = set(fields_of_study) - valid_fields
        if invalid_fields:
            return create_error_response(
                ErrorType.VALIDATION,
                f"Invalid fields of study: {', '.join(invalid_fields)}",
                {"valid_fields": list(valid_fields)}
            )
        params["fieldsOfStudy"] = ",".join(fields_of_study)
    
    # Make the API request
    result = await make_request("/paper/search/bulk", params)
    
    # Handle potential errors
    if isinstance(result, Dict) and "error" in result:
        return result
        
    return result

@mcp.tool()
async def paper_title_search(
    context: Context,
    query: str,
    fields: Optional[List[str]] = None,
    publication_types: Optional[List[str]] = None,
    open_access_pdf: bool = False,
    min_citation_count: Optional[int] = None,
    year: Optional[str] = None,
    venue: Optional[List[str]] = None,
    fields_of_study: Optional[List[str]] = None
) -> Dict:
    """
    Find a single paper by title match. This endpoint is optimized for finding a specific paper
    by its title and returns the best matching paper based on title similarity.

    Args:
        query (str): The title text to search for. The query will be matched against paper titles
            to find the closest match.

        fields (Optional[List[str]]): List of fields to return for the paper.
            paperId and title are always returned.

        publication_types (Optional[List[str]]): Filter by publication types.

        open_access_pdf (bool): If True, only include papers with a public PDF.
            Default: False

        min_citation_count (Optional[int]): Minimum number of citations required.

        year (Optional[str]): Filter by publication year. Supports several formats:
            - Single year: "2019"
            - Year range: "2016-2020"
            - Since year: "2010-"
            - Until year: "-2015"

        venue (Optional[List[str]]): Filter by publication venues.
            Accepts full venue names or ISO4 abbreviations.

        fields_of_study (Optional[List[str]]): Filter by fields of study.

    Returns:
        Dict: {
            "paperId": str,      # Semantic Scholar Paper ID
            "title": str,        # Paper title
            "matchScore": float, # Similarity score between query and matched title
            ...                  # Additional requested fields
        }
        
        Returns error response if no matching paper is found.
    """
    if not query.strip():
        return create_error_response(
            ErrorType.VALIDATION,
            "Query string cannot be empty"
        )

    # Validate and prepare fields
    if fields is None:
        fields = PaperFields.DEFAULT
    else:
        invalid_fields = set(fields) - PaperFields.VALID_FIELDS
        if invalid_fields:
            return create_error_response(
                ErrorType.VALIDATION,
                f"Invalid fields: {', '.join(invalid_fields)}",
                {"valid_fields": list(PaperFields.VALID_FIELDS)}
            )

    # Build base parameters
    params = {"query": query}

    # Add optional parameters
    if fields:
        params["fields"] = ",".join(fields)
    if publication_types:
        params["publicationTypes"] = ",".join(publication_types)
    if open_access_pdf:
        params["openAccessPdf"] = "true"
    if min_citation_count is not None:
        params["minCitationCount"] = str(min_citation_count)
    if year:
        params["year"] = year
    if venue:
        params["venue"] = ",".join(venue)
    if fields_of_study:
        params["fieldsOfStudy"] = ",".join(fields_of_study)

    result = await make_request("/paper/search/match", params)
    
    # Handle specific error cases
    if isinstance(result, Dict):
        if "error" in result:
            error_msg = result["error"].get("message", "")
            if "404" in error_msg:
                return create_error_response(
                    ErrorType.VALIDATION,
                    "No matching paper found",
                    {"original_query": query}
                )
            return result
    
    return result

@mcp.tool()
async def paper_details(
    context: Context,
    paper_id: str,
    fields: Optional[List[str]] = None
) -> Dict:
    """
    Get details about a paper using various types of identifiers.
    This endpoint provides comprehensive metadata about a paper.

    Args:
        paper_id (str): Paper identifier in one of the following formats:
            - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
            - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
            - ARXIV:<id> (e.g., "ARXIV:2106.15928")
            - MAG:<id> (e.g., "MAG:112218234")
            - ACL:<id> (e.g., "ACL:W12-3903")
            - PMID:<id> (e.g., "PMID:19872477")
            - PMCID:<id> (e.g., "PMCID:2323736")
            - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
        
        fields (Optional[List[str]]): List of fields to return.
            paperId is always returned.

    Returns:
        Dict: Paper details with requested fields.
            Always includes paperId.
            Returns error response if paper not found.
    """
    if not paper_id.strip():
        return create_error_response(
            ErrorType.VALIDATION,
            "Paper ID cannot be empty"
        )

    # Build request parameters
    params = {}
    if fields:
        params["fields"] = ",".join(fields)

    # Make the API request
    result = await make_request(f"/paper/{paper_id}", params)
    
    # Handle potential errors
    if isinstance(result, Dict) and "error" in result:
        error_msg = result["error"].get("message", "")
        if "404" in error_msg:
            return create_error_response(
                ErrorType.VALIDATION,
                "Paper not found",
                {"paper_id": paper_id}
            )
        return result

    return result

@mcp.tool()
async def paper_batch_details(
    context: Context,
    paper_ids: List[str],
    fields: Optional[str] = None
) -> Dict:
    """
    Get details for multiple papers in a single batch request.
    This endpoint is optimized for efficiently retrieving details about known papers.
    
    Args:
        paper_ids (List[str]): List of paper identifiers. Each ID can be in any of these formats:
            - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
            - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
            - ARXIV:<id> (e.g., "ARXIV:2106.15928")
            - MAG:<id> (e.g., "MAG:112218234")
            - ACL:<id> (e.g., "ACL:W12-3903")
            - PMID:<id> (e.g., "PMID:19872477")
            - PMCID:<id> (e.g., "PMCID:2323736")
            - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")
            Maximum: 500 IDs per request

        fields (Optional[str]): Comma-separated list of fields to return for each paper.
            paperId is always returned.
    
    Returns:
        List[Dict]: List of paper details with requested fields.
            - Results maintain the same order as input paper_ids
            - Invalid or not found paper IDs return null in the results
            - Each paper object contains the requested fields
            - paperId is always included in each paper object
    """
    # Validate inputs
    if not paper_ids:
        return create_error_response(
            ErrorType.VALIDATION,
            "Paper IDs list cannot be empty"
        )
        
    if len(paper_ids) > 500:
        return create_error_response(
            ErrorType.VALIDATION,
            "Cannot process more than 500 paper IDs at once",
            {"max_papers": 500, "received": len(paper_ids)}
        )

    # Validate fields if provided
    if fields:
        field_list = fields.split(",")
        invalid_fields = set(field_list) - PaperFields.VALID_FIELDS
        if invalid_fields:
            return create_error_response(
                ErrorType.VALIDATION,
                f"Invalid fields: {', '.join(invalid_fields)}",
                {"valid_fields": list(PaperFields.VALID_FIELDS)}
            )

    # Build request parameters
    params = {}
    if fields:
        params["fields"] = fields

    # Make POST request with proper structure
    try:
        async with httpx.AsyncClient(timeout=Config.TIMEOUT) as client:
            api_key = get_api_key()
            headers = {"x-api-key": api_key} if api_key else {}
            
            response = await client.post(
                f"{Config.BASE_URL}/paper/batch",
                params=params,
                json={"ids": paper_ids},
                headers=headers
            )
            response.raise_for_status()
            return response.json()
            
    except httpx.HTTPStatusError as e:
        if e.response.status_code == 429:
            return create_error_response(
                ErrorType.RATE_LIMIT,
                "Rate limit exceeded",
                {"retry_after": e.response.headers.get("retry-after")}
            )
        return create_error_response(
            ErrorType.API_ERROR,
            f"HTTP error: {e.response.status_code}",
            {"response": e.response.text}
        )
    except httpx.TimeoutException:
        return create_error_response(
            ErrorType.TIMEOUT,
            f"Request timed out after {Config.TIMEOUT} seconds"
        )
    except Exception as e:
        return create_error_response(
            ErrorType.API_ERROR,
            str(e)
        )

@mcp.tool()
async def paper_authors(
    context: Context,
    paper_id: str,
    fields: Optional[List[str]] = None,
    offset: int = 0,
    limit: int = 100
) -> Dict:
    """
    Get details about the authors of a paper with pagination support.
    This endpoint provides author information and their contributions.

    Args:
        paper_id (str): Paper identifier in one of the following formats:
            - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
            - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
            - ARXIV:<id> (e.g., "ARXIV:2106.15928")
            - MAG:<id> (e.g., "MAG:112218234")
            - ACL:<id> (e.g., "ACL:W12-3903")
            - PMID:<id> (e.g., "PMID:19872477")
            - PMCID:<id> (e.g., "PMCID:2323736")
            - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")

        fields (Optional[List[str]]): List of fields to return for each author.
            authorId is always returned.

        offset (int): Number of authors to skip for pagination.
            Default: 0

        limit (int): Maximum number of authors to return.
            Default: 100
            Maximum: 1000

    Returns:
        Dict: {
            "offset": int,     # Current offset in the results
            "next": int,       # Next offset (if more results available)
            "data": List[Dict] # List of authors with requested fields
        }
    """
    if not paper_id.strip():
        return create_error_response(
            ErrorType.VALIDATION,
            "Paper ID cannot be empty"
        )

    # Validate limit
    if limit > 1000:
        return create_error_response(
            ErrorType.VALIDATION,
            "Limit cannot exceed 1000",
            {"max_limit": 1000}
        )
    
    # Validate fields
    if fields:
        invalid_fields = set(fields) - AuthorDetailFields.VALID_FIELDS
        if invalid_fields:
            return create_error_response(
                ErrorType.VALIDATION,
                f"Invalid fields: {', '.join(invalid_fields)}",
                {"valid_fields": list(AuthorDetailFields.VALID_FIELDS)}
            )

    # Build request parameters
    params = {
        "offset": offset,
        "limit": limit
    }
    if fields:
        params["fields"] = ",".join(fields)

    # Make the API request
    result = await make_request(f"/paper/{paper_id}/authors", params)
    
    # Handle potential errors
    if isinstance(result, Dict) and "error" in result:
        error_msg = result["error"].get("message", "")
        if "404" in error_msg:
            return create_error_response(
                ErrorType.VALIDATION,
                "Paper not found",
                {"paper_id": paper_id}
            )
        return result

    return result

@mcp.tool()
async def paper_citations(
    context: Context,
    paper_id: str,
    fields: Optional[List[str]] = None,
    offset: int = 0,
    limit: int = 100
) -> Dict:
    """
    Get papers that cite the specified paper (papers where this paper appears in their bibliography).
    This endpoint provides detailed citation information including citation contexts.

    Args:
        paper_id (str): Paper identifier in one of the following formats:
            - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
            - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
            - ARXIV:<id> (e.g., "ARXIV:2106.15928")
            - MAG:<id> (e.g., "MAG:112218234")
            - ACL:<id> (e.g., "ACL:W12-3903")
            - PMID:<id> (e.g., "PMID:19872477")
            - PMCID:<id> (e.g., "PMCID:2323736")
            - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")

        fields (Optional[List[str]]): List of fields to return for each citing paper.
            paperId is always returned.

        offset (int): Number of citations to skip for pagination.
            Default: 0

        limit (int): Maximum number of citations to return.
            Default: 100
            Maximum: 1000

    Returns:
        Dict: {
            "offset": int,     # Current offset in the results
            "next": int,       # Next offset (if more results available)
            "data": List[Dict] # List of citing papers with requested fields
        }
    """
    if not paper_id.strip():
        return create_error_response(
            ErrorType.VALIDATION,
            "Paper ID cannot be empty"
        )

    # Validate limit
    if limit > 1000:
        return create_error_response(
            ErrorType.VALIDATION,
            "Limit cannot exceed 1000",
            {"max_limit": 1000}
        )

    # Validate fields
    if fields:
        invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS
        if invalid_fields:
            return create_error_response(
                ErrorType.VALIDATION,
                f"Invalid fields: {', '.join(invalid_fields)}",
                {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)}
            )

    # Build request parameters
    params = {
        "offset": offset,
        "limit": limit
    }
    if fields:
        params["fields"] = ",".join(fields)

    # Make the API request
    result = await make_request(f"/paper/{paper_id}/citations", params)
    
    # Handle potential errors
    if isinstance(result, Dict) and "error" in result:
        error_msg = result["error"].get("message", "")
        if "404" in error_msg:
            return create_error_response(
                ErrorType.VALIDATION,
                "Paper not found",
                {"paper_id": paper_id}
            )
        return result

    return result

@mcp.tool()
async def paper_references(
    context: Context,
    paper_id: str,
    fields: Optional[List[str]] = None,
    offset: int = 0,
    limit: int = 100
) -> Dict:
    """
    Get papers cited by the specified paper (papers appearing in this paper's bibliography).
    This endpoint provides detailed reference information including citation contexts.

    Args:
        paper_id (str): Paper identifier in one of the following formats:
            - Semantic Scholar ID (e.g., "649def34f8be52c8b66281af98ae884c09aef38b")
            - DOI:<doi> (e.g., "DOI:10.18653/v1/N18-3011")
            - ARXIV:<id> (e.g., "ARXIV:2106.15928")
            - MAG:<id> (e.g., "MAG:112218234")
            - ACL:<id> (e.g., "ACL:W12-3903")
            - PMID:<id> (e.g., "PMID:19872477")
            - PMCID:<id> (e.g., "PMCID:2323736")
            - URL:<url> (e.g., "URL:https://arxiv.org/abs/2106.15928v1")

        fields (Optional[List[str]]): List of fields to return for each referenced paper.
            paperId is always returned.

        offset (int): Number of references to skip for pagination.
            Default: 0

        limit (int): Maximum number of references to return.
            Default: 100
            Maximum: 1000

    Returns:
        Dict: {
            "offset": int,     # Current offset in the results
            "next": int,       # Next offset (if more results available)
            "data": List[Dict] # List of referenced papers with requested fields
        }
    """
    if not paper_id.strip():
        return create_error_response(
            ErrorType.VALIDATION,
            "Paper ID cannot be empty"
        )

    # Validate limit
    if limit > 1000:
        return create_error_response(
            ErrorType.VALIDATION,
            "Limit cannot exceed 1000",
            {"max_limit": 1000}
        )

    # Validate fields
    if fields:
        invalid_fields = set(fields) - CitationReferenceFields.VALID_FIELDS
        if invalid_fields:
            return create_error_response(
                ErrorType.VALIDATION,
                f"Invalid fields: {', '.join(invalid_fields)}",
                {"valid_fields": list(CitationReferenceFields.VALID_FIELDS)}
            )

    # Build request parameters
    params = {
        "offset": offset,
        "limit": limit
    }
    if fields:
        params["fields"] = ",".join(fields)

    # Make the API request
    result = await make_request(f"/paper/{paper_id}/references", params)
    
    # Handle potential errors
    if isinstance(result, Dict) and "error" in result:
        error_msg = result["error"].get("message", "")
        if "404" in error_msg:
            return create_error_response(
                ErrorType.VALIDATION,
                "Paper not found",
                {"paper_id": paper_id}
            )
        return result

    return result