Skip to main content
Glama
shreyaskarnik

Hugging Face MCP Server

get-dataset-info

Retrieve detailed metadata about Hugging Face datasets to understand their structure, contents, and usage requirements before downloading or processing.

Instructions

Get detailed information about a specific dataset

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
dataset_idYesThe ID of the dataset (e.g., 'squad')

Implementation Reference

  • Handler logic for the 'get-dataset-info' tool: retrieves dataset information from the Hugging Face API using make_hf_request, formats the response including dataset card if available, and returns it as JSON.
    elif name == "get-dataset-info":
        dataset_id = arguments.get("dataset_id")
        if not dataset_id:
            return [
                types.TextContent(type="text", text="Error: dataset_id is required")
            ]
    
        data = await make_hf_request(f"datasets/{quote_plus(dataset_id)}")
    
        if "error" in data:
            return [
                types.TextContent(
                    type="text",
                    text=f"Error retrieving dataset information: {data['error']}",
                )
            ]
    
        # Format the result
        dataset_info = {
            "id": data.get("id", ""),
            "name": data.get("datasetId", ""),
            "author": data.get("author", ""),
            "tags": data.get("tags", []),
            "downloads": data.get("downloads", 0),
            "likes": data.get("likes", 0),
            "lastModified": data.get("lastModified", ""),
            "description": data.get("description", "No description available"),
        }
    
        # Add dataset card if available
        if "card" in data and data["card"]:
            dataset_info["dataset_card"] = (
                data["card"].get("data", {}).get("text", "No dataset card available")
            )
    
        return [types.TextContent(type="text", text=json.dumps(dataset_info, indent=2))]
  • Input schema and registration for the 'get-dataset-info' tool in the list_tools handler, defining the required 'dataset_id' parameter.
    types.Tool(
        name="get-dataset-info",
        description="Get detailed information about a specific dataset",
        inputSchema={
            "type": "object",
            "properties": {
                "dataset_id": {
                    "type": "string",
                    "description": "The ID of the dataset (e.g., 'squad')",
                },
            },
            "required": ["dataset_id"],
        },
    ),
  • The list_tools handler that registers the 'get-dataset-info' tool among others.
    @server.list_tools()
    async def handle_list_tools() -> list[types.Tool]:
        """
        List available tools for interacting with the Hugging Face Hub.
        Each tool specifies its arguments using JSON Schema validation.
        """
        return [
            # Model Tools
            types.Tool(
                name="search-models",
                description="Search for models on Hugging Face Hub",
                inputSchema={
                    "type": "object",
                    "properties": {
                        "query": {
                            "type": "string",
                            "description": "Search term (e.g., 'bert', 'gpt')",
                        },
                        "author": {
                            "type": "string",
                            "description": "Filter by author/organization (e.g., 'huggingface', 'google')",
                        },
                        "tags": {
                            "type": "string",
                            "description": "Filter by tags (e.g., 'text-classification', 'translation')",
                        },
                        "limit": {
                            "type": "integer",
                            "description": "Maximum number of results to return",
                        },
                    },
                },
            ),
            types.Tool(
                name="get-model-info",
                description="Get detailed information about a specific model",
                inputSchema={
                    "type": "object",
                    "properties": {
                        "model_id": {
                            "type": "string",
                            "description": "The ID of the model (e.g., 'google/bert-base-uncased')",
                        },
                    },
                    "required": ["model_id"],
                },
            ),
            # Dataset Tools
            types.Tool(
                name="search-datasets",
                description="Search for datasets on Hugging Face Hub",
                inputSchema={
                    "type": "object",
                    "properties": {
                        "query": {"type": "string", "description": "Search term"},
                        "author": {
                            "type": "string",
                            "description": "Filter by author/organization",
                        },
                        "tags": {"type": "string", "description": "Filter by tags"},
                        "limit": {
                            "type": "integer",
                            "description": "Maximum number of results to return",
                        },
                    },
                },
            ),
            types.Tool(
                name="get-dataset-info",
                description="Get detailed information about a specific dataset",
                inputSchema={
                    "type": "object",
                    "properties": {
                        "dataset_id": {
                            "type": "string",
                            "description": "The ID of the dataset (e.g., 'squad')",
                        },
                    },
                    "required": ["dataset_id"],
                },
            ),
            # Space Tools
            types.Tool(
                name="search-spaces",
                description="Search for Spaces on Hugging Face Hub",
                inputSchema={
                    "type": "object",
                    "properties": {
                        "query": {"type": "string", "description": "Search term"},
                        "author": {
                            "type": "string",
                            "description": "Filter by author/organization",
                        },
                        "tags": {"type": "string", "description": "Filter by tags"},
                        "sdk": {
                            "type": "string",
                            "description": "Filter by SDK (e.g., 'streamlit', 'gradio', 'docker')",
                        },
                        "limit": {
                            "type": "integer",
                            "description": "Maximum number of results to return",
                        },
                    },
                },
            ),
            types.Tool(
                name="get-space-info",
                description="Get detailed information about a specific Space",
                inputSchema={
                    "type": "object",
                    "properties": {
                        "space_id": {
                            "type": "string",
                            "description": "The ID of the Space (e.g., 'huggingface/diffusers-demo')",
                        },
                    },
                    "required": ["space_id"],
                },
            ),
            # Papers Tools
            types.Tool(
                name="get-paper-info",
                description="Get information about a specific paper on Hugging Face",
                inputSchema={
                    "type": "object",
                    "properties": {
                        "arxiv_id": {
                            "type": "string",
                            "description": "The arXiv ID of the paper (e.g., '1810.04805')",
                        },
                    },
                    "required": ["arxiv_id"],
                },
            ),
            types.Tool(
                name="get-daily-papers",
                description="Get the list of daily papers curated by Hugging Face",
                inputSchema={
                    "type": "object",
                    "properties": {},
                },
            ),
            # Collections Tools
            types.Tool(
                name="search-collections",
                description="Search for collections on Hugging Face Hub",
                inputSchema={
                    "type": "object",
                    "properties": {
                        "owner": {"type": "string", "description": "Filter by owner"},
                        "item": {
                            "type": "string",
                            "description": "Filter by item (e.g., 'models/teknium/OpenHermes-2.5-Mistral-7B')",
                        },
                        "query": {
                            "type": "string",
                            "description": "Search term for titles and descriptions",
                        },
                        "limit": {
                            "type": "integer",
                            "description": "Maximum number of results to return",
                        },
                    },
                },
            ),
            types.Tool(
                name="get-collection-info",
                description="Get detailed information about a specific collection",
                inputSchema={
                    "type": "object",
                    "properties": {
                        "namespace": {
                            "type": "string",
                            "description": "The namespace of the collection (user or organization)",
                        },
                        "collection_id": {
                            "type": "string",
                            "description": "The ID part of the collection",
                        },
                    },
                    "required": ["namespace", "collection_id"],
                },
            ),
        ]
  • Helper function used by the get-dataset-info handler to make API requests to Hugging Face.
    async def make_hf_request(
        endpoint: str, params: Optional[Dict[str, Any]] = None
    ) -> Dict:
        """Make a request to the Hugging Face API with proper error handling."""
        url = f"{HF_API_BASE}/{endpoint}"
        try:
            response = await http_client.get(url, params=params)
            response.raise_for_status()
            return response.json()
        except Exception as e:
            return {"error": str(e)}

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/shreyaskarnik/huggingface-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server