Hugging Face MCP Server
by shreyaskarnik
Verified
- huggingface-mcp-server
- src
- huggingface
"""
🤗 Hugging Face MCP Server 🤗
This server provides Model Context Protocol (MCP) access to the Hugging Face API,
allowing models like Claude to interact with models, datasets, spaces, and other
Hugging Face resources in a read-only manner.
"""
import asyncio
import json
from typing import Any, Dict, Optional
from urllib.parse import quote_plus
import httpx
import mcp.server.stdio
import mcp.types as types
from huggingface_hub import HfApi
from mcp.server import NotificationOptions, Server
from mcp.server.models import InitializationOptions
from pydantic import AnyUrl
# Initialize server
server = Server("huggingface")
# Initialize Hugging Face API client
hf_api = HfApi()
# Base URL for the Hugging Face API
HF_API_BASE = "https://huggingface.co/api"
# Initialize HTTP client for making requests
http_client = httpx.AsyncClient(timeout=30.0)
# Helper Functions
async def make_hf_request(
endpoint: str, params: Optional[Dict[str, Any]] = None
) -> Dict:
"""Make a request to the Hugging Face API with proper error handling."""
url = f"{HF_API_BASE}/{endpoint}"
try:
response = await http_client.get(url, params=params)
response.raise_for_status()
return response.json()
except Exception as e:
return {"error": str(e)}
# Tool Handlers
@server.list_tools()
async def handle_list_tools() -> list[types.Tool]:
"""
List available tools for interacting with the Hugging Face Hub.
Each tool specifies its arguments using JSON Schema validation.
"""
return [
# Model Tools
types.Tool(
name="search-models",
description="Search for models on Hugging Face Hub",
inputSchema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search term (e.g., 'bert', 'gpt')",
},
"author": {
"type": "string",
"description": "Filter by author/organization (e.g., 'huggingface', 'google')",
},
"tags": {
"type": "string",
"description": "Filter by tags (e.g., 'text-classification', 'translation')",
},
"limit": {
"type": "integer",
"description": "Maximum number of results to return",
},
},
},
),
types.Tool(
name="get-model-info",
description="Get detailed information about a specific model",
inputSchema={
"type": "object",
"properties": {
"model_id": {
"type": "string",
"description": "The ID of the model (e.g., 'google/bert-base-uncased')",
},
},
"required": ["model_id"],
},
),
# Dataset Tools
types.Tool(
name="search-datasets",
description="Search for datasets on Hugging Face Hub",
inputSchema={
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search term"},
"author": {
"type": "string",
"description": "Filter by author/organization",
},
"tags": {"type": "string", "description": "Filter by tags"},
"limit": {
"type": "integer",
"description": "Maximum number of results to return",
},
},
},
),
types.Tool(
name="get-dataset-info",
description="Get detailed information about a specific dataset",
inputSchema={
"type": "object",
"properties": {
"dataset_id": {
"type": "string",
"description": "The ID of the dataset (e.g., 'squad')",
},
},
"required": ["dataset_id"],
},
),
# Space Tools
types.Tool(
name="search-spaces",
description="Search for Spaces on Hugging Face Hub",
inputSchema={
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search term"},
"author": {
"type": "string",
"description": "Filter by author/organization",
},
"tags": {"type": "string", "description": "Filter by tags"},
"sdk": {
"type": "string",
"description": "Filter by SDK (e.g., 'streamlit', 'gradio', 'docker')",
},
"limit": {
"type": "integer",
"description": "Maximum number of results to return",
},
},
},
),
types.Tool(
name="get-space-info",
description="Get detailed information about a specific Space",
inputSchema={
"type": "object",
"properties": {
"space_id": {
"type": "string",
"description": "The ID of the Space (e.g., 'huggingface/diffusers-demo')",
},
},
"required": ["space_id"],
},
),
# Papers Tools
types.Tool(
name="get-paper-info",
description="Get information about a specific paper on Hugging Face",
inputSchema={
"type": "object",
"properties": {
"arxiv_id": {
"type": "string",
"description": "The arXiv ID of the paper (e.g., '1810.04805')",
},
},
"required": ["arxiv_id"],
},
),
types.Tool(
name="get-daily-papers",
description="Get the list of daily papers curated by Hugging Face",
inputSchema={
"type": "object",
"properties": {},
},
),
# Collections Tools
types.Tool(
name="search-collections",
description="Search for collections on Hugging Face Hub",
inputSchema={
"type": "object",
"properties": {
"owner": {"type": "string", "description": "Filter by owner"},
"item": {
"type": "string",
"description": "Filter by item (e.g., 'models/teknium/OpenHermes-2.5-Mistral-7B')",
},
"query": {
"type": "string",
"description": "Search term for titles and descriptions",
},
"limit": {
"type": "integer",
"description": "Maximum number of results to return",
},
},
},
),
types.Tool(
name="get-collection-info",
description="Get detailed information about a specific collection",
inputSchema={
"type": "object",
"properties": {
"namespace": {
"type": "string",
"description": "The namespace of the collection (user or organization)",
},
"collection_id": {
"type": "string",
"description": "The ID part of the collection",
},
},
"required": ["namespace", "collection_id"],
},
),
]
@server.call_tool()
async def handle_call_tool(
name: str, arguments: dict | None
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
"""
Handle tool execution requests for Hugging Face API.
"""
if not arguments:
arguments = {}
if name == "search-models":
query = arguments.get("query")
author = arguments.get("author")
tags = arguments.get("tags")
limit = arguments.get("limit", 10)
params = {"limit": limit}
if query:
params["search"] = query
if author:
params["author"] = author
if tags:
params["filter"] = tags
data = await make_hf_request("models", params)
if "error" in data:
return [
types.TextContent(
type="text", text=f"Error searching models: {data['error']}"
)
]
# Format the results
results = []
for model in data:
model_info = {
"id": model.get("id", ""),
"name": model.get("modelId", ""),
"author": model.get("author", ""),
"tags": model.get("tags", []),
"downloads": model.get("downloads", 0),
"likes": model.get("likes", 0),
"lastModified": model.get("lastModified", ""),
}
results.append(model_info)
return [types.TextContent(type="text", text=json.dumps(results, indent=2))]
elif name == "get-model-info":
model_id = arguments.get("model_id")
if not model_id:
return [types.TextContent(type="text", text="Error: model_id is required")]
data = await make_hf_request(f"models/{quote_plus(model_id)}")
if "error" in data:
return [
types.TextContent(
type="text",
text=f"Error retrieving model information: {data['error']}",
)
]
# Format the result
model_info = {
"id": data.get("id", ""),
"name": data.get("modelId", ""),
"author": data.get("author", ""),
"tags": data.get("tags", []),
"pipeline_tag": data.get("pipeline_tag", ""),
"downloads": data.get("downloads", 0),
"likes": data.get("likes", 0),
"lastModified": data.get("lastModified", ""),
"description": data.get("description", "No description available"),
}
# Add model card if available
if "card" in data and data["card"]:
model_info["model_card"] = (
data["card"].get("data", {}).get("text", "No model card available")
)
return [types.TextContent(type="text", text=json.dumps(model_info, indent=2))]
elif name == "search-datasets":
query = arguments.get("query")
author = arguments.get("author")
tags = arguments.get("tags")
limit = arguments.get("limit", 10)
params = {"limit": limit}
if query:
params["search"] = query
if author:
params["author"] = author
if tags:
params["filter"] = tags
data = await make_hf_request("datasets", params)
if "error" in data:
return [
types.TextContent(
type="text", text=f"Error searching datasets: {data['error']}"
)
]
# Format the results
results = []
for dataset in data:
dataset_info = {
"id": dataset.get("id", ""),
"name": dataset.get("datasetId", ""),
"author": dataset.get("author", ""),
"tags": dataset.get("tags", []),
"downloads": dataset.get("downloads", 0),
"likes": dataset.get("likes", 0),
"lastModified": dataset.get("lastModified", ""),
}
results.append(dataset_info)
return [types.TextContent(type="text", text=json.dumps(results, indent=2))]
elif name == "get-dataset-info":
dataset_id = arguments.get("dataset_id")
if not dataset_id:
return [
types.TextContent(type="text", text="Error: dataset_id is required")
]
data = await make_hf_request(f"datasets/{quote_plus(dataset_id)}")
if "error" in data:
return [
types.TextContent(
type="text",
text=f"Error retrieving dataset information: {data['error']}",
)
]
# Format the result
dataset_info = {
"id": data.get("id", ""),
"name": data.get("datasetId", ""),
"author": data.get("author", ""),
"tags": data.get("tags", []),
"downloads": data.get("downloads", 0),
"likes": data.get("likes", 0),
"lastModified": data.get("lastModified", ""),
"description": data.get("description", "No description available"),
}
# Add dataset card if available
if "card" in data and data["card"]:
dataset_info["dataset_card"] = (
data["card"].get("data", {}).get("text", "No dataset card available")
)
return [types.TextContent(type="text", text=json.dumps(dataset_info, indent=2))]
elif name == "search-spaces":
query = arguments.get("query")
author = arguments.get("author")
tags = arguments.get("tags")
sdk = arguments.get("sdk")
limit = arguments.get("limit", 10)
params = {"limit": limit}
if query:
params["search"] = query
if author:
params["author"] = author
if tags:
params["filter"] = tags
if sdk:
params["filter"] = params.get("filter", "") + f" sdk:{sdk}"
data = await make_hf_request("spaces", params)
if "error" in data:
return [
types.TextContent(
type="text", text=f"Error searching spaces: {data['error']}"
)
]
# Format the results
results = []
for space in data:
space_info = {
"id": space.get("id", ""),
"name": space.get("spaceId", ""),
"author": space.get("author", ""),
"sdk": space.get("sdk", ""),
"tags": space.get("tags", []),
"likes": space.get("likes", 0),
"lastModified": space.get("lastModified", ""),
}
results.append(space_info)
return [types.TextContent(type="text", text=json.dumps(results, indent=2))]
elif name == "get-space-info":
space_id = arguments.get("space_id")
if not space_id:
return [types.TextContent(type="text", text="Error: space_id is required")]
data = await make_hf_request(f"spaces/{quote_plus(space_id)}")
if "error" in data:
return [
types.TextContent(
type="text",
text=f"Error retrieving space information: {data['error']}",
)
]
# Format the result
space_info = {
"id": data.get("id", ""),
"name": data.get("spaceId", ""),
"author": data.get("author", ""),
"sdk": data.get("sdk", ""),
"tags": data.get("tags", []),
"likes": data.get("likes", 0),
"lastModified": data.get("lastModified", ""),
"description": data.get("description", "No description available"),
"url": f"https://huggingface.co/spaces/{space_id}",
}
return [types.TextContent(type="text", text=json.dumps(space_info, indent=2))]
elif name == "get-paper-info":
arxiv_id = arguments.get("arxiv_id")
if not arxiv_id:
return [types.TextContent(type="text", text="Error: arxiv_id is required")]
data = await make_hf_request(f"papers/{arxiv_id}")
if "error" in data:
return [
types.TextContent(
type="text",
text=f"Error retrieving paper information: {data['error']}",
)
]
# Format the result
paper_info = {
"arxiv_id": data.get("arxivId", ""),
"title": data.get("title", ""),
"authors": data.get("authors", []),
"summary": data.get("summary", "No summary available"),
"url": f"https://huggingface.co/papers/{arxiv_id}",
}
# Get implementations
implementations = await make_hf_request(f"arxiv/{arxiv_id}/repos")
if "error" not in implementations:
paper_info["implementations"] = implementations
return [types.TextContent(type="text", text=json.dumps(paper_info, indent=2))]
elif name == "get-daily-papers":
data = await make_hf_request("daily_papers")
if "error" in data:
return [
types.TextContent(
type="text", text=f"Error retrieving daily papers: {data['error']}"
)
]
# Format the results
results = []
for paper in data:
paper_info = {
"arxiv_id": paper.get("paper", {}).get("arxivId", ""),
"title": paper.get("paper", {}).get("title", ""),
"authors": paper.get("paper", {}).get("authors", []),
"summary": paper.get("paper", {}).get("summary", "")[:200] + "..."
if len(paper.get("paper", {}).get("summary", "")) > 200
else paper.get("paper", {}).get("summary", ""),
}
results.append(paper_info)
return [types.TextContent(type="text", text=json.dumps(results, indent=2))]
elif name == "search-collections":
owner = arguments.get("owner")
item = arguments.get("item")
query = arguments.get("query")
limit = arguments.get("limit", 10)
params = {"limit": limit}
if owner:
params["owner"] = owner
if item:
params["item"] = item
if query:
params["q"] = query
data = await make_hf_request("collections", params)
if "error" in data:
return [
types.TextContent(
type="text", text=f"Error searching collections: {data['error']}"
)
]
# Format the results
results = []
for collection in data:
collection_info = {
"id": collection.get("id", ""),
"title": collection.get("title", ""),
"owner": collection.get("owner", {}).get("name", ""),
"description": collection.get(
"description", "No description available"
),
"items_count": collection.get("itemsCount", 0),
"upvotes": collection.get("upvotes", 0),
"last_modified": collection.get("lastModified", ""),
}
results.append(collection_info)
return [types.TextContent(type="text", text=json.dumps(results, indent=2))]
elif name == "get-collection-info":
namespace = arguments.get("namespace")
collection_id = arguments.get("collection_id")
if not namespace or not collection_id:
return [
types.TextContent(
type="text", text="Error: namespace and collection_id are required"
)
]
# Extract the slug from the collection_id if it contains a dash
slug = collection_id.split("-")[0] if "-" in collection_id else collection_id
endpoint = f"collections/{namespace}/{slug}-{collection_id}"
data = await make_hf_request(endpoint)
if "error" in data:
return [
types.TextContent(
type="text",
text=f"Error retrieving collection information: {data['error']}",
)
]
# Format the result
collection_info = {
"id": data.get("id", ""),
"title": data.get("title", ""),
"owner": data.get("owner", {}).get("name", ""),
"description": data.get("description", "No description available"),
"upvotes": data.get("upvotes", 0),
"last_modified": data.get("lastModified", ""),
"items": [],
}
# Add items
for item in data.get("items", []):
item_info = {
"type": item.get("item", {}).get("type", ""),
"id": item.get("item", {}).get("id", ""),
"note": item.get("note", ""),
}
collection_info["items"].append(item_info)
return [
types.TextContent(type="text", text=json.dumps(collection_info, indent=2))
]
else:
return [types.TextContent(type="text", text=f"Unknown tool: {name}")]
# Resource Handlers - Define popular models, datasets, and spaces as resources
@server.list_resources()
async def handle_list_resources() -> list[types.Resource]:
"""
List available Hugging Face resources.
This provides direct access to popular models, datasets, and spaces.
"""
resources = []
# Popular models
popular_models = [
(
"meta-llama/Llama-3-8B-Instruct",
"Llama 3 8B Instruct",
"Meta's Llama 3 8B Instruct model",
),
(
"mistralai/Mistral-7B-Instruct-v0.2",
"Mistral 7B Instruct v0.2",
"Mistral AI's 7B instruction-following model",
),
(
"openchat/openchat-3.5-0106",
"OpenChat 3.5",
"Open-source chatbot based on Mistral 7B",
),
(
"stabilityai/stable-diffusion-xl-base-1.0",
"Stable Diffusion XL 1.0",
"SDXL text-to-image model",
),
]
for model_id, name, description in popular_models:
resources.append(
types.Resource(
uri=AnyUrl(f"hf://model/{model_id}"),
name=name,
description=description,
mimeType="application/json",
)
)
# Popular datasets
popular_datasets = [
(
"databricks/databricks-dolly-15k",
"Databricks Dolly 15k",
"15k instruction-following examples",
),
("squad", "SQuAD", "Stanford Question Answering Dataset"),
("glue", "GLUE", "General Language Understanding Evaluation benchmark"),
(
"openai/summarize_from_feedback",
"Summarize From Feedback",
"OpenAI summarization dataset",
),
]
for dataset_id, name, description in popular_datasets:
resources.append(
types.Resource(
uri=AnyUrl(f"hf://dataset/{dataset_id}"),
name=name,
description=description,
mimeType="application/json",
)
)
# Popular spaces
popular_spaces = [
(
"huggingface/diffusers-demo",
"Diffusers Demo",
"Demo of Stable Diffusion models",
),
("gradio/chatbot-demo", "Chatbot Demo", "Demo of a Gradio chatbot interface"),
(
"prompthero/midjourney-v4-diffusion",
"Midjourney v4 Diffusion",
"Replica of Midjourney v4",
),
("stabilityai/stablevicuna", "StableVicuna", "Fine-tuned Vicuna with RLHF"),
]
for space_id, name, description in popular_spaces:
resources.append(
types.Resource(
uri=AnyUrl(f"hf://space/{space_id}"),
name=name,
description=description,
mimeType="application/json",
)
)
return resources
@server.read_resource()
async def handle_read_resource(uri: AnyUrl) -> str:
"""
Read a specific Hugging Face resource by its URI.
"""
if uri.scheme != "hf":
raise ValueError(f"Unsupported URI scheme: {uri.scheme}")
if not uri.path:
raise ValueError("Invalid Hugging Face resource URI")
parts = uri.path.lstrip("/").split("/", 1)
if len(parts) != 2:
raise ValueError("Invalid Hugging Face resource URI format")
resource_type, resource_id = parts
if resource_type == "model":
data = await make_hf_request(f"models/{quote_plus(resource_id)}")
elif resource_type == "dataset":
data = await make_hf_request(f"datasets/{quote_plus(resource_id)}")
elif resource_type == "space":
data = await make_hf_request(f"spaces/{quote_plus(resource_id)}")
else:
raise ValueError(f"Unsupported resource type: {resource_type}")
if "error" in data:
raise ValueError(f"Error retrieving resource: {data['error']}")
return json.dumps(data, indent=2)
# Prompt Handlers
@server.list_prompts()
async def handle_list_prompts() -> list[types.Prompt]:
"""
List available prompts for Hugging Face integration.
"""
return [
types.Prompt(
name="compare-models",
description="Compare multiple Hugging Face models",
arguments=[
types.PromptArgument(
name="model_ids",
description="Comma-separated list of model IDs to compare",
required=True,
)
],
),
types.Prompt(
name="summarize-paper",
description="Summarize an AI research paper from arXiv",
arguments=[
types.PromptArgument(
name="arxiv_id",
description="arXiv ID of the paper to summarize",
required=True,
),
types.PromptArgument(
name="detail_level",
description="Level of detail in the summary (brief/detailed/eli5)",
required=False,
),
],
),
]
@server.get_prompt()
async def handle_get_prompt(
name: str, arguments: dict[str, str] | None
) -> types.GetPromptResult:
"""
Generate a prompt related to Hugging Face resources.
"""
if not arguments:
arguments = {}
if name == "compare-models":
model_ids = arguments.get("model_ids", "")
if not model_ids:
raise ValueError("model_ids argument is required")
model_list = [model_id.strip() for model_id in model_ids.split(",")]
models_data = []
for model_id in model_list:
data = await make_hf_request(f"models/{quote_plus(model_id)}")
if "error" not in data:
models_data.append(data)
model_details = []
for data in models_data:
details = {
"id": data.get("id", ""),
"author": data.get("author", ""),
"downloads": data.get("downloads", 0),
"tags": data.get("tags", []),
"description": data.get("description", "No description available"),
}
model_details.append(details)
return types.GetPromptResult(
description=f"Comparing models: {model_ids}",
messages=[
types.PromptMessage(
role="user",
content=types.TextContent(
type="text",
text="I'd like you to compare these Hugging Face models and help me understand their differences, strengths, and suitable use cases:\n\n"
+ json.dumps(model_details, indent=2)
+ "\n\nPlease structure your comparison with sections on architecture, performance, use cases, and limitations.",
),
)
],
)
elif name == "summarize-paper":
arxiv_id = arguments.get("arxiv_id", "")
if not arxiv_id:
raise ValueError("arxiv_id argument is required")
detail_level = arguments.get("detail_level", "detailed")
paper_data = await make_hf_request(f"papers/{arxiv_id}")
if "error" in paper_data:
raise ValueError(f"Error retrieving paper: {paper_data['error']}")
# Get implementations
implementations = await make_hf_request(f"arxiv/{arxiv_id}/repos")
return types.GetPromptResult(
description=f"Summarizing paper: {paper_data.get('title', arxiv_id)}",
messages=[
types.PromptMessage(
role="user",
content=types.TextContent(
type="text",
text=f"Please provide a {'detailed' if detail_level == 'detailed' else 'brief'} summary of this AI research paper:\n\n"
+ f"Title: {paper_data.get('title', 'Unknown')}\n"
+ f"Authors: {', '.join(paper_data.get('authors', []))}\n"
+ f"Abstract: {paper_data.get('summary', 'No abstract available')}\n\n"
+ (
f"Implementations on Hugging Face: {json.dumps(implementations, indent=2)}\n\n"
if "error" not in implementations
else ""
)
+ f"Please {'cover all key aspects including methodology, results, and implications' if detail_level == 'detailed' else 'provide a concise overview of the main contributions'}.",
),
)
],
)
else:
raise ValueError(f"Unknown prompt: {name}")
async def main():
# Run the server using stdin/stdout streams
async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
await server.run(
read_stream,
write_stream,
InitializationOptions(
server_name="huggingface",
server_version="0.1.0",
capabilities=server.get_capabilities(
notification_options=NotificationOptions(),
experimental_capabilities={},
),
),
)
if __name__ == "__main__":
asyncio.run(main())