search_datasets
Search and filter Hugging Face datasets by name, tags, author, or description to find relevant data for machine learning projects.
Instructions
Find datasets on the Hugging Face Hub by name, tag, or author
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| search | No | Query to match against dataset names and descriptions | |
| author | No | Filter by dataset owner (user or organization) | |
| filter | No | Tag filters (e.g., task_categories:text-classification, language:en) | |
| sort | No | Sort order for results | |
| direction | No | Sort direction (default: desc) | |
| limit | No | Max results to return (default: 20, max: 100) |
Implementation Reference
- src/tools/search-datasets.ts:63-96 (handler)The handler function for the search_datasets tool, which processes arguments and fetches datasets from the Hugging Face Hub.
async ({ search, author, filter, sort, direction, limit }) => { const params: Record<string, string | number | string[] | undefined> = { search, author, filter, sort, direction: direction === "asc" ? "1" : direction === "desc" ? "-1" : undefined, limit: limit ?? 20, }; const datasets = await fetchHub<DatasetInfo[]>("/api/datasets", params); const results = datasets.map((d) => ({ id: d.id, author: d.author, description: d.description?.slice(0, 200), downloads: d.downloads, likes: d.likes, trending_score: d.trendingScore, tags: d.tags?.slice(0, 10), last_modified: d.lastModified, private: d.private, gated: d.gated, })); return { content: [ { type: "text" as const, text: JSON.stringify(results, null, 2), }, ], }; } - src/tools/search-datasets.ts:26-62 (schema)Zod schema defining the input parameters for the search_datasets tool.
{ search: z .string() .optional() .describe("Query to match against dataset names and descriptions"), author: z .string() .optional() .describe("Filter by dataset owner (user or organization)"), filter: z .array(z.string()) .optional() .describe( "Tag filters (e.g., task_categories:text-classification, language:en)" ), sort: z .enum([ "trending_score", "downloads", "likes", "created_at", "last_modified", ]) .optional() .describe("Sort order for results"), direction: z .enum(["asc", "desc"]) .optional() .describe("Sort direction (default: desc)"), limit: z .number() .int() .min(1) .max(100) .optional() .describe("Max results to return (default: 20, max: 100)"), }, - src/tools/search-datasets.ts:22-98 (registration)Registration function for the search_datasets tool.
export function registerSearchDatasets(server: McpServer) { server.tool( "search_datasets", "Find datasets on the Hugging Face Hub by name, tag, or author", { search: z .string() .optional() .describe("Query to match against dataset names and descriptions"), author: z .string() .optional() .describe("Filter by dataset owner (user or organization)"), filter: z .array(z.string()) .optional() .describe( "Tag filters (e.g., task_categories:text-classification, language:en)" ), sort: z .enum([ "trending_score", "downloads", "likes", "created_at", "last_modified", ]) .optional() .describe("Sort order for results"), direction: z .enum(["asc", "desc"]) .optional() .describe("Sort direction (default: desc)"), limit: z .number() .int() .min(1) .max(100) .optional() .describe("Max results to return (default: 20, max: 100)"), }, async ({ search, author, filter, sort, direction, limit }) => { const params: Record<string, string | number | string[] | undefined> = { search, author, filter, sort, direction: direction === "asc" ? "1" : direction === "desc" ? "-1" : undefined, limit: limit ?? 20, }; const datasets = await fetchHub<DatasetInfo[]>("/api/datasets", params); const results = datasets.map((d) => ({ id: d.id, author: d.author, description: d.description?.slice(0, 200), downloads: d.downloads, likes: d.likes, trending_score: d.trendingScore, tags: d.tags?.slice(0, 10), last_modified: d.lastModified, private: d.private, gated: d.gated, })); return { content: [ { type: "text" as const, text: JSON.stringify(results, null, 2), }, ], }; } ); }