Tidymodels MCP Server
by JavOrraca
Verified
#!/usr/bin/env node
import {
Server,
StdioServerTransport,
CallToolRequestSchema,
ErrorCode,
ListResourcesRequestSchema,
ListToolsRequestSchema,
McpError,
ReadResourceRequestSchema
} from '@modelcontextprotocol/sdk';
import axios from 'axios';
// GitHub token from environment variables
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
// Base tidymodels documentation URL
const TIDYMODELS_DOCS_URL = 'https://www.tidymodels.org';
class TidymodelsServer {
private server: Server;
private axiosInstance: any;
private cachedRepos: any[] = [];
private cacheExpiryMs: number = 3600000; // 1 hour
private lastCacheUpdate: number = 0;
private repoContentCache: Map<string, any> = new Map();
private documentationCache: Map<string, any> = new Map();
constructor() {
this.server = new Server(
{
name: 'tidymodels-server',
version: '0.1.0',
},
{
capabilities: {
resources: {},
tools: {},
},
}
);
this.axiosInstance = axios.create({
baseURL: 'https://api.github.com',
headers: GITHUB_TOKEN ? {
Authorization: `token ${GITHUB_TOKEN}`
} : {}
});
this.setupResourceHandlers();
this.setupToolHandlers();
// Error handling
this.server.onerror = (error) => console.error('[MCP Error]', error);
process.on('SIGINT', async () => {
await this.server.close();
process.exit(0);
});
}
// Helper methods for GitHub API
private async getRepos(forceRefresh: boolean = false): Promise<any[]> {
const now = Date.now();
if (forceRefresh || this.cachedRepos.length === 0 || (now - this.lastCacheUpdate) > this.cacheExpiryMs) {
try {
console.error('Fetching tidymodels repositories from GitHub...');
const response = await this.axiosInstance.get('/orgs/tidymodels/repos', {
params: {
per_page: 100,
sort: 'updated'
}
});
this.cachedRepos = response.data;
this.lastCacheUpdate = now;
console.error(`Cached ${this.cachedRepos.length} repositories`);
} catch (error) {
console.error('Error fetching repositories:', error);
// If there's an error, use the cached data if available
if (this.cachedRepos.length === 0) {
throw new McpError(
ErrorCode.InternalError,
`Failed to fetch repositories: ${error instanceof Error ? error.message : String(error)}`
);
}
}
}
return this.cachedRepos;
}
private async getRepoContent(repo: string, path: string = ''): Promise<any> {
const cacheKey = `${repo}:${path}`;
if (this.repoContentCache.has(cacheKey)) {
return this.repoContentCache.get(cacheKey);
}
try {
const response = await this.axiosInstance.get(`/repos/tidymodels/${repo}/contents/${path}`);
this.repoContentCache.set(cacheKey, response.data);
return response.data;
} catch (error) {
throw new McpError(
ErrorCode.InternalError,
`Failed to fetch content for ${repo}/${path}: ${error instanceof Error ? error.message : String(error)}`
);
}
}
private async getFileContent(repo: string, path: string): Promise<string> {
try {
const response = await this.axiosInstance.get(`/repos/tidymodels/${repo}/contents/${path}`);
if ('content' in response.data && 'encoding' in response.data) {
const fileData = response.data as { content: string; encoding: string };
if (fileData.encoding === 'base64') {
return Buffer.from(fileData.content, 'base64').toString('utf-8');
}
}
throw new McpError(
ErrorCode.InternalError,
`Invalid file data format for ${repo}/${path}`
);
} catch (error) {
throw new McpError(
ErrorCode.InternalError,
`Failed to fetch file content for ${repo}/${path}: ${error instanceof Error ? error.message : String(error)}`
);
}
}
private async searchCodeInOrg(query: string): Promise<any[]> {
try {
const response = await this.axiosInstance.get('/search/code', {
params: {
q: `org:tidymodels ${query}`,
per_page: 100
}
});
return response.data.items;
} catch (error) {
throw new McpError(
ErrorCode.InternalError,
`Failed to search code: ${error instanceof Error ? error.message : String(error)}`
);
}
}
private async getTidymodelsRReference(packageName?: string): Promise<any> {
try {
// First get the list of repos to find the R packages
const repos = await this.getRepos();
const rPackages = repos.filter(repo => {
// Filter by specific package if provided
if (packageName && !repo.name.includes(packageName)) return false;
// Check if this is likely an R package by looking for common files
return true; // For now assume all are R packages, could be refined
});
const packagesInfo = await Promise.all(rPackages.map(async (repo) => {
try {
// Try to get DESCRIPTION file which is standard in R packages
const descriptionContent = await this.getFileContent(repo.name, 'DESCRIPTION').catch(() => '');
// Parse basic info from DESCRIPTION
const title = descriptionContent.match(/Title: (.*)/i)?.[1] || '';
const version = descriptionContent.match(/Version: (.*)/i)?.[1] || '';
const description = descriptionContent.match(/Description: (.*)/i)?.[1] || repo.description || '';
const depends = descriptionContent.match(/Depends: (.*)/i)?.[1] || '';
const imports = descriptionContent.match(/Imports: ([\s\S]*?)(?=\n\w|$)/i)?.[1]?.replace(/\n/g, ' ') || '';
const suggests = descriptionContent.match(/Suggests: ([\s\S]*?)(?=\n\w|$)/i)?.[1]?.replace(/\n/g, ' ') || '';
// Get README for more info
const readmeContent = await this.getFileContent(repo.name, 'README.md').catch(() => '');
return {
name: repo.name,
title,
version,
description,
depends,
imports,
suggests,
stars: repo.stargazers_count,
open_issues: repo.open_issues_count,
url: repo.html_url,
language: repo.language,
updated_at: repo.updated_at,
readme_excerpt: readmeContent.substring(0, 1000) + (readmeContent.length > 1000 ? '...' : '')
};
} catch (error) {
// If we can't get package details, return basic repo info
return {
name: repo.name,
description: repo.description,
stars: repo.stargazers_count,
open_issues: repo.open_issues_count,
url: repo.html_url,
language: repo.language,
updated_at: repo.updated_at
};
}
}));
return packagesInfo;
} catch (error) {
throw new McpError(
ErrorCode.InternalError,
`Failed to get R package reference: ${error instanceof Error ? error.message : String(error)}`
);
}
}
private async searchFunctionDocumentation(query: string, packageName?: string): Promise<any> {
try {
// Use code search to find function definitions and documentation
const searchQuery = packageName
? `org:tidymodels repo:tidymodels/${packageName} ${query} path:.R`
: `org:tidymodels ${query} path:.R`;
const response = await this.axiosInstance.get('/search/code', {
params: {
q: searchQuery,
per_page: 50
}
});
const results = await Promise.all(response.data.items.map(async (item: any) => {
try {
// Get the file content to extract function documentation
const content = await this.getFileContent(item.repository.name, item.path);
// Extract roxygen documentation blocks (simplified)
const roxygenBlocks = content.match(/#'[\s\S]*?function\s*\([^)]*\)/g) || [];
// Find blocks that match our query
const matchingBlocks = roxygenBlocks.filter(block =>
block.toLowerCase().includes(query.toLowerCase())
);
return {
repository: item.repository.name,
path: item.path,
url: item.html_url,
documentation: matchingBlocks.length > 0
? matchingBlocks.join('\n\n')
: 'No documentation found'
};
} catch (error) {
return {
repository: item.repository.name,
path: item.path,
url: item.html_url,
error: `Failed to fetch content: ${error instanceof Error ? error.message : String(error)}`
};
}
}));
return results;
} catch (error) {
throw new McpError(
ErrorCode.InternalError,
`Failed to search function documentation: ${error instanceof Error ? error.message : String(error)}`
);
}
}
private async generateRCode(request: string, templateType?: string): Promise<string> {
// This function would generate R code based on the template type and request
// In a real implementation, this might call an API or use embedded templates
// For now, we'll return template examples based on the request type
let codeTemplate = '';
switch (templateType?.toLowerCase()) {
case 'recipe':
codeTemplate = `# Recipe for preprocessing data
library(tidymodels)
# Create a recipe for data preprocessing
recipe <- recipe(target ~ ., data = data) |>
step_normalize(all_numeric_predictors()) |>
step_dummy(all_nominal_predictors()) |>
step_zv(all_predictors()) |>
step_corr(all_numeric_predictors())
# Prepare the recipe on training data
recipe_prepped <- prep(recipe, training = training_data)
# Apply to data
processed_data <- bake(recipe_prepped, new_data = data)`;
break;
case 'model':
codeTemplate = `# Build a tidymodels workflow for ${request}
library(tidymodels)
# Define model specification
model_spec <-
# Choose appropriate model for your task
boost_tree() |>
set_engine("xgboost") |>
set_mode("classification") # or regression
# Create a workflow
workflow <-
workflow() |>
add_recipe(recipe) |>
add_model(model_spec)
# Fit model
fitted_model <- fit(workflow, data = training_data)
# Make predictions
predictions <- predict(fitted_model, new_data = test_data)`;
break;
case 'tune':
codeTemplate = `# Hyperparameter tuning with tidymodels
library(tidymodels)
# Define model with tuning parameters
model_spec <-
boost_tree(
trees = tune(),
min_n = tune(),
tree_depth = tune()
) |>
set_engine("xgboost") |>
set_mode("classification")
# Create workflow
workflow <-
workflow() |>
add_recipe(recipe) |>
add_model(model_spec)
# Create resamples for tuning
resamples <- vfold_cv(training_data, v = 5)
# Define tuning grid
tuning_grid <- grid_latin_hypercube(
trees(range = c(10, 2000)),
min_n(range = c(2, 40)),
tree_depth(range = c(1, 15)),
size = 20
)
# Tune model
tuning_results <-
workflow |>
tune_grid(
resamples = resamples,
grid = tuning_grid,
metrics = metric_set(roc_auc, accuracy)
)
# Select best parameters
best_params <- select_best(tuning_results, metric = "roc_auc")
# Finalize workflow
final_workflow <-
workflow |>
finalize_workflow(best_params)
# Fit final model
final_model <- fit(final_workflow, data = training_data)`;
break;
case 'evaluation':
codeTemplate = `# Model evaluation with tidymodels
library(tidymodels)
# Create test/train split
set.seed(123)
data_split <- initial_split(data, prop = 0.75, strata = outcome)
train_data <- training(data_split)
test_data <- testing(data_split)
# Fit finalized model on training data
final_fit <- fit(workflow, data = train_data)
# Predict on test data
predictions <- predict(final_fit, test_data)
predictions_with_prob <- predict(final_fit, test_data, type = "prob")
# Combine predictions with actual values
results <- bind_cols(
test_data,
predictions,
predictions_with_prob
)
# Evaluate performance
metrics <- metric_set(accuracy, roc_auc, sensitivity, specificity)
performance <- metrics(
results,
truth = outcome,
estimate = .pred_class,
.pred_yes
)
# Create confusion matrix
conf_mat(results, truth = outcome, estimate = .pred_class)
# Plot ROC curve
results |>
roc_curve(truth = outcome, .pred_yes) |>
autoplot()`;
break;
default:
// General template if no specific type given
codeTemplate = `# Tidymodels workflow for ${request}
library(tidymodels)
# Data preparation
data_split <- initial_split(data, prop = 0.75)
train_data <- training(data_split)
test_data <- testing(data_split)
# Create a recipe for data preprocessing
recipe <- recipe(outcome ~ ., data = train_data) |>
step_normalize(all_numeric_predictors()) |>
step_dummy(all_nominal_predictors())
# Define model
model_spec <-
# Choose appropriate model for your task
rand_forest() |>
set_engine("ranger") |>
set_mode("classification") # or regression
# Create workflow
workflow <-
workflow() |>
add_recipe(recipe) |>
add_model(model_spec)
# Fit model
fitted_model <- fit(workflow, data = train_data)
# Evaluate model
predictions <- predict(fitted_model, test_data)
performance <- metrics(bind_cols(test_data, predictions),
truth = outcome,
estimate = .pred_class)`;
}
return codeTemplate;
}
private setupResourceHandlers() {
// List all repositories as resources
this.server.setRequestHandler(ListResourcesRequestSchema, async () => {
const repos = await this.getRepos();
return {
resources: [
// Repository resources
...repos.map(repo => ({
uri: `tidymodels://repos/${repo.name}`,
name: repo.name,
mimeType: 'application/json',
description: repo.description || `Repository: ${repo.name}`,
})),
// Documentation resources
{
uri: 'tidymodels://docs/overview',
name: 'Tidymodels Overview',
mimeType: 'text/markdown',
description: 'Overview of the tidymodels ecosystem',
},
{
uri: 'tidymodels://docs/getting-started',
name: 'Getting Started',
mimeType: 'text/markdown',
description: 'Getting started with tidymodels',
},
// Code template resources
{
uri: 'tidymodels://templates/recipe',
name: 'Recipe Template',
mimeType: 'text/plain',
description: 'Template for creating a tidymodels recipe',
},
{
uri: 'tidymodels://templates/model',
name: 'Model Template',
mimeType: 'text/plain',
description: 'Template for creating a tidymodels model',
},
{
uri: 'tidymodels://templates/tune',
name: 'Tuning Template',
mimeType: 'text/plain',
description: 'Template for hyperparameter tuning with tidymodels',
},
{
uri: 'tidymodels://templates/evaluation',
name: 'Evaluation Template',
mimeType: 'text/plain',
description: 'Template for evaluating models with tidymodels',
}
],
};
});
// Read resource handler
this.server.setRequestHandler(
ReadResourceRequestSchema,
async (request) => {
const { uri } = request.params;
// Match repository pattern
const repoMatch = uri.match(/^tidymodels:\/\/repos\/([^\/]+)$/);
if (repoMatch) {
const repoName = repoMatch[1];
const repos = await this.getRepos();
const repo = repos.find(r => r.name === repoName);
if (!repo) {
throw new McpError(
ErrorCode.InvalidRequest,
`Repository not found: ${repoName}`
);
}
return {
contents: [
{
uri,
mimeType: 'application/json',
text: JSON.stringify(repo, null, 2),
},
],
};
}
// Match file pattern
const fileMatch = uri.match(/^tidymodels:\/\/files\/([^\/]+)\/(.+)$/);
if (fileMatch) {
const [, repoName, filePath] = fileMatch;
const content = await this.getFileContent(repoName, filePath);
return {
contents: [
{
uri,
mimeType: this.getMimeType(filePath),
text: content,
},
],
};
}
// Match documentation pattern
const docsMatch = uri.match(/^tidymodels:\/\/docs\/(.+)$/);
if (docsMatch) {
const docName = docsMatch[1];
// Get documentation content
let content = '';
if (docName === 'overview') {
content = `# Tidymodels Ecosystem Overview
Tidymodels is a collection of packages for modeling and machine learning using tidyverse principles.
## Core Packages
- **rsample**: For data splitting and resampling
- **parsnip**: Provides a unified interface to models
- **recipes**: For preprocessing and feature engineering
- **workflows**: Combining preprocessing, modeling, and postprocessing
- **tune**: For hyperparameter tuning
- **yardstick**: For measuring model performance
- **dials**: Tools for creating and managing tuning parameters
- **broom**: For converting model objects into tidy data frames
## Extended Ecosystem
- **tidyposterior**: Bayesian analysis of model performance
- **corrr**: Correlation analysis tools
- **applicable**: Checking model applicability for new data
- **spatialsample**: Spatial resampling methods
- **poissonreg**: For Poisson and negative binomial regression
- **discrim**: Models for discriminant analysis
- **embed**: For creating embeddings and learned features
For more details, visit [the tidymodels website](https://www.tidymodels.org/).`;
} else if (docName === 'getting-started') {
content = `# Getting Started with Tidymodels
## Installation
\`\`\`r
# Install the complete tidymodels package
install.packages("tidymodels")
# Or install individual packages
install.packages(c("parsnip", "recipes", "rsample", "workflows"))
\`\`\`
## Basic Workflow
\`\`\`r
library(tidymodels)
# Split data
set.seed(123)
data_split <- initial_split(mtcars, prop = 0.75)
train_data <- training(data_split)
test_data <- testing(data_split)
# Create recipe for preprocessing
car_recipe <- recipe(mpg ~ ., data = train_data) |>
step_normalize(all_predictors())
# Define model
lm_model <- linear_reg() |>
set_engine("lm")
# Create workflow
lm_workflow <- workflow() |>
add_recipe(car_recipe) |>
add_model(lm_model)
# Fit model
lm_fit <- fit(lm_workflow, data = train_data)
# Make predictions
predictions <- predict(lm_fit, test_data)
# Evaluate performance
metrics(bind_cols(test_data, predictions),
truth = mpg,
estimate = .pred)
\`\`\`
## Key Concepts
1. **Data Splitting** with rsample
2. **Preprocessing** with recipes
3. **Model Specification** with parsnip
4. **Workflow** to combine preprocessing and modeling
5. **Evaluation** with yardstick
For more examples and tutorials, visit [the tidymodels website](https://www.tidymodels.org/start/).`;
} else {
throw new McpError(
ErrorCode.InvalidRequest,
`Documentation not found: ${docName}`
);
}
return {
contents: [
{
uri,
mimeType: 'text/markdown',
text: content,
},
],
};
}
// Match template pattern
const templateMatch = uri.match(/^tidymodels:\/\/templates\/(.+)$/);
if (templateMatch) {
const templateName = templateMatch[1];
const template = await this.generateRCode('Example task', templateName);
return {
contents: [
{
uri,
mimeType: 'text/plain',
text: template,
},
],
};
}
throw new McpError(
ErrorCode.InvalidRequest,
`Invalid URI format: ${uri}`
);
}
);
}
private getMimeType(filePath: string): string {
const ext = filePath.split('.').pop()?.toLowerCase();
switch (ext) {
case 'r':
return 'text/r-script';
case 'rmd':
return 'text/markdown';
case 'md':
return 'text/markdown';
case 'json':
return 'application/json';
case 'yml':
case 'yaml':
return 'application/yaml';
case 'txt':
return 'text/plain';
default:
return 'text/plain';
}
}
private setupToolHandlers() {
this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: [
{
name: 'list_tidymodels_packages',
description: 'List all packages in the tidymodels ecosystem',
inputSchema: {
type: 'object',
properties: {
refresh: {
type: 'boolean',
description: 'Force a refresh of the repository cache',
},
},
},
},
{
name: 'get_package_details',
description: 'Get detailed information about a specific tidymodels package',
inputSchema: {
type: 'object',
properties: {
package: {
type: 'string',
description: 'Package name',
},
},
required: ['package'],
},
},
{
name: 'search_r_functions',
description: 'Search for R functions in tidymodels packages',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Function name or keyword to search for',
},
package: {
type: 'string',
description: 'Limit search to a specific package (optional)',
},
},
required: ['query'],
},
},
{
name: 'generate_tidymodels_code',
description: 'Generate R code for common tidymodels tasks',
inputSchema: {
type: 'object',
properties: {
task: {
type: 'string',
description: 'Description of the task',
},
template: {
type: 'string',
description: 'Type of template (recipe, model, tune, evaluation)',
enum: ['recipe', 'model', 'tune', 'evaluation'],
},
},
required: ['task'],
},
},
{
name: 'search_issues',
description: 'Search for issues in tidymodels repositories',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query',
},
repo: {
type: 'string',
description: 'Limit search to a specific repository (optional)',
},
state: {
type: 'string',
description: 'Issue state (open, closed, all)',
enum: ['open', 'closed', 'all'],
},
},
required: ['query'],
},
},
],
}));
this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
switch (name) {
case 'list_tidymodels_packages': {
const { refresh = false } = args as { refresh?: boolean };
const repos = await this.getRepos(refresh);
// Filter to likely R packages
const rPackages = repos.filter(repo => {
// Simple heuristic - could be improved
const isRPackage =
repo.language === 'R' ||
repo.name.startsWith('r') ||
['parsnip', 'recipes', 'rsample', 'tune', 'dials', 'workflows', 'yardstick'].includes(repo.name);
return isRPackage;
});
const packageList = rPackages.map(repo => ({
name: repo.name,
description: repo.description,
stars: repo.stargazers_count,
forks: repo.forks_count,
url: repo.html_url,
updated_at: repo.updated_at,
}));
return {
content: [
{
type: 'text',
text: JSON.stringify(packageList, null, 2),
},
],
};
}
case 'get_package_details': {
const { package: packageName } = args as { package: string };
if (!packageName) {
throw new McpError(
ErrorCode.InvalidParams,
'Package name is required'
);
}
try {
// Get package details
const packageDetails = await this.getTidymodelsRReference(packageName);
if (packageDetails.length === 0) {
return {
content: [
{
type: 'text',
text: `Package "${packageName}" not found in tidymodels organization`,
},
],
isError: true,
};
}
return {
content: [
{
type: 'text',
text: JSON.stringify(packageDetails[0], null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: 'text',
text: `Error fetching package details: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
case 'search_r_functions': {
const { query, package: packageName } = args as { query: string, package?: string };
if (!query) {
throw new McpError(
ErrorCode.InvalidParams,
'Search query is required'
);
}
try {
const searchResults = await this.searchFunctionDocumentation(query, packageName);
if (searchResults.length === 0) {
return {
content: [
{
type: 'text',
text: `No functions found matching "${query}" ${packageName ? `in package "${packageName}"` : ''}`,
},
],
};
}
return {
content: [
{
type: 'text',
text: JSON.stringify(searchResults, null, 2),
},
],
};
} catch (error) {
return {
content: [
{
type: 'text',
text: `Error searching functions: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
case 'generate_tidymodels_code': {
const { task, template } = args as { task: string, template?: string };
if (!task) {
throw new McpError(
ErrorCode.InvalidParams,
'Task description is required'
);
}
try {
const code = await this.generateRCode(task, template);
return {
content: [
{
type: 'text',
text: code,
},
],
};
} catch (error) {
return {
content: [
{
type: 'text',
text: `Error generating code: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
}
case 'search_issues': {
const { query, repo, state = 'open' } = args as { query: string, repo?: string, state?: string };
if (!query) {
throw new McpError(
ErrorCode.InvalidParams,
'Search query is required'
);
}
try {
// Build the search query