Skip to main content
Glama
withNebius.py8.63 kB
import gradio as gr import pandas as pd import io import pickle import matplotlib.pyplot as plt import seaborn as sns from lazypredict.Supervised import LazyClassifier, LazyRegressor from sklearn.model_selection import train_test_split from ydata_profiling import ProfileReport import tempfile import requests from openai import OpenAI # Added for Nebius AI Studio LLM integration def load_data(file_input): """Loads CSV data from either a local file upload or a public URL.""" if file_input is None: return None try: # For local file uploads, file_input is a temporary file object if hasattr(file_input, 'name'): file_path = file_input.name with open(file_path, 'rb') as f: file_bytes = f.read() df = pd.read_csv(io.BytesIO(file_bytes)) # For URL text input elif isinstance(file_input, str) and file_input.startswith('http'): response = requests.get(file_input) response.raise_for_status() df = pd.read_csv(io.StringIO(response.text)) else: return None return df except Exception as e: gr.Warning(f"Failed to load or parse data: {e}") return None def analyze_and_model(df, target_column): """Internal function to perform EDA, model training, and visualization.""" profile = ProfileReport(df, title="EDA Report", minimal=True) with tempfile.NamedTemporaryFile(delete=False, suffix=".html") as temp_html: profile.to_file(temp_html.name) profile_path = temp_html.name X = df.drop(columns=[target_column]) y = df[target_column] task = "classification" if y.nunique() <= 10 else "regression" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = LazyClassifier(ignore_warnings=True, verbose=0) if task == "classification" else LazyRegressor(ignore_warnings=True, verbose=0) models, _ = model.fit(X_train, X_test, y_train, y_test) sort_metric = "Accuracy" if task == "classification" else "R-Squared" best_model_name = models.sort_values(by=sort_metric, ascending=False).index[0] best_model = model.models[best_model_name] with tempfile.NamedTemporaryFile(delete=False, suffix=".pkl") as temp_pkl: pickle.dump(best_model, temp_pkl) pickle_path = temp_pkl.name plt.figure(figsize=(10, 6)) plot_column = "Accuracy" if task == "classification" else "R-Squared" sns.barplot(x=models[plot_column].head(10), y=models.head(10).index) plt.title(f"Top 10 Models by {plot_column}") plt.tight_layout() with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_png: plt.savefig(temp_png.name) plot_path = temp_png.name plt.close() models_reset = models.reset_index().rename(columns={'index': 'Model'}) return profile_path, task, models_reset, plot_path, pickle_path def run_pipeline(data_source, target_column, nebius_api_key): # Added nebius_api_key """ This single function drives the entire application. It's exposed as the primary tool for the MCP server. :param data_source: A local file path (from gr.File) or a URL (from gr.Textbox). :param target_column: The name of the target column for prediction. :param nebius_api_key: The API key for Nebius AI Studio. """ # --- 1. Input Validation --- if not data_source or not target_column: error_msg = "Error: Data source and target column must be provided." gr.Warning(error_msg) return None, error_msg, None, None, None, "Please provide all inputs." gr.Info("Starting analysis...") # --- 2. Data Loading --- df = load_data(data_source) if df is None: return None, "Error: Could not load data.", None, None, None, None if target_column not in df.columns: error_msg = f"Error: Target column '{target_column}' not found in the dataset. Available: {list(df.columns)}" gr.Warning(error_msg) return None, error_msg, None, None, None, None # --- 3. Analysis and Modeling --- profile_path, task, models_df, plot_path, pickle_path = analyze_and_model(df, target_column) # --- 4. Explanation with Nebius AI Studio LLM --- best_model_name = models_df.iloc[0]['Model'] llm_explanation = "AI explanation is unavailable. Please provide a Nebius AI Studio API key to enable this feature." # Generic fallback [1] if nebius_api_key: try: # Initialize OpenAI client for Nebius AI Studio [2] client = OpenAI( base_url="https://api.studio.nebius.com/v1/", # TODO: Replace with actual Nebius AI Studio API base URL if different [2] api_key=nebius_api_key ) # Craft a prompt for the LLM [2] prompt_text = f"Explain the significance of the top performing model, '{best_model_name}', for a {task} task in a data analysis context. Keep the explanation concise and professional. Analyse the report at profile_path: {profile_path}." # Example prompt [2, 3] # Make the LLM call [2, 3] response = client.chat.completions.create( model="Qwen/Qwen3-4B-fast", # Example model, can be changed [2, 4] messages=[ {"role": "system", "content": "You are a helpful AI assistant that explains data science concepts."}, {"role": "user", "content": prompt_text} ], temperature=0.7, # Controls randomness [1] max_tokens=500, # Limits response length [1] ) llm_explanation = response.to_json() llm_explanation = llm_explanation['choices'][0]['message']['content'] # Extract the explanation text [2, 3] except Exception as e: # Catch any API errors [1] gr.Warning(f"Failed to get AI explanation: {e}. Please check your API key or try again later.") llm_explanation = "An error occurred while fetching AI explanation. Please check your API key or try again later." gr.Info("Analysis complete!") return profile_path, task, models_df, plot_path, pickle_path, llm_explanation # --- Gradio UI --- with gr.Blocks(title="AutoML Trainer", theme=gr.themes.Soft()) as demo: gr.Markdown("## 🤖 AutoML Trainer") gr.Markdown("Enter a CSV data source (local file or public URL) and a target column to run the analysis. This interface is now friendly for both humans and AI agents.") with gr.Row(): with gr.Column(scale=1): # Using gr.File allows for both upload and is compatible with agents file_input = gr.File(label="Upload Local CSV File") url_input = gr.Textbox(label="Or Enter Public CSV URL", placeholder="e.g., https://.../data.csv") target_column_input = gr.Textbox(label="Enter Target Column Name", placeholder="e.g., approved") # Added API key input nebius_api_key_input = gr.Textbox(label="Nebius AI Studio API Key (Optional)", type="password", placeholder="Enter your API key for AI explanations") run_button = gr.Button("Run Analysis & AutoML", variant="primary") with gr.Column(scale=2): task_output = gr.Textbox(label="Detected Task", interactive=False) llm_output = gr.Textbox(label="AI Explanation", lines=3, interactive=False) # Changed label to reflect AI explanation metrics_output = gr.Dataframe(label="Model Performance Metrics") with gr.Row(): vis_output = gr.Image(label="Top Models Comparison") with gr.Column(): eda_output = gr.File(label="Download Full EDA Report") model_output = gr.File(label="Download Best Model (.pkl)") # The single click event that powers the whole app # A helper function decides whether to use the file or URL input def process_inputs(file_data, url_data, target, api_key): # Added api_key data_source = file_data if file_data is not None else url_data return run_pipeline(data_source, target, api_key) # Passed api_key run_button.click( fn=process_inputs, inputs=[file_input, url_input, target_column_input, nebius_api_key_input], # Added nebius_api_key_input outputs=[eda_output, task_output, metrics_output, vis_output, model_output, llm_output] ) demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_api=True, inbrowser=True, mcp_server=True )

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/daniel-was-taken/MCP_Project'

If you have feedback or need assistance with the MCP directory API, please join our Discord server