Claude MCP Data Explorer
by tofunori
Verified
import fs from 'fs';
import * as path from 'path';
import Papa from 'papaparse';
import * as ss from 'simple-statistics';
// Global storage for loaded DataFrames
const dataFrames: Record<string, any[]> = {};
let dfCounter = 1;
// Interface for loadCsv arguments
interface LoadCsvArgs {
csv_path: string;
df_name?: string;
}
/**
* Loads a CSV file into memory and returns a summary of its contents
*/
export async function loadCsv(args: LoadCsvArgs): Promise<{ type: string, text: string }[]> {
const { csv_path, df_name } = args;
if (!csv_path) {
return [{ type: 'text', text: 'Error: csv_path is required' }];
}
// Normalize path for Windows
const normalizedPath = path.normalize(csv_path);
// Generate a default name if none provided
const dataFrameName = df_name || `df_${dfCounter++}`;
try {
// Check if file exists
if (!fs.existsSync(normalizedPath)) {
return [{ type: 'text', text: `Error: File not found: ${normalizedPath}` }];
}
// Get file size
const stats = fs.statSync(normalizedPath);
const fileSizeMb = stats.size / (1024 * 1024);
// Read file content
const fileContent = fs.readFileSync(normalizedPath, 'utf8');
// Parse CSV
const parseResult = Papa.parse(fileContent, {
header: true,
dynamicTyping: true,
skipEmptyLines: true
});
if (parseResult.errors && parseResult.errors.length > 0) {
return [{ type: 'text', text: `Error parsing CSV: ${parseResult.errors[0].message}` }];
}
const data = parseResult.data as any[];
// Store in global storage
dataFrames[dataFrameName] = data;
// Generate summary
const summary = generateSummary(data, parseResult.meta.fields || []);
return [{
type: 'text',
text: `Successfully loaded ${normalizedPath} as ${dataFrameName} (${data.length} rows × ${parseResult.meta.fields?.length || 0} columns)\n\n${summary}`
}];
} catch (error: any) {
return [{ type: 'text', text: `Error loading CSV: ${error.message}` }];
}
}
/**
* Generates a summary of the data
*/
function generateSummary(data: any[], fields: string[]): string {
if (data.length === 0 || fields.length === 0) {
return 'No data or columns found in the file.';
}
const summary = ['### Data Summary'];
// Column information
summary.push('\n#### Columns:');
for (const field of fields) {
const values = data.map(row => row[field]).filter(val => val !== null && val !== undefined);
const numValues = values.length;
const numMissing = data.length - numValues;
let columnInfo = `- **${field}**:`;
// Determine column type
if (numValues === 0) {
columnInfo += ` (All values missing)`;
} else if (typeof values[0] === 'number') {
// Numeric column
const min = ss.min(values);
const max = ss.max(values);
const mean = ss.mean(values);
const median = ss.median(values);
columnInfo += ` numeric (${numValues} values, ${numMissing} missing)`;
columnInfo += `\n - Range: ${min} to ${max}`;
columnInfo += `\n - Mean: ${mean.toFixed(2)}, Median: ${median.toFixed(2)}`;
} else if (typeof values[0] === 'string') {
// String column
const uniqueValues = new Set(values).size;
columnInfo += ` string (${numValues} values, ${numMissing} missing)`;
columnInfo += `\n - Unique values: ${uniqueValues}`;
// Show top values if there aren't too many
if (uniqueValues <= 10) {
const valueCounts: Record<string, number> = {};
for (const val of values) {
valueCounts[val] = (valueCounts[val] || 0) + 1;
}
const topValues = Object.entries(valueCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, 5)
.map(([val, count]) => `${val} (${count})`);
columnInfo += `\n - Top values: ${topValues.join(', ')}`;
}
} else if (typeof values[0] === 'boolean') {
// Boolean column
const trueCount = values.filter(v => v === true).length;
const falseCount = values.filter(v => v === false).length;
columnInfo += ` boolean (${numValues} values, ${numMissing} missing)`;
columnInfo += `\n - True: ${trueCount}, False: ${falseCount}`;
} else {
// Other type
columnInfo += ` (${numValues} values, ${numMissing} missing)`;
}
summary.push(columnInfo);
}
return summary.join('\n');
}
// Export functions to access the data
export function getDataFrame(name: string): any[] | undefined {
return dataFrames[name];
}
export function getAllDataFrames(): Record<string, any[]> {
return dataFrames;
}