Skip to main content
Glama
test_integration.py12.5 kB
#!/usr/bin/env python """ Test script for CSV MCP Server This script tests the core functionality of the CSV MCP Server without requiring an MCP client. """ import asyncio import sys import os import pandas as pd import numpy as np from pathlib import Path # Add src to path sys.path.insert(0, str(Path(__file__).parent)) from src.csv_editor.models.csv_session import SessionManager, get_session_manager from src.csv_editor.tools.io_operations import ( load_csv_from_content, export_csv, get_session_info, list_sessions ) from src.csv_editor.tools.transformations import ( filter_rows, sort_data, select_columns, rename_columns, add_column, remove_columns, change_column_type, fill_missing_values, remove_duplicates ) from src.csv_editor.tools.analytics import ( get_statistics, get_column_statistics, get_correlation_matrix, group_by_aggregate, get_value_counts, detect_outliers, profile_data ) from src.csv_editor.tools.validation import ( validate_schema, check_data_quality, find_anomalies ) # Test data TEST_CSV_CONTENT = """name,age,salary,department,hire_date Alice,28,55000,Engineering,2021-01-15 Bob,35,65000,Engineering,2019-06-01 Charlie,42,75000,Management,2018-03-20 Diana,31,58000,Marketing,2020-08-10 Eve,29,52000,Sales,2021-03-25 Frank,45,85000,Management,2017-11-30 Grace,26,48000,Marketing,2022-02-14 Henry,38,70000,Engineering,2019-09-15 Iris,33,62000,Sales,2020-05-20 Jack,41,78000,Management,2018-07-12 Kate,27,,Marketing,2021-11-01 Leo,36,68000,Engineering,2019-04-18 Mia,30,56000,Sales,2020-12-05 Nathan,44,82000,Management,2017-09-08 Olivia,25,45000,Marketing,2022-06-30 Peter,39,72000,Engineering,2018-10-22 Quinn,32,60000,Sales,2020-03-15 Rachel,28,54000,Marketing,2021-07-20 Sam,37,69000,Engineering,2019-02-28 Tina,34,64000,Sales,2020-01-10 """ class Colors: """ANSI color codes for terminal output""" HEADER = '\033[95m' BLUE = '\033[94m' CYAN = '\033[96m' GREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' def print_test(name: str): """Print test header""" print(f"\n{Colors.HEADER}{Colors.BOLD}Testing: {name}{Colors.ENDC}") def print_success(msg: str): """Print success message""" print(f"{Colors.GREEN}✓ {msg}{Colors.ENDC}") def print_error(msg: str): """Print error message""" print(f"{Colors.FAIL}✗ {msg}{Colors.ENDC}") def print_info(msg: str): """Print info message""" print(f"{Colors.CYAN}ℹ {msg}{Colors.ENDC}") def print_data(data: any, indent: int = 2): """Print data with indentation""" indent_str = " " * indent if isinstance(data, dict): for key, value in data.items(): if isinstance(value, (dict, list)) and len(str(value)) > 50: print(f"{indent_str}{Colors.BLUE}{key}:{Colors.ENDC} <{type(value).__name__} with {len(value)} items>") else: print(f"{indent_str}{Colors.BLUE}{key}:{Colors.ENDC} {value}") elif isinstance(data, pd.DataFrame): print(f"{indent_str}DataFrame shape: {data.shape}") print(data.head().to_string().replace('\n', f'\n{indent_str}')) else: print(f"{indent_str}{data}") async def test_io_operations(): """Test I/O operations""" print_test("I/O Operations") # Load CSV from content result = await load_csv_from_content( content=TEST_CSV_CONTENT, delimiter="," ) if result["success"]: session_id = result["session_id"] print_success(f"Loaded CSV with session ID: {session_id}") print_info(f"Rows: {result['rows_affected']}, Columns: {len(result['columns_affected'])}") print_info(f"Column names: {', '.join(result['columns_affected'])}") else: print_error("Failed to load CSV") return None # Get session info info = await get_session_info(session_id=session_id) if info["success"]: print_success("Retrieved session info") print_data(info.get("data", info)) # List sessions sessions = await list_sessions() if sessions["success"]: print_success(f"Listed {len(sessions.get('sessions', []))} active session(s)") return session_id async def test_transformations(session_id: str): """Test data transformation operations""" print_test("Data Transformations") # Filter rows result = await filter_rows( session_id=session_id, conditions=[ {"column": "salary", "operator": ">", "value": 60000}, {"column": "department", "operator": "in", "value": ["Engineering", "Management"]} ], mode="and" ) if result["success"]: print_success(f"Filtered rows: {result['rows_before']} → {result['rows_after']}") # Sort data result = await sort_data( session_id=session_id, columns=[ {"column": "department", "ascending": True}, {"column": "salary", "ascending": False} ] ) if result["success"]: print_success("Sorted data by department and salary") # Select columns result = await select_columns( session_id=session_id, columns=["name", "department", "salary"] ) if result["success"]: print_success(f"Selected columns: {', '.join(result['selected_columns'])}") # Add calculated column result = await add_column( session_id=session_id, name="salary_level", value=None, formula="lambda row: 'High' if row['salary'] > 65000 else 'Medium' if row['salary'] > 55000 else 'Low' if pd.notna(row['salary']) else 'Unknown'" ) if result["success"]: print_success(f"Added column 'salary_level'") # Fill missing values result = await fill_missing_values( session_id=session_id, strategy="mean", columns=["salary"] ) if result["success"]: print_success(f"Filled {result['values_filled']} missing value(s)") async def test_analytics(session_id: str): """Test analytics operations""" print_test("Analytics") # Get statistics result = await get_statistics( session_id=session_id, columns=["salary"] ) if result["success"]: print_success("Got statistics for salary column") print_data(result["statistics"]) # Get correlation matrix result = await get_correlation_matrix( session_id=session_id, method="pearson", min_correlation=0.3 ) if result["success"]: print_success("Got correlation matrix") if result["significant_correlations"]: print_info("Significant correlations found:") for corr in result["significant_correlations"]: print(f" {corr['column1']} ↔ {corr['column2']}: {corr['correlation']:.3f}") # Group by and aggregate result = await group_by_aggregate( session_id=session_id, group_by=["department"], aggregations={ "salary": ["mean", "min", "max", "count"] } ) if result["success"]: print_success("Grouped by department with aggregations") print_info("Department salary statistics:") manager = get_session_manager() session = manager.get_session(session_id) if session and session.df is not None: print(session.df.head(10).to_string()) # Detect outliers result = await detect_outliers( session_id=session_id, columns=["salary"], method="iqr", threshold=1.5 ) if result["success"]: print_success(f"Detected {result['total_outliers']} outlier(s)") if result["outliers"]: print_info("Outlier details:") for col, details in result["outliers"].items(): print(f" {col}: {details['count']} outliers") # Profile data result = await profile_data( session_id=session_id, include_correlations=True, include_outliers=True ) if result["success"]: print_success("Generated data profile") print_info(f"Profile summary: {result['profile']['summary']['total_rows']} rows, " f"{result['profile']['summary']['total_columns']} columns") async def test_validation(session_id: str): """Test validation operations""" print_test("Data Validation") # Validate schema schema = { "name": {"type": "string", "required": True}, "salary": {"type": "numeric", "min": 0, "max": 200000}, "department": {"type": "string", "allowed_values": ["Engineering", "Management", "Marketing", "Sales"]} } result = await validate_schema( session_id=session_id, schema=schema ) if result["success"]: if result["valid"]: print_success("Data validates against schema") else: print_info(f"Schema validation found {len(result['errors'])} error(s)") # Check data quality result = await check_data_quality( session_id=session_id ) if result["success"]: quality_score = result["quality_results"]["overall_score"] print_success(f"Data quality score: {quality_score:.1f}%") print_info("Quality metrics:") for metric, score in result["quality_results"]["metrics"].items(): status = "✓" if score == 100 else "⚠" if score >= 80 else "✗" print(f" {status} {metric}: {score:.1f}%") # Find anomalies result = await find_anomalies( session_id=session_id, columns=["salary"] ) if result["success"]: total_anomalies = result["summary"]["total_anomalies"] print_success(f"Found {total_anomalies} anomaly(ies)") if total_anomalies > 0: print_info("Anomaly types:") for atype, count in result["summary"]["by_type"].items(): print(f" {atype}: {count}") async def test_export(session_id: str): """Test export operations""" print_test("Export Operations") # Create test output directory output_dir = Path("test_output") output_dir.mkdir(exist_ok=True) # Export to different formats formats = ["csv", "json", "html", "markdown"] for fmt in formats: output_file = output_dir / f"test_export.{fmt if fmt != 'markdown' else 'md'}" result = await export_csv( session_id=session_id, file_path=str(output_file), format=fmt ) if result["success"]: print_success(f"Exported to {fmt.upper()}: {output_file}") else: print_error(f"Failed to export to {fmt.upper()}") async def main(): """Main test function""" print(f"\n{Colors.HEADER}{Colors.BOLD}═══════════════════════════════════════════{Colors.ENDC}") print(f"{Colors.HEADER}{Colors.BOLD} CSV MCP Server Test Suite{Colors.ENDC}") print(f"{Colors.HEADER}{Colors.BOLD}═══════════════════════════════════════════{Colors.ENDC}") try: # Test I/O operations session_id = await test_io_operations() if not session_id: print_error("Failed to create session, aborting tests") return # Test transformations await test_transformations(session_id) # Test analytics await test_analytics(session_id) # Test validation await test_validation(session_id) # Test export await test_export(session_id) print(f"\n{Colors.GREEN}{Colors.BOLD}═══════════════════════════════════════════{Colors.ENDC}") print(f"{Colors.GREEN}{Colors.BOLD} All tests completed successfully!{Colors.ENDC}") print(f"{Colors.GREEN}{Colors.BOLD}═══════════════════════════════════════════{Colors.ENDC}\n") except Exception as e: print_error(f"Test failed with error: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(main())

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/santoshray02/csv-editor'

If you have feedback or need assistance with the MCP directory API, please join our Discord server