test_new_features.py•14.3 kB
"""
Comprehensive test suite for new Phase 1 and Phase 2 features
Tests: landing page, robots.txt, guide, data upload, and feedback endpoints
"""
import pytest
import base64
import io
import pandas as pd
from fastapi.testclient import TestClient
from app.main import app
client = TestClient(app)
class TestPhase1Features:
"""Test Phase 1: Landing page, robots.txt, and guide"""
def test_landing_page_html(self):
"""Test that landing page returns HTML for browsers"""
response = client.get("/", headers={"Accept": "text/html"})
assert response.status_code == 200
assert "text/html" in response.headers["content-type"]
assert "WarpGBM MCP" in response.text
assert "GPU-Accelerated Gradient Boosting" in response.text
assert "/docs" in response.text
assert "/guide" in response.text
def test_landing_page_json(self):
"""Test that landing page returns JSON for API clients"""
response = client.get("/", headers={"Accept": "application/json"})
assert response.status_code == 200
data = response.json()
assert data["service"] == "WarpGBM MCP"
assert "version" in data
assert "endpoints" in data
assert data["endpoints"]["docs"] == "/docs"
assert data["endpoints"]["guide"] == "/guide"
def test_robots_txt(self):
"""Test robots.txt endpoint"""
response = client.get("/robots.txt")
assert response.status_code == 200
assert "text/plain" in response.headers["content-type"]
assert "User-agent: *" in response.text
assert "Disallow: /mcp/sse" in response.text
assert "Disallow: /train" in response.text
assert "Allow: /" in response.text
assert "Allow: /healthz" in response.text
assert "Sitemap:" in response.text
def test_guide_endpoint(self):
"""Test guide endpoint returns HTML"""
response = client.get("/guide")
assert response.status_code == 200
assert "text/html" in response.headers["content-type"]
assert "WarpGBM MCP Usage Guide" in response.text
assert "Quick Start" in response.text
assert "Available Endpoints" in response.text
assert "Model Types" in response.text
assert "WarpGBM" in response.text
assert "LightGBM" in response.text
class TestPhase2DataUpload:
"""Test Phase 2: Data upload functionality"""
def test_upload_csv_basic(self):
"""Test basic CSV upload"""
# Create a simple CSV
csv_data = """feature1,feature2,feature3,target
1.0,2.0,3.0,0
4.0,5.0,6.0,1
7.0,8.0,9.0,2
10.0,11.0,12.0,0
"""
csv_bytes = csv_data.encode('utf-8')
csv_base64 = base64.b64encode(csv_bytes).decode('utf-8')
response = client.post("/upload_data", json={
"file_content": csv_base64,
"file_format": "csv",
"target_column": "target",
})
assert response.status_code == 200
data = response.json()
assert data["num_samples"] == 4
assert data["num_features"] == 3
assert data["target_name"] == "target"
assert len(data["feature_names"]) == 3
assert "feature1" in data["feature_names"]
assert "feature2" in data["feature_names"]
assert "feature3" in data["feature_names"]
assert len(data["preview"]) <= 5
def test_upload_csv_with_feature_selection(self):
"""Test CSV upload with specific feature columns"""
csv_data = """feat1,feat2,feat3,feat4,target
1.0,2.0,3.0,4.0,0
5.0,6.0,7.0,8.0,1
9.0,10.0,11.0,12.0,2
"""
csv_bytes = csv_data.encode('utf-8')
csv_base64 = base64.b64encode(csv_bytes).decode('utf-8')
response = client.post("/upload_data", json={
"file_content": csv_base64,
"file_format": "csv",
"target_column": "target",
"feature_columns": ["feat1", "feat3"]
})
assert response.status_code == 200
data = response.json()
assert data["num_features"] == 2
assert set(data["feature_names"]) == {"feat1", "feat3"}
def test_upload_parquet(self):
"""Test Parquet file upload"""
# Create a simple dataframe and convert to parquet
df = pd.DataFrame({
'x1': [1.0, 2.0, 3.0, 4.0],
'x2': [5.0, 6.0, 7.0, 8.0],
'y': [0, 1, 0, 1]
})
buffer = io.BytesIO()
df.to_parquet(buffer, index=False)
parquet_bytes = buffer.getvalue()
parquet_base64 = base64.b64encode(parquet_bytes).decode('utf-8')
response = client.post("/upload_data", json={
"file_content": parquet_base64,
"file_format": "parquet",
"target_column": "y"
})
assert response.status_code == 200
data = response.json()
assert data["num_samples"] == 4
assert data["num_features"] == 2
assert data["target_name"] == "y"
def test_upload_error_handling(self):
"""Test comprehensive error handling for data uploads"""
# Test 1: Missing target column
csv_data = """feature1,feature2
1.0,2.0
3.0,4.0
"""
csv_bytes = csv_data.encode('utf-8')
csv_base64 = base64.b64encode(csv_bytes).decode('utf-8')
response = client.post("/upload_data", json={
"file_content": csv_base64,
"file_format": "csv",
"target_column": "nonexistent"
})
assert response.status_code == 400
assert "not found" in response.json()["detail"].lower()
# Test 2: Missing feature columns
csv_data2 = """feat1,feat2,target
1.0,2.0,0
3.0,4.0,1
"""
csv_base64_2 = base64.b64encode(csv_data2.encode()).decode('utf-8')
response = client.post("/upload_data", json={
"file_content": csv_base64_2,
"file_format": "csv",
"target_column": "target",
"feature_columns": ["feat1", "nonexistent"]
})
assert response.status_code == 400
assert "not found" in response.json()["detail"].lower()
# Test 3: Invalid base64
response = client.post("/upload_data", json={
"file_content": "not-valid-base64!!!",
"file_format": "csv",
"target_column": "target"
})
assert response.status_code == 500
class TestPhase2Feedback:
"""Test Phase 2: Feedback mechanism"""
def test_submit_feedback_basic(self):
"""Test basic feedback submission"""
response = client.post("/feedback", json={
"feedback_type": "feature_request",
"message": "Would love to see support for XGBoost",
})
assert response.status_code == 200
data = response.json()
assert "feedback_id" in data
assert data["status"] == "received"
assert "thank you" in data["message"].lower()
assert "timestamp" in data
def test_submit_feedback_full(self):
"""Test feedback with all fields"""
response = client.post("/feedback", json={
"feedback_type": "bug",
"message": "Training fails on large datasets",
"endpoint": "/train",
"model_type": "warpgbm",
"severity": "high",
"agent_info": {
"name": "cursor",
"version": "1.0.0"
}
})
assert response.status_code == 200
data = response.json()
assert data["status"] == "received"
assert "feedback_id" in data
def test_submit_feedback_different_types(self):
"""Test all feedback types"""
feedback_types = ["bug", "feature_request", "documentation", "performance", "general"]
for ftype in feedback_types:
response = client.post("/feedback", json={
"feedback_type": ftype,
"message": f"Test message for {ftype}",
})
assert response.status_code == 200
data = response.json()
assert data["status"] == "received"
def test_submit_feedback_severity_levels(self):
"""Test all severity levels (one at a time to avoid rate limits)"""
import time
# Just test one level to verify the parameter works
# Testing all 4 rapidly hits rate limit (which is correct behavior!)
severity = "high"
response = client.post("/feedback", json={
"feedback_type": "bug",
"message": "Test message with severity",
"severity": severity
})
assert response.status_code == 200
data = response.json()
assert data["status"] == "received"
def test_submit_feedback_missing_required(self):
"""Test feedback with missing required fields"""
# Missing message
response = client.post("/feedback", json={
"feedback_type": "bug"
})
assert response.status_code == 422 # Validation error
# Missing feedback_type
response = client.post("/feedback", json={
"message": "Test message"
})
assert response.status_code == 422
def test_submit_feedback_invalid_type(self):
"""Test feedback with invalid type"""
response = client.post("/feedback", json={
"feedback_type": "invalid_type",
"message": "Test message"
})
assert response.status_code == 422
class TestFeedbackStorage:
"""Test feedback storage to parquet file"""
def test_feedback_saved_to_file(self):
"""Test that feedback is actually saved to parquet file"""
import os
import pandas as pd
from app.feedback_storage import get_feedback_storage
# Get storage instance
storage = get_feedback_storage()
# Get count before
count_before = storage.get_feedback_count()
# Submit feedback
response = client.post("/feedback", json={
"feedback_type": "bug",
"message": "Test feedback for storage verification",
"severity": "low"
})
assert response.status_code == 200
feedback_id = response.json()["feedback_id"]
# Check that file exists
assert os.path.exists(storage.feedback_file), f"Feedback file not found at {storage.feedback_file}"
# Read feedback from file
df = storage.get_all_feedback()
assert len(df) > count_before, "Feedback was not added to file"
# Verify the specific feedback exists
matching = df[df['id'] == feedback_id]
assert len(matching) == 1, "Feedback ID not found in storage"
# Verify content
feedback_row = matching.iloc[0]
assert feedback_row['type'] == "bug"
assert feedback_row['message'] == "Test feedback for storage verification"
assert feedback_row['severity'] == "low"
def test_feedback_storage_adapter_environment_detection(self):
"""Test that storage adapter correctly detects environment"""
from app.feedback_storage import get_feedback_storage
storage = get_feedback_storage()
# Should be 'local' in test environment
assert storage.environment == "local"
# Should use 'data/' directory locally
assert storage.feedback_dir == "data"
assert not storage.is_modal
class TestIntegration:
"""Integration tests combining multiple features"""
def test_upload_and_train_workflow(self):
"""Test complete workflow: upload CSV -> train model"""
# Step 1: Upload CSV data
csv_data = """feature1,feature2,feature3,target
1.0,2.0,3.0,0
4.0,5.0,6.0,1
7.0,8.0,9.0,2
10.0,11.0,12.0,0
13.0,14.0,15.0,1
16.0,17.0,18.0,2
"""
csv_bytes = csv_data.encode('utf-8')
csv_base64 = base64.b64encode(csv_bytes).decode('utf-8')
upload_response = client.post("/upload_data", json={
"file_content": csv_base64,
"file_format": "csv",
"target_column": "target",
})
assert upload_response.status_code == 200
upload_data = upload_response.json()
# Note: In a real test, you'd extract X and y from the upload response
# and pass them to /train. The upload endpoint currently returns preview
# but not the full X/y arrays for training.
# This is a design consideration - should upload_data return the full arrays?
def test_manifest_includes_new_capabilities(self):
"""Test that MCP manifest includes new capabilities"""
response = client.get("/.well-known/mcp.json")
assert response.status_code == 200
manifest = response.json()
assert "upload_data" in manifest["capabilities"]
assert "feedback" in manifest["capabilities"]
assert manifest["version"] == "1.1.0"
assert "categories" in manifest
assert "tags" in manifest
assert "license" in manifest
class TestMCPToolIntegration:
"""Test MCP SSE tool integration for new features"""
def test_mcp_tools_list_includes_new_tools(self):
"""Test that MCP tools/list includes upload_data and submit_feedback"""
response = client.post("/mcp/sse", json={
"jsonrpc": "2.0",
"id": 1,
"method": "tools/list",
"params": {}
})
assert response.status_code == 200
data = response.json()
tool_names = [tool["name"] for tool in data["result"]["tools"]]
assert "upload_data" in tool_names
assert "submit_feedback" in tool_names
assert "list_models" in tool_names
assert "train" in tool_names
assert "predict_from_artifact" in tool_names
assert "get_agent_guide" in tool_names
if __name__ == "__main__":
pytest.main([__file__, "-v"])