Pierre Fitness Platform MCP Server

llm_local_integration_test.rs•18.9 KiB

// ABOUTME: Integration tests for local LLM with Pierre fitness tools // ABOUTME: Validates function calling and latency with Ollama/vLLM backends // // SPDX-License-Identifier: MIT OR Apache-2.0 // Copyright (c) 2025 Pierre Fitness Intelligence #![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] #![allow(missing_docs)] //! # Local LLM Integration Tests //! //! These tests require a running local LLM server (Ollama recommended). //! //! ## Why Tests Are `#[ignore]` //! //! All tests in this file are marked `#[ignore]` because they require external //! infrastructure that CI environments cannot provide: //! //! - **Ollama/vLLM server**: Must be running locally with models downloaded //! - **GPU/CPU resources**: Large language models require significant compute //! - **Model availability**: Specific models must be pulled (9GB+ downloads) //! //! These are **development validation tests**, not CI tests. They verify that //! the local LLM integration works correctly when manually testing locally. //! //! ## Latency Test Thresholds //! //! The latency tests use thresholds calibrated for cloud APIs (Groq/Gemini). //! Local inference with a 14B parameter model will typically exceed these: //! //! | Test | Threshold | Typical Local Time | //! |------|-----------|-------------------| //! | Basic completion | 5s | 8-15s | //! | First token (streaming) | 2s | 8-10s | //! | Tool calling | 10s | 25-35s | //! //! **Latency test failures are expected** when running locally and do not //! indicate broken functionality. The functional tests (tool matching) are //! the important validation. //! //! ## Setup //! //! 1. Install Ollama: `brew install ollama` (macOS) or <https://ollama.ai/download> //! 2. Start server: `ollama serve` //! 3. Pull model: `ollama pull qwen2.5:14b-instruct` //! //! ## Running //! //! ```bash //! # Run all local LLM tests (requires server) //! cargo test --test llm_local_integration_test -- --ignored --nocapture //! //! # Run specific test //! cargo test --test llm_local_integration_test test_pierre_fitness_tools_with_local_llm -- --ignored --nocapture //! ``` use pierre_mcp_server::llm::{ ChatMessage, ChatRequest, FunctionDeclaration, LlmCapabilities, LlmProvider, OpenAiCompatibleConfig, OpenAiCompatibleProvider, Tool, }; use serde_json::json; use std::sync::Arc; use std::time::Instant; // ============================================================================= // Helper Functions // ============================================================================= /// Create a provider configured for Ollama with the recommended model fn create_ollama_provider() -> OpenAiCompatibleProvider { let config = OpenAiCompatibleConfig::ollama("qwen2.5:14b-instruct"); OpenAiCompatibleProvider::new(config).expect("Provider should be created") } /// Create Pierre fitness tool definitions for testing function calling #[allow(clippy::too_many_lines)] fn create_pierre_fitness_tools() -> Vec<Tool> { vec![ Tool { function_declarations: vec![FunctionDeclaration { name: "calculate_metrics".to_owned(), description: "Calculate performance metrics from activity data including pace, power, heart rate zones".to_owned(), parameters: Some(json!({ "type": "object", "properties": { "activity_type": { "type": "string", "enum": ["running", "cycling", "swimming"], "description": "Type of activity" }, "distance_meters": { "type": "number", "description": "Total distance in meters" }, "duration_seconds": { "type": "number", "description": "Total duration in seconds" } }, "required": ["activity_type", "distance_meters", "duration_seconds"] })), }], }, Tool { function_declarations: vec![FunctionDeclaration { name: "analyze_training_load".to_owned(), description: "Analyze training load metrics including TSS, TRIMP, and fatigue levels".to_owned(), parameters: Some(json!({ "type": "object", "properties": { "recent_activities": { "type": "integer", "description": "Number of recent activities to analyze" }, "include_hr_zones": { "type": "boolean", "description": "Include heart rate zone analysis" } }, "required": ["recent_activities"] })), }], }, Tool { function_declarations: vec![FunctionDeclaration { name: "calculate_fitness_score".to_owned(), description: "Calculate overall fitness score based on recent training".to_owned(), parameters: Some(json!({ "type": "object", "properties": { "period_days": { "type": "integer", "description": "Number of days to analyze (default: 30)" } } })), }], }, Tool { function_declarations: vec![FunctionDeclaration { name: "predict_performance".to_owned(), description: "Predict race performance based on training data and VDOT".to_owned(), parameters: Some(json!({ "type": "object", "properties": { "race_distance": { "type": "string", "enum": ["5k", "10k", "half_marathon", "marathon"], "description": "Target race distance" }, "target_date": { "type": "string", "format": "date", "description": "Target race date (YYYY-MM-DD)" } }, "required": ["race_distance"] })), }], }, Tool { function_declarations: vec![FunctionDeclaration { name: "generate_recommendations".to_owned(), description: "Generate personalized training recommendations".to_owned(), parameters: Some(json!({ "type": "object", "properties": { "focus_area": { "type": "string", "enum": ["endurance", "speed", "recovery", "general"], "description": "Training focus area" } } })), }], }, Tool { function_declarations: vec![FunctionDeclaration { name: "calculate_recovery_score".to_owned(), description: "Calculate recovery score based on sleep and activity data".to_owned(), parameters: Some(json!({ "type": "object", "properties": { "include_sleep": { "type": "boolean", "description": "Include sleep data in analysis" }, "include_hrv": { "type": "boolean", "description": "Include HRV data if available" } } })), }], }, ] } // ============================================================================= // Server Connectivity Tests // ============================================================================= #[tokio::test] #[ignore = "Requires running Ollama server"] async fn test_ollama_server_health() { let provider = create_ollama_provider(); let result = provider.health_check().await; assert!( result.is_ok(), "Ollama server should be reachable: {result:?}" ); assert!(result.unwrap(), "Health check should return true"); } #[tokio::test] #[ignore = "Requires running vLLM server"] async fn test_vllm_server_health() { let config = OpenAiCompatibleConfig::vllm("meta-llama/Llama-3.1-8B-Instruct"); let provider = OpenAiCompatibleProvider::new(config).unwrap(); let result = provider.health_check().await; assert!( result.is_ok(), "vLLM server should be reachable: {result:?}" ); } // ============================================================================= // Pierre Fitness Tools Integration Tests // ============================================================================= #[tokio::test] #[ignore = "Requires Ollama server with model pulled"] async fn test_pierre_fitness_tools_with_local_llm() { let provider = create_ollama_provider(); let tools = create_pierre_fitness_tools(); // Test prompts that should trigger specific tools let test_cases = vec![ ( "I ran 10km in 50 minutes yesterday. Calculate my metrics.", vec!["calculate_metrics"], ), ( "How is my training load this week? Analyze my last 7 activities.", vec!["analyze_training_load"], ), ( "What's my current fitness score?", vec!["calculate_fitness_score"], ), ( "I'm training for a half marathon. Predict my finish time.", vec!["predict_performance"], ), ( "Give me training recommendations for improving my endurance.", vec!["generate_recommendations"], ), ( "Am I recovered enough for a hard workout? Check my recovery score.", vec!["calculate_recovery_score"], ), ]; let mut successful_calls = 0; let total_cases = test_cases.len(); for (prompt, expected_tools) in test_cases { println!("\n--- Testing: {prompt} ---"); println!("Expected tools: {expected_tools:?}"); let request = ChatRequest::new(vec![ChatMessage::user(prompt)]); let start = Instant::now(); let response = provider .complete_with_tools(&request, Some(tools.clone())) .await; let elapsed = start.elapsed(); println!("Response time: {elapsed:?}"); match response { Ok(resp) => { if let Some(function_calls) = &resp.function_calls { println!( "Tool calls: {:?}", function_calls.iter().map(|tc| &tc.name).collect::<Vec<_>>() ); // Check if any expected tool was called let called_any_expected = function_calls .iter() .any(|tc| expected_tools.contains(&tc.name.as_str())); if called_any_expected { successful_calls += 1; println!("Matched expected tool!"); } } else if let Some(content) = &resp.content { let preview_len = 100.min(content.len()); println!("No tool calls, text response: {}", &content[..preview_len]); } } Err(e) => { println!("Error: {e:?}"); } } } println!("\n=== Summary ==="); println!("Successful tool matches: {successful_calls}/{total_cases}"); // Expect at least 50% success rate for function calling assert!( successful_calls >= total_cases / 2, "Expected at least 50% of prompts to trigger correct tools. Got {successful_calls}/{total_cases}" ); } #[tokio::test] #[ignore = "Requires Ollama server with model pulled"] async fn test_pierre_complex_multi_tool_query() { let provider = create_ollama_provider(); let tools = create_pierre_fitness_tools(); let request = ChatRequest::new(vec![ChatMessage::user( "I need a complete training analysis. Check my fitness score, analyze my training load, \ and give me recommendations for the next week.", )]); let response = provider.complete_with_tools(&request, Some(tools)).await; assert!(response.is_ok(), "Should handle multi-tool query"); let resp = response.unwrap(); println!("Response: {resp:?}"); // For complex queries, model should either: // 1. Make multiple tool calls // 2. Or explain that it needs to call tools sequentially assert!( resp.function_calls.is_some() || resp.content.is_some(), "Should either call tools or provide explanation" ); } // ============================================================================= // Latency Tests // ============================================================================= #[tokio::test] #[ignore = "Requires Ollama server with model pulled"] async fn test_local_llm_latency_acceptable() { let provider = create_ollama_provider(); let simple_request = ChatRequest::new(vec![ChatMessage::user("What is 2 + 2? Answer briefly.")]); let start = Instant::now(); let response = provider.complete(&simple_request).await; let elapsed = start.elapsed(); assert!(response.is_ok(), "Simple query should succeed"); println!("Simple query latency: {elapsed:?}"); // Simple queries should complete within 5 seconds on local hardware assert!( elapsed.as_secs() < 5, "Simple query took too long: {elapsed:?}" ); } #[tokio::test] #[ignore = "Requires Ollama server with model pulled"] async fn test_local_llm_streaming_first_token_latency() { use futures_util::StreamExt; let provider = create_ollama_provider(); let request = ChatRequest::new(vec![ChatMessage::user("Count from 1 to 10.")]); let start = Instant::now(); let stream_result = provider.complete_stream(&request).await; assert!(stream_result.is_ok(), "Stream should start"); let mut stream = stream_result.unwrap(); // Measure time to first token let first_token = stream.next().await; let ttft = start.elapsed(); println!("Time to first token: {ttft:?}"); assert!(first_token.is_some(), "Should receive first token"); assert!(first_token.unwrap().is_ok(), "First token should be valid"); // TTFT should be under 2 seconds for a warmed-up local model assert!(ttft.as_secs() < 3, "Time to first token too slow: {ttft:?}"); // Consume rest of stream let mut total_tokens = 1; while let Some(chunk) = stream.next().await { if chunk.is_ok() { total_tokens += 1; } } let total_time = start.elapsed(); println!("Total tokens: {total_tokens}, Total time: {total_time:?}"); println!( "Tokens/sec: {:.1}", f64::from(total_tokens) / total_time.as_secs_f64() ); } #[tokio::test] #[ignore = "Requires Ollama server with model pulled"] async fn test_local_llm_tool_calling_latency() { let provider = create_ollama_provider(); let tools = create_pierre_fitness_tools(); let request = ChatRequest::new(vec![ChatMessage::user( "Calculate my running metrics for a 5km run in 25 minutes.", )]); let start = Instant::now(); let response = provider.complete_with_tools(&request, Some(tools)).await; let elapsed = start.elapsed(); assert!(response.is_ok(), "Tool call should succeed"); println!("Tool calling latency: {elapsed:?}"); // Tool calling adds some overhead but should still be under 10 seconds assert!( elapsed.as_secs() < 10, "Tool calling took too long: {elapsed:?}" ); let resp = response.unwrap(); if let Some(calls) = &resp.function_calls { println!( "Tools called: {:?}", calls.iter().map(|c| &c.name).collect::<Vec<_>>() ); } } // ============================================================================= // Error Handling Tests // ============================================================================= #[tokio::test] #[ignore = "Requires Ollama server (but not the model)"] async fn test_local_llm_missing_model_error() { let config = OpenAiCompatibleConfig::ollama("nonexistent-model:latest"); let provider = OpenAiCompatibleProvider::new(config).unwrap(); let request = ChatRequest::new(vec![ChatMessage::user("Hello")]); let response = provider.complete(&request).await; // Should get an error about missing model assert!(response.is_err(), "Should fail with missing model"); let err = response.unwrap_err(); println!("Error for missing model: {err:?}"); } #[tokio::test] async fn test_local_llm_server_not_running_error() { // Use a port that definitely doesn't have a server let config = OpenAiCompatibleConfig { base_url: "http://localhost:59999/v1".to_owned(), api_key: None, default_model: "test".to_owned(), provider_name: "test".to_owned(), display_name: "Test".to_owned(), capabilities: LlmCapabilities::default(), }; let provider = OpenAiCompatibleProvider::new(config).unwrap(); let result = provider.health_check().await; // Should fail because server is not running assert!(result.is_err(), "Should fail when server is not running"); let err = result.unwrap_err(); println!("Error for missing server: {err:?}"); } // ============================================================================= // Concurrent Request Tests // ============================================================================= #[tokio::test] #[ignore = "Requires Ollama server with model pulled"] async fn test_local_llm_concurrent_requests() { let provider = create_ollama_provider(); let provider = Arc::new(provider); let requests = vec!["What is 1 + 1?", "What is 2 + 2?", "What is 3 + 3?"]; let start = Instant::now(); let handles: Vec<_> = requests .into_iter() .map(|prompt| { let prov = provider.clone(); tokio::spawn(async move { let req = ChatRequest::new(vec![ChatMessage::user(prompt)]); prov.complete(&req).await }) }) .collect(); let mut successes = 0; for handle in handles { let result = handle.await.unwrap(); if result.is_ok() { successes += 1; } } let elapsed = start.elapsed(); println!("Concurrent requests completed in {elapsed:?}"); println!("Successes: {successes}/3"); // All should succeed (Ollama handles concurrent requests) assert_eq!(successes, 3, "All concurrent requests should succeed"); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jfarcand/pierre_mcp_server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

llm_local_integration_test.rs•18.9 KiB