ishika_mcp

test_google.py•38.8 KiB

import os import sys import types from unittest.mock import patch, MagicMock import pytest from crewai.llm import LLM from crewai.crew import Crew from crewai.agent import Agent from crewai.task import Task @pytest.fixture(autouse=True) def mock_google_api_key(): """Mock GOOGLE_API_KEY for tests only if real keys are not set.""" if "GOOGLE_API_KEY" not in os.environ and "GEMINI_API_KEY" not in os.environ: with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-key"}): yield else: yield def test_gemini_completion_is_used_when_google_provider(): """ Test that GeminiCompletion from completion.py is used when LLM uses provider 'google' """ llm = LLM(model="google/gemini-2.0-flash-001") assert llm.__class__.__name__ == "GeminiCompletion" assert llm.provider == "gemini" assert llm.model == "gemini-2.0-flash-001" def test_gemini_completion_is_used_when_gemini_provider(): """ Test that GeminiCompletion is used when provider is 'gemini' """ llm = LLM(model="gemini/gemini-2.0-flash-001") from crewai.llms.providers.gemini.completion import GeminiCompletion assert isinstance(llm, GeminiCompletion) assert llm.provider == "gemini" assert llm.model == "gemini-2.0-flash-001" def test_gemini_completion_module_is_imported(): """ Test that the completion module is properly imported when using Google provider """ module_name = "crewai.llms.providers.gemini.completion" # Remove module from cache if it exists if module_name in sys.modules: del sys.modules[module_name] # Create LLM instance - this should trigger the import LLM(model="google/gemini-2.0-flash-001") # Verify the module was imported assert module_name in sys.modules completion_mod = sys.modules[module_name] assert isinstance(completion_mod, types.ModuleType) # Verify the class exists in the module assert hasattr(completion_mod, 'GeminiCompletion') def test_native_gemini_raises_error_when_initialization_fails(): """ Test that LLM raises ImportError when native Gemini completion fails. With the new behavior, when a native provider is in SUPPORTED_NATIVE_PROVIDERS but fails to instantiate, we raise an ImportError instead of silently falling back. This provides clearer error messages to users about missing dependencies. """ # Mock the _get_native_provider to return a failing class with patch('crewai.llm.LLM._get_native_provider') as mock_get_provider: class FailingCompletion: def __init__(self, *args, **kwargs): raise Exception("Native Google Gen AI SDK failed") mock_get_provider.return_value = FailingCompletion # This should raise ImportError with clear message with pytest.raises(ImportError) as excinfo: LLM(model="google/gemini-2.0-flash-001") # Verify the error message is helpful assert "Error importing native provider" in str(excinfo.value) assert "Native Google Gen AI SDK failed" in str(excinfo.value) def test_gemini_completion_initialization_parameters(): """ Test that GeminiCompletion is initialized with correct parameters """ llm = LLM( model="google/gemini-2.0-flash-001", temperature=0.7, max_output_tokens=2000, top_p=0.9, top_k=40, api_key="test-key" ) from crewai.llms.providers.gemini.completion import GeminiCompletion assert isinstance(llm, GeminiCompletion) assert llm.model == "gemini-2.0-flash-001" assert llm.temperature == 0.7 assert llm.max_output_tokens == 2000 assert llm.top_p == 0.9 assert llm.top_k == 40 def test_gemini_specific_parameters(): """ Test Gemini-specific parameters like stop_sequences, streaming, and safety settings """ safety_settings = { "HARM_CATEGORY_HARASSMENT": "BLOCK_MEDIUM_AND_ABOVE", "HARM_CATEGORY_HATE_SPEECH": "BLOCK_MEDIUM_AND_ABOVE" } llm = LLM( model="google/gemini-2.0-flash-001", stop_sequences=["Human:", "Assistant:"], stream=True, safety_settings=safety_settings, project="test-project", location="us-central1" ) from crewai.llms.providers.gemini.completion import GeminiCompletion assert isinstance(llm, GeminiCompletion) assert llm.stop_sequences == ["Human:", "Assistant:"] assert llm.stream == True assert llm.safety_settings == safety_settings assert llm.project == "test-project" assert llm.location == "us-central1" def test_gemini_completion_call(): """ Test that GeminiCompletion call method works """ llm = LLM(model="google/gemini-2.0-flash-001") # Mock the call method on the instance with patch.object(llm, 'call', return_value="Hello! I'm Gemini, ready to help.") as mock_call: result = llm.call("Hello, how are you?") assert result == "Hello! I'm Gemini, ready to help." mock_call.assert_called_once_with("Hello, how are you?") def test_gemini_completion_called_during_crew_execution(): """ Test that GeminiCompletion.call is actually invoked when running a crew """ # Create the LLM instance first gemini_llm = LLM(model="google/gemini-2.0-flash-001") # Mock the call method on the specific instance with patch.object(gemini_llm, 'call', return_value="Tokyo has 14 million people.") as mock_call: # Create agent with explicit LLM configuration agent = Agent( role="Research Assistant", goal="Find population info", backstory="You research populations.", llm=gemini_llm, ) task = Task( description="Find Tokyo population", expected_output="Population number", agent=agent, ) crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() # Verify mock was called assert mock_call.called assert "14 million" in str(result) def test_gemini_completion_call_arguments(): """ Test that GeminiCompletion.call is invoked with correct arguments """ # Create LLM instance first gemini_llm = LLM(model="google/gemini-2.0-flash-001") # Mock the instance method with patch.object(gemini_llm, 'call') as mock_call: mock_call.return_value = "Task completed successfully." agent = Agent( role="Test Agent", goal="Complete a simple task", backstory="You are a test agent.", llm=gemini_llm # Use same instance ) task = Task( description="Say hello world", expected_output="Hello world", agent=agent, ) crew = Crew(agents=[agent], tasks=[task]) crew.kickoff() # Verify call was made assert mock_call.called # Check the arguments passed to the call method call_args = mock_call.call_args assert call_args is not None # The first argument should be the messages messages = call_args[0][0] # First positional argument assert isinstance(messages, (str, list)) # Verify that the task description appears in the messages if isinstance(messages, str): assert "hello world" in messages.lower() elif isinstance(messages, list): message_content = str(messages).lower() assert "hello world" in message_content def test_multiple_gemini_calls_in_crew(): """ Test that GeminiCompletion.call is invoked multiple times for multiple tasks """ # Create LLM instance first gemini_llm = LLM(model="google/gemini-2.0-flash-001") # Mock the instance method with patch.object(gemini_llm, 'call') as mock_call: mock_call.return_value = "Task completed." agent = Agent( role="Multi-task Agent", goal="Complete multiple tasks", backstory="You can handle multiple tasks.", llm=gemini_llm # Use same instance ) task1 = Task( description="First task", expected_output="First result", agent=agent, ) task2 = Task( description="Second task", expected_output="Second result", agent=agent, ) crew = Crew( agents=[agent], tasks=[task1, task2] ) crew.kickoff() # Verify multiple calls were made assert mock_call.call_count >= 2 # At least one call per task # Verify each call had proper arguments for call in mock_call.call_args_list: assert len(call[0]) > 0 # Has positional arguments messages = call[0][0] assert messages is not None def test_gemini_completion_with_tools(): """ Test that GeminiCompletion.call is invoked with tools when agent has tools """ from crewai.tools import tool @tool def sample_tool(query: str) -> str: """A sample tool for testing""" return f"Tool result for: {query}" # Create LLM instance first gemini_llm = LLM(model="google/gemini-2.0-flash-001") # Mock the instance method with patch.object(gemini_llm, 'call') as mock_call: mock_call.return_value = "Task completed with tools." agent = Agent( role="Tool User", goal="Use tools to complete tasks", backstory="You can use tools.", llm=gemini_llm, # Use same instance tools=[sample_tool] ) task = Task( description="Use the sample tool", expected_output="Tool usage result", agent=agent, ) crew = Crew(agents=[agent], tasks=[task]) crew.kickoff() assert mock_call.called call_args = mock_call.call_args call_kwargs = call_args[1] if len(call_args) > 1 else {} if 'tools' in call_kwargs: assert call_kwargs['tools'] is not None assert len(call_kwargs['tools']) > 0 def test_gemini_raises_error_when_model_not_supported(): """Test that GeminiCompletion raises ValueError when model not supported""" # Mock the Google client to raise an error with patch('crewai.llms.providers.gemini.completion.genai') as mock_genai: mock_client = MagicMock() mock_genai.Client.return_value = mock_client from google.genai.errors import ClientError # type: ignore mock_response = MagicMock() mock_response.body_segments = [{ 'error': { 'code': 404, 'message': 'models/model-doesnt-exist is not found for API version v1beta, or is not supported for generateContent.', 'status': 'NOT_FOUND' } }] mock_response.status_code = 404 mock_client.models.generate_content.side_effect = ClientError(404, mock_response) llm = LLM(model="google/model-doesnt-exist") with pytest.raises(Exception): # Should raise some error for unsupported model llm.call("Hello") def test_gemini_vertex_ai_setup(): """ Test that Vertex AI configuration is properly handled """ with patch.dict(os.environ, { "GOOGLE_CLOUD_PROJECT": "test-project", "GOOGLE_CLOUD_LOCATION": "us-west1" }): llm = LLM( model="google/gemini-2.0-flash-001", project="test-project", location="us-west1" ) from crewai.llms.providers.gemini.completion import GeminiCompletion assert isinstance(llm, GeminiCompletion) assert llm.project == "test-project" assert llm.location == "us-west1" def test_gemini_api_key_configuration(): """ Test that API key configuration works for both GOOGLE_API_KEY and GEMINI_API_KEY """ # Test with GOOGLE_API_KEY with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-google-key"}): llm = LLM(model="google/gemini-2.0-flash-001") from crewai.llms.providers.gemini.completion import GeminiCompletion assert isinstance(llm, GeminiCompletion) assert llm.api_key == "test-google-key" # Test with GEMINI_API_KEY with patch.dict(os.environ, {"GEMINI_API_KEY": "test-gemini-key"}, clear=True): llm = LLM(model="google/gemini-2.0-flash-001") assert isinstance(llm, GeminiCompletion) assert llm.api_key == "test-gemini-key" def test_gemini_model_capabilities(): """ Test that model capabilities are correctly identified """ # Test Gemini 2.0 model llm_2_0 = LLM(model="google/gemini-2.0-flash-001") from crewai.llms.providers.gemini.completion import GeminiCompletion assert isinstance(llm_2_0, GeminiCompletion) assert llm_2_0.supports_tools == True # Test Gemini 1.5 model llm_1_5 = LLM(model="google/gemini-1.5-pro") assert isinstance(llm_1_5, GeminiCompletion) assert llm_1_5.supports_tools == True def test_gemini_generation_config(): """ Test that generation config is properly prepared """ llm = LLM( model="google/gemini-2.0-flash-001", temperature=0.7, top_p=0.9, top_k=40, max_output_tokens=1000 ) from crewai.llms.providers.gemini.completion import GeminiCompletion assert isinstance(llm, GeminiCompletion) # Test config preparation config = llm._prepare_generation_config() # Verify config has the expected parameters assert hasattr(config, 'temperature') or 'temperature' in str(config) assert hasattr(config, 'top_p') or 'top_p' in str(config) assert hasattr(config, 'top_k') or 'top_k' in str(config) assert hasattr(config, 'max_output_tokens') or 'max_output_tokens' in str(config) def test_gemini_model_detection(): """ Test that various Gemini model formats are properly detected """ # Test Gemini model naming patterns that actually work with provider detection gemini_test_cases = [ "google/gemini-2.0-flash-001", "gemini/gemini-2.0-flash-001", "google/gemini-1.5-pro", "gemini/gemini-1.5-flash" ] for model_name in gemini_test_cases: llm = LLM(model=model_name) from crewai.llms.providers.gemini.completion import GeminiCompletion assert isinstance(llm, GeminiCompletion), f"Failed for model: {model_name}" def test_gemini_supports_stop_words(): """ Test that Gemini models support stop sequences """ llm = LLM(model="google/gemini-2.0-flash-001") assert llm.supports_stop_words() == True def test_gemini_context_window_size(): """ Test that Gemini models return correct context window sizes """ # Test Gemini 2.0 Flash llm_2_0 = LLM(model="google/gemini-2.0-flash-001") context_size_2_0 = llm_2_0.get_context_window_size() assert context_size_2_0 > 500000 # Should be substantial (1M tokens) # Test Gemini 1.5 Pro llm_1_5 = LLM(model="google/gemini-1.5-pro") context_size_1_5 = llm_1_5.get_context_window_size() assert context_size_1_5 > 1000000 # Should be very large (2M tokens) def test_gemini_message_formatting(): """ Test that messages are properly formatted for Gemini API """ llm = LLM(model="google/gemini-2.0-flash-001") # Test message formatting test_messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there!"}, {"role": "user", "content": "How are you?"} ] formatted_contents, system_instruction = llm._format_messages_for_gemini(test_messages) # System message should be extracted assert system_instruction == "You are a helpful assistant." # Remaining messages should be Content objects assert len(formatted_contents) >= 3 # Should have user, model, user messages # First content should be user role assert formatted_contents[0].role == "user" # Second should be model (converted from assistant) assert formatted_contents[1].role == "model" def test_gemini_streaming_parameter(): """ Test that streaming parameter is properly handled """ # Test non-streaming llm_no_stream = LLM(model="google/gemini-2.0-flash-001", stream=False) assert llm_no_stream.stream == False # Test streaming llm_stream = LLM(model="google/gemini-2.0-flash-001", stream=True) assert llm_stream.stream == True def test_gemini_tool_conversion(): """ Test that tools are properly converted to Gemini format """ llm = LLM(model="google/gemini-2.0-flash-001") # Mock tool in CrewAI format crewai_tools = [{ "type": "function", "function": { "name": "test_tool", "description": "A test tool", "parameters": { "type": "object", "properties": { "query": {"type": "string", "description": "Search query"} }, "required": ["query"] } } }] # Test tool conversion gemini_tools = llm._convert_tools_for_interference(crewai_tools) assert len(gemini_tools) == 1 # Gemini tools are Tool objects with function_declarations assert hasattr(gemini_tools[0], 'function_declarations') assert len(gemini_tools[0].function_declarations) == 1 func_decl = gemini_tools[0].function_declarations[0] assert func_decl.name == "test_tool" assert func_decl.description == "A test tool" def test_gemini_environment_variable_api_key(): """ Test that Google API key is properly loaded from environment """ with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-google-key"}): llm = LLM(model="google/gemini-2.0-flash-001") assert llm.client is not None assert hasattr(llm.client, 'models') assert llm.api_key == "test-google-key" @pytest.mark.vcr() def test_gemini_token_usage_tracking(): """ Test that token usage is properly tracked for Gemini responses """ llm = LLM(model="google/gemini-2.0-flash-001") result = llm.call("Hello") assert result.strip() == "Hi there! How can I help you today?" usage = llm.get_token_usage_summary() assert usage.successful_requests == 1 assert usage.prompt_tokens > 0 assert usage.completion_tokens > 0 assert usage.total_tokens > 0 @pytest.mark.vcr() def test_gemini_tool_returning_float(): """ Test that Gemini properly handles tools that return non-dict values like floats. This is an end-to-end test that verifies the agent can use a tool that returns a float (which gets wrapped in {"result": value} for Gemini's FunctionResponse). """ from pydantic import BaseModel, Field from typing import Type from crewai.tools import BaseTool class SumNumbersToolInput(BaseModel): a: float = Field(..., description="The first number to add") b: float = Field(..., description="The second number to add") class SumNumbersTool(BaseTool): name: str = "sum_numbers" description: str = "Add two numbers together and return the result" args_schema: Type[BaseModel] = SumNumbersToolInput def _run(self, a: float, b: float) -> float: return a + b sum_tool = SumNumbersTool() agent = Agent( role="Calculator", goal="Calculate numbers accurately", backstory="You are a calculator that adds numbers.", llm=LLM(model="google/gemini-2.0-flash-001"), tools=[sum_tool], verbose=True, ) task = Task( description="What is 10000 + 20000? Use the sum_numbers tool to calculate this.", expected_output="The sum of the two numbers", agent=agent, ) crew = Crew(agents=[agent], tasks=[task], verbose=True) result = crew.kickoff() # The result should contain 30000 (the sum) assert "30000" in result.raw def test_gemini_stop_sequences_sync(): """Test that stop and stop_sequences attributes stay synchronized.""" llm = LLM(model="google/gemini-2.0-flash-001") # Test setting stop as a list llm.stop = ["\nObservation:", "\nThought:"] assert llm.stop_sequences == ["\nObservation:", "\nThought:"] assert llm.stop == ["\nObservation:", "\nThought:"] # Test setting stop as a string llm.stop = "\nFinal Answer:" assert llm.stop_sequences == ["\nFinal Answer:"] assert llm.stop == ["\nFinal Answer:"] # Test setting stop as None llm.stop = None assert llm.stop_sequences == [] assert llm.stop == [] def test_gemini_stop_sequences_sent_to_api(): """Test that stop_sequences are properly sent to the Gemini API.""" llm = LLM(model="google/gemini-2.0-flash-001") # Set stop sequences via the stop attribute (simulating CrewAgentExecutor) llm.stop = ["\nObservation:", "\nThought:"] # Patch the API call to capture parameters without making real call with patch.object(llm.client.models, 'generate_content') as mock_generate: mock_response = MagicMock() mock_response.text = "Hello" mock_response.candidates = [] mock_response.usage_metadata = MagicMock( prompt_token_count=10, candidates_token_count=5, total_token_count=15 ) mock_generate.return_value = mock_response llm.call("Say hello in one word") # Verify stop_sequences were passed to the API in the config call_kwargs = mock_generate.call_args[1] assert "config" in call_kwargs # The config object should have stop_sequences set config = call_kwargs["config"] # Check if the config has stop_sequences attribute assert hasattr(config, 'stop_sequences') or 'stop_sequences' in config.__dict__ if hasattr(config, 'stop_sequences'): assert config.stop_sequences == ["\nObservation:", "\nThought:"] @pytest.mark.vcr() @pytest.mark.skip(reason="VCR cannot replay SSE streaming responses") def test_google_streaming_returns_usage_metrics(): """ Test that Google Gemini streaming calls return proper token usage metrics. """ agent = Agent( role="Research Assistant", goal="Find information about the capital of Japan", backstory="You are a helpful research assistant.", llm=LLM(model="gemini/gemini-2.0-flash-exp", stream=True), verbose=True, ) task = Task( description="What is the capital of Japan?", expected_output="The capital of Japan", agent=agent, ) crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() assert result.token_usage is not None assert result.token_usage.total_tokens > 0 assert result.token_usage.prompt_tokens > 0 assert result.token_usage.completion_tokens > 0 assert result.token_usage.successful_requests >= 1 @pytest.mark.vcr() def test_google_express_mode_works() -> None: """ Test Google Vertex AI Express mode with API key authentication. This tests Vertex AI Express mode (aiplatform.googleapis.com) with API key authentication. """ with patch.dict(os.environ, {"GOOGLE_GENAI_USE_VERTEXAI": "true"}): agent = Agent( role="Research Assistant", goal="Find information about the capital of Japan", backstory="You are a helpful research assistant.", llm=LLM( model="gemini/gemini-2.0-flash-exp", ), verbose=True, ) task = Task( description="What is the capital of Japan?", expected_output="The capital of Japan", agent=agent, ) crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() assert result.token_usage is not None assert result.token_usage.total_tokens > 0 assert result.token_usage.prompt_tokens > 0 assert result.token_usage.completion_tokens > 0 assert result.token_usage.successful_requests >= 1 def test_gemini_2_0_model_detection(): """Test that Gemini 2.0 models are properly detected.""" # Test Gemini 2.0 models llm_2_0 = LLM(model="google/gemini-2.0-flash-001") from crewai.llms.providers.gemini.completion import GeminiCompletion assert isinstance(llm_2_0, GeminiCompletion) assert llm_2_0.is_gemini_2_0 is True llm_2_5 = LLM(model="google/gemini-2.5-flash") assert isinstance(llm_2_5, GeminiCompletion) assert llm_2_5.is_gemini_2_0 is True # Test non-2.0 models llm_1_5 = LLM(model="google/gemini-1.5-pro") assert isinstance(llm_1_5, GeminiCompletion) assert llm_1_5.is_gemini_2_0 is False def test_add_property_ordering_to_schema(): """Test that _add_property_ordering correctly adds propertyOrdering to schemas.""" from crewai.llms.providers.gemini.completion import GeminiCompletion # Test simple object schema simple_schema = { "type": "object", "properties": { "name": {"type": "string"}, "age": {"type": "integer"}, "email": {"type": "string"} } } result = GeminiCompletion._add_property_ordering(simple_schema) assert "propertyOrdering" in result assert result["propertyOrdering"] == ["name", "age", "email"] # Test nested object schema nested_schema = { "type": "object", "properties": { "user": { "type": "object", "properties": { "name": {"type": "string"}, "contact": { "type": "object", "properties": { "email": {"type": "string"}, "phone": {"type": "string"} } } } }, "id": {"type": "integer"} } } result = GeminiCompletion._add_property_ordering(nested_schema) assert "propertyOrdering" in result assert result["propertyOrdering"] == ["user", "id"] assert "propertyOrdering" in result["properties"]["user"] assert result["properties"]["user"]["propertyOrdering"] == ["name", "contact"] assert "propertyOrdering" in result["properties"]["user"]["properties"]["contact"] assert result["properties"]["user"]["properties"]["contact"]["propertyOrdering"] == ["email", "phone"] def test_gemini_2_0_response_model_with_property_ordering(): """Test that Gemini 2.0 models include propertyOrdering in response schemas.""" from pydantic import BaseModel, Field class TestResponse(BaseModel): """Test response model.""" name: str = Field(..., description="The name") age: int = Field(..., description="The age") email: str = Field(..., description="The email") llm = LLM(model="google/gemini-2.0-flash-001") # Prepare generation config with response model config = llm._prepare_generation_config(response_model=TestResponse) # Verify that the config has response_json_schema assert hasattr(config, 'response_json_schema') or 'response_json_schema' in config.__dict__ # Get the schema if hasattr(config, 'response_json_schema'): schema = config.response_json_schema else: schema = config.__dict__.get('response_json_schema', {}) # Verify propertyOrdering is present for Gemini 2.0 assert "propertyOrdering" in schema assert "name" in schema["propertyOrdering"] assert "age" in schema["propertyOrdering"] assert "email" in schema["propertyOrdering"] def test_gemini_1_5_response_model_uses_response_schema(): """Test that Gemini 1.5 models use response_schema parameter (not response_json_schema).""" from pydantic import BaseModel, Field class TestResponse(BaseModel): """Test response model.""" name: str = Field(..., description="The name") age: int = Field(..., description="The age") llm = LLM(model="google/gemini-1.5-pro") # Prepare generation config with response model config = llm._prepare_generation_config(response_model=TestResponse) # Verify that the config uses response_schema (not response_json_schema) assert hasattr(config, 'response_schema') or 'response_schema' in config.__dict__ assert not (hasattr(config, 'response_json_schema') and config.response_json_schema is not None) # Get the schema if hasattr(config, 'response_schema'): schema = config.response_schema else: schema = config.__dict__.get('response_schema') # For Gemini 1.5, response_schema should be the Pydantic model itself # The SDK handles conversion internally assert schema is TestResponse or isinstance(schema, type) # ============================================================================= # Agent Kickoff Structured Output Tests # ============================================================================= @pytest.mark.vcr() def test_gemini_agent_kickoff_structured_output_without_tools(): """ Test that agent kickoff returns structured output without tools. This tests native structured output handling for Gemini models. """ from pydantic import BaseModel, Field class AnalysisResult(BaseModel): """Structured output for analysis results.""" topic: str = Field(description="The topic analyzed") key_points: list[str] = Field(description="Key insights from the analysis") summary: str = Field(description="Brief summary of findings") agent = Agent( role="Analyst", goal="Provide structured analysis on topics", backstory="You are an expert analyst who provides clear, structured insights.", llm=LLM(model="google/gemini-2.0-flash-001"), tools=[], verbose=True, ) result = agent.kickoff( messages="Analyze the benefits of remote work briefly. Keep it concise.", response_format=AnalysisResult, ) assert result.pydantic is not None, "Expected pydantic output but got None" assert isinstance(result.pydantic, AnalysisResult), f"Expected AnalysisResult but got {type(result.pydantic)}" assert result.pydantic.topic, "Topic should not be empty" assert len(result.pydantic.key_points) > 0, "Should have at least one key point" assert result.pydantic.summary, "Summary should not be empty" @pytest.mark.vcr() def test_gemini_agent_kickoff_structured_output_with_tools(): """ Test that agent kickoff returns structured output after using tools. This tests post-tool-call structured output handling for Gemini models. """ from pydantic import BaseModel, Field from crewai.tools import tool class CalculationResult(BaseModel): """Structured output for calculation results.""" operation: str = Field(description="The mathematical operation performed") result: int = Field(description="The result of the calculation") explanation: str = Field(description="Brief explanation of the calculation") @tool def add_numbers(a: int, b: int) -> int: """Add two numbers together and return the sum.""" return a + b agent = Agent( role="Calculator", goal="Perform calculations using available tools", backstory="You are a calculator assistant that uses tools to compute results.", llm=LLM(model="google/gemini-2.0-flash-001"), tools=[add_numbers], verbose=True, ) result = agent.kickoff( messages="Calculate 15 + 27 using your add_numbers tool. Report the result.", response_format=CalculationResult, ) assert result.pydantic is not None, "Expected pydantic output but got None" assert isinstance(result.pydantic, CalculationResult), f"Expected CalculationResult but got {type(result.pydantic)}" assert result.pydantic.result == 42, f"Expected result 42 but got {result.pydantic.result}" assert result.pydantic.operation, "Operation should not be empty" assert result.pydantic.explanation, "Explanation should not be empty" def test_gemini_stop_words_not_applied_to_structured_output(): """ Test that stop words are NOT applied when response_model is provided. This ensures JSON responses containing stop word patterns (like "Observation:") are not truncated, which would cause JSON validation to fail. """ from pydantic import BaseModel, Field from crewai.llms.providers.gemini.completion import GeminiCompletion class ResearchResult(BaseModel): """Research result that may contain stop word patterns in string fields.""" finding: str = Field(description="The research finding") observation: str = Field(description="Observation about the finding") # Create Gemini completion instance with stop words configured # Gemini uses stop_sequences instead of stop llm = GeminiCompletion( model="gemini-2.0-flash-001", stop_sequences=["Observation:", "Final Answer:"], # Common stop words ) # JSON response that contains a stop word pattern in a string field # Without the fix, this would be truncated at "Observation:" breaking the JSON json_response = '{"finding": "The data shows growth", "observation": "Observation: This confirms the hypothesis"}' # Test the _validate_structured_output method which is used for structured output handling result = llm._validate_structured_output(json_response, ResearchResult) # Should successfully parse the full JSON without truncation assert isinstance(result, ResearchResult) assert result.finding == "The data shows growth" # The observation field should contain the full text including "Observation:" assert "Observation:" in result.observation def test_gemini_stop_words_still_applied_to_regular_responses(): """ Test that stop words ARE still applied for regular (non-structured) responses. This ensures the fix didn't break normal stop word behavior. """ from crewai.llms.providers.gemini.completion import GeminiCompletion # Create Gemini completion instance with stop words configured # Gemini uses stop_sequences instead of stop llm = GeminiCompletion( model="gemini-2.0-flash-001", stop_sequences=["Observation:", "Final Answer:"], ) # Response that contains a stop word - should be truncated response_with_stop_word = "I need to search for more information.\n\nAction: search\nObservation: Found results" # Test the _apply_stop_words method directly result = llm._apply_stop_words(response_with_stop_word) # Response should be truncated at the stop word assert "Observation:" not in result assert "Found results" not in result assert "I need to search for more information" in result def test_gemini_structured_output_preserves_json_with_stop_word_patterns(): """ Test that structured output validation preserves JSON content even when string fields contain stop word patterns. """ from pydantic import BaseModel, Field from crewai.llms.providers.gemini.completion import GeminiCompletion class AgentObservation(BaseModel): """Model with fields that might contain stop word-like text.""" action_taken: str = Field(description="What action was taken") observation_result: str = Field(description="The observation result") final_answer: str = Field(description="The final answer") # Gemini uses stop_sequences instead of stop llm = GeminiCompletion( model="gemini-2.0-flash-001", stop_sequences=["Observation:", "Final Answer:", "Action:"], ) # JSON that contains all the stop word patterns as part of the content json_with_stop_patterns = '''{ "action_taken": "Action: Searched the database", "observation_result": "Observation: Found 5 relevant results", "final_answer": "Final Answer: The data shows positive growth" }''' # Test the _validate_structured_output method - this should NOT truncate # since it's structured output result = llm._validate_structured_output(json_with_stop_patterns, AgentObservation) assert isinstance(result, AgentObservation) assert "Action:" in result.action_taken assert "Observation:" in result.observation_result assert "Final Answer:" in result.final_answer @pytest.mark.vcr() def test_gemini_cached_prompt_tokens(): """ Test that Gemini correctly extracts and tracks cached_prompt_tokens from cached_content_token_count in the usage metadata. Sends two calls with the same large prompt to trigger caching. """ padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 system_msg = f"You are a helpful assistant. {padding}" llm = LLM(model="google/gemini-2.5-flash") # First call llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": "Say hello in one word."}, ]) # Second call: same system prompt llm.call([ {"role": "system", "content": system_msg}, {"role": "user", "content": "Say goodbye in one word."}, ]) usage = llm.get_token_usage_summary() assert usage.total_tokens > 0 assert usage.prompt_tokens > 0 assert usage.completion_tokens > 0 assert usage.successful_requests == 2 # cached_prompt_tokens should be populated (may be 0 if Gemini # doesn't cache for this particular request, but the field should exist) assert usage.cached_prompt_tokens >= 0 @pytest.mark.vcr() def test_gemini_cached_prompt_tokens_with_tools(): """ Test that Gemini correctly tracks cached_prompt_tokens when tools are used. The large system prompt should be cached across tool-calling requests. """ padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 system_msg = f"You are a helpful assistant that uses tools. {padding}" def get_weather(location: str) -> str: return f"The weather in {location} is sunny and 72°F" tools = [ { "name": "get_weather", "description": "Get the current weather for a location", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "The city name" } }, "required": ["location"], }, } ] llm = LLM(model="google/gemini-2.5-flash") # First call with tool llm.call( [ {"role": "system", "content": system_msg}, {"role": "user", "content": "What is the weather in Tokyo?"}, ], tools=tools, available_functions={"get_weather": get_weather}, ) # Second call with same system prompt + tools llm.call( [ {"role": "system", "content": system_msg}, {"role": "user", "content": "What is the weather in Paris?"}, ], tools=tools, available_functions={"get_weather": get_weather}, ) usage = llm.get_token_usage_summary() assert usage.total_tokens > 0 assert usage.prompt_tokens > 0 assert usage.successful_requests == 2 # cached_prompt_tokens should be populated (may be 0 if Gemini # doesn't cache for this particular request, but the field should exist) assert usage.cached_prompt_tokens >= 0

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/crewAIInc/crewAI'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_google.py•38.8 KiB