import os
import sys
import types
from unittest.mock import patch, MagicMock
import pytest
from crewai.llm import LLM
from crewai.crew import Crew
from crewai.agent import Agent
from crewai.task import Task
@pytest.fixture(autouse=True)
def mock_google_api_key():
"""Mock GOOGLE_API_KEY for tests only if real keys are not set."""
if "GOOGLE_API_KEY" not in os.environ and "GEMINI_API_KEY" not in os.environ:
with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-key"}):
yield
else:
yield
def test_gemini_completion_is_used_when_google_provider():
"""
Test that GeminiCompletion from completion.py is used when LLM uses provider 'google'
"""
llm = LLM(model="google/gemini-2.0-flash-001")
assert llm.__class__.__name__ == "GeminiCompletion"
assert llm.provider == "gemini"
assert llm.model == "gemini-2.0-flash-001"
def test_gemini_completion_is_used_when_gemini_provider():
"""
Test that GeminiCompletion is used when provider is 'gemini'
"""
llm = LLM(model="gemini/gemini-2.0-flash-001")
from crewai.llms.providers.gemini.completion import GeminiCompletion
assert isinstance(llm, GeminiCompletion)
assert llm.provider == "gemini"
assert llm.model == "gemini-2.0-flash-001"
def test_gemini_completion_module_is_imported():
"""
Test that the completion module is properly imported when using Google provider
"""
module_name = "crewai.llms.providers.gemini.completion"
# Remove module from cache if it exists
if module_name in sys.modules:
del sys.modules[module_name]
# Create LLM instance - this should trigger the import
LLM(model="google/gemini-2.0-flash-001")
# Verify the module was imported
assert module_name in sys.modules
completion_mod = sys.modules[module_name]
assert isinstance(completion_mod, types.ModuleType)
# Verify the class exists in the module
assert hasattr(completion_mod, 'GeminiCompletion')
def test_native_gemini_raises_error_when_initialization_fails():
"""
Test that LLM raises ImportError when native Gemini completion fails.
With the new behavior, when a native provider is in SUPPORTED_NATIVE_PROVIDERS
but fails to instantiate, we raise an ImportError instead of silently falling back.
This provides clearer error messages to users about missing dependencies.
"""
# Mock the _get_native_provider to return a failing class
with patch('crewai.llm.LLM._get_native_provider') as mock_get_provider:
class FailingCompletion:
def __init__(self, *args, **kwargs):
raise Exception("Native Google Gen AI SDK failed")
mock_get_provider.return_value = FailingCompletion
# This should raise ImportError with clear message
with pytest.raises(ImportError) as excinfo:
LLM(model="google/gemini-2.0-flash-001")
# Verify the error message is helpful
assert "Error importing native provider" in str(excinfo.value)
assert "Native Google Gen AI SDK failed" in str(excinfo.value)
def test_gemini_completion_initialization_parameters():
"""
Test that GeminiCompletion is initialized with correct parameters
"""
llm = LLM(
model="google/gemini-2.0-flash-001",
temperature=0.7,
max_output_tokens=2000,
top_p=0.9,
top_k=40,
api_key="test-key"
)
from crewai.llms.providers.gemini.completion import GeminiCompletion
assert isinstance(llm, GeminiCompletion)
assert llm.model == "gemini-2.0-flash-001"
assert llm.temperature == 0.7
assert llm.max_output_tokens == 2000
assert llm.top_p == 0.9
assert llm.top_k == 40
def test_gemini_specific_parameters():
"""
Test Gemini-specific parameters like stop_sequences, streaming, and safety settings
"""
safety_settings = {
"HARM_CATEGORY_HARASSMENT": "BLOCK_MEDIUM_AND_ABOVE",
"HARM_CATEGORY_HATE_SPEECH": "BLOCK_MEDIUM_AND_ABOVE"
}
llm = LLM(
model="google/gemini-2.0-flash-001",
stop_sequences=["Human:", "Assistant:"],
stream=True,
safety_settings=safety_settings,
project="test-project",
location="us-central1"
)
from crewai.llms.providers.gemini.completion import GeminiCompletion
assert isinstance(llm, GeminiCompletion)
assert llm.stop_sequences == ["Human:", "Assistant:"]
assert llm.stream == True
assert llm.safety_settings == safety_settings
assert llm.project == "test-project"
assert llm.location == "us-central1"
def test_gemini_completion_call():
"""
Test that GeminiCompletion call method works
"""
llm = LLM(model="google/gemini-2.0-flash-001")
# Mock the call method on the instance
with patch.object(llm, 'call', return_value="Hello! I'm Gemini, ready to help.") as mock_call:
result = llm.call("Hello, how are you?")
assert result == "Hello! I'm Gemini, ready to help."
mock_call.assert_called_once_with("Hello, how are you?")
def test_gemini_completion_called_during_crew_execution():
"""
Test that GeminiCompletion.call is actually invoked when running a crew
"""
# Create the LLM instance first
gemini_llm = LLM(model="google/gemini-2.0-flash-001")
# Mock the call method on the specific instance
with patch.object(gemini_llm, 'call', return_value="Tokyo has 14 million people.") as mock_call:
# Create agent with explicit LLM configuration
agent = Agent(
role="Research Assistant",
goal="Find population info",
backstory="You research populations.",
llm=gemini_llm,
)
task = Task(
description="Find Tokyo population",
expected_output="Population number",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
result = crew.kickoff()
# Verify mock was called
assert mock_call.called
assert "14 million" in str(result)
def test_gemini_completion_call_arguments():
"""
Test that GeminiCompletion.call is invoked with correct arguments
"""
# Create LLM instance first
gemini_llm = LLM(model="google/gemini-2.0-flash-001")
# Mock the instance method
with patch.object(gemini_llm, 'call') as mock_call:
mock_call.return_value = "Task completed successfully."
agent = Agent(
role="Test Agent",
goal="Complete a simple task",
backstory="You are a test agent.",
llm=gemini_llm # Use same instance
)
task = Task(
description="Say hello world",
expected_output="Hello world",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
crew.kickoff()
# Verify call was made
assert mock_call.called
# Check the arguments passed to the call method
call_args = mock_call.call_args
assert call_args is not None
# The first argument should be the messages
messages = call_args[0][0] # First positional argument
assert isinstance(messages, (str, list))
# Verify that the task description appears in the messages
if isinstance(messages, str):
assert "hello world" in messages.lower()
elif isinstance(messages, list):
message_content = str(messages).lower()
assert "hello world" in message_content
def test_multiple_gemini_calls_in_crew():
"""
Test that GeminiCompletion.call is invoked multiple times for multiple tasks
"""
# Create LLM instance first
gemini_llm = LLM(model="google/gemini-2.0-flash-001")
# Mock the instance method
with patch.object(gemini_llm, 'call') as mock_call:
mock_call.return_value = "Task completed."
agent = Agent(
role="Multi-task Agent",
goal="Complete multiple tasks",
backstory="You can handle multiple tasks.",
llm=gemini_llm # Use same instance
)
task1 = Task(
description="First task",
expected_output="First result",
agent=agent,
)
task2 = Task(
description="Second task",
expected_output="Second result",
agent=agent,
)
crew = Crew(
agents=[agent],
tasks=[task1, task2]
)
crew.kickoff()
# Verify multiple calls were made
assert mock_call.call_count >= 2 # At least one call per task
# Verify each call had proper arguments
for call in mock_call.call_args_list:
assert len(call[0]) > 0 # Has positional arguments
messages = call[0][0]
assert messages is not None
def test_gemini_completion_with_tools():
"""
Test that GeminiCompletion.call is invoked with tools when agent has tools
"""
from crewai.tools import tool
@tool
def sample_tool(query: str) -> str:
"""A sample tool for testing"""
return f"Tool result for: {query}"
# Create LLM instance first
gemini_llm = LLM(model="google/gemini-2.0-flash-001")
# Mock the instance method
with patch.object(gemini_llm, 'call') as mock_call:
mock_call.return_value = "Task completed with tools."
agent = Agent(
role="Tool User",
goal="Use tools to complete tasks",
backstory="You can use tools.",
llm=gemini_llm, # Use same instance
tools=[sample_tool]
)
task = Task(
description="Use the sample tool",
expected_output="Tool usage result",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
crew.kickoff()
assert mock_call.called
call_args = mock_call.call_args
call_kwargs = call_args[1] if len(call_args) > 1 else {}
if 'tools' in call_kwargs:
assert call_kwargs['tools'] is not None
assert len(call_kwargs['tools']) > 0
def test_gemini_raises_error_when_model_not_supported():
"""Test that GeminiCompletion raises ValueError when model not supported"""
# Mock the Google client to raise an error
with patch('crewai.llms.providers.gemini.completion.genai') as mock_genai:
mock_client = MagicMock()
mock_genai.Client.return_value = mock_client
from google.genai.errors import ClientError # type: ignore
mock_response = MagicMock()
mock_response.body_segments = [{
'error': {
'code': 404,
'message': 'models/model-doesnt-exist is not found for API version v1beta, or is not supported for generateContent.',
'status': 'NOT_FOUND'
}
}]
mock_response.status_code = 404
mock_client.models.generate_content.side_effect = ClientError(404, mock_response)
llm = LLM(model="google/model-doesnt-exist")
with pytest.raises(Exception): # Should raise some error for unsupported model
llm.call("Hello")
def test_gemini_vertex_ai_setup():
"""
Test that Vertex AI configuration is properly handled
"""
with patch.dict(os.environ, {
"GOOGLE_CLOUD_PROJECT": "test-project",
"GOOGLE_CLOUD_LOCATION": "us-west1"
}):
llm = LLM(
model="google/gemini-2.0-flash-001",
project="test-project",
location="us-west1"
)
from crewai.llms.providers.gemini.completion import GeminiCompletion
assert isinstance(llm, GeminiCompletion)
assert llm.project == "test-project"
assert llm.location == "us-west1"
def test_gemini_api_key_configuration():
"""
Test that API key configuration works for both GOOGLE_API_KEY and GEMINI_API_KEY
"""
# Test with GOOGLE_API_KEY
with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-google-key"}):
llm = LLM(model="google/gemini-2.0-flash-001")
from crewai.llms.providers.gemini.completion import GeminiCompletion
assert isinstance(llm, GeminiCompletion)
assert llm.api_key == "test-google-key"
# Test with GEMINI_API_KEY
with patch.dict(os.environ, {"GEMINI_API_KEY": "test-gemini-key"}, clear=True):
llm = LLM(model="google/gemini-2.0-flash-001")
assert isinstance(llm, GeminiCompletion)
assert llm.api_key == "test-gemini-key"
def test_gemini_model_capabilities():
"""
Test that model capabilities are correctly identified
"""
# Test Gemini 2.0 model
llm_2_0 = LLM(model="google/gemini-2.0-flash-001")
from crewai.llms.providers.gemini.completion import GeminiCompletion
assert isinstance(llm_2_0, GeminiCompletion)
assert llm_2_0.supports_tools == True
# Test Gemini 1.5 model
llm_1_5 = LLM(model="google/gemini-1.5-pro")
assert isinstance(llm_1_5, GeminiCompletion)
assert llm_1_5.supports_tools == True
def test_gemini_generation_config():
"""
Test that generation config is properly prepared
"""
llm = LLM(
model="google/gemini-2.0-flash-001",
temperature=0.7,
top_p=0.9,
top_k=40,
max_output_tokens=1000
)
from crewai.llms.providers.gemini.completion import GeminiCompletion
assert isinstance(llm, GeminiCompletion)
# Test config preparation
config = llm._prepare_generation_config()
# Verify config has the expected parameters
assert hasattr(config, 'temperature') or 'temperature' in str(config)
assert hasattr(config, 'top_p') or 'top_p' in str(config)
assert hasattr(config, 'top_k') or 'top_k' in str(config)
assert hasattr(config, 'max_output_tokens') or 'max_output_tokens' in str(config)
def test_gemini_model_detection():
"""
Test that various Gemini model formats are properly detected
"""
# Test Gemini model naming patterns that actually work with provider detection
gemini_test_cases = [
"google/gemini-2.0-flash-001",
"gemini/gemini-2.0-flash-001",
"google/gemini-1.5-pro",
"gemini/gemini-1.5-flash"
]
for model_name in gemini_test_cases:
llm = LLM(model=model_name)
from crewai.llms.providers.gemini.completion import GeminiCompletion
assert isinstance(llm, GeminiCompletion), f"Failed for model: {model_name}"
def test_gemini_supports_stop_words():
"""
Test that Gemini models support stop sequences
"""
llm = LLM(model="google/gemini-2.0-flash-001")
assert llm.supports_stop_words() == True
def test_gemini_context_window_size():
"""
Test that Gemini models return correct context window sizes
"""
# Test Gemini 2.0 Flash
llm_2_0 = LLM(model="google/gemini-2.0-flash-001")
context_size_2_0 = llm_2_0.get_context_window_size()
assert context_size_2_0 > 500000 # Should be substantial (1M tokens)
# Test Gemini 1.5 Pro
llm_1_5 = LLM(model="google/gemini-1.5-pro")
context_size_1_5 = llm_1_5.get_context_window_size()
assert context_size_1_5 > 1000000 # Should be very large (2M tokens)
def test_gemini_message_formatting():
"""
Test that messages are properly formatted for Gemini API
"""
llm = LLM(model="google/gemini-2.0-flash-001")
# Test message formatting
test_messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"},
{"role": "user", "content": "How are you?"}
]
formatted_contents, system_instruction = llm._format_messages_for_gemini(test_messages)
# System message should be extracted
assert system_instruction == "You are a helpful assistant."
# Remaining messages should be Content objects
assert len(formatted_contents) >= 3 # Should have user, model, user messages
# First content should be user role
assert formatted_contents[0].role == "user"
# Second should be model (converted from assistant)
assert formatted_contents[1].role == "model"
def test_gemini_streaming_parameter():
"""
Test that streaming parameter is properly handled
"""
# Test non-streaming
llm_no_stream = LLM(model="google/gemini-2.0-flash-001", stream=False)
assert llm_no_stream.stream == False
# Test streaming
llm_stream = LLM(model="google/gemini-2.0-flash-001", stream=True)
assert llm_stream.stream == True
def test_gemini_tool_conversion():
"""
Test that tools are properly converted to Gemini format
"""
llm = LLM(model="google/gemini-2.0-flash-001")
# Mock tool in CrewAI format
crewai_tools = [{
"type": "function",
"function": {
"name": "test_tool",
"description": "A test tool",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search query"}
},
"required": ["query"]
}
}
}]
# Test tool conversion
gemini_tools = llm._convert_tools_for_interference(crewai_tools)
assert len(gemini_tools) == 1
# Gemini tools are Tool objects with function_declarations
assert hasattr(gemini_tools[0], 'function_declarations')
assert len(gemini_tools[0].function_declarations) == 1
func_decl = gemini_tools[0].function_declarations[0]
assert func_decl.name == "test_tool"
assert func_decl.description == "A test tool"
def test_gemini_environment_variable_api_key():
"""
Test that Google API key is properly loaded from environment
"""
with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-google-key"}):
llm = LLM(model="google/gemini-2.0-flash-001")
assert llm.client is not None
assert hasattr(llm.client, 'models')
assert llm.api_key == "test-google-key"
@pytest.mark.vcr()
def test_gemini_token_usage_tracking():
"""
Test that token usage is properly tracked for Gemini responses
"""
llm = LLM(model="google/gemini-2.0-flash-001")
result = llm.call("Hello")
assert result.strip() == "Hi there! How can I help you today?"
usage = llm.get_token_usage_summary()
assert usage.successful_requests == 1
assert usage.prompt_tokens > 0
assert usage.completion_tokens > 0
assert usage.total_tokens > 0
@pytest.mark.vcr()
def test_gemini_tool_returning_float():
"""
Test that Gemini properly handles tools that return non-dict values like floats.
This is an end-to-end test that verifies the agent can use a tool that returns
a float (which gets wrapped in {"result": value} for Gemini's FunctionResponse).
"""
from pydantic import BaseModel, Field
from typing import Type
from crewai.tools import BaseTool
class SumNumbersToolInput(BaseModel):
a: float = Field(..., description="The first number to add")
b: float = Field(..., description="The second number to add")
class SumNumbersTool(BaseTool):
name: str = "sum_numbers"
description: str = "Add two numbers together and return the result"
args_schema: Type[BaseModel] = SumNumbersToolInput
def _run(self, a: float, b: float) -> float:
return a + b
sum_tool = SumNumbersTool()
agent = Agent(
role="Calculator",
goal="Calculate numbers accurately",
backstory="You are a calculator that adds numbers.",
llm=LLM(model="google/gemini-2.0-flash-001"),
tools=[sum_tool],
verbose=True,
)
task = Task(
description="What is 10000 + 20000? Use the sum_numbers tool to calculate this.",
expected_output="The sum of the two numbers",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task], verbose=True)
result = crew.kickoff()
# The result should contain 30000 (the sum)
assert "30000" in result.raw
def test_gemini_stop_sequences_sync():
"""Test that stop and stop_sequences attributes stay synchronized."""
llm = LLM(model="google/gemini-2.0-flash-001")
# Test setting stop as a list
llm.stop = ["\nObservation:", "\nThought:"]
assert llm.stop_sequences == ["\nObservation:", "\nThought:"]
assert llm.stop == ["\nObservation:", "\nThought:"]
# Test setting stop as a string
llm.stop = "\nFinal Answer:"
assert llm.stop_sequences == ["\nFinal Answer:"]
assert llm.stop == ["\nFinal Answer:"]
# Test setting stop as None
llm.stop = None
assert llm.stop_sequences == []
assert llm.stop == []
def test_gemini_stop_sequences_sent_to_api():
"""Test that stop_sequences are properly sent to the Gemini API."""
llm = LLM(model="google/gemini-2.0-flash-001")
# Set stop sequences via the stop attribute (simulating CrewAgentExecutor)
llm.stop = ["\nObservation:", "\nThought:"]
# Patch the API call to capture parameters without making real call
with patch.object(llm.client.models, 'generate_content') as mock_generate:
mock_response = MagicMock()
mock_response.text = "Hello"
mock_response.candidates = []
mock_response.usage_metadata = MagicMock(
prompt_token_count=10,
candidates_token_count=5,
total_token_count=15
)
mock_generate.return_value = mock_response
llm.call("Say hello in one word")
# Verify stop_sequences were passed to the API in the config
call_kwargs = mock_generate.call_args[1]
assert "config" in call_kwargs
# The config object should have stop_sequences set
config = call_kwargs["config"]
# Check if the config has stop_sequences attribute
assert hasattr(config, 'stop_sequences') or 'stop_sequences' in config.__dict__
if hasattr(config, 'stop_sequences'):
assert config.stop_sequences == ["\nObservation:", "\nThought:"]
@pytest.mark.vcr()
@pytest.mark.skip(reason="VCR cannot replay SSE streaming responses")
def test_google_streaming_returns_usage_metrics():
"""
Test that Google Gemini streaming calls return proper token usage metrics.
"""
agent = Agent(
role="Research Assistant",
goal="Find information about the capital of Japan",
backstory="You are a helpful research assistant.",
llm=LLM(model="gemini/gemini-2.0-flash-exp", stream=True),
verbose=True,
)
task = Task(
description="What is the capital of Japan?",
expected_output="The capital of Japan",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
result = crew.kickoff()
assert result.token_usage is not None
assert result.token_usage.total_tokens > 0
assert result.token_usage.prompt_tokens > 0
assert result.token_usage.completion_tokens > 0
assert result.token_usage.successful_requests >= 1
@pytest.mark.vcr()
def test_google_express_mode_works() -> None:
"""
Test Google Vertex AI Express mode with API key authentication.
This tests Vertex AI Express mode (aiplatform.googleapis.com) with API key
authentication.
"""
with patch.dict(os.environ, {"GOOGLE_GENAI_USE_VERTEXAI": "true"}):
agent = Agent(
role="Research Assistant",
goal="Find information about the capital of Japan",
backstory="You are a helpful research assistant.",
llm=LLM(
model="gemini/gemini-2.0-flash-exp",
),
verbose=True,
)
task = Task(
description="What is the capital of Japan?",
expected_output="The capital of Japan",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task])
result = crew.kickoff()
assert result.token_usage is not None
assert result.token_usage.total_tokens > 0
assert result.token_usage.prompt_tokens > 0
assert result.token_usage.completion_tokens > 0
assert result.token_usage.successful_requests >= 1
def test_gemini_2_0_model_detection():
"""Test that Gemini 2.0 models are properly detected."""
# Test Gemini 2.0 models
llm_2_0 = LLM(model="google/gemini-2.0-flash-001")
from crewai.llms.providers.gemini.completion import GeminiCompletion
assert isinstance(llm_2_0, GeminiCompletion)
assert llm_2_0.is_gemini_2_0 is True
llm_2_5 = LLM(model="google/gemini-2.5-flash")
assert isinstance(llm_2_5, GeminiCompletion)
assert llm_2_5.is_gemini_2_0 is True
# Test non-2.0 models
llm_1_5 = LLM(model="google/gemini-1.5-pro")
assert isinstance(llm_1_5, GeminiCompletion)
assert llm_1_5.is_gemini_2_0 is False
def test_add_property_ordering_to_schema():
"""Test that _add_property_ordering correctly adds propertyOrdering to schemas."""
from crewai.llms.providers.gemini.completion import GeminiCompletion
# Test simple object schema
simple_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"email": {"type": "string"}
}
}
result = GeminiCompletion._add_property_ordering(simple_schema)
assert "propertyOrdering" in result
assert result["propertyOrdering"] == ["name", "age", "email"]
# Test nested object schema
nested_schema = {
"type": "object",
"properties": {
"user": {
"type": "object",
"properties": {
"name": {"type": "string"},
"contact": {
"type": "object",
"properties": {
"email": {"type": "string"},
"phone": {"type": "string"}
}
}
}
},
"id": {"type": "integer"}
}
}
result = GeminiCompletion._add_property_ordering(nested_schema)
assert "propertyOrdering" in result
assert result["propertyOrdering"] == ["user", "id"]
assert "propertyOrdering" in result["properties"]["user"]
assert result["properties"]["user"]["propertyOrdering"] == ["name", "contact"]
assert "propertyOrdering" in result["properties"]["user"]["properties"]["contact"]
assert result["properties"]["user"]["properties"]["contact"]["propertyOrdering"] == ["email", "phone"]
def test_gemini_2_0_response_model_with_property_ordering():
"""Test that Gemini 2.0 models include propertyOrdering in response schemas."""
from pydantic import BaseModel, Field
class TestResponse(BaseModel):
"""Test response model."""
name: str = Field(..., description="The name")
age: int = Field(..., description="The age")
email: str = Field(..., description="The email")
llm = LLM(model="google/gemini-2.0-flash-001")
# Prepare generation config with response model
config = llm._prepare_generation_config(response_model=TestResponse)
# Verify that the config has response_json_schema
assert hasattr(config, 'response_json_schema') or 'response_json_schema' in config.__dict__
# Get the schema
if hasattr(config, 'response_json_schema'):
schema = config.response_json_schema
else:
schema = config.__dict__.get('response_json_schema', {})
# Verify propertyOrdering is present for Gemini 2.0
assert "propertyOrdering" in schema
assert "name" in schema["propertyOrdering"]
assert "age" in schema["propertyOrdering"]
assert "email" in schema["propertyOrdering"]
def test_gemini_1_5_response_model_uses_response_schema():
"""Test that Gemini 1.5 models use response_schema parameter (not response_json_schema)."""
from pydantic import BaseModel, Field
class TestResponse(BaseModel):
"""Test response model."""
name: str = Field(..., description="The name")
age: int = Field(..., description="The age")
llm = LLM(model="google/gemini-1.5-pro")
# Prepare generation config with response model
config = llm._prepare_generation_config(response_model=TestResponse)
# Verify that the config uses response_schema (not response_json_schema)
assert hasattr(config, 'response_schema') or 'response_schema' in config.__dict__
assert not (hasattr(config, 'response_json_schema') and config.response_json_schema is not None)
# Get the schema
if hasattr(config, 'response_schema'):
schema = config.response_schema
else:
schema = config.__dict__.get('response_schema')
# For Gemini 1.5, response_schema should be the Pydantic model itself
# The SDK handles conversion internally
assert schema is TestResponse or isinstance(schema, type)
# =============================================================================
# Agent Kickoff Structured Output Tests
# =============================================================================
@pytest.mark.vcr()
def test_gemini_agent_kickoff_structured_output_without_tools():
"""
Test that agent kickoff returns structured output without tools.
This tests native structured output handling for Gemini models.
"""
from pydantic import BaseModel, Field
class AnalysisResult(BaseModel):
"""Structured output for analysis results."""
topic: str = Field(description="The topic analyzed")
key_points: list[str] = Field(description="Key insights from the analysis")
summary: str = Field(description="Brief summary of findings")
agent = Agent(
role="Analyst",
goal="Provide structured analysis on topics",
backstory="You are an expert analyst who provides clear, structured insights.",
llm=LLM(model="google/gemini-2.0-flash-001"),
tools=[],
verbose=True,
)
result = agent.kickoff(
messages="Analyze the benefits of remote work briefly. Keep it concise.",
response_format=AnalysisResult,
)
assert result.pydantic is not None, "Expected pydantic output but got None"
assert isinstance(result.pydantic, AnalysisResult), f"Expected AnalysisResult but got {type(result.pydantic)}"
assert result.pydantic.topic, "Topic should not be empty"
assert len(result.pydantic.key_points) > 0, "Should have at least one key point"
assert result.pydantic.summary, "Summary should not be empty"
@pytest.mark.vcr()
def test_gemini_agent_kickoff_structured_output_with_tools():
"""
Test that agent kickoff returns structured output after using tools.
This tests post-tool-call structured output handling for Gemini models.
"""
from pydantic import BaseModel, Field
from crewai.tools import tool
class CalculationResult(BaseModel):
"""Structured output for calculation results."""
operation: str = Field(description="The mathematical operation performed")
result: int = Field(description="The result of the calculation")
explanation: str = Field(description="Brief explanation of the calculation")
@tool
def add_numbers(a: int, b: int) -> int:
"""Add two numbers together and return the sum."""
return a + b
agent = Agent(
role="Calculator",
goal="Perform calculations using available tools",
backstory="You are a calculator assistant that uses tools to compute results.",
llm=LLM(model="google/gemini-2.0-flash-001"),
tools=[add_numbers],
verbose=True,
)
result = agent.kickoff(
messages="Calculate 15 + 27 using your add_numbers tool. Report the result.",
response_format=CalculationResult,
)
assert result.pydantic is not None, "Expected pydantic output but got None"
assert isinstance(result.pydantic, CalculationResult), f"Expected CalculationResult but got {type(result.pydantic)}"
assert result.pydantic.result == 42, f"Expected result 42 but got {result.pydantic.result}"
assert result.pydantic.operation, "Operation should not be empty"
assert result.pydantic.explanation, "Explanation should not be empty"
def test_gemini_stop_words_not_applied_to_structured_output():
"""
Test that stop words are NOT applied when response_model is provided.
This ensures JSON responses containing stop word patterns (like "Observation:")
are not truncated, which would cause JSON validation to fail.
"""
from pydantic import BaseModel, Field
from crewai.llms.providers.gemini.completion import GeminiCompletion
class ResearchResult(BaseModel):
"""Research result that may contain stop word patterns in string fields."""
finding: str = Field(description="The research finding")
observation: str = Field(description="Observation about the finding")
# Create Gemini completion instance with stop words configured
# Gemini uses stop_sequences instead of stop
llm = GeminiCompletion(
model="gemini-2.0-flash-001",
stop_sequences=["Observation:", "Final Answer:"], # Common stop words
)
# JSON response that contains a stop word pattern in a string field
# Without the fix, this would be truncated at "Observation:" breaking the JSON
json_response = '{"finding": "The data shows growth", "observation": "Observation: This confirms the hypothesis"}'
# Test the _validate_structured_output method which is used for structured output handling
result = llm._validate_structured_output(json_response, ResearchResult)
# Should successfully parse the full JSON without truncation
assert isinstance(result, ResearchResult)
assert result.finding == "The data shows growth"
# The observation field should contain the full text including "Observation:"
assert "Observation:" in result.observation
def test_gemini_stop_words_still_applied_to_regular_responses():
"""
Test that stop words ARE still applied for regular (non-structured) responses.
This ensures the fix didn't break normal stop word behavior.
"""
from crewai.llms.providers.gemini.completion import GeminiCompletion
# Create Gemini completion instance with stop words configured
# Gemini uses stop_sequences instead of stop
llm = GeminiCompletion(
model="gemini-2.0-flash-001",
stop_sequences=["Observation:", "Final Answer:"],
)
# Response that contains a stop word - should be truncated
response_with_stop_word = "I need to search for more information.\n\nAction: search\nObservation: Found results"
# Test the _apply_stop_words method directly
result = llm._apply_stop_words(response_with_stop_word)
# Response should be truncated at the stop word
assert "Observation:" not in result
assert "Found results" not in result
assert "I need to search for more information" in result
def test_gemini_structured_output_preserves_json_with_stop_word_patterns():
"""
Test that structured output validation preserves JSON content
even when string fields contain stop word patterns.
"""
from pydantic import BaseModel, Field
from crewai.llms.providers.gemini.completion import GeminiCompletion
class AgentObservation(BaseModel):
"""Model with fields that might contain stop word-like text."""
action_taken: str = Field(description="What action was taken")
observation_result: str = Field(description="The observation result")
final_answer: str = Field(description="The final answer")
# Gemini uses stop_sequences instead of stop
llm = GeminiCompletion(
model="gemini-2.0-flash-001",
stop_sequences=["Observation:", "Final Answer:", "Action:"],
)
# JSON that contains all the stop word patterns as part of the content
json_with_stop_patterns = '''{
"action_taken": "Action: Searched the database",
"observation_result": "Observation: Found 5 relevant results",
"final_answer": "Final Answer: The data shows positive growth"
}'''
# Test the _validate_structured_output method - this should NOT truncate
# since it's structured output
result = llm._validate_structured_output(json_with_stop_patterns, AgentObservation)
assert isinstance(result, AgentObservation)
assert "Action:" in result.action_taken
assert "Observation:" in result.observation_result
assert "Final Answer:" in result.final_answer
@pytest.mark.vcr()
def test_gemini_cached_prompt_tokens():
"""
Test that Gemini correctly extracts and tracks cached_prompt_tokens
from cached_content_token_count in the usage metadata.
Sends two calls with the same large prompt to trigger caching.
"""
padding = "This is padding text to ensure the prompt is large enough for caching. " * 80
system_msg = f"You are a helpful assistant. {padding}"
llm = LLM(model="google/gemini-2.5-flash")
# First call
llm.call([
{"role": "system", "content": system_msg},
{"role": "user", "content": "Say hello in one word."},
])
# Second call: same system prompt
llm.call([
{"role": "system", "content": system_msg},
{"role": "user", "content": "Say goodbye in one word."},
])
usage = llm.get_token_usage_summary()
assert usage.total_tokens > 0
assert usage.prompt_tokens > 0
assert usage.completion_tokens > 0
assert usage.successful_requests == 2
# cached_prompt_tokens should be populated (may be 0 if Gemini
# doesn't cache for this particular request, but the field should exist)
assert usage.cached_prompt_tokens >= 0
@pytest.mark.vcr()
def test_gemini_cached_prompt_tokens_with_tools():
"""
Test that Gemini correctly tracks cached_prompt_tokens when tools are used.
The large system prompt should be cached across tool-calling requests.
"""
padding = "This is padding text to ensure the prompt is large enough for caching. " * 80
system_msg = f"You are a helpful assistant that uses tools. {padding}"
def get_weather(location: str) -> str:
return f"The weather in {location} is sunny and 72°F"
tools = [
{
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name"
}
},
"required": ["location"],
},
}
]
llm = LLM(model="google/gemini-2.5-flash")
# First call with tool
llm.call(
[
{"role": "system", "content": system_msg},
{"role": "user", "content": "What is the weather in Tokyo?"},
],
tools=tools,
available_functions={"get_weather": get_weather},
)
# Second call with same system prompt + tools
llm.call(
[
{"role": "system", "content": system_msg},
{"role": "user", "content": "What is the weather in Paris?"},
],
tools=tools,
available_functions={"get_weather": get_weather},
)
usage = llm.get_token_usage_summary()
assert usage.total_tokens > 0
assert usage.prompt_tokens > 0
assert usage.successful_requests == 2
# cached_prompt_tokens should be populated (may be 0 if Gemini
# doesn't cache for this particular request, but the field should exist)
assert usage.cached_prompt_tokens >= 0