ishika_mcp

test_azure.py•45.4 KiB

import os import sys import types from unittest.mock import patch, MagicMock, Mock import pytest from crewai.llm import LLM from crewai.crew import Crew from crewai.agent import Agent from crewai.task import Task @pytest.fixture def mock_azure_credentials(): """Mock Azure credentials for tests that need them.""" with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test.openai.azure.com" }): yield @pytest.mark.usefixtures("mock_azure_credentials") def test_azure_completion_is_used_when_azure_provider(): """ Test that AzureCompletion from completion.py is used when LLM uses provider 'azure' """ llm = LLM(model="azure/gpt-4") assert llm.__class__.__name__ == "AzureCompletion" assert llm.provider == "azure" assert llm.model == "gpt-4" @pytest.mark.usefixtures("mock_azure_credentials") def test_azure_completion_is_used_when_azure_openai_provider(): """ Test that AzureCompletion is used when provider is 'azure_openai' """ llm = LLM(model="azure_openai/gpt-4") from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm, AzureCompletion) assert llm.provider == "azure" assert llm.model == "gpt-4" def test_azure_tool_use_conversation_flow(): """ Test that the Azure completion properly handles tool use conversation flow """ from crewai.llms.providers.azure.completion import AzureCompletion from azure.ai.inference.models import ChatCompletionsToolCall # Create AzureCompletion instance completion = AzureCompletion( model="gpt-4", api_key="test-key", endpoint="https://test.openai.azure.com" ) # Mock tool function def mock_weather_tool(location: str) -> str: return f"The weather in {location} is sunny and 75°F" available_functions = {"get_weather": mock_weather_tool} # Mock the Azure client responses with patch.object(completion.client, 'complete') as mock_complete: # Mock tool call in response with proper type mock_tool_call = MagicMock(spec=ChatCompletionsToolCall) mock_tool_call.function.name = "get_weather" mock_tool_call.function.arguments = '{"location": "San Francisco"}' mock_message = MagicMock() mock_message.content = None mock_message.tool_calls = [mock_tool_call] mock_choice = MagicMock() mock_choice.message = mock_message mock_response = MagicMock() mock_response.choices = [mock_choice] mock_response.usage = MagicMock( prompt_tokens=100, completion_tokens=50, total_tokens=150 ) mock_complete.return_value = mock_response # Test the call messages = [{"role": "user", "content": "What's the weather like in San Francisco?"}] result = completion.call( messages=messages, available_functions=available_functions ) # Verify the tool was executed and returned the result assert result == "The weather in San Francisco is sunny and 75°F" # Verify that the API was called assert mock_complete.called @pytest.mark.usefixtures("mock_azure_credentials") def test_azure_completion_module_is_imported(): """ Test that the completion module is properly imported when using Azure provider """ module_name = "crewai.llms.providers.azure.completion" # Remove module from cache if it exists if module_name in sys.modules: del sys.modules[module_name] # Create LLM instance - this should trigger the import LLM(model="azure/gpt-4") # Verify the module was imported assert module_name in sys.modules completion_mod = sys.modules[module_name] assert isinstance(completion_mod, types.ModuleType) # Verify the class exists in the module assert hasattr(completion_mod, 'AzureCompletion') def test_native_azure_raises_error_when_initialization_fails(): """ Test that LLM raises ImportError when native Azure completion fails to initialize. This ensures we don't silently fall back when there's a configuration issue. """ # Mock the _get_native_provider to return a failing class with patch('crewai.llm.LLM._get_native_provider') as mock_get_provider: class FailingCompletion: def __init__(self, *args, **kwargs): raise Exception("Native Azure AI Inference SDK failed") mock_get_provider.return_value = FailingCompletion # This should raise ImportError, not fall back to LiteLLM with pytest.raises(ImportError) as excinfo: LLM(model="azure/gpt-4") assert "Error importing native provider" in str(excinfo.value) assert "Native Azure AI Inference SDK failed" in str(excinfo.value) def test_azure_completion_initialization_parameters(): """ Test that AzureCompletion is initialized with correct parameters """ llm = LLM( model="azure/gpt-4", temperature=0.7, max_tokens=2000, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.3, api_key="test-key", endpoint="https://test.openai.azure.com" ) from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm, AzureCompletion) assert llm.model == "gpt-4" assert llm.temperature == 0.7 assert llm.max_tokens == 2000 assert llm.top_p == 0.9 assert llm.frequency_penalty == 0.5 assert llm.presence_penalty == 0.3 def test_azure_specific_parameters(): """ Test Azure-specific parameters like stop sequences, streaming, and API version """ llm = LLM( model="azure/gpt-4", stop=["Human:", "Assistant:"], stream=True, api_version="2024-02-01", endpoint="https://test.openai.azure.com" ) from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm, AzureCompletion) assert llm.stop == ["Human:", "Assistant:"] assert llm.stream == True assert llm.api_version == "2024-02-01" @pytest.mark.usefixtures("mock_azure_credentials") def test_azure_completion_call(): """ Test that AzureCompletion call method works """ llm = LLM(model="azure/gpt-4") # Mock the call method on the instance with patch.object(llm, 'call', return_value="Hello! I'm Azure OpenAI, ready to help.") as mock_call: result = llm.call("Hello, how are you?") assert result == "Hello! I'm Azure OpenAI, ready to help." mock_call.assert_called_once_with("Hello, how are you?") @pytest.mark.usefixtures("mock_azure_credentials") def test_azure_completion_called_during_crew_execution(): """ Test that AzureCompletion.call is actually invoked when running a crew """ # Create the LLM instance first azure_llm = LLM(model="azure/gpt-4") # Mock the call method on the specific instance with patch.object(azure_llm, 'call', return_value="Tokyo has 14 million people.") as mock_call: # Create agent with explicit LLM configuration agent = Agent( role="Research Assistant", goal="Find population info", backstory="You research populations.", llm=azure_llm, ) task = Task( description="Find Tokyo population", expected_output="Population number", agent=agent, ) crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() # Verify mock was called assert mock_call.called assert "14 million" in str(result) @pytest.mark.usefixtures("mock_azure_credentials") def test_azure_completion_call_arguments(): """ Test that AzureCompletion.call is invoked with correct arguments """ # Create LLM instance first azure_llm = LLM(model="azure/gpt-4") # Mock the instance method with patch.object(azure_llm, 'call') as mock_call: mock_call.return_value = "Task completed successfully." agent = Agent( role="Test Agent", goal="Complete a simple task", backstory="You are a test agent.", llm=azure_llm # Use same instance ) task = Task( description="Say hello world", expected_output="Hello world", agent=agent, ) crew = Crew(agents=[agent], tasks=[task]) crew.kickoff() # Verify call was made assert mock_call.called # Check the arguments passed to the call method call_args = mock_call.call_args assert call_args is not None # The first argument should be the messages messages = call_args[0][0] # First positional argument assert isinstance(messages, (str, list)) # Verify that the task description appears in the messages if isinstance(messages, str): assert "hello world" in messages.lower() elif isinstance(messages, list): message_content = str(messages).lower() assert "hello world" in message_content def test_multiple_azure_calls_in_crew(): """ Test that AzureCompletion.call is invoked multiple times for multiple tasks """ # Create LLM instance first azure_llm = LLM(model="azure/gpt-4") # Mock the instance method with patch.object(azure_llm, 'call') as mock_call: mock_call.return_value = "Task completed." agent = Agent( role="Multi-task Agent", goal="Complete multiple tasks", backstory="You can handle multiple tasks.", llm=azure_llm # Use same instance ) task1 = Task( description="First task", expected_output="First result", agent=agent, ) task2 = Task( description="Second task", expected_output="Second result", agent=agent, ) crew = Crew( agents=[agent], tasks=[task1, task2] ) crew.kickoff() # Verify multiple calls were made assert mock_call.call_count >= 2 # At least one call per task # Verify each call had proper arguments for call in mock_call.call_args_list: assert len(call[0]) > 0 # Has positional arguments messages = call[0][0] assert messages is not None def test_azure_completion_with_tools(): """ Test that AzureCompletion.call is invoked with tools when agent has tools """ from crewai.tools import tool @tool def sample_tool(query: str) -> str: """A sample tool for testing""" return f"Tool result for: {query}" # Create LLM instance first azure_llm = LLM(model="azure/gpt-4") # Mock the instance method with patch.object(azure_llm, 'call') as mock_call: mock_call.return_value = "Task completed with tools." agent = Agent( role="Tool User", goal="Use tools to complete tasks", backstory="You can use tools.", llm=azure_llm, # Use same instance tools=[sample_tool] ) task = Task( description="Use the sample tool", expected_output="Tool usage result", agent=agent, ) crew = Crew(agents=[agent], tasks=[task]) crew.kickoff() assert mock_call.called call_args = mock_call.call_args call_kwargs = call_args[1] if len(call_args) > 1 else {} if 'tools' in call_kwargs: assert call_kwargs['tools'] is not None assert len(call_kwargs['tools']) > 0 def test_azure_raises_error_when_endpoint_missing(): """Test that AzureCompletion raises ValueError when endpoint is missing""" from crewai.llms.providers.azure.completion import AzureCompletion # Clear environment variables with patch.dict(os.environ, {}, clear=True): with pytest.raises(ValueError, match="Azure endpoint is required"): AzureCompletion(model="gpt-4", api_key="test-key") def test_azure_raises_error_when_api_key_missing(): """Test that AzureCompletion raises ValueError when API key is missing""" from crewai.llms.providers.azure.completion import AzureCompletion # Clear environment variables with patch.dict(os.environ, {}, clear=True): with pytest.raises(ValueError, match="Azure API key is required"): AzureCompletion(model="gpt-4", endpoint="https://test.openai.azure.com") def test_azure_endpoint_configuration(): """ Test that Azure endpoint configuration works with multiple environment variable names """ # Test with AZURE_ENDPOINT with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test1.openai.azure.com" }): llm = LLM(model="azure/gpt-4") from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm, AzureCompletion) assert llm.endpoint == "https://test1.openai.azure.com/openai/deployments/gpt-4" # Test with AZURE_OPENAI_ENDPOINT with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_OPENAI_ENDPOINT": "https://test2.openai.azure.com" }, clear=True): llm = LLM(model="azure/gpt-4") assert isinstance(llm, AzureCompletion) # Endpoint should be auto-constructed for Azure OpenAI assert llm.endpoint == "https://test2.openai.azure.com/openai/deployments/gpt-4" def test_azure_api_key_configuration(): """ Test that API key configuration works from AZURE_API_KEY environment variable """ with patch.dict(os.environ, { "AZURE_API_KEY": "test-azure-key", "AZURE_ENDPOINT": "https://test.openai.azure.com" }): llm = LLM(model="azure/gpt-4") from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm, AzureCompletion) assert llm.api_key == "test-azure-key" def test_azure_model_capabilities(): """ Test that model capabilities are correctly identified """ # Test GPT-4 model (supports function calling) llm_gpt4 = LLM(model="azure/gpt-4") from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm_gpt4, AzureCompletion) assert llm_gpt4.is_openai_model == True assert llm_gpt4.supports_function_calling() == True # Test GPT-3.5 model llm_gpt35 = LLM(model="azure/gpt-35-turbo") assert isinstance(llm_gpt35, AzureCompletion) assert llm_gpt35.is_openai_model == True assert llm_gpt35.supports_function_calling() == True def test_azure_completion_params_preparation(): """ Test that completion parameters are properly prepared """ with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://models.inference.ai.azure.com" }): llm = LLM( model="azure/gpt-4", temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.3, max_tokens=1000 ) from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm, AzureCompletion) messages = [{"role": "user", "content": "Hello"}] params = llm._prepare_completion_params(messages) assert params["model"] == "gpt-4" assert params["temperature"] == 0.7 assert params["top_p"] == 0.9 assert params["frequency_penalty"] == 0.5 assert params["presence_penalty"] == 0.3 assert params["max_tokens"] == 1000 def test_azure_model_detection(): """ Test that various Azure model formats are properly detected """ # Test Azure model naming patterns azure_test_cases = [ "azure/gpt-4", "azure_openai/gpt-4", "azure/gpt-4o", "azure/gpt-35-turbo" ] for model_name in azure_test_cases: llm = LLM(model=model_name) from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm, AzureCompletion), f"Failed for model: {model_name}" def test_azure_supports_stop_words(): """ Test that Azure models support stop sequences """ llm = LLM(model="azure/gpt-4") assert llm.supports_stop_words() == True def test_azure_gpt5_models_do_not_support_stop_words(): """ Test that GPT-5 family models do not support stop words. GPT-5 models use the Responses API which doesn't support stop sequences. See: https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure """ # GPT-5 base models gpt5_models = [ "azure/gpt-5", "azure/gpt-5-mini", "azure/gpt-5-nano", "azure/gpt-5-chat", # GPT-5.1 series "azure/gpt-5.1", "azure/gpt-5.1-chat", "azure/gpt-5.1-codex", "azure/gpt-5.1-codex-mini", # GPT-5.2 series "azure/gpt-5.2", "azure/gpt-5.2-chat", ] for model_name in gpt5_models: llm = LLM(model=model_name) assert llm.supports_stop_words() == False, f"Expected {model_name} to NOT support stop words" def test_azure_o_series_models_do_not_support_stop_words(): """ Test that o-series reasoning models do not support stop words. """ o_series_models = [ "azure/o1", "azure/o1-mini", "azure/o3", "azure/o3-mini", "azure/o4", "azure/o4-mini", ] for model_name in o_series_models: llm = LLM(model=model_name) assert llm.supports_stop_words() == False, f"Expected {model_name} to NOT support stop words" def test_azure_responses_api_models_do_not_support_stop_words(): """ Test that models using the Responses API do not support stop words. """ responses_api_models = [ "azure/computer-use-preview", ] for model_name in responses_api_models: llm = LLM(model=model_name) assert llm.supports_stop_words() == False, f"Expected {model_name} to NOT support stop words" def test_azure_stop_words_not_included_for_unsupported_models(): """ Test that stop words are not included in completion params for models that don't support them. """ with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://models.inference.ai.azure.com" }): # Test GPT-5 model - stop should NOT be included even if set llm_gpt5 = LLM( model="azure/gpt-5-nano", stop=["STOP", "END"] ) params = llm_gpt5._prepare_completion_params( messages=[{"role": "user", "content": "test"}] ) assert "stop" not in params, "stop should not be included for GPT-5 models" # Test regular model - stop SHOULD be included llm_gpt4 = LLM( model="azure/gpt-4", stop=["STOP", "END"] ) params = llm_gpt4._prepare_completion_params( messages=[{"role": "user", "content": "test"}] ) assert "stop" in params, "stop should be included for GPT-4 models" assert params["stop"] == ["STOP", "END"] def test_azure_context_window_size(): """ Test that Azure models return correct context window sizes """ # Test GPT-4 llm_gpt4 = LLM(model="azure/gpt-4") context_size_gpt4 = llm_gpt4.get_context_window_size() assert context_size_gpt4 > 0 # Should return valid context size # Test GPT-4o llm_gpt4o = LLM(model="azure/gpt-4o") context_size_gpt4o = llm_gpt4o.get_context_window_size() assert context_size_gpt4o > context_size_gpt4 # GPT-4o has larger context def test_azure_message_formatting(): """ Test that messages are properly formatted for Azure API """ llm = LLM(model="azure/gpt-4") # Test message formatting test_messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there!"}, {"role": "user", "content": "How are you?"} ] formatted_messages = llm._format_messages_for_azure(test_messages) # All messages should be formatted as dictionaries with content assert len(formatted_messages) == 4 # Verify each message is a dict with content for msg in formatted_messages: assert isinstance(msg, dict) assert "content" in msg def test_azure_streaming_parameter(): """ Test that streaming parameter is properly handled """ # Test non-streaming llm_no_stream = LLM(model="azure/gpt-4", stream=False) assert llm_no_stream.stream == False # Test streaming llm_stream = LLM(model="azure/gpt-4", stream=True) assert llm_stream.stream == True def test_azure_tool_conversion(): """ Test that tools are properly converted to Azure OpenAI format """ llm = LLM(model="azure/gpt-4") # Mock tool in CrewAI format crewai_tools = [{ "type": "function", "function": { "name": "test_tool", "description": "A test tool", "parameters": { "type": "object", "properties": { "query": {"type": "string", "description": "Search query"} }, "required": ["query"] } } }] # Test tool conversion azure_tools = llm._convert_tools_for_interference(crewai_tools) assert len(azure_tools) == 1 # Azure tools should maintain the function calling format assert azure_tools[0]["type"] == "function" assert azure_tools[0]["function"]["name"] == "test_tool" assert azure_tools[0]["function"]["description"] == "A test tool" assert "parameters" in azure_tools[0]["function"] def test_azure_environment_variable_endpoint(): """ Test that Azure endpoint is properly loaded from environment """ with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test.openai.azure.com" }): llm = LLM(model="azure/gpt-4") assert llm.client is not None assert llm.endpoint == "https://test.openai.azure.com/openai/deployments/gpt-4" def test_azure_token_usage_tracking(): """ Test that token usage is properly tracked for Azure responses """ llm = LLM(model="azure/gpt-4") # Mock the Azure response with usage information with patch.object(llm.client, 'complete') as mock_complete: mock_message = MagicMock() mock_message.content = "test response" mock_message.tool_calls = None mock_choice = MagicMock() mock_choice.message = mock_message mock_response = MagicMock() mock_response.choices = [mock_choice] mock_response.usage = MagicMock( prompt_tokens=50, completion_tokens=25, total_tokens=75 ) mock_complete.return_value = mock_response result = llm.call("Hello") # Verify the response assert result == "test response" # Verify token usage was extracted usage = llm._extract_azure_token_usage(mock_response) assert usage["prompt_tokens"] == 50 assert usage["completion_tokens"] == 25 assert usage["total_tokens"] == 75 def test_azure_http_error_handling(): """ Test that Azure HTTP errors are properly handled """ from azure.core.exceptions import HttpResponseError llm = LLM(model="azure/gpt-4") # Mock an HTTP error with patch.object(llm.client, 'complete') as mock_complete: mock_complete.side_effect = HttpResponseError(message="Rate limit exceeded", response=MagicMock(status_code=429)) with pytest.raises(HttpResponseError): llm.call("Hello") @pytest.mark.vcr() def test_azure_streaming_completion(): """ Test that streaming completions work properly """ llm = LLM(model="azure/gpt-4o-mini", stream=True) result = llm.call("Say hello") assert result is not None assert isinstance(result, str) assert len(result) > 0 def test_azure_api_version_default(): """ Test that Azure API version defaults correctly """ llm = LLM(model="azure/gpt-4") from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm, AzureCompletion) # Should use default or environment variable assert llm.api_version is not None def test_azure_function_calling_support(): """ Test that function calling is supported for OpenAI models """ # Test with GPT-4 (supports function calling) llm_gpt4 = LLM(model="azure/gpt-4") assert llm_gpt4.supports_function_calling() == True # Test with GPT-3.5 (supports function calling) llm_gpt35 = LLM(model="azure/gpt-35-turbo") assert llm_gpt35.supports_function_calling() == True def test_azure_openai_endpoint_url_construction(): """ Test that Azure OpenAI endpoint URLs are automatically constructed correctly """ from crewai.llms.providers.azure.completion import AzureCompletion with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test-resource.openai.azure.com" }): llm = LLM(model="azure/gpt-4o-mini") assert "/openai/deployments/gpt-4o-mini" in llm.endpoint assert llm.endpoint == "https://test-resource.openai.azure.com/openai/deployments/gpt-4o-mini" assert llm.is_azure_openai_endpoint == True def test_azure_openai_endpoint_url_with_trailing_slash(): """ Test that trailing slashes are handled correctly in endpoint URLs """ from crewai.llms.providers.azure.completion import AzureCompletion with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test-resource.openai.azure.com/" # trailing slash }): llm = LLM(model="azure/gpt-4o") assert llm.endpoint == "https://test-resource.openai.azure.com/openai/deployments/gpt-4o" assert not llm.endpoint.endswith("//") def test_azure_openai_endpoint_already_complete(): """ Test that already complete Azure OpenAI endpoint URLs are not modified """ with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test-resource.openai.azure.com/openai/deployments/my-deployment" }): llm = LLM(model="azure/gpt-4") assert llm.endpoint == "https://test-resource.openai.azure.com/openai/deployments/my-deployment" assert llm.is_azure_openai_endpoint == True def test_non_azure_openai_endpoint_unchanged(): """ Test that non-Azure OpenAI endpoints are not modified """ with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://models.inference.ai.azure.com" }): llm = LLM(model="azure/mistral-large") assert llm.endpoint == "https://models.inference.ai.azure.com" assert llm.is_azure_openai_endpoint == False def test_azure_openai_model_parameter_excluded(): """ Test that model parameter is NOT included for Azure OpenAI endpoints """ with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test.openai.azure.com/openai/deployments/gpt-4" }): llm = LLM(model="azure/gpt-4") # Prepare params to check model parameter handling params = llm._prepare_completion_params( messages=[{"role": "user", "content": "test"}] ) # Model parameter should NOT be included for Azure OpenAI endpoints assert "model" not in params assert "messages" in params assert params["stream"] == False def test_non_azure_openai_model_parameter_included(): """ Test that model parameter IS included for non-Azure OpenAI endpoints """ from crewai.llms.providers.azure.completion import AzureCompletion with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://models.inference.ai.azure.com" }): llm = LLM(model="azure/mistral-large") params = llm._prepare_completion_params( messages=[{"role": "user", "content": "test"}] ) assert "model" in params assert params["model"] == "mistral-large" def test_azure_message_formatting_with_role(): """ Test that messages are formatted with both 'role' and 'content' fields """ from crewai.llms.providers.azure.completion import AzureCompletion llm = LLM(model="azure/gpt-4") # Test with string message formatted = llm._format_messages_for_azure("Hello world") assert isinstance(formatted, list) assert len(formatted) > 0 assert "role" in formatted[0] assert "content" in formatted[0] messages = [ {"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there"} ] formatted = llm._format_messages_for_azure(messages) for msg in formatted: assert "role" in msg assert "content" in msg assert msg["role"] in ["system", "user", "assistant"] def test_azure_message_formatting_default_role(): """ Test that messages without a role default to 'user' """ llm = LLM(model="azure/gpt-4") # Test with message that has role but tests default behavior messages = [{"role": "user", "content": "test message"}] formatted = llm._format_messages_for_azure(messages) assert formatted[0]["role"] == "user" assert formatted[0]["content"] == "test message" def test_azure_endpoint_detection_flags(): """ Test that is_azure_openai_endpoint flag is set correctly """ with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test.openai.azure.com/openai/deployments/gpt-4" }): llm_openai = LLM(model="azure/gpt-4") assert llm_openai.is_azure_openai_endpoint == True with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://models.inference.ai.azure.com" }): llm_other = LLM(model="azure/mistral-large") assert llm_other.is_azure_openai_endpoint == False def test_azure_improved_error_messages(): """ Test that improved error messages are provided for common HTTP errors """ from crewai.llms.providers.azure.completion import AzureCompletion from azure.core.exceptions import HttpResponseError llm = LLM(model="azure/gpt-4") with patch.object(llm.client, 'complete') as mock_complete: error_401 = HttpResponseError(message="Unauthorized") error_401.status_code = 401 mock_complete.side_effect = error_401 with pytest.raises(HttpResponseError): llm.call("test") error_404 = HttpResponseError(message="Not Found") error_404.status_code = 404 mock_complete.side_effect = error_404 with pytest.raises(HttpResponseError): llm.call("test") error_429 = HttpResponseError(message="Rate Limited") error_429.status_code = 429 mock_complete.side_effect = error_429 with pytest.raises(HttpResponseError): llm.call("test") def test_azure_api_version_properly_passed(): """ Test that api_version is properly passed to the client """ from crewai.llms.providers.azure.completion import AzureCompletion with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test.openai.azure.com", "AZURE_API_VERSION": "" # Clear env var to test default }, clear=False): llm = LLM(model="azure/gpt-4", api_version="2024-08-01") assert llm.api_version == "2024-08-01" with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test.openai.azure.com" }, clear=True): llm_default = LLM(model="azure/gpt-4") assert llm_default.api_version == "2024-06-01" # Current default def test_azure_timeout_and_max_retries_stored(): """ Test that timeout and max_retries parameters are stored """ from crewai.llms.providers.azure.completion import AzureCompletion with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test.openai.azure.com" }): llm = LLM( model="azure/gpt-4", timeout=60.0, max_retries=5 ) assert llm.timeout == 60.0 assert llm.max_retries == 5 def test_azure_complete_params_include_optional_params(): """ Test that optional parameters are included in completion params when set """ from crewai.llms.providers.azure.completion import AzureCompletion with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://models.inference.ai.azure.com" }): llm = LLM( model="azure/gpt-4", temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.3, max_tokens=1000, stop=["STOP", "END"] ) params = llm._prepare_completion_params( messages=[{"role": "user", "content": "test"}] ) assert params["temperature"] == 0.7 assert params["top_p"] == 0.9 assert params["frequency_penalty"] == 0.5 assert params["presence_penalty"] == 0.3 assert params["max_tokens"] == 1000 assert params["stop"] == ["STOP", "END"] def test_azure_endpoint_validation_with_azure_prefix(): """ Test that 'azure/' prefix is properly stripped when constructing endpoint """ from crewai.llms.providers.azure.completion import AzureCompletion with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://test.openai.azure.com" }): llm = LLM(model="azure/gpt-4o-mini") # Should strip 'azure/' prefix and use 'gpt-4o-mini' as deployment name assert "gpt-4o-mini" in llm.endpoint assert "azure/gpt-4o-mini" not in llm.endpoint def test_azure_message_formatting_preserves_all_roles(): """ Test that all message roles (system, user, assistant) are preserved correctly """ from crewai.llms.providers.azure.completion import AzureCompletion llm = LLM(model="azure/gpt-4") messages = [ {"role": "system", "content": "System message"}, {"role": "user", "content": "User message"}, {"role": "assistant", "content": "Assistant message"}, {"role": "user", "content": "Another user message"} ] formatted = llm._format_messages_for_azure(messages) assert formatted[0]["role"] == "system" assert formatted[0]["content"] == "System message" assert formatted[1]["role"] == "user" assert formatted[1]["content"] == "User message" assert formatted[2]["role"] == "assistant" assert formatted[2]["content"] == "Assistant message" assert formatted[3]["role"] == "user" assert formatted[3]["content"] == "Another user message" def test_azure_deepseek_model_support(): """ Test that DeepSeek and other non-OpenAI models work correctly with Azure AI Inference """ with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://models.inference.ai.azure.com" }): # Test DeepSeek model llm_deepseek = LLM(model="azure/deepseek-chat") # Endpoint should not be modified for non-OpenAI endpoints assert llm_deepseek.endpoint == "https://models.inference.ai.azure.com" assert llm_deepseek.is_azure_openai_endpoint == False # Model parameter should be included in completion params params = llm_deepseek._prepare_completion_params( messages=[{"role": "user", "content": "test"}] ) assert "model" in params assert params["model"] == "deepseek-chat" # Should not be detected as OpenAI model (no function calling) assert llm_deepseek.is_openai_model == False assert llm_deepseek.supports_function_calling() == False def test_azure_mistral_and_other_models(): """ Test that various non-OpenAI models (Mistral, Llama, etc.) work with Azure AI Inference """ test_models = [ "mistral-large-latest", "llama-3-70b-instruct", "cohere-command-r-plus" ] for model_name in test_models: with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://models.inference.ai.azure.com" }): llm = LLM(model=f"azure/{model_name}") # Verify endpoint is not modified assert llm.endpoint == "https://models.inference.ai.azure.com" assert llm.is_azure_openai_endpoint == False # Verify model parameter is included params = llm._prepare_completion_params( messages=[{"role": "user", "content": "test"}] ) assert "model" in params assert params["model"] == model_name def test_azure_completion_params_preparation_with_drop_params(): """ Test that completion parameters are properly prepared with drop paramaeters attribute respected """ with patch.dict(os.environ, { "AZURE_API_KEY": "test-key", "AZURE_ENDPOINT": "https://models.inference.ai.azure.com" }): llm = LLM( model="azure/o4-mini", drop_params=True, additional_drop_params=["stop"], max_tokens=1000 ) from crewai.llms.providers.azure.completion import AzureCompletion assert isinstance(llm, AzureCompletion) messages = [{"role": "user", "content": "Hello"}] params = llm._prepare_completion_params(messages) assert params.get('stop') == None @pytest.mark.vcr() def test_azure_streaming_returns_usage_metrics(): """ Test that Azure streaming calls return proper token usage metrics. """ agent = Agent( role="Research Assistant", goal="Find information about the capital of Spain", backstory="You are a helpful research assistant.", llm=LLM(model="azure/gpt-4o-mini", stream=True), verbose=True, ) task = Task( description="What is the capital of Spain?", expected_output="The capital of Spain", agent=agent, ) crew = Crew(agents=[agent], tasks=[task]) result = crew.kickoff() assert result.token_usage is not None assert result.token_usage.total_tokens > 0 assert result.token_usage.prompt_tokens > 0 assert result.token_usage.completion_tokens > 0 assert result.token_usage.successful_requests >= 1 # ============================================================================= # Agent Kickoff Structured Output Tests # ============================================================================= @pytest.mark.vcr() def test_azure_agent_kickoff_structured_output_without_tools(): """ Test that agent kickoff returns structured output without tools. This tests native structured output handling for Azure OpenAI models. """ from pydantic import BaseModel, Field class AnalysisResult(BaseModel): """Structured output for analysis results.""" topic: str = Field(description="The topic analyzed") key_points: list[str] = Field(description="Key insights from the analysis") summary: str = Field(description="Brief summary of findings") agent = Agent( role="Analyst", goal="Provide structured analysis on topics", backstory="You are an expert analyst who provides clear, structured insights.", llm=LLM(model="azure/gpt-4o-mini"), tools=[], verbose=True, ) result = agent.kickoff( messages="Analyze the benefits of remote work briefly. Keep it concise.", response_format=AnalysisResult, ) assert result.pydantic is not None, "Expected pydantic output but got None" assert isinstance(result.pydantic, AnalysisResult), f"Expected AnalysisResult but got {type(result.pydantic)}" assert result.pydantic.topic, "Topic should not be empty" assert len(result.pydantic.key_points) > 0, "Should have at least one key point" assert result.pydantic.summary, "Summary should not be empty" @pytest.mark.vcr() def test_azure_agent_kickoff_structured_output_with_tools(): """ Test that agent kickoff returns structured output after using tools. This tests post-tool-call structured output handling for Azure OpenAI models. """ from pydantic import BaseModel, Field from crewai.tools import tool class CalculationResult(BaseModel): """Structured output for calculation results.""" operation: str = Field(description="The mathematical operation performed") result: int = Field(description="The result of the calculation") explanation: str = Field(description="Brief explanation of the calculation") @tool def add_numbers(a: int, b: int) -> int: """Add two numbers together and return the sum.""" return a + b agent = Agent( role="Calculator", goal="Perform calculations using available tools", backstory="You are a calculator assistant that uses tools to compute results.", llm=LLM(model="azure/gpt-4o-mini"), tools=[add_numbers], verbose=True, ) result = agent.kickoff( messages="Calculate 15 + 27 using your add_numbers tool. Report the result.", response_format=CalculationResult, ) assert result.pydantic is not None, "Expected pydantic output but got None" assert isinstance(result.pydantic, CalculationResult), f"Expected CalculationResult but got {type(result.pydantic)}" assert result.pydantic.result == 42, f"Expected result 42 but got {result.pydantic.result}" assert result.pydantic.operation, "Operation should not be empty" assert result.pydantic.explanation, "Explanation should not be empty" def test_azure_stop_words_not_applied_to_structured_output(): """ Test that stop words are NOT applied when response_model is provided. This ensures JSON responses containing stop word patterns (like "Observation:") are not truncated, which would cause JSON validation to fail. """ from pydantic import BaseModel, Field from crewai.llms.providers.azure.completion import AzureCompletion class ResearchResult(BaseModel): """Research result that may contain stop word patterns in string fields.""" finding: str = Field(description="The research finding") observation: str = Field(description="Observation about the finding") # Create AzureCompletion instance with stop words configured llm = AzureCompletion( model="gpt-4", api_key="test-key", endpoint="https://test.openai.azure.com", stop=["Observation:", "Final Answer:"], # Common stop words ) # JSON response that contains a stop word pattern in a string field # Without the fix, this would be truncated at "Observation:" breaking the JSON json_response = '{"finding": "The data shows growth", "observation": "Observation: This confirms the hypothesis"}' with patch.object(llm.client, 'complete') as mock_complete: mock_message = MagicMock() mock_message.content = json_response mock_message.tool_calls = None mock_choice = MagicMock() mock_choice.message = mock_message mock_response = MagicMock() mock_response.choices = [mock_choice] mock_response.usage = MagicMock( prompt_tokens=100, completion_tokens=50, total_tokens=150 ) mock_complete.return_value = mock_response # Call with response_model - stop words should NOT be applied result = llm.call( messages=[{"role": "user", "content": "Analyze the data"}], response_model=ResearchResult, ) # Should successfully parse the full JSON without truncation assert isinstance(result, ResearchResult) assert result.finding == "The data shows growth" # The observation field should contain the full text including "Observation:" assert "Observation:" in result.observation def test_azure_stop_words_still_applied_to_regular_responses(): """ Test that stop words ARE still applied for regular (non-structured) responses. This ensures the fix didn't break normal stop word behavior. """ from crewai.llms.providers.azure.completion import AzureCompletion # Create AzureCompletion instance with stop words configured llm = AzureCompletion( model="gpt-4", api_key="test-key", endpoint="https://test.openai.azure.com", stop=["Observation:", "Final Answer:"], ) # Response that contains a stop word - should be truncated response_with_stop_word = "I need to search for more information.\n\nAction: search\nObservation: Found results" with patch.object(llm.client, 'complete') as mock_complete: mock_message = MagicMock() mock_message.content = response_with_stop_word mock_message.tool_calls = None mock_choice = MagicMock() mock_choice.message = mock_message mock_response = MagicMock() mock_response.choices = [mock_choice] mock_response.usage = MagicMock( prompt_tokens=100, completion_tokens=50, total_tokens=150 ) mock_complete.return_value = mock_response # Call WITHOUT response_model - stop words SHOULD be applied result = llm.call( messages=[{"role": "user", "content": "Search for something"}], ) # Response should be truncated at the stop word assert "Observation:" not in result assert "Found results" not in result assert "I need to search for more information" in result

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/crewAIInc/crewAI'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_azure.py•45.4 KiB