"""
E2E Tests for Web Search with GPT-5 Responses API Tool
Tests the correct implementation using web_search as a tool in Responses API
"""
import pytest
import pytest_asyncio
import os
from datetime import datetime
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
from src.llm_core.client import IRISClient
from src.llm_core.providers import ProviderFactory, LLMMessage, MessageRole
@pytest.fixture
def iris_client_sync():
"""IRIS client fixture for sync tests (no Redis needed)"""
# Ensure API keys are set
if not os.getenv("ANTHROPIC_API_KEY") or not os.getenv("OPENAI_API_KEY"):
pytest.skip("Missing API keys")
# Create client (will auto-initialize providers via ProviderFactory)
client = IRISClient(default_provider="anthropic")
return client
@pytest_asyncio.fixture
async def iris_client_async():
"""IRIS client fixture for async tests (with Redis)"""
# Ensure API keys are set
if not os.getenv("ANTHROPIC_API_KEY") or not os.getenv("OPENAI_API_KEY"):
pytest.skip("Missing API keys")
# Create client (will auto-initialize providers via ProviderFactory)
client = IRISClient(default_provider="anthropic")
# Connect Redis explicitly
await client._ensure_redis_connected()
return client
@pytest_asyncio.fixture
async def test_session(iris_client_async):
"""Create a test session"""
# Create session (session_id is generated automatically)
session = await iris_client_async.conversation_manager.create_session(
user_id="test_user",
platform="pytest",
metadata={"test": True}
)
yield session.session_id
# Cleanup
try:
await iris_client_async.conversation_manager.delete_session(session.session_id)
except:
pass
class TestWebSearchTool:
"""Test suite for web search as GPT-5 tool"""
def test_keyword_detection_italian(self, iris_client_sync):
"""Test: Italian keywords trigger web search"""
search_messages = [
"cerca informazioni su OpenAI",
"trova le ultime notizie",
"qual è il meteo oggi?",
"dimmi il prezzo di Bitcoin"
]
for msg in search_messages:
result = iris_client_sync._should_use_web_search(msg)
assert result is True, f"Failed to detect search need in: {msg}"
print(f"✓ Detected: {msg}")
def test_keyword_detection_english(self, iris_client_sync):
"""Test: English keywords trigger web search"""
search_messages = [
"search for information about AI",
"find the latest news",
"what's the weather today?",
"tell me the price of Bitcoin"
]
for msg in search_messages:
result = iris_client_sync._should_use_web_search(msg)
assert result is True, f"Failed to detect search need in: {msg}"
print(f"✓ Detected: {msg}")
def test_no_search_for_general_chat(self, iris_client_sync):
"""Test: General conversation doesn't trigger web search"""
normal_messages = [
"Ciao, come stai?",
"Puoi aiutarmi con un progetto?",
"Grazie mille!",
"Hello, how are you?",
"Can you help me?",
"Thank you!"
]
for msg in normal_messages:
result = iris_client_sync._should_use_web_search(msg)
assert result is False, f"Incorrectly triggered search for: {msg}"
print(f"✓ No search: {msg}")
@pytest.mark.asyncio
async def test_gpt5_provider_with_web_search_tool(self):
"""Test: GPT-5 provider accepts web search tool"""
openai_key = os.getenv("OPENAI_API_KEY")
if not openai_key:
pytest.skip("OPENAI_API_KEY not found")
# Create GPT-5 provider (gpt-5 base supports web search)
provider = ProviderFactory.create_provider("openai", openai_key, model="gpt-5")
# Test message requiring current info
messages = [
LLMMessage(role=MessageRole.SYSTEM, content="You are a helpful assistant."),
LLMMessage(role=MessageRole.USER, content="What's happening in AI technology today?")
]
# Call with web_search enabled as tool
response = await provider.chat(
messages=messages,
web_search=True,
tool_choice="auto"
)
# Verify response
assert response is not None
assert len(response.content) > 0
assert response.provider == "openai"
# Check metadata
assert response.metadata.get("web_search") is True
print(f"\n✅ GPT-5 Web Search Tool Test:")
print(f"Model: {response.model}")
print(f"Tokens: {response.usage.get('total_tokens', 'N/A')}")
print(f"Response length: {len(response.content)} chars")
print(f"Content preview: {response.content[:300]}...")
@pytest.mark.asyncio
async def test_smart_routing_uses_claude_default(self, iris_client_async, test_session):
"""Test: Smart routing uses Claude for general questions"""
message = "Explain the concept of polymorphism in programming."
response = await iris_client_async.chat_with_smart_routing(
session_id=test_session,
message=message
)
# Should use Claude (default provider)
assert response is not None
assert len(response.content) > 0
assert response.provider == "anthropic"
print(f"\n✅ Claude Response ({len(response.content)} chars):")
print(f"Model: {response.model}")
print(f"Content preview: {response.content[:200]}...")
@pytest.mark.asyncio
async def test_smart_routing_uses_gpt5_for_search(self, iris_client_async, test_session):
"""Test: Smart routing uses GPT-5 with search for current info"""
message = "What are the latest developments in AI technology?"
response = await iris_client_async.chat_with_smart_routing(
session_id=test_session,
message=message
)
# Should use OpenAI GPT-5 with web search
assert response is not None
assert len(response.content) > 0
assert response.provider == "openai"
# Should have web search metadata
assert response.metadata.get("web_search") is True
print(f"\n✅ GPT-5 + Web Search Response ({len(response.content)} chars):")
print(f"Model: {response.model}")
print(f"Tokens: {response.usage.get('total_tokens', 'N/A')}")
print(f"Content preview: {response.content[:300]}...")
@pytest.mark.asyncio
async def test_force_web_search_parameter(self, iris_client_async, test_session):
"""Test: force_web_search parameter works correctly"""
# Normal message but forcing web search
message = "Tell me about programming"
response = await iris_client_async.chat_with_smart_routing(
session_id=test_session,
message=message,
force_web_search=True
)
# Should use OpenAI GPT-5 with search even though message is general
assert response is not None
assert response.provider == "openai"
assert response.metadata.get("web_search") is True
print(f"\n✅ Forced Web Search Response:")
print(f"Model: {response.model}")
print(f"Content preview: {response.content[:200]}...")
if __name__ == "__main__":
# Run tests
pytest.main([__file__, "-v", "-s"])