ChunkHound

Overview Schema Related Servers Score Discussions

test_anthropic_thinking.py•9.27 KiB

#!/usr/bin/env python3 """Manual test script for Anthropic provider with extended thinking and Opus 4.5 features. This script tests the Anthropic provider with real API calls to verify: 1. Basic completion without thinking 2. Completion with thinking enabled 3. Structured JSON output 4. Content block handling 5. Opus 4.5 effort parameter 6. Context management Usage: export ANTHROPIC_API_KEY=sk-ant-api03-... uv run python tests/manual/test_anthropic_thinking.py """ import asyncio import os import sys import pytest from chunkhound.providers.llm.anthropic_llm_provider import ( AnthropicLLMProvider, EFFORT_SUPPORTED_MODELS, ) # Skip all tests in this file if no API key is available pytestmark = pytest.mark.skipif( not os.getenv("ANTHROPIC_API_KEY"), reason="ANTHROPIC_API_KEY not set - manual integration tests require real API key" ) async def test_basic_completion(): """Test basic completion without thinking.""" print("\n" + "=" * 80) print("TEST 1: Basic Completion (no thinking)") print("=" * 80) provider = AnthropicLLMProvider( api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-sonnet-4-5-20250929", thinking_enabled=False, ) prompt = "What is 2+2? Answer in one sentence." print(f"\nPrompt: {prompt}") response = await provider.complete(prompt, max_completion_tokens=100) print(f"\nResponse: {response.content}") print(f"Tokens used: {response.tokens_used}") print(f"Finish reason: {response.finish_reason}") return response.tokens_used > 0 async def test_thinking_completion(): """Test completion with thinking enabled.""" print("\n" + "=" * 80) print("TEST 2: Completion with Extended Thinking") print("=" * 80) provider = AnthropicLLMProvider( api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-sonnet-4-5-20250929", thinking_enabled=True, thinking_budget_tokens=5000, ) prompt = """Solve this step-by-step: If a train travels at 60 mph for 2.5 hours, then increases speed to 80 mph for 1.5 hours, how far did it travel in total?""" print(f"\nPrompt: {prompt}") response = await provider.complete(prompt, max_completion_tokens=2000) print(f"\nResponse: {response.content}") print(f"Tokens used: {response.tokens_used}") print(f"Finish reason: {response.finish_reason}") # Note: With thinking enabled, the actual thinking process is in separate blocks # that we don't include in the final text output by default print("\nNote: Extended thinking blocks are processed but not included in output by default.") return response.tokens_used > 0 async def test_structured_output(): """Test structured JSON output.""" print("\n" + "=" * 80) print("TEST 3: Structured JSON Output") print("=" * 80) provider = AnthropicLLMProvider( api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-sonnet-4-5-20250929", thinking_enabled=False, ) schema = { "type": "object", "properties": { "answer": {"type": "number"}, "explanation": {"type": "string"}, }, "required": ["answer", "explanation"], } prompt = "What is 15 * 23? Provide the answer and a brief explanation." print(f"\nPrompt: {prompt}") print(f"\nRequired schema: {schema}") response = await provider.complete_structured( prompt, schema, max_completion_tokens=200 ) print(f"\nStructured Response:") import json print(json.dumps(response, indent=2)) # Verify it matches schema assert "answer" in response assert "explanation" in response assert isinstance(response["answer"], (int, float)) return True async def test_health_check(): """Test provider health check.""" print("\n" + "=" * 80) print("TEST 4: Provider Health Check") print("=" * 80) provider = AnthropicLLMProvider( api_key=os.getenv("ANTHROPIC_API_KEY"), thinking_enabled=True, ) health = await provider.health_check() print(f"\nHealth status: {health}") assert health["status"] == "healthy" assert health["provider"] == "anthropic" assert health["thinking_enabled"] is True return True async def test_usage_stats(): """Test usage statistics tracking.""" print("\n" + "=" * 80) print("TEST 5: Usage Statistics Tracking") print("=" * 80) provider = AnthropicLLMProvider( api_key=os.getenv("ANTHROPIC_API_KEY"), thinking_enabled=False, ) # Make a few completions await provider.complete("Say hello", max_completion_tokens=50) await provider.complete("Say goodbye", max_completion_tokens=50) stats = provider.get_usage_stats() print(f"\nUsage Statistics:") print(f" Requests made: {stats['requests_made']}") print(f" Total tokens: {stats['total_tokens']}") print(f" Prompt tokens: {stats['prompt_tokens']}") print(f" Completion tokens: {stats['completion_tokens']}") print(f" Thinking tokens: {stats['thinking_tokens']}") assert stats["requests_made"] == 2 assert stats["total_tokens"] > 0 return True async def test_opus_45_effort(): """Test Opus 4.5 with effort parameter.""" print("\n" + "=" * 80) print("TEST 6: Opus 4.5 Effort Parameter") print("=" * 80) provider = AnthropicLLMProvider( api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-opus-4-5-20251101", thinking_enabled=True, thinking_budget_tokens=8000, effort="medium", ) print(f"\nModel: {provider.model}") print(f"Effort: {provider._effort}") print(f"Model in supported list: {provider.model in EFFORT_SUPPORTED_MODELS}") prompt = "What are three key benefits of functional programming? Be concise." print(f"\nPrompt: {prompt}") response = await provider.complete(prompt, max_completion_tokens=2000) print(f"\nResponse: {response.content[:500]}...") print(f"Tokens used: {response.tokens_used}") print(f"Finish reason: {response.finish_reason}") return response.tokens_used > 0 async def test_opus_45_full_features(): """Test Opus 4.5 with all features enabled.""" print("\n" + "=" * 80) print("TEST 7: Opus 4.5 Full Features (thinking + effort + context mgmt)") print("=" * 80) provider = AnthropicLLMProvider( api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-opus-4-5-20251101", thinking_enabled=True, thinking_budget_tokens=10000, interleaved_thinking=True, effort="high", context_management_enabled=True, clear_thinking_keep_turns=2, ) print(f"\nConfiguration:") print(f" Model: {provider.model}") print(f" Thinking: {provider._thinking_enabled}") print(f" Interleaved: {provider._interleaved_thinking}") print(f" Effort: {provider._effort}") print(f" Context Mgmt: {provider._context_management_enabled}") # Check beta headers headers = provider._get_beta_headers() print(f" Beta headers: {headers}") prompt = "Explain the difference between async/await and callbacks in 2-3 sentences." print(f"\nPrompt: {prompt}") response = await provider.complete(prompt, max_completion_tokens=3000) print(f"\nResponse: {response.content}") print(f"Tokens used: {response.tokens_used}") print(f"Finish reason: {response.finish_reason}") # Verify health check includes new fields health = await provider.health_check() print(f"\nHealth check: {health}") assert health["interleaved_thinking"] is True assert health["context_management_enabled"] is True assert health.get("effort") == "high" return response.tokens_used > 0 async def main(): """Run all tests.""" # Check for API key if not os.getenv("ANTHROPIC_API_KEY"): print("ERROR: ANTHROPIC_API_KEY environment variable not set") print("\nUsage:") print(" export ANTHROPIC_API_KEY=sk-ant-api03-...") print(" uv run python tests/manual/test_anthropic_thinking.py") sys.exit(1) print("\n" + "=" * 80) print("Anthropic Provider Extended Thinking Tests") print("=" * 80) print(f"\nAPI Key: {os.getenv('ANTHROPIC_API_KEY')[:20]}...") results = [] try: results.append(("Basic Completion", await test_basic_completion())) results.append(("Thinking Completion", await test_thinking_completion())) results.append(("Structured Output", await test_structured_output())) results.append(("Health Check", await test_health_check())) results.append(("Usage Stats", await test_usage_stats())) results.append(("Opus 4.5 Effort", await test_opus_45_effort())) results.append(("Opus 4.5 Full Features", await test_opus_45_full_features())) except Exception as e: print(f"\n❌ Test failed with error: {e}") import traceback traceback.print_exc() sys.exit(1) # Print summary print("\n" + "=" * 80) print("TEST SUMMARY") print("=" * 80) for name, passed in results: status = "✅ PASS" if passed else "❌ FAIL" print(f"{status}: {name}") if all(passed for _, passed in results): print("\n🎉 All tests passed!") sys.exit(0) else: print("\n❌ Some tests failed") sys.exit(1) if __name__ == "__main__": asyncio.run(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ofriw/chunkhound'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_anthropic_thinking.py•9.27 KiB