ishika_mcp

test_agent_utils.py•34.5 KiB

"""Tests for agent utility functions.""" from __future__ import annotations import asyncio from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest from pydantic import BaseModel, Field from crewai.tools.base_tool import BaseTool from crewai.utilities.agent_utils import ( _asummarize_chunks, _estimate_token_count, _extract_summary_tags, _format_messages_for_summary, _split_messages_into_chunks, convert_tools_to_openai_schema, summarize_messages, ) class CalculatorInput(BaseModel): """Input schema for calculator tool.""" expression: str = Field(description="Mathematical expression to evaluate") class CalculatorTool(BaseTool): """A simple calculator tool for testing.""" name: str = "calculator" description: str = "Perform mathematical calculations" args_schema: type[BaseModel] = CalculatorInput def _run(self, expression: str) -> str: """Execute the calculation.""" try: result = eval(expression) # noqa: S307 return str(result) except Exception as e: return f"Error: {e}" class SearchInput(BaseModel): """Input schema for search tool.""" query: str = Field(description="Search query") max_results: int = Field(default=10, description="Maximum number of results") class SearchTool(BaseTool): """A search tool for testing.""" name: str = "web_search" description: str = "Search the web for information" args_schema: type[BaseModel] = SearchInput def _run(self, query: str, max_results: int = 10) -> str: """Execute the search.""" return f"Search results for '{query}' (max {max_results})" class NoSchemaTool(BaseTool): """A tool without an args schema for testing edge cases.""" name: str = "simple_tool" description: str = "A simple tool with no schema" def _run(self, **kwargs: Any) -> str: """Execute the tool.""" return "Simple tool executed" class TestConvertToolsToOpenaiSchema: """Tests for convert_tools_to_openai_schema function.""" def test_converts_single_tool(self) -> None: """Test converting a single tool to OpenAI schema.""" tools = [CalculatorTool()] schemas, functions = convert_tools_to_openai_schema(tools) assert len(schemas) == 1 assert len(functions) == 1 schema = schemas[0] assert schema["type"] == "function" assert schema["function"]["name"] == "calculator" assert schema["function"]["description"] == "Perform mathematical calculations" assert "properties" in schema["function"]["parameters"] assert "expression" in schema["function"]["parameters"]["properties"] def test_converts_multiple_tools(self) -> None: """Test converting multiple tools to OpenAI schema.""" tools = [CalculatorTool(), SearchTool()] schemas, functions = convert_tools_to_openai_schema(tools) assert len(schemas) == 2 assert len(functions) == 2 # Check calculator calc_schema = next(s for s in schemas if s["function"]["name"] == "calculator") assert calc_schema["function"]["description"] == "Perform mathematical calculations" # Check search search_schema = next(s for s in schemas if s["function"]["name"] == "web_search") assert search_schema["function"]["description"] == "Search the web for information" assert "query" in search_schema["function"]["parameters"]["properties"] assert "max_results" in search_schema["function"]["parameters"]["properties"] def test_functions_dict_contains_callables(self) -> None: """Test that the functions dict maps names to callable run methods.""" tools = [CalculatorTool(), SearchTool()] schemas, functions = convert_tools_to_openai_schema(tools) assert "calculator" in functions assert "web_search" in functions assert callable(functions["calculator"]) assert callable(functions["web_search"]) def test_function_can_be_called(self) -> None: """Test that the returned function can be called.""" tools = [CalculatorTool()] schemas, functions = convert_tools_to_openai_schema(tools) result = functions["calculator"](expression="2 + 2") assert result == "4" def test_empty_tools_list(self) -> None: """Test with an empty tools list.""" schemas, functions = convert_tools_to_openai_schema([]) assert schemas == [] assert functions == {} def test_schema_has_required_fields(self) -> None: """Test that the schema includes required fields information.""" tools = [SearchTool()] schemas, functions = convert_tools_to_openai_schema(tools) schema = schemas[0] params = schema["function"]["parameters"] # Should have required array assert "required" in params assert "query" in params["required"] def test_tool_without_args_schema(self) -> None: """Test converting a tool that doesn't have an args_schema.""" # Create a minimal tool without args_schema class MinimalTool(BaseTool): name: str = "minimal" description: str = "A minimal tool" def _run(self) -> str: return "done" tools = [MinimalTool()] schemas, functions = convert_tools_to_openai_schema(tools) assert len(schemas) == 1 schema = schemas[0] assert schema["function"]["name"] == "minimal" # Parameters should be empty dict or have minimal schema assert isinstance(schema["function"]["parameters"], dict) def test_schema_structure_matches_openai_format(self) -> None: """Test that the schema structure matches OpenAI's expected format.""" tools = [CalculatorTool()] schemas, functions = convert_tools_to_openai_schema(tools) schema = schemas[0] # Top level must have "type": "function" assert schema["type"] == "function" # Must have "function" key with nested structure assert "function" in schema func = schema["function"] # Function must have name and description assert "name" in func assert "description" in func assert isinstance(func["name"], str) assert isinstance(func["description"], str) # Parameters should be a valid JSON schema assert "parameters" in func params = func["parameters"] assert isinstance(params, dict) def test_removes_redundant_schema_fields(self) -> None: """Test that redundant title and description are removed from parameters.""" tools = [CalculatorTool()] schemas, functions = convert_tools_to_openai_schema(tools) params = schemas[0]["function"]["parameters"] # Title should be removed as it's redundant with function name assert "title" not in params def test_preserves_field_descriptions(self) -> None: """Test that field descriptions are preserved in the schema.""" tools = [SearchTool()] schemas, functions = convert_tools_to_openai_schema(tools) params = schemas[0]["function"]["parameters"] query_prop = params["properties"]["query"] # Field description should be preserved assert "description" in query_prop assert query_prop["description"] == "Search query" def test_preserves_default_values(self) -> None: """Test that default values are preserved in the schema.""" tools = [SearchTool()] schemas, functions = convert_tools_to_openai_schema(tools) params = schemas[0]["function"]["parameters"] max_results_prop = params["properties"]["max_results"] # Default value should be preserved assert "default" in max_results_prop assert max_results_prop["default"] == 10 def _make_mock_i18n() -> MagicMock: """Create a mock i18n with the new structured prompt keys.""" mock_i18n = MagicMock() mock_i18n.slice.side_effect = lambda key: { "summarizer_system_message": "You are a precise assistant that creates structured summaries.", "summarize_instruction": "Summarize the conversation:\n{conversation}", "summary": "<summary>\n{merged_summary}\n</summary>\nContinue the task.", }.get(key, "") return mock_i18n class TestSummarizeMessages: """Tests for summarize_messages function.""" def test_preserves_files_from_user_messages(self) -> None: """Test that files attached to user messages are preserved after summarization.""" mock_files = {"image.png": MagicMock(), "doc.pdf": MagicMock()} messages: list[dict[str, Any]] = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Analyze this image", "files": mock_files}, {"role": "assistant", "content": "I can see the image shows..."}, {"role": "user", "content": "What about the colors?"}, ] mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 1000 mock_llm.call.return_value = "<summary>Summarized conversation about image analysis.</summary>" summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) # System message preserved + summary message = 2 assert len(messages) == 2 assert messages[0]["role"] == "system" summary_msg = messages[1] assert summary_msg["role"] == "user" assert "files" in summary_msg assert summary_msg["files"] == mock_files def test_merges_files_from_multiple_user_messages(self) -> None: """Test that files from multiple user messages are merged.""" file1 = MagicMock() file2 = MagicMock() file3 = MagicMock() messages: list[dict[str, Any]] = [ {"role": "user", "content": "First image", "files": {"img1.png": file1}}, {"role": "assistant", "content": "I see the first image."}, {"role": "user", "content": "Second image", "files": {"img2.png": file2, "doc.pdf": file3}}, {"role": "assistant", "content": "I see the second image and document."}, ] mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 1000 mock_llm.call.return_value = "<summary>Summarized conversation.</summary>" summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) assert len(messages) == 1 assert "files" in messages[0] assert messages[0]["files"] == { "img1.png": file1, "img2.png": file2, "doc.pdf": file3, } def test_works_without_files(self) -> None: """Test that summarization works when no files are attached.""" messages: list[dict[str, Any]] = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there!"}, ] mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 1000 mock_llm.call.return_value = "<summary>A greeting exchange.</summary>" summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) assert len(messages) == 1 assert "files" not in messages[0] def test_modifies_original_messages_list(self) -> None: """Test that the original messages list is modified in-place.""" messages: list[dict[str, Any]] = [ {"role": "user", "content": "First message"}, {"role": "assistant", "content": "Response"}, {"role": "user", "content": "Second message"}, ] original_list_id = id(messages) mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 1000 mock_llm.call.return_value = "<summary>Summary</summary>" summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) assert id(messages) == original_list_id assert len(messages) == 1 def test_preserves_system_messages(self) -> None: """Test that system messages are preserved and not summarized.""" messages: list[dict[str, Any]] = [ {"role": "system", "content": "You are a research assistant."}, {"role": "user", "content": "Find information about AI."}, {"role": "assistant", "content": "I found several resources on AI."}, ] mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 1000 mock_llm.call.return_value = "<summary>User asked about AI, assistant found resources.</summary>" summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) assert len(messages) == 2 assert messages[0]["role"] == "system" assert messages[0]["content"] == "You are a research assistant." assert messages[1]["role"] == "user" def test_formats_conversation_with_role_labels(self) -> None: """Test that the LLM receives role-labeled conversation text.""" messages: list[dict[str, Any]] = [ {"role": "system", "content": "System prompt."}, {"role": "user", "content": "Hello there"}, {"role": "assistant", "content": "Hi! How can I help?"}, ] mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 1000 mock_llm.call.return_value = "<summary>Greeting exchange.</summary>" summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) # Check what was passed to llm.call call_args = mock_llm.call.call_args[0][0] user_msg_content = call_args[1]["content"] assert "[USER]:" in user_msg_content assert "[ASSISTANT]:" in user_msg_content # System content should NOT appear in summarization input assert "System prompt." not in user_msg_content def test_extracts_summary_from_tags(self) -> None: """Test that <summary> tags are extracted from LLM response.""" messages: list[dict[str, Any]] = [ {"role": "user", "content": "Do something."}, {"role": "assistant", "content": "Done."}, ] mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 1000 mock_llm.call.return_value = "Here is the summary:\n<summary>The extracted summary content.</summary>\nExtra text." summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) assert "The extracted summary content." in messages[0]["content"] def test_handles_tool_messages(self) -> None: """Test that tool messages are properly formatted in summarization.""" messages: list[dict[str, Any]] = [ {"role": "user", "content": "Search for Python."}, {"role": "assistant", "content": None, "tool_calls": [ {"function": {"name": "web_search", "arguments": '{"query": "Python"}'}} ]}, {"role": "tool", "content": "Python is a programming language.", "name": "web_search"}, {"role": "assistant", "content": "Python is a programming language."}, ] mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 1000 mock_llm.call.return_value = "<summary>User searched for Python info.</summary>" summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) # Verify the conversation text sent to LLM contains tool labels call_args = mock_llm.call.call_args[0][0] user_msg_content = call_args[1]["content"] assert "[TOOL_RESULT (web_search)]:" in user_msg_content def test_only_system_messages_no_op(self) -> None: """Test that only system messages results in no-op (no summarization).""" messages: list[dict[str, Any]] = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "system", "content": "Additional system instructions."}, ] mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 1000 summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) # No LLM call should have been made mock_llm.call.assert_not_called() # System messages should remain untouched assert len(messages) == 2 assert messages[0]["content"] == "You are a helpful assistant." assert messages[1]["content"] == "Additional system instructions." class TestFormatMessagesForSummary: """Tests for _format_messages_for_summary helper.""" def test_skips_system_messages(self) -> None: messages: list[dict[str, Any]] = [ {"role": "system", "content": "System prompt"}, {"role": "user", "content": "Hello"}, ] result = _format_messages_for_summary(messages) assert "System prompt" not in result assert "[USER]: Hello" in result def test_formats_user_and_assistant(self) -> None: messages: list[dict[str, Any]] = [ {"role": "user", "content": "Question"}, {"role": "assistant", "content": "Answer"}, ] result = _format_messages_for_summary(messages) assert "[USER]: Question" in result assert "[ASSISTANT]: Answer" in result def test_formats_tool_messages(self) -> None: messages: list[dict[str, Any]] = [ {"role": "tool", "content": "Result data", "name": "search_tool"}, ] result = _format_messages_for_summary(messages) assert "[TOOL_RESULT (search_tool)]:" in result assert "Result data" in result def test_handles_none_content_with_tool_calls(self) -> None: messages: list[dict[str, Any]] = [ {"role": "assistant", "content": None, "tool_calls": [ {"function": {"name": "calculator", "arguments": "{}"}} ]}, ] result = _format_messages_for_summary(messages) assert "[Called tools: calculator]" in result def test_handles_none_content_without_tool_calls(self) -> None: messages: list[dict[str, Any]] = [ {"role": "assistant", "content": None}, ] result = _format_messages_for_summary(messages) assert "[ASSISTANT]:" in result def test_handles_multimodal_content(self) -> None: messages: list[dict[str, Any]] = [ {"role": "user", "content": [ {"type": "text", "text": "Describe this image"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}} ]}, ] result = _format_messages_for_summary(messages) assert "[USER]: Describe this image" in result def test_empty_messages(self) -> None: result = _format_messages_for_summary([]) assert result == "" class TestExtractSummaryTags: """Tests for _extract_summary_tags helper.""" def test_extracts_content_from_tags(self) -> None: text = "Preamble\n<summary>The actual summary.</summary>\nPostamble" assert _extract_summary_tags(text) == "The actual summary." def test_handles_multiline_content(self) -> None: text = "<summary>\nLine 1\nLine 2\nLine 3\n</summary>" result = _extract_summary_tags(text) assert "Line 1" in result assert "Line 2" in result assert "Line 3" in result def test_falls_back_when_no_tags(self) -> None: text = "Just a plain summary without tags." assert _extract_summary_tags(text) == text def test_handles_empty_string(self) -> None: assert _extract_summary_tags("") == "" def test_extracts_first_match(self) -> None: text = "<summary>First</summary> text <summary>Second</summary>" assert _extract_summary_tags(text) == "First" class TestSplitMessagesIntoChunks: """Tests for _split_messages_into_chunks helper.""" def test_single_chunk_when_under_limit(self) -> None: messages: list[dict[str, Any]] = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi"}, ] chunks = _split_messages_into_chunks(messages, max_tokens=1000) assert len(chunks) == 1 assert len(chunks[0]) == 2 def test_splits_at_message_boundaries(self) -> None: messages: list[dict[str, Any]] = [ {"role": "user", "content": "A" * 100}, # ~25 tokens {"role": "assistant", "content": "B" * 100}, # ~25 tokens {"role": "user", "content": "C" * 100}, # ~25 tokens ] # max_tokens=30 should cause splits chunks = _split_messages_into_chunks(messages, max_tokens=30) assert len(chunks) == 3 def test_excludes_system_messages(self) -> None: messages: list[dict[str, Any]] = [ {"role": "system", "content": "System prompt"}, {"role": "user", "content": "Hello"}, ] chunks = _split_messages_into_chunks(messages, max_tokens=1000) assert len(chunks) == 1 # The system message should not be in any chunk for chunk in chunks: for msg in chunk: assert msg.get("role") != "system" def test_empty_messages(self) -> None: chunks = _split_messages_into_chunks([], max_tokens=1000) assert chunks == [] def test_only_system_messages(self) -> None: messages: list[dict[str, Any]] = [ {"role": "system", "content": "System prompt"}, ] chunks = _split_messages_into_chunks(messages, max_tokens=1000) assert chunks == [] def test_handles_none_content(self) -> None: messages: list[dict[str, Any]] = [ {"role": "assistant", "content": None}, {"role": "user", "content": "Follow up"}, ] chunks = _split_messages_into_chunks(messages, max_tokens=1000) assert len(chunks) == 1 assert len(chunks[0]) == 2 class TestEstimateTokenCount: """Tests for _estimate_token_count helper.""" def test_empty_string(self) -> None: assert _estimate_token_count("") == 0 def test_short_string(self) -> None: assert _estimate_token_count("hello") == 1 # 5 // 4 = 1 def test_longer_string(self) -> None: assert _estimate_token_count("a" * 100) == 25 # 100 // 4 = 25 def test_approximation_is_conservative(self) -> None: # For English text, actual token count is typically lower than char/4 text = "The quick brown fox jumps over the lazy dog." estimated = _estimate_token_count(text) assert estimated > 0 assert estimated == len(text) // 4 class TestParallelSummarization: """Tests for parallel chunk summarization via asyncio.""" def _make_messages_for_n_chunks(self, n: int) -> list[dict[str, Any]]: """Build a message list that will produce exactly *n* chunks. Each message has 400 chars (~100 tokens). With max_tokens=100 returned by the mock LLM, each message lands in its own chunk. """ msgs: list[dict[str, Any]] = [] for i in range(n): msgs.append({"role": "user", "content": f"msg-{i} " + "x" * 400}) return msgs def test_multiple_chunks_use_acall(self) -> None: """When there are multiple chunks, summarize_messages should use llm.acall (parallel) instead of llm.call (sequential).""" messages = self._make_messages_for_n_chunks(3) mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 100 # force multiple chunks mock_llm.acall = AsyncMock( side_effect=[ "<summary>Summary chunk 1</summary>", "<summary>Summary chunk 2</summary>", "<summary>Summary chunk 3</summary>", ] ) summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) # acall should have been awaited once per chunk assert mock_llm.acall.await_count == 3 # sync call should NOT have been used for chunk summarization mock_llm.call.assert_not_called() def test_single_chunk_uses_sync_call(self) -> None: """When there is only one chunk, summarize_messages should use the sync llm.call path (no async overhead).""" messages: list[dict[str, Any]] = [ {"role": "user", "content": "Short message"}, {"role": "assistant", "content": "Short reply"}, ] mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 100_000 mock_llm.call.return_value = "<summary>Short summary</summary>" summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) mock_llm.call.assert_called_once() def test_parallel_results_preserve_order(self) -> None: """Summaries must appear in the same order as the original chunks, regardless of which async call finishes first.""" messages = self._make_messages_for_n_chunks(3) mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 100 # Simulate varying latencies — chunk 2 finishes before chunk 0 async def _delayed_acall(msgs: Any, **kwargs: Any) -> str: user_content = msgs[1]["content"] if "msg-0" in user_content: await asyncio.sleep(0.05) return "<summary>Summary-A</summary>" elif "msg-1" in user_content: return "<summary>Summary-B</summary>" # fastest else: await asyncio.sleep(0.02) return "<summary>Summary-C</summary>" mock_llm.acall = _delayed_acall summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) # The final summary message should have A, B, C in order summary_content = messages[-1]["content"] pos_a = summary_content.index("Summary-A") pos_b = summary_content.index("Summary-B") pos_c = summary_content.index("Summary-C") assert pos_a < pos_b < pos_c def test_asummarize_chunks_returns_ordered_results(self) -> None: """Direct test of the async helper _asummarize_chunks.""" chunk_a: list[dict[str, Any]] = [{"role": "user", "content": "Chunk A"}] chunk_b: list[dict[str, Any]] = [{"role": "user", "content": "Chunk B"}] mock_llm = MagicMock() mock_llm.acall = AsyncMock( side_effect=[ "<summary>Result A</summary>", "<summary>Result B</summary>", ] ) results = asyncio.run( _asummarize_chunks( chunks=[chunk_a, chunk_b], llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) ) assert len(results) == 2 assert results[0]["content"] == "Result A" assert results[1]["content"] == "Result B" @patch("crewai.utilities.agent_utils.is_inside_event_loop", return_value=True) def test_works_inside_existing_event_loop(self, _mock_loop: Any) -> None: """When called from inside a running event loop (e.g. a Flow), the ThreadPoolExecutor fallback should still work.""" messages = self._make_messages_for_n_chunks(2) mock_llm = MagicMock() mock_llm.get_context_window_size.return_value = 100 mock_llm.acall = AsyncMock( side_effect=[ "<summary>Flow summary 1</summary>", "<summary>Flow summary 2</summary>", ] ) summarize_messages( messages=messages, llm=mock_llm, callbacks=[], i18n=_make_mock_i18n(), ) assert mock_llm.acall.await_count == 2 # Verify the merged summary made it into messages assert "Flow summary 1" in messages[-1]["content"] assert "Flow summary 2" in messages[-1]["content"] def _build_long_conversation() -> list[dict[str, Any]]: """Build a multi-turn conversation that produces multiple chunks at max_tokens=200. Each non-system message is ~100-140 estimated tokens (400-560 chars), so a max_tokens of 200 yields roughly 3 chunks from 6 messages. """ return [ { "role": "system", "content": "You are a helpful research assistant.", }, { "role": "user", "content": ( "Tell me about the history of the Python programming language. " "Who created it, when was it first released, and what were the " "main design goals? Please provide a detailed overview covering " "the major milestones from its inception through Python 3." ), }, { "role": "assistant", "content": ( "Python was created by Guido van Rossum and first released in 1991. " "The main design goals were code readability and simplicity. Key milestones: " "Python 1.0 (1994) introduced functional programming tools like lambda and map. " "Python 2.0 (2000) added list comprehensions and garbage collection. " "Python 3.0 (2008) was a major backward-incompatible release that fixed " "fundamental design flaws. Python 2 reached end-of-life in January 2020." ), }, { "role": "user", "content": ( "What about the async/await features? When were they introduced " "and how do they compare to similar features in JavaScript and C#? " "Also explain the Global Interpreter Lock and its implications." ), }, { "role": "assistant", "content": ( "Async/await was introduced in Python 3.5 (PEP 492, 2015). " "Unlike JavaScript which is single-threaded by design, Python's asyncio " "is an opt-in framework. C# introduced async/await in 2012 (C# 5.0) and " "was a major inspiration for Python's implementation. " "The GIL (Global Interpreter Lock) is a mutex that protects access to " "Python objects, preventing multiple threads from executing Python bytecodes " "simultaneously. This means CPU-bound multithreaded programs don't benefit " "from multiple cores. PEP 703 proposes making the GIL optional in CPython." ), }, { "role": "user", "content": ( "Explain the Python package ecosystem. How does pip work, what is PyPI, " "and what are virtual environments? Compare pip with conda and uv." ), }, { "role": "assistant", "content": ( "PyPI (Python Package Index) is the official repository hosting 400k+ packages. " "pip is the standard package installer that downloads from PyPI. " "Virtual environments (venv) create isolated Python installations to avoid " "dependency conflicts between projects. conda is a cross-language package manager " "popular in data science that can manage non-Python dependencies. " "uv is a new Rust-based tool that is 10-100x faster than pip and aims to replace " "pip, pip-tools, and virtualenv with a single unified tool." ), }, ] class TestParallelSummarizationVCR: """VCR-backed integration tests for parallel summarization. These tests use a real LLM but patch get_context_window_size to force multiple chunks, exercising the asyncio.gather + acall parallel path. To record cassettes: PYTEST_VCR_RECORD_MODE=all uv run pytest lib/crewai/tests/utilities/test_agent_utils.py::TestParallelSummarizationVCR -v """ @pytest.mark.vcr() def test_parallel_summarize_openai(self) -> None: """Test that parallel summarization with gpt-4o-mini produces a valid summary.""" from crewai.llm import LLM from crewai.utilities.i18n import I18N llm = LLM(model="gpt-4o-mini", temperature=0) i18n = I18N() messages = _build_long_conversation() original_system = messages[0]["content"] # Patch get_context_window_size to return 200 — forces multiple chunks with patch.object(type(llm), "get_context_window_size", return_value=200): # Verify we actually get multiple chunks with this window size non_system = [m for m in messages if m.get("role") != "system"] chunks = _split_messages_into_chunks(non_system, max_tokens=200) assert len(chunks) > 1, f"Expected multiple chunks, got {len(chunks)}" summarize_messages( messages=messages, llm=llm, callbacks=[], i18n=i18n, ) # System message preserved assert messages[0]["role"] == "system" assert messages[0]["content"] == original_system # Summary produced as a user message summary_msg = messages[-1] assert summary_msg["role"] == "user" assert len(summary_msg["content"]) > 0 @pytest.mark.vcr() def test_parallel_summarize_preserves_files(self) -> None: """Test that file references survive parallel summarization.""" from crewai.llm import LLM from crewai.utilities.i18n import I18N llm = LLM(model="gpt-4o-mini", temperature=0) i18n = I18N() messages = _build_long_conversation() mock_file = MagicMock() messages[1]["files"] = {"report.pdf": mock_file} with patch.object(type(llm), "get_context_window_size", return_value=200): summarize_messages( messages=messages, llm=llm, callbacks=[], i18n=i18n, ) summary_msg = messages[-1] assert summary_msg["role"] == "user" assert "files" in summary_msg assert "report.pdf" in summary_msg["files"]

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/crewAIInc/crewAI'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_agent_utils.py•34.5 KiB