test_cross_tool_comprehensive.pyโข15 kB
#!/usr/bin/env python3
"""
Comprehensive Cross-Tool Test
Tests file deduplication, conversation continuation, and file handling
across all available MCP tools using realistic workflows with low thinking mode.
Validates:
1. Cross-tool conversation continuation
2. File deduplication across different tools
3. Mixed file scenarios (old + new files)
4. Conversation history preservation
5. Proper tool chaining with context
"""
from .conversation_base_test import ConversationBaseTest
class CrossToolComprehensiveTest(ConversationBaseTest):
"""Comprehensive test across all MCP tools"""
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple:
"""Call an MCP tool in-process"""
# Use the new method for workflow tools
workflow_tools = ["analyze", "debug", "codereview", "precommit", "refactor", "thinkdeep"]
if tool_name in workflow_tools:
response_text, continuation_id = super().call_mcp_tool(tool_name, params)
else:
response_text, continuation_id = self.call_mcp_tool_direct(tool_name, params)
return response_text, continuation_id
@property
def test_name(self) -> str:
return "cross_tool_comprehensive"
@property
def test_description(self) -> str:
return "Comprehensive cross-tool file deduplication and continuation"
def run_test(self) -> bool:
"""Comprehensive cross-tool test with all MCP tools"""
try:
self.logger.info("๐ Test: Comprehensive cross-tool file deduplication and continuation")
# Initialize for in-process tool calling
self.setUp()
# Setup test files
self.setup_test_files()
# Create short test files for quick testing
python_code = """def login(user, pwd):
# Security issue: plain text password
if user == "admin" and pwd == "123":
return True
return False
def hash_pwd(pwd):
# Weak hashing
return str(hash(pwd))
"""
config_file = """{
"db_password": "weak123",
"debug": true,
"secret_key": "test"
}"""
auth_file = self.create_additional_test_file("auth.py", python_code)
config_file_path = self.create_additional_test_file("config.json", config_file)
# Get timestamp for log filtering
import datetime
start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
# Tool chain: chat โ analyze โ debug โ codereview โ precommit
# Each step builds on the previous with cross-tool continuation
current_continuation_id = None
responses = []
# Step 1: Start with chat tool to understand the codebase
self.logger.info(" Step 1: chat tool - Initial codebase exploration")
chat_params = {
"prompt": "List security issues in auth.py",
"files": [auth_file],
"thinking_mode": "low",
"model": "flash",
}
response1, continuation_id1 = self.call_mcp_tool("chat", chat_params)
if not response1 or not continuation_id1:
self.logger.error(" โ Step 1: chat tool failed")
return False
self.logger.info(f" โ
Step 1: chat completed with continuation_id: {continuation_id1[:8]}...")
responses.append(("chat", response1, continuation_id1))
current_continuation_id = continuation_id1
# Step 2: Use analyze tool to do deeper analysis (fresh conversation)
self.logger.info(" Step 2: analyze tool - Deep code analysis (fresh)")
analyze_params = {
"step": "Starting comprehensive code analysis to find security vulnerabilities in the authentication system",
"step_number": 1,
"total_steps": 2,
"next_step_required": True,
"findings": "Initial analysis will focus on security vulnerabilities in authentication code",
"relevant_files": [auth_file],
"thinking_mode": "low",
"model": "flash",
}
response2, continuation_id2 = self.call_mcp_tool("analyze", analyze_params)
if not response2:
self.logger.error(" โ Step 2: analyze tool failed")
return False
self.logger.info(
f" โ
Step 2: analyze completed with continuation_id: {continuation_id2[:8] if continuation_id2 else 'None'}..."
)
responses.append(("analyze", response2, continuation_id2))
# Step 3: Continue chat conversation with config file
self.logger.info(" Step 3: chat continuation - Add config file context")
chat_continue_params = {
"continuation_id": current_continuation_id,
"prompt": "Check config.json too",
"files": [auth_file, config_file_path], # Old + new file
"thinking_mode": "low",
"model": "flash",
}
response3, _ = self.call_mcp_tool("chat", chat_continue_params)
if not response3:
self.logger.error(" โ Step 3: chat continuation failed")
return False
self.logger.info(" โ
Step 3: chat continuation completed")
responses.append(("chat_continue", response3, current_continuation_id))
# Step 4: Use debug tool to identify specific issues
self.logger.info(" Step 4: debug tool - Identify specific problems")
debug_params = {
"step": "Starting debug investigation to identify and fix authentication security issues",
"step_number": 1,
"total_steps": 2,
"next_step_required": True,
"findings": "Investigating authentication vulnerabilities found in previous analysis",
"relevant_files": [auth_file, config_file_path],
"thinking_mode": "low",
"model": "flash",
}
response4, continuation_id4 = self.call_mcp_tool("debug", debug_params)
if not response4:
self.logger.error(" โ Step 4: debug tool failed")
return False
self.logger.info(
f" โ
Step 4: debug completed with continuation_id: {continuation_id4[:8] if continuation_id4 else 'None'}..."
)
responses.append(("debug", response4, continuation_id4))
# Step 5: Cross-tool continuation - continue debug with chat context
if continuation_id4:
self.logger.info(" Step 5: debug continuation - Additional analysis")
debug_continue_params = {
"step": "Continuing debug investigation to fix password hashing implementation",
"step_number": 2,
"total_steps": 2,
"next_step_required": False,
"findings": "Building on previous analysis to fix weak password hashing",
"continuation_id": continuation_id4,
"relevant_files": [auth_file, config_file_path],
"thinking_mode": "low",
"model": "flash",
}
response5, _ = self.call_mcp_tool("debug", debug_continue_params)
if response5:
self.logger.info(" โ
Step 5: debug continuation completed")
responses.append(("debug_continue", response5, continuation_id4))
# Step 6: Use codereview for comprehensive review
self.logger.info(" Step 6: codereview tool - Comprehensive code review")
codereview_params = {
"step": "Starting comprehensive security code review of authentication system",
"step_number": 1,
"total_steps": 2,
"next_step_required": True,
"findings": "Performing thorough security review of authentication code and configuration",
"relevant_files": [auth_file, config_file_path],
"thinking_mode": "low",
"model": "flash",
}
response6, continuation_id6 = self.call_mcp_tool("codereview", codereview_params)
if not response6:
self.logger.error(" โ Step 6: codereview tool failed")
return False
self.logger.info(
f" โ
Step 6: codereview completed with continuation_id: {continuation_id6[:8] if continuation_id6 else 'None'}..."
)
responses.append(("codereview", response6, continuation_id6))
# Step 7: Create improved version and use precommit
self.logger.info(" Step 7: precommit tool - Pre-commit validation")
# Create a short improved version
improved_code = """import hashlib
def secure_login(user, pwd):
# Better: hashed password check
hashed = hashlib.sha256(pwd.encode()).hexdigest()
if user == "admin" and hashed == "expected_hash":
return True
return False
"""
improved_file = self.create_additional_test_file("auth_improved.py", improved_code)
precommit_params = {
"step": "Starting pre-commit validation of improved authentication code",
"step_number": 1,
"total_steps": 2,
"next_step_required": True,
"findings": "Validating improved authentication implementation before commit",
"path": self.test_dir,
"relevant_files": [auth_file, config_file_path, improved_file],
"thinking_mode": "low",
"model": "flash",
}
response7, continuation_id7 = self.call_mcp_tool("precommit", precommit_params)
if not response7:
self.logger.error(" โ Step 7: precommit tool failed")
return False
self.logger.info(
f" โ
Step 7: precommit completed with continuation_id: {continuation_id7[:8] if continuation_id7 else 'None'}..."
)
responses.append(("precommit", response7, continuation_id7))
# Validate comprehensive results
self.logger.info(" ๐ Validating comprehensive cross-tool results...")
logs = self.get_server_logs_since(start_time)
# Validation criteria
tools_used = [r[0] for r in responses]
continuation_ids_created = [r[2] for r in responses if r[2]]
# Check for various log patterns
conversation_logs = [
line for line in logs.split("\n") if "conversation" in line.lower() or "history" in line.lower()
]
embedding_logs = [
line
for line in logs.split("\n")
if "๐" in line or "embedding" in line.lower() or "file" in line.lower()
]
continuation_logs = [
line for line in logs.split("\n") if "continuation" in line.lower() or "resuming" in line.lower()
]
cross_tool_logs = [
line
for line in logs.split("\n")
if any(tool in line.lower() for tool in ["chat", "analyze", "debug", "codereview", "precommit"])
]
# File mentions
auth_file_mentioned = any("auth.py" in line for line in logs.split("\n"))
config_file_mentioned = any("config.json" in line for line in logs.split("\n"))
improved_file_mentioned = any("auth_improved.py" in line for line in logs.split("\n"))
# Print comprehensive diagnostics
self.logger.info(f" Tools used: {len(tools_used)} ({', '.join(tools_used)})")
self.logger.info(f" Continuation IDs created: {len(continuation_ids_created)}")
self.logger.info(f" Conversation logs found: {len(conversation_logs)}")
self.logger.info(f" File embedding logs found: {len(embedding_logs)}")
self.logger.info(f" Continuation logs found: {len(continuation_logs)}")
self.logger.info(f" Cross-tool activity logs: {len(cross_tool_logs)}")
self.logger.info(f" Auth file mentioned: {auth_file_mentioned}")
self.logger.info(f" Config file mentioned: {config_file_mentioned}")
self.logger.info(f" Improved file mentioned: {improved_file_mentioned}")
if self.verbose:
self.logger.debug(" ๐ Sample tool activity logs:")
for log in cross_tool_logs[:10]: # Show first 10
if log.strip():
self.logger.debug(f" {log.strip()}")
self.logger.debug(" ๐ Sample continuation logs:")
for log in continuation_logs[:5]: # Show first 5
if log.strip():
self.logger.debug(f" {log.strip()}")
# Comprehensive success criteria
success_criteria = [
len(tools_used) >= 5, # Used multiple tools
len(continuation_ids_created) >= 3, # Created multiple continuation threads
len(embedding_logs) > 10, # Significant file embedding activity
len(continuation_logs) > 0, # Evidence of continuation
auth_file_mentioned, # Original file processed
config_file_mentioned, # Additional file processed
improved_file_mentioned, # New file processed
len(conversation_logs) > 5, # Conversation history activity
]
passed_criteria = sum(success_criteria)
total_criteria = len(success_criteria)
self.logger.info(f" Success criteria met: {passed_criteria}/{total_criteria}")
# Allow for slight variations in log output (7/8 is sufficient for comprehensive test)
if passed_criteria >= total_criteria - 1: # Allow 1 missing criterion
self.logger.info(" โ
Comprehensive cross-tool test: PASSED")
if passed_criteria < total_criteria:
self.logger.info(
f" โน๏ธ Note: {total_criteria - passed_criteria} criterion not met (acceptable variation)"
)
return True
else:
self.logger.warning(" โ ๏ธ Comprehensive cross-tool test: FAILED")
self.logger.warning(" ๐ก Check logs for detailed cross-tool activity")
return False
except Exception as e:
self.logger.error(f"Comprehensive cross-tool test failed: {e}")
return False
finally:
self.cleanup_test_files()