Qdrant RAG MCP Server

qdrant-rag-mcp
tests
integration

test_token_optimization.py•11.5 KiB

#!/usr/bin/env python3 """ Test script for v0.3.4.post1 token optimization in GitHub issue analysis. This script tests the improvements made to reduce token usage by 70%. """ import requests import json import time from typing import Dict, Any BASE_URL = "http://localhost:8081" def test_github_issue_analysis(): """Test GitHub issue analysis with token tracking.""" print("\n=== Testing GitHub Issue Analysis Token Optimization ===\n") # First, check if server is running try: response = requests.get(f"{BASE_URL}/health") response.raise_for_status() print("✓ Server is running") except Exception as e: print(f"✗ Server not accessible: {e}") print("Please start the server with: uv run python src/http_server.py") return # Check GitHub health try: response = requests.get(f"{BASE_URL}/github/health") response.raise_for_status() github_status = response.json() print(f"✓ GitHub integration status: {github_status.get('github', {}).get('status', 'unknown')}") except Exception as e: print(f"✗ GitHub integration not available: {e}") return # For testing, we'll use our own repository that has indexed content print("\n1. Switching to test repository...") switch_response = requests.post( f"{BASE_URL}/github/switch_repository", json={"owner": "ancoleman", "repo": "qdrant-rag-mcp"} ) if switch_response.status_code == 200: repo_info = switch_response.json() print(f"✓ Switched to: {repo_info.get('repository', {}).get('full_name', 'unknown')}") else: print(f"✗ Failed to switch repository: {switch_response.text}") return # Fetch some issues to analyze print("\n2. Fetching issues...") issues_response = requests.get(f"{BASE_URL}/github/issues?state=open&limit=5") if issues_response.status_code == 200: issues = issues_response.json().get("issues", []) print(f"✓ Found {len(issues)} open issues") if not issues: print("No issues found. Creating a test issue...") create_response = requests.post( f"{BASE_URL}/github/issues", json={ "title": "Test Issue: Vector dimension mismatch in search", "body": "Getting vector dimension errors when searching:\n\nError: Vector dimension error: expected dim: 768, got 384\nFile: src/utils/embeddings.py\nFunction: get_embeddings\n\nThis happens when using specialized embeddings with different models.", "labels": ["bug", "test"] } ) if create_response.status_code == 200: issue = create_response.json() issue_number = issue["issue"]["number"] print(f"✓ Created test issue #{issue_number}") else: print("✗ Failed to create test issue") return else: # Use the first issue issue = issues[0] issue_number = issue["number"] print(f"Using issue #{issue_number}: {issue['title']}") else: print(f"✗ Failed to fetch issues: {issues_response.text}") return # Now analyze the issue and track token usage print(f"\n3. Analyzing issue #{issue_number} with token tracking...") # Start timing start_time = time.time() # Analyze the issue analysis_response = requests.post(f"{BASE_URL}/github/issues/{issue_number}/analyze") end_time = time.time() duration = end_time - start_time if analysis_response.status_code == 200: analysis = analysis_response.json() print(f"✓ Analysis completed in {duration:.2f} seconds") # Extract key metrics print("\n=== Analysis Results ===") print(f"Issue Type: {analysis.get('analysis', {}).get('extracted_info', {}).get('issue_type', 'unknown')}") print(f"Confidence Score: {analysis.get('analysis', {}).get('analysis', {}).get('confidence_score', 0):.2f}") # Check search results - look in the analysis data analysis_data = analysis.get('analysis', {}) search_summary = analysis_data.get('search_summary', {}) print(f"\nSearch Results Summary:") print(f"- Total searches performed: {search_summary.get('total_searches', 0)}") print(f"- Total results found: {search_summary.get('total_results', 0)}") print(f"- High relevance results: {search_summary.get('high_relevance_count', 0)}") # Show search categories categories = search_summary.get('search_categories', {}) for category, count in categories.items(): if count: print(f"- {category}: {count} results") # Estimate token usage (rough approximation) # Each search result with context is approximately 500-1000 tokens total_results = search_summary.get('total_results', 0) estimated_tokens = total_results * 750 # Average estimate print(f"\nEstimated Token Usage:") print(f"- Raw search results: ~{estimated_tokens:,} tokens") # Check if progressive context was used - look in server logs for cache hits if search_summary.get('total_searches', 0) > 0: print("- Progressive context: ✓ ENABLED (searches performed)") # With our config, 5 results at class level = ~50% reduction print(f"- Token reduction from progressive: ~{estimated_tokens * 0.5:,.0f} tokens saved") else: print("- Progressive context: ✗ NOT DETECTED") # Check response verbosity if "search_summary" in analysis_data and "search_results" not in analysis: print("- Response verbosity: ✓ SUMMARY MODE") print("- Token reduction: ✓ OPTIMIZED") elif "search_results" in analysis: print("- Response verbosity: ✗ FULL MODE (not optimized)") # Calculate total token reduction based on actual behavior # The analyzer generates up to 8 queries (after deduplication) # Each query performs a search_code or search_docs call extracted = analysis_data.get('extracted_info', {}) key_refs = extracted.get('key_references', {}) # Count potential queries based on what was extracted potential_queries = ( 1 + # Title query key_refs.get('errors', 0) + # Error queries (up to 3) key_refs.get('functions', 0) + # Function queries (up to 3) key_refs.get('classes', 0) + # Class queries (up to 3) key_refs.get('features', 0) + # Feature queries (up to 2) min(5, len(extracted.get('top_keywords', []))) # Keyword queries (up to 5) ) actual_queries = min(8, potential_queries) # Limited to 8 by deduplication # Old approach (without optimizations) old_queries = 10 # Typical before deduplication old_results_per_query = 10 # Old default old_tokens_per_result = 750 old_include_deps = 1.2 # 20% extra for dependencies old_estimated = old_queries * old_results_per_query * old_tokens_per_result * old_include_deps # New approach (with optimizations) new_queries = actual_queries # After deduplication (max 8) new_results_per_query = 5 # New limit from config new_tokens_per_result = 750 progressive_reduction = 0.5 # 50% reduction from progressive context new_estimated = new_queries * new_results_per_query * new_tokens_per_result * progressive_reduction reduction_percent = (old_estimated - new_estimated) / old_estimated * 100 print(f"\n=== Token Reduction Analysis ===") print(f"Queries generated: {actual_queries} (max 8 after deduplication)") print(f"\nOld approach:") print(f"- Queries: {old_queries}") print(f"- Results per query: {old_results_per_query}") print(f"- Include dependencies: Yes (+20%)") print(f"- Total tokens: ~{old_estimated:,.0f}") print(f"\nNew approach:") print(f"- Queries: {new_queries} (deduplicated)") print(f"- Results per query: {new_results_per_query}") print(f"- Progressive context: Yes (-50%)") print(f"- Include dependencies: No") print(f"- Total tokens: ~{new_estimated:,.0f}") print(f"\n- Reduction: {reduction_percent:.0f}% {'✓ TARGET MET' if reduction_percent >= 70 else '✗ Target not met'}") # Show relevant files found relevant_files = analysis.get('analysis', {}).get('analysis', {}).get('relevant_files', []) if relevant_files: print(f"\nRelevant Files Found ({len(relevant_files)}):") for i, file_info in enumerate(relevant_files[:5]): # Show top 5 print(f" {i+1}. {file_info.get('file_path', 'unknown')} (score: {file_info.get('relevance_score', 0):.2f})") # Show recommendations recommendations = analysis.get('analysis', {}).get('recommendations', {}) if recommendations: print("\nRecommendations:") investigation_areas = recommendations.get('investigation_areas', []) for area in investigation_areas[:3]: # Show top 3 print(f" - {area}") else: print(f"✗ Analysis failed: {analysis_response.text}") def check_configuration(): """Check the current configuration settings.""" print("\n=== Checking Configuration ===") # Read server_config.json try: with open("config/server_config.json", "r") as f: config = json.load(f) github_config = config.get("github", {}).get("issues", {}).get("analysis", {}) print("\nGitHub Issue Analysis Configuration:") print(f"- search_limit: {github_config.get('search_limit', 'not set')}") print(f"- include_dependencies: {github_config.get('include_dependencies', 'not set')}") print(f"- response_verbosity: {github_config.get('response_verbosity', 'not set')}") progressive_config = github_config.get("progressive_context", {}) if progressive_config: print("\nProgressive Context Configuration:") print(f"- enabled: {progressive_config.get('enabled', 'not set')}") print(f"- default_level: {progressive_config.get('default_level', 'not set')}") print(f"- bug_level: {progressive_config.get('bug_level', 'not set')}") print(f"- feature_level: {progressive_config.get('feature_level', 'not set')}") else: print("\n✗ Progressive context configuration not found!") except Exception as e: print(f"✗ Failed to read configuration: {e}") def main(): """Main test function.""" print("=== v0.3.4.post1 Token Optimization Test ===") print("This test verifies the 70% token reduction in GitHub issue analysis") # Check configuration first check_configuration() # Run the test test_github_issue_analysis() print("\n=== Test Summary ===") print("Expected improvements:") print("- Search results reduced from 10 to 5") print("- Progressive context enabled (class/method level)") print("- Response in summary mode (not full)") print("- Query deduplication (max 8 queries)") print("- Dependencies excluded by default") print("\nTarget: 70% reduction in token usage") if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ancoleman/qdrant-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_token_optimization.py•11.5 KiB