Dataproc MCP Server

knowledge-comprehensive.test.js•15.4 KiB

#!/usr/bin/env node /** * Comprehensive Knowledge Indexer Test * * DEPENDENCIES: * - ✅ Runs standalone (no MCP server required) * - ✅ Requires Qdrant running on localhost:6333 * - ✅ Auto-builds project if needed * * USAGE: * npm run build && node tests/manual/test-knowledge-indexer-comprehensive.js * * OR with Qdrant setup: * docker-compose -f docker-compose.qdrant.yml up -d * npm run build && node tests/manual/test-knowledge-indexer-comprehensive.js * * Tests all aspects of knowledge indexing including: * - Cluster configuration indexing and retrieval * - Job submission indexing and querying * - Natural language queries and semantic search * - Tag-based search functionality * - Raw document retrieval and compression analysis * - Error handling and data validation */ import { KnowledgeIndexer } from '../../../build/services/knowledge-indexer.js'; console.log('🧠 Comprehensive Knowledge Indexer Test'); console.log('=' .repeat(60)); async function testKnowledgeIndexer() { const results = { indexing: { passed: 0, failed: 0, tests: [] }, querying: { passed: 0, failed: 0, tests: [] }, features: { passed: 0, failed: 0, tests: [] }, integration: { passed: 0, failed: 0, tests: [] } }; let indexer; try { console.log('\n📋 Initializing Knowledge Indexer...'); indexer = new KnowledgeIndexer({ url: 'http://localhost:6333', collectionName: 'test_knowledge_comprehensive', vectorSize: 384, distance: 'Cosine' }); try { await indexer.initialize(); results.integration.passed++; results.integration.tests.push('✅ Knowledge Indexer initialization'); console.log(' ✅ Knowledge Indexer initialized successfully'); } catch (error) { results.integration.failed++; results.integration.tests.push(`❌ Knowledge Indexer initialization: ${error.message}`); console.log(` ❌ Initialization failed: ${error.message}`); return; // Can't continue without initialization } // NEW: Manual Storage Verification Test console.log('\n🔧 Manual Storage Verification...'); try { const { QdrantStorageService } = await import('../../../build/services/qdrant-storage.js'); const { randomUUID } = await import('crypto'); const storage = new QdrantStorageService({ url: 'http://localhost:6333', collectionName: 'dataproc_knowledge', vectorSize: 384 }); await storage.initialize(); // Test manual storage const testData = { clusterName: 'manual-test-cluster', projectId: 'test-project', region: 'us-central1', packages: ['pandas', 'tensorflow', 'scikit-learn'], machineType: 'n1-standard-4', type: 'cluster' }; const testMetadata = { toolName: 'manual-test', timestamp: new Date().toISOString(), projectId: 'test-project', region: 'us-central1', clusterName: 'manual-test-cluster', responseType: 'cluster-test', originalTokenCount: 100, filteredTokenCount: 80, compressionRatio: 1.0, type: 'cluster' }; console.log(' 📝 Attempting manual storage...'); const storedId = await storage.storeClusterData(testData, testMetadata); console.log(` ✅ Manual storage successful! ID: ${storedId}`); // Test manual retrieval - extract UUID from resource URI console.log(' 🔍 Testing manual retrieval...'); // storedId is a resource URI like "dataproc/stored/manual-test/test-project/us-central1/manual-test-cluster/UUID" // We need to extract just the UUID part for retrieveById const uuidMatch = storedId.match(/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$/i); const actualId = uuidMatch ? uuidMatch[1] : storedId; console.log(` 🔍 Extracted UUID for retrieval: ${actualId}`); const retrieved = await storage.retrieveById(actualId); if (retrieved && retrieved.clusterName === 'manual-test-cluster') { console.log(' ✅ Manual retrieval successful!'); results.integration.passed++; results.integration.tests.push('✅ Manual storage/retrieval verification'); } else { console.log(' ❌ Manual retrieval failed'); results.integration.failed++; results.integration.tests.push('❌ Manual storage/retrieval verification'); } } catch (error) { console.log(` ❌ Manual storage test failed: ${error.message}`); console.log(` 🔍 Error details:`, error); results.integration.failed++; results.integration.tests.push(`❌ Manual storage verification: ${error.message}`); } console.log('\n1️⃣ Testing Cluster Configuration Indexing...'); // Test comprehensive cluster data const clusterData = { clusterName: 'ml-analytics-cluster', projectId: 'data-science-project', region: 'us-central1', config: { softwareConfig: { properties: { 'dataproc:pip.packages': 'pandas==1.5.0,numpy==1.21.0,scikit-learn==1.1.0,tensorflow==2.9.0', 'dataproc:conda.packages': 'matplotlib,seaborn' }, optionalComponents: ['JUPYTER', 'ZEPPELIN', 'ANACONDA'] }, masterConfig: { machineTypeUri: 'projects/data-science-project/zones/us-central1-a/machineTypes/n1-highmem-4', diskConfig: { bootDiskSizeGb: 200, bootDiskType: 'pd-ssd' } }, workerConfig: { machineTypeUri: 'projects/data-science-project/zones/us-central1-a/machineTypes/n1-standard-4', numInstances: 4, diskConfig: { bootDiskSizeGb: 100, bootDiskType: 'pd-standard' } } }, labels: { environment: 'production', team: 'data-science', purpose: 'ml-training' }, status: { state: 'RUNNING', stateStartTime: new Date().toISOString() } }; try { await indexer.indexClusterConfiguration(clusterData); results.indexing.passed++; results.indexing.tests.push('✅ Cluster configuration indexing'); console.log(' ✅ Cluster configuration indexed successfully'); } catch (error) { results.indexing.failed++; results.indexing.tests.push(`❌ Cluster configuration indexing: ${error.message}`); console.log(` ❌ Cluster indexing failed: ${error.message}`); } console.log('\n2️⃣ Testing Job Submission Indexing...'); // Test successful job const successfulJob = { jobId: 'hive-query-' + Date.now(), jobType: 'hive', clusterName: 'ml-analytics-cluster', projectId: 'data-science-project', region: 'us-central1', query: 'SELECT COUNT(*) FROM sales_data WHERE date >= "2024-01-01"', status: 'DONE', submissionTime: new Date().toISOString(), duration: 15000, results: { schema: { fields: [{ name: 'count', type: 'BIGINT' }] }, rows: [['1250000']], totalRows: 1 } }; try { await indexer.indexJobSubmission(successfulJob); results.indexing.passed++; results.indexing.tests.push('✅ Successful job indexing'); console.log(' ✅ Successful job indexed successfully'); } catch (error) { results.indexing.failed++; results.indexing.tests.push(`❌ Successful job indexing: ${error.message}`); console.log(` ❌ Successful job indexing failed: ${error.message}`); } // Test failed job const failedJob = { jobId: 'spark-job-' + Date.now(), jobType: 'spark', clusterName: 'ml-analytics-cluster', projectId: 'data-science-project', region: 'us-central1', query: 'spark.sql("SELECT * FROM non_existent_table")', status: 'ERROR', submissionTime: new Date().toISOString(), duration: 2000, error: { message: 'Table or view not found: non_existent_table', type: 'AnalysisException' } }; try { await indexer.indexJobSubmission(failedJob); results.indexing.passed++; results.indexing.tests.push('✅ Failed job indexing'); console.log(' ✅ Failed job indexed successfully'); } catch (error) { results.indexing.failed++; results.indexing.tests.push(`❌ Failed job indexing: ${error.message}`); console.log(` ❌ Failed job indexing failed: ${error.message}`); } console.log('\n3️⃣ Testing Natural Language Queries...'); // Test semantic search queries const queries = [ { query: 'machine learning clusters with pandas', type: 'clusters', expectedResults: 1 }, { query: 'tensorflow scikit-learn', type: 'clusters', expectedResults: 1 }, { query: 'hive queries on sales data', type: 'jobs', expectedResults: 1 }, { query: 'failed spark jobs', type: 'jobs', expectedResults: 1 }, { query: 'production environment clusters', type: 'all', expectedResults: 1 } ]; for (const testQuery of queries) { try { const results_query = await indexer.queryKnowledge(testQuery.query, { type: testQuery.type, limit: 10 }); if (results_query.length > 0) { results.querying.passed++; results.querying.tests.push(`✅ Query: "${testQuery.query}" (${results_query.length} results)`); console.log(` ✅ Query "${testQuery.query}" returned ${results_query.length} results`); } else { results.querying.failed++; results.querying.tests.push(`❌ Query: "${testQuery.query}" (no results)`); console.log(` ❌ Query "${testQuery.query}" returned no results`); } } catch (error) { results.querying.failed++; results.querying.tests.push(`❌ Query: "${testQuery.query}" (error: ${error.message})`); console.log(` ❌ Query "${testQuery.query}" failed: ${error.message}`); } } console.log('\n4️⃣ Testing Tag-Based Search...'); // Test tag-based queries const tagQueries = [ `jobId:${successfulJob.jobId}`, `clusterName:ml-analytics-cluster`, `projectId:data-science-project`, `clusterName:ml-analytics-cluster hive queries` ]; for (const tagQuery of tagQueries) { try { const results_tag = await indexer.queryKnowledge(tagQuery, { type: 'all', limit: 5 }); results.features.passed++; results.features.tests.push(`✅ Tag query: "${tagQuery}" (${results_tag.length} results)`); console.log(` ✅ Tag query "${tagQuery}" returned ${results_tag.length} results`); } catch (error) { results.features.failed++; results.features.tests.push(`❌ Tag query: "${tagQuery}" (error: ${error.message})`); console.log(` ❌ Tag query "${tagQuery}" failed: ${error.message}`); } } console.log('\n5️⃣ Testing Enhanced Features...'); // Test raw document retrieval try { const rawResults = await indexer.queryKnowledgeWithRawDocuments('machine learning', { type: 'all', limit: 3, includeRawDocument: true }); if (rawResults.length > 0 && rawResults[0].rawDocument) { results.features.passed++; results.features.tests.push('✅ Raw document retrieval'); console.log(` ✅ Raw document retrieval successful (${rawResults.length} results with raw docs)`); } else { results.features.failed++; results.features.tests.push('❌ Raw document retrieval (no raw documents)'); console.log(' ❌ Raw document retrieval failed - no raw documents returned'); } } catch (error) { results.features.failed++; results.features.tests.push(`❌ Raw document retrieval: ${error.message}`); console.log(` ❌ Raw document retrieval failed: ${error.message}`); } // Test analytics features try { const clusterInsights = indexer.getClusterInsights(); const jobAnalytics = indexer.getJobTypeAnalytics(); results.features.passed++; results.features.tests.push(`✅ Analytics (${clusterInsights.totalClusters} clusters, ${jobAnalytics.totalJobs} jobs)`); console.log(` ✅ Analytics: ${clusterInsights.totalClusters} clusters, ${jobAnalytics.totalJobs} jobs tracked`); } catch (error) { results.features.failed++; results.features.tests.push(`❌ Analytics: ${error.message}`); console.log(` ❌ Analytics failed: ${error.message}`); } console.log('\n6️⃣ Testing Error Handling...'); // Test invalid data handling try { await indexer.indexClusterConfiguration(null); results.integration.failed++; results.integration.tests.push('❌ Null data handling (should have failed)'); console.log(' ❌ Null data was accepted (should have failed)'); } catch (error) { results.integration.passed++; results.integration.tests.push('✅ Null data properly rejected'); console.log(' ✅ Null data properly rejected'); } // Test invalid query handling try { const invalidResults = await indexer.queryKnowledge('', { type: 'invalid_type' }); results.integration.passed++; results.integration.tests.push(`✅ Invalid query handled gracefully (${invalidResults.length} results)`); console.log(` ✅ Invalid query handled gracefully`); } catch (error) { results.integration.passed++; results.integration.tests.push('✅ Invalid query properly rejected'); console.log(' ✅ Invalid query properly rejected'); } } catch (error) { console.error('❌ Test setup failed:', error); } finally { // Cleanup console.log('\n🧹 Cleaning up...'); try { if (indexer && typeof indexer.shutdown === 'function') { await indexer.shutdown(); } console.log('✅ Cleanup completed'); } catch (cleanupError) { console.error('❌ Cleanup failed:', cleanupError); } } // Print summary console.log('\n📊 TEST SUMMARY'); console.log('=' .repeat(60)); const categories = ['indexing', 'querying', 'features', 'integration']; let totalPassed = 0, totalFailed = 0; categories.forEach(category => { const result = results[category]; totalPassed += result.passed; totalFailed += result.failed; console.log(`\n${category.toUpperCase()}:`); console.log(` Passed: ${result.passed}, Failed: ${result.failed}`); result.tests.forEach(test => console.log(` ${test}`)); }); console.log(`\n🎯 OVERALL: ${totalPassed} passed, ${totalFailed} failed`); if (totalFailed === 0) { console.log('🎉 All tests passed! Knowledge indexer is working correctly.'); } else { console.log('⚠️ Some tests failed - this indicates the ERR_INVALID_ARG_TYPE fix may need refinement.'); } // Provide actionable insights console.log('\n💡 INSIGHTS:'); if (results.indexing.failed > 0) { console.log(' - Indexing issues detected - check Qdrant storage layer'); } if (results.querying.failed > 0) { console.log(' - Query issues detected - check semantic search functionality'); } if (results.features.failed > 0) { console.log(' - Feature issues detected - check enhanced functionality'); } if (totalFailed === 0) { console.log(' - All systems operational - ERR_INVALID_ARG_TYPE fix successful!'); } } testKnowledgeIndexer().catch(console.error);

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dipseth/dataproc-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

knowledge-comprehensive.test.js•15.4 KiB