test-embedding-comparison.sh•9.2 kB
test-embedding-comparison.sh#!/bin/bash
echo "🧠 CodeGraph Embedding Provider Comparison Test"
echo "ONNX (Speed) vs Ollama nomic-embed-code (Code-Specialized)"
echo "=" * 60
# Check if nomic-embed-code is available
if ! ollama list | grep -q "nomic-embed-code"; then
echo "📦 Installing nomic-embed-code for comparison..."
ollama pull hf.co/nomic-ai/nomic-embed-code-GGUF:Q4_K_M
if [ $? -ne 0 ]; then
echo "❌ Failed to install nomic-embed-code"
echo "Proceeding with ONNX-only test..."
OLLAMA_AVAILABLE=false
else
echo "✅ nomic-embed-code installed"
OLLAMA_AVAILABLE=true
fi
else
echo "✅ nomic-embed-code already available"
OLLAMA_AVAILABLE=true
fi
# Build with both embedding providers
echo ""
echo "🔧 Building CodeGraph with both embedding providers..."
MACOSX_DEPLOYMENT_TARGET=11.0 cargo build -p codegraph-mcp --features "qwen-integration,faiss,embeddings,embeddings-ollama,codegraph-vector/onnx"
if [ $? -ne 0 ]; then
echo "❌ Build failed"
exit 1
fi
echo "✅ Build successful with both providers"
# Test directory setup
TEST_DIR="embedding-test-$(date +%s)"
mkdir -p "$TEST_DIR"
cd "$TEST_DIR"
# Create test files with different code patterns
echo "📝 Creating test codebase..."
cat > AuthService.ts << 'EOF'
export interface User {
id: string;
email: string;
password: string;
}
export class AuthService {
private users: Map<string, User> = new Map();
async validateCredentials(email: string, password: string): Promise<User | null> {
const user = Array.from(this.users.values()).find(u => u.email === email);
if (!user) {
return null;
}
const isValid = await this.comparePassword(password, user.password);
return isValid ? user : null;
}
private async comparePassword(plaintext: string, hashed: string): Promise<boolean> {
// In real implementation, use bcrypt or similar
return plaintext === hashed;
}
async createUser(email: string, password: string): Promise<User> {
const user: User = {
id: crypto.randomUUID(),
email,
password: await this.hashPassword(password)
};
this.users.set(user.id, user);
return user;
}
private async hashPassword(password: string): Promise<string> {
// In real implementation, use bcrypt
return `hashed_${password}`;
}
}
EOF
cat > DatabaseService.ts << 'EOF'
export interface ConnectionConfig {
host: string;
port: number;
database: string;
username: string;
password: string;
}
export class DatabaseService {
private connection: any = null;
async connect(config: ConnectionConfig): Promise<void> {
try {
this.connection = await this.createConnection(config);
console.log(`Connected to database: ${config.database}`);
} catch (error) {
console.error('Database connection failed:', error);
throw error;
}
}
async query<T>(sql: string, params: any[] = []): Promise<T[]> {
if (!this.connection) {
throw new Error('Database not connected');
}
try {
const result = await this.connection.query(sql, params);
return result.rows;
} catch (error) {
console.error('Query failed:', error);
throw error;
}
}
private async createConnection(config: ConnectionConfig) {
// Mock connection creation
return {
query: async (sql: string, params: any[]) => ({
rows: []
})
};
}
}
EOF
cat > APIController.ts << 'EOF'
import { AuthService } from './AuthService';
import { DatabaseService } from './DatabaseService';
export interface APIResponse<T> {
success: boolean;
data?: T;
error?: string;
}
export class APIController {
constructor(
private authService: AuthService,
private dbService: DatabaseService
) {}
async handleLogin(email: string, password: string): Promise<APIResponse<{token: string}>> {
try {
const user = await this.authService.validateCredentials(email, password);
if (!user) {
return {
success: false,
error: 'Invalid credentials'
};
}
const token = this.generateJWT(user);
return {
success: true,
data: { token }
};
} catch (error) {
return {
success: false,
error: 'Internal server error'
};
}
}
async handleUserCreation(email: string, password: string): Promise<APIResponse<{user: any}>> {
try {
const existingUser = await this.dbService.query(
'SELECT id FROM users WHERE email = ?',
[email]
);
if (existingUser.length > 0) {
return {
success: false,
error: 'User already exists'
};
}
const user = await this.authService.createUser(email, password);
return {
success: true,
data: { user }
};
} catch (error) {
return {
success: false,
error: 'User creation failed'
};
}
}
private generateJWT(user: any): string {
// Mock JWT generation
return `jwt_token_for_${user.id}`;
}
}
EOF
echo "✅ Test codebase created (3 TypeScript files with authentication patterns)"
# Initialize CodeGraph
echo ""
echo "🚀 Initializing CodeGraph..."
../target/debug/codegraph init .
echo ""
echo "📊 Testing Embedding Provider Performance Comparison"
echo ""
# Test 1: ONNX Embeddings (Speed optimized)
echo "🔥 Test 1: ONNX Embeddings (Speed Optimized)"
echo "Provider: ONNX Runtime with optimized models"
echo "Expected: Fast indexing, good general embeddings"
export CODEGRAPH_EMBEDDING_PROVIDER=onnx
export CODEGRAPH_LOCAL_MODEL=/Users/username/.cache/huggingface/hub/models--Qdrant--all-MiniLM-L6-v2-onnx/snapshots/5f1b8cd78bc4fb444dd171e59b18f3a3af89a079
echo "Starting ONNX embedding test..."
time ../target/debug/codegraph index . --force --languages typescript --verbose 2>&1 | grep -E "(Found|embeddings|complete|ONNX)" || true
echo ""a
# Test 2: Ollama Embeddings (Code-specialized)
if [ "$OLLAMA_AVAILABLE" = true ]; then
echo "🧠 Test 2: Ollama nomic-embed-code (Code-Specialized)"
echo "Provider: Ollama with nomic-embed-code"
echo "Expected: Superior code understanding, better semantic search"
export CODEGRAPH_EMBEDDING_PROVIDER=ollama
export CODEGRAPH_EMBEDDING_MODEL=hf.co/nomic-ai/nomic-embed-code-GGUF:Q4_K_M
echo "Starting Ollama embedding test..."
time ../target/debug/codegraph index . --force --languages typescript --verbose 2>&1 | grep -E "(Found|embeddings|complete|Ollama|nomic)" || true
else
echo "🚫 Test 2: Skipped (nomic-embed-code not available)"
fi
echo ""
echo "🔍 Testing Semantic Search Quality"
# Test semantic search with both providers
TEST_QUERIES=("authentication pattern" "database connection" "error handling" "user validation" "API endpoint")
for query in "${TEST_QUERIES[@]}"; do
echo ""
echo "Query: '$query'"
echo "ONNX Results:"
export CODEGRAPH_EMBEDDING_PROVIDER=onnx
echo "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"vector.search\",\"params\":{\"query\":\"$query\",\"limit\":3}}" | \
../target/debug/codegraph start stdio 2>/dev/null | \
jq -r '.result.results[]?.name // "No results"' 2>/dev/null | head -3 || echo "No results"
if [ "$OLLAMA_AVAILABLE" = true ]; then
echo "Ollama Results:"
export CODEGRAPH_EMBEDDING_PROVIDER=ollama
echo "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"vector.search\",\"params\":{\"query\":\"$query\",\"limit\":3}}" | \
../target/debug/codegraph start stdio 2>/dev/null | \
jq -r '.result.results[]?.name // "No results"' 2>/dev/null | head -3 || echo "No results"
fi
done
# Cleanup
cd ..
rm -rf "$TEST_DIR"
echo ""
echo "🎉 Embedding Provider Comparison Complete!"
echo ""
echo "📊 Summary:"
echo "✅ ONNX: Fast, general-purpose embeddings"
if [ "$OLLAMA_AVAILABLE" = true ]; then
echo "✅ Ollama: Code-specialized embeddings with nomic-embed-code"
echo ""
echo "🚀 Revolutionary Architecture Complete:"
echo " • Code-specialized embeddings (nomic-embed-code)"
echo " • SOTA code analysis (Qwen2.5-Coder-14B-128K)"
echo " • 100% local AI development platform"
echo " • Zero external dependencies"
echo " • Best-in-class code understanding at every level"
else
echo "⚠️ Ollama: Not tested (model not available)"
fi
echo ""
echo "🎯 Next Steps:"
echo "1. Choose optimal embedding provider for your use case"
echo "2. Configure environment variables in Claude Desktop"
echo "3. Experience revolutionary local-first AI development"