/**
* Hybrid search combining semantic vector search with keyword matching
*/
import Fuse from 'fuse.js';
import path from 'path';
import { promises as fs } from 'fs';
import { CodeChunk, SearchResult, SearchFilters } from '../types/index.js';
import { EmbeddingProvider, getEmbeddingProvider } from '../embeddings/index.js';
import { VectorStorageProvider, getStorageProvider } from '../storage/index.js';
import { analyzerRegistry } from './analyzer-registry.js';
import { IndexCorruptedError } from '../errors/index.js';
export interface SearchOptions {
useSemanticSearch?: boolean;
useKeywordSearch?: boolean;
semanticWeight?: number;
keywordWeight?: number;
}
const DEFAULT_SEARCH_OPTIONS: SearchOptions = {
useSemanticSearch: true,
useKeywordSearch: true,
semanticWeight: 0.7,
keywordWeight: 0.3
};
export class CodebaseSearcher {
private rootPath: string;
private storagePath: string;
private fuseIndex: Fuse<CodeChunk> | null = null;
private chunks: CodeChunk[] = [];
private embeddingProvider: EmbeddingProvider | null = null;
private storageProvider: VectorStorageProvider | null = null;
private initialized = false;
// v1.2: Pattern intelligence for trend detection
private patternIntelligence: {
decliningPatterns: Set<string>;
risingPatterns: Set<string>;
patternWarnings: Map<string, string>;
} | null = null;
constructor(rootPath: string) {
this.rootPath = rootPath;
this.storagePath = path.join(rootPath, '.codebase-index');
}
async initialize(): Promise<void> {
if (this.initialized) return;
try {
await this.loadKeywordIndex();
await this.loadPatternIntelligence();
this.embeddingProvider = await getEmbeddingProvider();
this.storageProvider = await getStorageProvider({
path: this.storagePath
});
this.initialized = true;
} catch (error) {
if (error instanceof IndexCorruptedError) {
throw error; // Propagate to handler for auto-heal
}
console.warn('Partial initialization (keyword search only):', error);
this.initialized = true;
}
}
private async loadKeywordIndex(): Promise<void> {
try {
const indexPath = path.join(this.rootPath, '.codebase-index.json');
const content = await fs.readFile(indexPath, 'utf-8');
this.chunks = JSON.parse(content);
this.fuseIndex = new Fuse(this.chunks, {
keys: [
{ name: 'content', weight: 0.4 },
{ name: 'metadata.componentName', weight: 0.25 },
{ name: 'filePath', weight: 0.15 },
{ name: 'relativePath', weight: 0.15 },
{ name: 'componentType', weight: 0.15 },
{ name: 'layer', weight: 0.1 },
{ name: 'tags', weight: 0.15 }
],
includeScore: true,
threshold: 0.4,
useExtendedSearch: true,
ignoreLocation: true
});
} catch (error) {
console.warn('Keyword index load failed:', error);
this.chunks = [];
this.fuseIndex = null;
}
}
/**
* v1.2: Load pattern intelligence for trend detection and warnings
*/
private async loadPatternIntelligence(): Promise<void> {
try {
const intelligencePath = path.join(this.rootPath, '.codebase-intelligence.json');
const content = await fs.readFile(intelligencePath, 'utf-8');
const intelligence = JSON.parse(content);
const decliningPatterns = new Set<string>();
const risingPatterns = new Set<string>();
const patternWarnings = new Map<string, string>();
// Extract pattern indicators from intelligence data
if (intelligence.patterns) {
for (const [_category, data] of Object.entries(intelligence.patterns)) {
const patternData = data as any;
// Track primary pattern
if (patternData.primary?.trend === 'Rising') {
risingPatterns.add(patternData.primary.name.toLowerCase());
}
// Track declining alternatives
if (patternData.alsoDetected) {
for (const alt of patternData.alsoDetected) {
if (alt.trend === 'Declining') {
decliningPatterns.add(alt.name.toLowerCase());
patternWarnings.set(
alt.name.toLowerCase(),
`⚠️ Uses declining pattern: ${alt.name} (${alt.guidance || 'consider modern alternatives'})`
);
} else if (alt.trend === 'Rising') {
risingPatterns.add(alt.name.toLowerCase());
}
}
}
}
}
this.patternIntelligence = { decliningPatterns, risingPatterns, patternWarnings };
console.error(
`[search] Loaded pattern intelligence: ${decliningPatterns.size} declining, ${risingPatterns.size} rising patterns`
);
} catch (error) {
console.warn(
'Pattern intelligence load failed (will proceed without trend detection):',
error
);
this.patternIntelligence = null;
}
}
/**
* v1.2: Detect pattern trend from chunk content
*/
private detectChunkTrend(chunk: CodeChunk): {
trend: 'Rising' | 'Stable' | 'Declining' | undefined;
warning?: string;
} {
if (!this.patternIntelligence) {
return { trend: undefined };
}
const content = chunk.content.toLowerCase();
const { decliningPatterns, risingPatterns, patternWarnings } = this.patternIntelligence;
// Check for declining patterns
for (const pattern of decliningPatterns) {
if (content.includes(pattern)) {
return {
trend: 'Declining',
warning: patternWarnings.get(pattern)
};
}
}
// Check for rising patterns
for (const pattern of risingPatterns) {
if (content.includes(pattern)) {
return { trend: 'Rising' };
}
}
return { trend: 'Stable' };
}
async search(
query: string,
limit: number = 5,
filters?: SearchFilters,
options: SearchOptions = DEFAULT_SEARCH_OPTIONS
): Promise<SearchResult[]> {
if (!this.initialized) {
await this.initialize();
}
const { useSemanticSearch, useKeywordSearch, semanticWeight, keywordWeight } = {
...DEFAULT_SEARCH_OPTIONS,
...options
};
const results: Map<string, { chunk: CodeChunk; scores: number[] }> = new Map();
if (useSemanticSearch && this.embeddingProvider && this.storageProvider) {
try {
const vectorResults = await this.semanticSearch(query, limit * 2, filters);
vectorResults.forEach((result) => {
const id = result.chunk.id;
const existing = results.get(id);
if (existing) {
existing.scores.push(result.score * (semanticWeight || 0.7));
} else {
results.set(id, {
chunk: result.chunk,
scores: [result.score * (semanticWeight || 0.7)]
});
}
});
} catch (error) {
if (error instanceof IndexCorruptedError) {
throw error; // Propagate to handler for auto-heal
}
console.warn('Semantic search failed:', error);
}
}
if (useKeywordSearch && this.fuseIndex) {
try {
const keywordResults = await this.keywordSearch(query, limit * 2, filters);
keywordResults.forEach((result) => {
const id = result.chunk.id;
const existing = results.get(id);
if (existing) {
existing.scores.push(result.score * (keywordWeight || 0.3));
} else {
results.set(id, {
chunk: result.chunk,
scores: [result.score * (keywordWeight || 0.3)]
});
}
});
} catch (error) {
console.warn('Keyword search failed:', error);
}
}
const combinedResults: SearchResult[] = Array.from(results.entries())
.map(([_id, { chunk, scores }]) => {
// Calculate base combined score
let combinedScore = scores.reduce((sum, score) => sum + score, 0);
// Normalize to 0-1 range (scores are already weighted)
// If both semantic and keyword matched, max possible is ~1.0
combinedScore = Math.min(1.0, combinedScore);
// Boost scores for Angular components with proper detection
if (chunk.componentType && chunk.framework === 'angular') {
combinedScore = Math.min(1.0, combinedScore * 1.3);
}
// Boost if layer is detected
if (chunk.layer && chunk.layer !== 'unknown') {
combinedScore = Math.min(1.0, combinedScore * 1.1);
}
// v1.2: Detect pattern trend and apply momentum boost
const { trend, warning } = this.detectChunkTrend(chunk);
if (trend === 'Rising') {
combinedScore = Math.min(1.0, combinedScore * 1.15); // +15% for modern patterns
} else if (trend === 'Declining') {
combinedScore = combinedScore * 0.9; // -10% for legacy patterns
}
const summary = this.generateSummary(chunk);
const snippet = this.generateSnippet(chunk.content);
return {
summary,
snippet,
filePath: chunk.filePath,
startLine: chunk.startLine,
endLine: chunk.endLine,
score: combinedScore,
relevanceReason: this.generateRelevanceReason(chunk, query),
language: chunk.language,
framework: chunk.framework,
componentType: chunk.componentType,
layer: chunk.layer,
metadata: chunk.metadata,
// v1.2: Pattern momentum awareness
trend,
patternWarning: warning
} as SearchResult;
})
.sort((a, b) => b.score - a.score)
.slice(0, limit);
return combinedResults;
}
private generateSummary(chunk: CodeChunk): string {
const analyzer = chunk.framework ? analyzerRegistry.get(chunk.framework) : null;
if (analyzer && analyzer.summarize) {
try {
const summary = analyzer.summarize(chunk);
// Only use analyzer summary if it's meaningful (not the generic fallback)
if (summary && !summary.startsWith('Code in ') && !summary.includes(': lines ')) {
return summary;
}
} catch (error) {
console.warn('Analyzer summary failed:', error);
}
}
// Enhanced generic summary
const fileName = path.basename(chunk.filePath);
const componentName = chunk.metadata?.componentName;
const componentType = chunk.componentType;
// Try to extract a meaningful name from content
const classMatch = chunk.content.match(
/(?:export\s+)?(?:class|interface|type|enum|function)\s+(\w+)/
);
const name = componentName || (classMatch ? classMatch[1] : null);
if (name && componentType) {
return `${
componentType.charAt(0).toUpperCase() + componentType.slice(1)
} '${name}' in ${fileName}.`;
} else if (name) {
return `'${name}' defined in ${fileName}.`;
} else if (componentType) {
return `${componentType.charAt(0).toUpperCase() + componentType.slice(1)} in ${fileName}.`;
}
// Last resort: describe the file type
const ext = path.extname(fileName).slice(1);
const langMap: Record<string, string> = {
ts: 'TypeScript',
js: 'JavaScript',
html: 'HTML template',
scss: 'SCSS styles',
css: 'CSS styles',
json: 'JSON config'
};
return `${langMap[ext] || ext.toUpperCase()} in ${fileName}.`;
}
private generateSnippet(content: string, maxLines: number = 100): string {
const lines = content.split('\n');
if (lines.length <= maxLines) {
return content;
}
const snippet = lines.slice(0, maxLines).join('\n');
const remaining = lines.length - maxLines;
return `${snippet}\n\n... [${remaining} more lines]`;
}
private async semanticSearch(
query: string,
limit: number,
filters?: SearchFilters
): Promise<{ chunk: CodeChunk; score: number }[]> {
if (!this.embeddingProvider || !this.storageProvider) {
return [];
}
const queryVector = await this.embeddingProvider.embed(query);
const results = await this.storageProvider.search(queryVector, limit, filters);
return results.map((r) => ({
chunk: r.chunk,
score: r.score
}));
}
private async keywordSearch(
query: string,
limit: number,
filters?: SearchFilters
): Promise<{ chunk: CodeChunk; score: number }[]> {
if (!this.fuseIndex || this.chunks.length === 0) {
return [];
}
let fuseResults = this.fuseIndex.search(query);
if (filters) {
fuseResults = fuseResults.filter((r) => {
const chunk = r.item;
if (filters.componentType && chunk.componentType !== filters.componentType) {
return false;
}
if (filters.layer && chunk.layer !== filters.layer) {
return false;
}
if (filters.framework && chunk.framework !== filters.framework) {
return false;
}
if (filters.language && chunk.language !== filters.language) {
return false;
}
if (filters.tags && filters.tags.length > 0) {
const chunkTags = chunk.tags || [];
if (!filters.tags.some((tag) => chunkTags.includes(tag))) {
return false;
}
}
return true;
});
}
return fuseResults.slice(0, limit).map((r) => {
const chunk = r.item;
let score = 1 - (r.score || 0);
// Boost exact matches on class name or file path
const queryLower = query.toLowerCase();
const fileName = path.basename(chunk.filePath).toLowerCase();
const relativePathLower = chunk.relativePath.toLowerCase();
const componentName = chunk.metadata?.componentName?.toLowerCase() || '';
// Exact class name match
if (componentName && queryLower === componentName) {
score = Math.min(1.0, score + 0.3);
}
// Exact file name match
if (
fileName === queryLower ||
fileName.replace(/\.ts$/, '') === queryLower.replace(/\.ts$/, '')
) {
score = Math.min(1.0, score + 0.2);
}
// File path contains query
if (
chunk.filePath.toLowerCase().includes(queryLower) ||
relativePathLower.includes(queryLower)
) {
score = Math.min(1.0, score + 0.1);
}
return {
chunk,
score
};
});
}
private generateRelevanceReason(chunk: CodeChunk, query: string): string {
const reasons: string[] = [];
if (chunk.componentType) {
reasons.push(`${chunk.componentType}`);
}
if (chunk.layer) {
reasons.push(`${chunk.layer} layer`);
}
const queryLower = query.toLowerCase();
const matchingTags = (chunk.tags || []).filter((tag) => queryLower.includes(tag.toLowerCase()));
if (matchingTags.length > 0) {
reasons.push(`tags: ${matchingTags.join(', ')}`);
}
return reasons.length > 0 ? reasons.join('; ') : 'content match';
}
async getChunkCount(): Promise<number> {
if (this.storageProvider) {
return await this.storageProvider.count();
}
return this.chunks.length;
}
isReady(): boolean {
return this.initialized;
}
}