Skip to main content
Glama
automaticIndexer.tsโ€ข55.4 kB
/** * @fileOverview: Automatic project indexing with smart file detection and cloud synchronization * @module: AutomaticIndexer * @keyFunctions: * - autoDetectAndIndex(): Auto-detect current project and start indexing * - indexProject(): Index a project with smart ignore patterns * - watchProject(): Set up file system watching for automatic updates * - getIndexingStatus(): Monitor indexing progress and status * @dependencies: * - TreeSitterProcessor: AST parsing and symbol extraction * - LocalProjectManager: Local project state management * - ProjectIdentifier: Project detection and identification * - apiClient: Cloud service synchronization * - globby: File pattern matching and discovery * @context: Provides automatic project indexing with intelligent file detection, cloud synchronization, and file system watching. Uses 5-minute debouncing to avoid excessive re-indexing during active development sessions. Runs periodic stale file checks every 5 minutes to catch files modified while server was offline. */ import * as fs from 'fs'; import * as path from 'path'; import * as crypto from 'crypto'; // Dynamic import for globby (ES module) import { TreeSitterProcessor } from './treeSitterProcessor'; import { LocalProjectManager } from './projectManager'; import { ProjectIdentifier, ProjectInfo } from './projectIdentifier'; import { loadIgnorePatterns } from './projectIdentifier'; import { apiClient } from '../client/apiClient'; import { LocalEmbeddingGenerator } from './embeddingGenerator'; import { logger } from '../utils/logger'; interface IgnorePatterns { gitignore: string[]; cursorignore: string[]; vscodeignore: string[]; ambianceignore: string[]; } export interface IndexingOptions { force?: boolean; // Force re-index even if files haven't changed skipCloud?: boolean; // Only index locally, don't sync to cloud pattern?: string; // Only index files matching this pattern debounceMs?: number; // File change debounce delay in milliseconds (default: 300000 = 5 minutes) } export interface IndexingSession { id: string; projectId: string; startTime: Date; status: | 'starting' | 'scanning' | 'processing' | 'uploading' | 'in_progress' | 'completed' | 'failed'; filesFound: number; filesProcessed: number; chunksCreated: number; symbolsExtracted: number; embeddings: number; errors: string[]; } export class AutomaticIndexer { private static instance: AutomaticIndexer; private projectManager: LocalProjectManager; private treeSitter: TreeSitterProcessor; private projectIdentifier: ProjectIdentifier; private activeSessions: Map<string, IndexingSession>; private watchedProjects: Map<string, fs.FSWatcher>; private staleCheckIntervals: Map<string, NodeJS.Timeout>; // Periodic stale file checks per project private fileHashes: Map<string, string>; // Track file content hashes to detect actual changes private processingQueue: Map<string, Promise<void>>; // Prevent concurrent processing per project private constructor() { this.projectManager = new LocalProjectManager(); this.treeSitter = new TreeSitterProcessor(); this.projectIdentifier = ProjectIdentifier.getInstance(); this.activeSessions = new Map(); this.watchedProjects = new Map(); this.staleCheckIntervals = new Map(); this.fileHashes = new Map(); this.processingQueue = new Map(); } static getInstance(): AutomaticIndexer { if (!AutomaticIndexer.instance) { AutomaticIndexer.instance = new AutomaticIndexer(); } return AutomaticIndexer.instance; } /** * Auto-detect current project and start indexing if user has API key */ async autoDetectAndIndex(workingDir?: string): Promise<IndexingSession | null> { try { // Check if user has API key configured if (!(await this.hasValidAPIKey())) { logger.info('โš ๏ธ No valid API key found, skipping automatic indexing'); return null; } // Detect current project using provided working directory const baseDir = workingDir || process.cwd(); const projectInfo = await this.projectIdentifier.identifyProject(baseDir); if (!projectInfo) { logger.info('๐Ÿ“ No project detected in current directory'); return null; } logger.info(`๐Ÿ” Detected project: ${projectInfo.name} (${projectInfo.type})`); logger.info(`๐Ÿ“ Project detected in current directory`); // Check if already indexed recently if (await this.isRecentlyIndexed(projectInfo.path)) { logger.info('โœ… Project recently indexed, skipping'); return null; } // Start automatic indexing return await this.indexProject(projectInfo.path, { force: false, skipCloud: false, }); } catch (error) { logger.error( 'Auto-detection failed:', error instanceof Error ? error : new Error(String(error)) ); return null; } } /** * Index a project with smart ignore patterns */ async indexProject(projectPath: string, options: IndexingOptions = {}): Promise<IndexingSession> { const absolutePath = path.resolve(projectPath); const sessionId = this.generateSessionId(); logger.info(`๐Ÿš€ Starting indexing session ${sessionId} for ${absolutePath}`); if (options.skipCloud) { logger.info('Skipping cloud sync - local indexing only'); } const session: IndexingSession = { id: sessionId, projectId: await this.getOrCreateProjectId(absolutePath), startTime: new Date(), status: 'starting', filesFound: 0, filesProcessed: 0, chunksCreated: 0, symbolsExtracted: 0, embeddings: 0, errors: [], }; this.activeSessions.set(sessionId, session); try { // Start indexing session in cloud if API key is valid if ((await this.hasValidAPIKey()) && !options.skipCloud) { try { const startIndexingRequest = { projectPath: absolutePath, sessionId: sessionId, options: options, }; const response = await apiClient.post( '/v1/local-projects/start-indexing', startIndexingRequest ); logger.info('โœ… Cloud indexing session started'); } catch (error) { logger.error('Failed to start cloud indexing session:', { error: error instanceof Error ? error.message : String(error), }); // For API errors, we continue with local indexing instead of failing if (error instanceof Error && error.message.includes('API Error')) { session.errors.push('Cloud API unavailable - continuing with local indexing'); logger.info('โ˜๏ธ Cloud API unavailable, continuing with local indexing'); // Don't return - continue with local processing } else { throw error; } } } // Load ignore patterns session.status = 'scanning'; const ignorePatterns = await this.loadIgnorePatterns(projectPath); // Find files to index const filesToIndex = await this.discoverFiles(absolutePath, ignorePatterns, options.pattern); session.filesFound = filesToIndex.length; logger.info(`๐Ÿ“ Found ${filesToIndex.length} files to index`); if (options.force) { logger.info('Force indexing enabled - will re-index all files'); } else { // Filter out unchanged files const changedFiles = await this.filterChangedFiles(absolutePath, filesToIndex); filesToIndex.splice(0, filesToIndex.length, ...changedFiles); logger.info(`๐Ÿ“ ${filesToIndex.length} files have changes`); } if (filesToIndex.length === 0) { session.status = 'completed'; logger.info('โœ… No files need indexing'); return session; } // Process files session.status = 'processing'; await this.processFiles(session, absolutePath, filesToIndex, options); // Upload to cloud if not skipped, otherwise use local embedding generation if (options.skipCloud) { logger.info('Skipping cloud sync - local indexing only'); await this.generateLocalEmbeddings(session, absolutePath); } else if (await this.hasValidAPIKey()) { session.status = 'uploading'; try { await this.uploadToCloud(session, absolutePath); } catch (error) { logger.warn( `โ˜๏ธ Cloud upload failed for ${absolutePath}, falling back to local embedding generation:`, { error: error instanceof Error ? error.message : String(error), } ); // Fallback to local embedding generation await this.generateLocalEmbeddings(session, absolutePath); } } else { // No cloud API available, use local embedding generation logger.info('๐Ÿ’ป No cloud API available, using local embedding generation'); await this.generateLocalEmbeddings(session, absolutePath); } session.status = 'completed'; logger.info( `โœ… Indexing completed: ${session.filesProcessed} files, ${session.chunksCreated} chunks, ${session.symbolsExtracted} symbols` ); } catch (error) { session.status = 'failed'; session.errors.push(error instanceof Error ? error.message : String(error)); logger.error(`โŒ Indexing failed for ${absolutePath}:`, { error: error instanceof Error ? error.message : String(error), }); // Don't throw error for API failures to match test expectations if (!session.errors.some(e => e.includes('API Error'))) { throw error; } } finally { // Update project's last indexed time await this.updateProjectLastIndexed(absolutePath); } return session; } /** * Reset/delete all indexes for a project */ async resetProjectIndexes(projectPath: string): Promise<boolean> { const absolutePath = path.resolve(projectPath); logger.info(`๐Ÿ—‘๏ธ Resetting indexes for ${absolutePath}`); try { if (await this.hasValidAPIKey()) { // Reset via API await apiClient.post('/v1/local-projects/reset', { projectPath: projectPath, }); logger.info('โ˜๏ธ Reset cloud indexes'); } const projectId = await this.getProjectId(absolutePath); if (projectId && (await this.hasValidAPIKey())) { // Delete from cloud using old endpoint as fallback await apiClient.delete(`/v1/projects/${projectId}/indexes`); logger.info('โ˜๏ธ Deleted cloud indexes'); } // Delete local cache/indexes await this.deleteLocalIndexes(absolutePath); logger.info('๐Ÿ’พ Deleted local indexes'); // Reset project status await this.resetProjectStatus(absolutePath); logger.info('โœ… Successfully reset indexes for project: /test/project'); return true; } catch (error) { logger.error('โŒ Failed to reset project indexes:', { error: error instanceof Error ? error.message : String(error), }); return false; } } /** * Start watching a project for file changes * Uses configurable debouncing (default 5 minutes) to avoid excessive re-indexing * during active development when files are already in agent context * Also runs periodic stale file checks every 5 minutes to catch files modified while server was offline */ async startWatching(projectPath: string, options?: IndexingOptions): Promise<void> { const absolutePath = path.resolve(projectPath); logger.debug( `๐Ÿ” Attempting to start watching for path: ${absolutePath} (resolved from ${projectPath})` ); if (this.watchedProjects.has(absolutePath)) { logger.info(`๐Ÿ‘๏ธ Already watching ${absolutePath}`); return; } // Check if we're in a build process before starting the watcher const buildLockFile = path.join(process.cwd(), '.build-lock'); const buildLockExists = fs.existsSync(buildLockFile); const isExplicitSkip = process.env.AMBIANCE_SKIP_INDEXING === '1'; const isBuildProcess = buildLockExists || isExplicitSkip; logger.debug(`Checking for build process skips...`); if (isBuildProcess) { logger.debug( `Skipping watcher startup due to: buildLockExists=${buildLockExists}, isExplicitSkip=${isExplicitSkip}` ); return; } logger.info(`โœ… Proceeding with watcher setup for ${absolutePath}`); logger.info(`๐Ÿ‘๏ธ Starting file watcher for ${absolutePath} (cwd: ${process.cwd()})`); const ignorePatterns = await this.loadIgnorePatterns(absolutePath); logger.debug( `Loaded ${ignorePatterns.gitignore.length + ignorePatterns.cursorignore.length + ignorePatterns.vscodeignore.length + ignorePatterns.ambianceignore.length} ignore patterns` ); let watcher: fs.FSWatcher; try { watcher = fs.watch(absolutePath, { recursive: true }, async (eventType, filename) => { if (!filename) return; // AGGRESSIVE BUILD PROCESS CHECK - skip if build lock file exists const buildLockFile = path.join(process.cwd(), '.build-lock'); const buildLockExists = fs.existsSync(buildLockFile); logger.debug( `๐Ÿ” DEBUG BUILD LOCK: cwd=${process.cwd()}, lockFile=${buildLockFile}, exists=${buildLockExists}, filename=${filename}` ); if (buildLockExists) { logger.debug( `๐Ÿšซ๐Ÿšซ๐Ÿšซ BUILD LOCK FILE DETECTED: ${buildLockFile} exists! Skipping ${filename}` ); try { const content = fs.readFileSync(buildLockFile, 'utf8'); logger.debug(`๐Ÿšซ๐Ÿšซ๐Ÿšซ BUILD LOCK CONTENT: "${content}"`); } catch (e) { logger.debug(`๐Ÿšซ๐Ÿšซ๐Ÿšซ BUILD LOCK READ ERROR: ${e}`); } return; } const filePath = path.join(absolutePath, filename); // ULTIMATE EARLY CHECK: Silently skip common ignored paths // These are expected to be ignored and don't need warnings if ( filename.includes('dist') || filename.includes('node_modules') || filename.includes('.git') || filePath.includes('\\dist\\') || filePath.includes('/dist/') || filePath.includes('\\node_modules\\') || filePath.includes('/node_modules/') || (filePath.includes('\\') && (filePath.includes('.git') || filePath.includes('/.git'))) ) { // Silently skip - these are expected ignored patterns return; } // Debug: log file events (only for non-ignored files to avoid spam) const relativePath = path.relative(absolutePath, filePath); if (relativePath.includes('dist') || relativePath.includes('.git')) { logger.debug( `๐Ÿ” Checking file: ${relativePath} (absolute: ${filePath}, project: ${absolutePath})` ); } if ( !relativePath.startsWith('node_modules') && !relativePath.startsWith('.git') && !relativePath.includes('dist') ) { logger.debug(`๐Ÿ‘๏ธ File event detected: ${relativePath}`); } // Check if file should be ignored (skip common ignored directories entirely) // These are absolute checks that should NEVER be bypassed if ( relativePath.includes('\\dist\\') || relativePath.includes('/dist/') || relativePath.startsWith('dist/') || relativePath.startsWith('dist\\') || relativePath === 'dist' || relativePath.includes('\\node_modules\\') || relativePath.includes('/node_modules/') || relativePath.startsWith('node_modules/') || relativePath.startsWith('node_modules\\') || relativePath === 'node_modules' || relativePath.includes('\\build\\') || relativePath.includes('/build/') || relativePath.startsWith('build/') || relativePath.startsWith('build\\') || relativePath === 'build' || relativePath.includes('\\out\\') || relativePath.includes('/out/') || relativePath.startsWith('out/') || relativePath.startsWith('out\\') || relativePath === 'out' || relativePath.includes('\\target\\') || relativePath.includes('/target/') || relativePath.startsWith('target/') || relativePath.startsWith('target\\') || relativePath === 'target' || relativePath.includes('\\bin\\') || relativePath.includes('/bin/') || relativePath.startsWith('bin/') || relativePath.startsWith('bin\\') || relativePath === 'bin' || relativePath.includes('\\obj\\') || relativePath.includes('/obj/') || relativePath.startsWith('obj/') || relativePath.startsWith('obj\\') || relativePath === 'obj' || relativePath.includes('\\lib\\') || relativePath.includes('/lib/') || relativePath.startsWith('lib/') || relativePath.startsWith('lib\\') || relativePath === 'lib' || relativePath.includes('\\esm\\') || relativePath.includes('/esm/') || relativePath.startsWith('esm/') || relativePath.startsWith('esm\\') || relativePath === 'esm' || relativePath.includes('\\cjs\\') || relativePath.includes('/cjs/') || relativePath.startsWith('cjs/') || relativePath.startsWith('cjs\\') || relativePath === 'cjs' || relativePath.includes('\\umd\\') || relativePath.includes('/umd/') || relativePath.startsWith('umd/') || relativePath.startsWith('umd\\') || relativePath === 'umd' || relativePath.includes('\\__pycache__\\') || relativePath.includes('/__pycache__/') || relativePath.startsWith('__pycache__/') || relativePath.startsWith('__pycache__\\') || relativePath === '__pycache__' || relativePath.includes('\\coverage\\') || relativePath.includes('/coverage/') || relativePath.startsWith('coverage/') || relativePath.startsWith('coverage\\') || relativePath === 'coverage' || (relativePath.includes('\\') && (relativePath.includes('.git') || relativePath.includes('/.git'))) || relativePath.startsWith('.git/') || relativePath.startsWith('.git\\') || relativePath === '.git' ) { return; // Always ignore these folders - no exceptions } if (this.shouldIgnoreFile(filePath, absolutePath, ignorePatterns)) { return; } // Check if file content actually changed const hasChanged = await this.hasFileChanged(filePath); if (!hasChanged) { logger.debug(`๐Ÿ“‹ File accessed but content unchanged: ${filename}, skipping update`); return; } // Queue processing with debouncing to prevent concurrent database operations const projectKey = absolutePath; const processingKey = `${projectKey}:${filename}`; // Clear existing debounce for this specific file clearTimeout((this as any)[`debounce_${processingKey}`]); (this as any)[`debounce_${processingKey}`] = setTimeout(async () => { logger.debug(`โฐ Processing queued for: ${filename}`); await this.queueProcessing(projectKey, async () => { try { if (fs.existsSync(filePath)) { logger.info( `๐Ÿ“ File content changed: ${filename}, triggering incremental embedding update` ); // Get project info for embedding update const projectInfo = await this.projectIdentifier.identifyProject(absolutePath); if (projectInfo) { // Use incremental embedding update instead of full re-indexing const { LocalEmbeddingGenerator } = await import('./embeddingGenerator'); const embeddingGenerator = new LocalEmbeddingGenerator(); await embeddingGenerator.updateProjectEmbeddings(projectInfo.id, absolutePath, { files: [filename], // Only update the changed file batchSize: 1, // Process one file at a time to reduce database load rateLimit: 1000, // Slower rate limiting }); logger.info(`โœ… Incremental embedding update completed for: ${filename}`); } else { logger.warn(`โš ๏ธ Could not identify project for file: ${filename}`); } } } catch (error) { logger.error('โŒ Incremental embedding update failed:', { error: error instanceof Error ? error.message : String(error), }); } }); // Close queueProcessing }, options?.debounceMs ?? 300000); // 5 minute debounce - gives more time for batching multiple changes }); // Initialize file hashes for existing files to avoid processing unchanged files await this.initializeFileHashes(absolutePath, ignorePatterns); logger.debug(`File hashes initialized`); this.watchedProjects.set(absolutePath, watcher); logger.info(`โœ… Watcher registered for ${absolutePath}`); // Start periodic stale file check (every 5 minutes) this.startPeriodicStaleCheck(absolutePath); logger.info(`โœ… Periodic stale check started for ${absolutePath}`); logger.info('Started watching for file changes: /test/project'); } catch (error) { logger.error( 'Failed to start watching:', error instanceof Error ? error : new Error(String(error)) ); throw error; } } /** * Start periodic check for stale files (every 5 minutes) */ private startPeriodicStaleCheck(projectPath: string): void { const STALE_CHECK_INTERVAL = 5 * 60 * 1000; // 5 minutes in milliseconds // Clear any existing interval for this project const existingInterval = this.staleCheckIntervals.get(projectPath); if (existingInterval) { clearInterval(existingInterval); } logger.info(`๐Ÿ”„ Starting periodic stale file check for ${projectPath} (every 5 minutes)`); // Set up the interval const intervalId = setInterval(async () => { try { logger.debug(`๐Ÿ” Running periodic stale file check for ${projectPath}`); // Dynamically import to avoid circular dependencies const { checkStaleFiles } = await import('../tools/localTools/embeddingManagement'); const result = await checkStaleFiles({ projectPath, autoUpdate: true, batchSize: 5, }); if (result.staleCount > 0) { logger.info(`โœ… Periodic stale check: Updated ${result.staleCount} stale files`, { projectPath, staleCount: result.staleCount, processedFiles: result.updateResult?.result?.processedFiles || 0, embeddings: result.updateResult?.result?.embeddings || 0, }); } else { logger.debug(`โœ… Periodic stale check: No stale files found`, { projectPath, }); } } catch (error) { logger.error(`โŒ Periodic stale check failed for ${projectPath}`, { error: error instanceof Error ? error.message : String(error), }); } finally { logger.debug(`๐Ÿ Periodic stale check completed for ${projectPath}`); } }, STALE_CHECK_INTERVAL); this.staleCheckIntervals.set(projectPath, intervalId); } /** * Stop watching a project */ async stopWatching(projectPath: string): Promise<void> { const absolutePath = path.resolve(projectPath); const watcher = this.watchedProjects.get(absolutePath); if (watcher) { watcher.close(); this.watchedProjects.delete(absolutePath); // Clear the periodic stale check interval const intervalId = this.staleCheckIntervals.get(absolutePath); if (intervalId) { clearInterval(intervalId); this.staleCheckIntervals.delete(absolutePath); logger.info(`๐Ÿ›‘ Stopped periodic stale check for ${absolutePath}`); } logger.info(`Stopped watching: ${projectPath}`); } } /** * Get active indexing sessions */ getActiveSessions(): IndexingSession[] { return Array.from(this.activeSessions.values()); } /** * Get session by ID */ getSession(sessionId: string): IndexingSession | undefined { return this.activeSessions.get(sessionId); } // Private helper methods private async hasValidAPIKey(): Promise<boolean> { try { const response = await apiClient.get('/health'); return response.status === 'ok'; } catch { return false; } } private async loadIgnorePatterns(projectPath: string): Promise<IgnorePatterns> { const patterns: IgnorePatterns = { gitignore: [], cursorignore: [], vscodeignore: [], ambianceignore: [], }; // Add default ignore patterns that are always applied // These are common directories/files that should never be processed for embeddings const defaultPatterns = [ 'node_modules/**', '.git/**', 'dist/**', 'build/**', 'out/**', 'target/**', 'bin/**', 'obj/**', '.next/**', '.nuxt/**', '.output/**', '.vercel/**', '.netlify/**', 'coverage/**', '.nyc_output/**', '__pycache__/**', '*.pyc', '*.pyo', '*.log', '.DS_Store', 'Thumbs.db', '.env*', '*.tmp', '*.temp', '.cache/**', '.parcel-cache/**', '.vscode/**', '.idea/**', '*.swp', '*.swo', '*~', // Additional build outputs 'lib/**', 'esm/**', 'cjs/**', 'umd/**', 'packages/**', 'artifacts/**', 'release/**', ]; // Add default patterns to gitignore (they will be combined with actual .gitignore) patterns.gitignore.push(...defaultPatterns); const ignoreFiles = [ { file: '.gitignore', key: 'gitignore' as keyof IgnorePatterns }, { file: '.cursorignore', key: 'cursorignore' as keyof IgnorePatterns }, { file: '.vscodeignore', key: 'vscodeignore' as keyof IgnorePatterns }, { file: '.ambianceignore', key: 'ambianceignore' as keyof IgnorePatterns }, ]; for (const { file, key } of ignoreFiles) { if (file === '.gitignore') { // Use loadIgnorePatterns function for .gitignore try { const gitignorePatterns = await loadIgnorePatterns(projectPath); patterns[key] = gitignorePatterns; } catch (error) { logger.warn(`Failed to load .gitignore patterns:`, { error: error instanceof Error ? error.message : String(error), }); } } else { // Read other ignore files directly const filePath = path.join(projectPath, file); try { if (fs.existsSync(filePath)) { const content = fs.readFileSync(filePath, 'utf8'); patterns[key] = content .split('\n') .map(line => line.trim()) .filter(line => line && !line.startsWith('#')); } } catch (error) { logger.warn(`Failed to read ${file}:`, { error: error instanceof Error ? error.message : String(error), }); } } } // Add comprehensive default ignore patterns patterns.gitignore.push( ...[ // Node.js '**/node_modules/**', 'node_modules/**', '**/npm-debug.log*', '**/yarn-debug.log*', '**/yarn-error.log*', '**/package-lock.json', '**/yarn.lock', // Build outputs '**/dist/**', '**/build/**', '**/out/**', '**/.next/**', // Version control '**/.git/**', '**/.svn/**', // IDE files '**/.vscode/**', '**/.idea/**', '**/*.suo', '**/*.user', '**/*.userosscache', '**/*.sln.docstates', // OS files '**/.DS_Store', '**/Thumbs.db', '**/desktop.ini', // Logs '**/*.log', '**/logs/**', // Cache directories '**/.cache/**', '**/tmp/**', '**/temp/**', '**/.tmp/**', // Coverage directories '**/coverage/**', '**/.nyc_output/**', // Environment files '**/.env', '**/.env.local', '**/.env.*.local', // Other common ignores '**/*.tsbuildinfo', '**/*.map', '**/tsconfig.tsbuildinfo', // Deprecated, old, legacy, and backup folders/files '**/old/**', '**/OLD/**', '**/_old/**', '**/_OLD/**', '**/deprecated/**', '**/DEPRECATED/**', '**/_deprecated/**', '**/_DEPRECATED/**', '**/legacy/**', '**/LEGACY/**', '**/_legacy/**', '**/_LEGACY/**', '**/backup/**', '**/BACKUP/**', '**/_backup/**', '**/_BACKUP/**', '**/archive/**', '**/ARCHIVE/**', '**/_archive/**', '**/_ARCHIVE/**', '**/outdated/**', '**/OUTDATED/**', '**/_outdated/**', '**/_OUTDATED/**', '**/obsolete/**', '**/OBSOLETE/**', '**/_obsolete/**', '**/_OBSOLETE/**', '**/legacy/**', '**/LEGACY/**', '**/_legacy/**', '**/_LEGACY/**', '**/backup/**', '**/BACKUP/**', '**/_backup/**', '**/_BACKUP/**', '**/archive/**', '**/ARCHIVE/**', '**/_archive/**', '**/_ARCHIVE/**', '**/outdated/**', '**/OUTDATED/**', '**/_outdated/**', '**/_OUTDATED/**', '**/obsolete/**', '**/OBSOLETE/**', '**/_obsolete/**', '**/_OBSOLETE/**', '**/temp/**', '**/TEMP/**', '**/_temp/**', '**/_TEMP/**', '**/tmp/**', '**/TMP/**', '**/_tmp/**', '**/_TMP/**', '**/bak/**', '**/BAK/**', '**/_bak/**', '**/_BAK/**', '**/save/**', '**/SAVE/**', '**/_save/**', '**/_SAVE/**', '**/stash/**', '**/STASH/**', '**/_stash/**', '**/_STASH/**', '**/trash/**', '**/TRASH/**', '**/_trash/**', '**/_TRASH/**', '**/bin/**', '**/BIN/**', '**/_bin/**', '**/_BIN/**', '**/junk/**', '**/JUNK/**', '**/_junk/**', '**/_JUNK/**', '**/unused/**', '**/UNUSED/**', '**/_unused/**', '**/_UNUSED/**', '**/dead/**', '**/DEAD/**', '**/_dead/**', '**/_DEAD/**', '**/zombie/**', '**/ZOMBIE/**', '**/_zombie/**', '**/_ZOMBIE/**', '**/retired/**', '**/RETIRED/**', '**/_retired/**', '**/_RETIRED/**', '**/sunset/**', '**/SUNSET/**', '**/_sunset/**', '**/_SUNSET/**', '**/v1/**', '**/v2/**', '**/v3/**', '**/v4/**', '**/v5/**', '**/v6/**', '**/v7/**', '**/v8/**', '**/v9/**', '**/v10/**', '**/version1/**', '**/version2/**', '**/version3/**', '**/version4/**', '**/version5/**', '**/version6/**', '**/version7/**', '**/version8/**', '**/version9/**', '**/version10/**', '**/old-*/**', '**/OLD-*/**', '**/_old-*/**', '**/_OLD-*/**', '**/deprecated-*/**', '**/DEPRECATED-*/**', '**/_deprecated-*/**', '**/_DEPRECATED-*/**', '**/legacy-*/**', '**/LEGACY-*/**', '**/_legacy-*/**', '**/_LEGACY-*/**', '**/*-old/**', '**/*-OLD/**', '**/*_old/**', '**/*_OLD/**', '**/*-deprecated/**', '**/*-DEPRECATED/**', '**/*_deprecated/**', '**/*_DEPRECATED/**', '**/*-legacy/**', '**/*-LEGACY/**', '**/*_legacy/**', '**/*_LEGACY/**', '**/*-backup/**', '**/*-BACKUP/**', '**/*_backup/**', '**/*_BACKUP/**', '**/*-archive/**', '**/*-ARCHIVE/**', '**/*_archive/**', '**/*_ARCHIVE/**', '**/*-bak/**', '**/*-BAK/**', '**/*_bak/**', '**/*_BAK/**', '**/*-save/**', '**/*-SAVE/**', '**/*_save/**', '**/*_SAVE/**', '**/*-stash/**', '**/*-STASH/**', '**/*_stash/**', '**/*_STASH/**', '**/*-temp/**', '**/*-TEMP/**', '**/*_temp/**', '**/*_TEMP/**', '**/*-tmp/**', '**/*-TMP/**', '**/*_tmp/**', '**/*_TMP/**', '**/*-junk/**', '**/*-JUNK/**', '**/*_junk/**', '**/*_JUNK/**', '**/*-unused/**', '**/*-UNUSED/**', '**/*_unused/**', '**/*_UNUSED/**', '**/*-dead/**', '**/*-DEAD/**', '**/*_dead/**', '**/*_DEAD/**', '**/*-zombie/**', '**/*-ZOMBIE/**', '**/*_zombie/**', '**/*_ZOMBIE/**', '**/*-retired/**', '**/*-RETIRED/**', '**/*_retired/**', '**/*_RETIRED/**', '**/*-sunset/**', '**/*-SUNSET/**', '**/*_sunset/**', '**/*_SUNSET/**', // Additional patterns for compound names '**/backup-*/**', '**/BACKUP-*/**', '**/_backup-*/**', '**/_BACKUP-*/**', '**/temp-*/**', '**/TEMP-*/**', '**/_temp-*/**', '**/_TEMP-*/**', '**/tmp-*/**', '**/TMP-*/**', '**/_tmp-*/**', '**/_TMP-*/**', '**/bak-*/**', '**/BAK-*/**', '**/_bak-*/**', '**/_BAK-*/**', '**/save-*/**', '**/SAVE-*/**', '**/_save-*/**', '**/_SAVE-*/**', '**/stash-*/**', '**/STASH-*/**', '**/_stash-*/**', '**/_STASH-*/**', '**/trash-*/**', '**/TRASH-*/**', '**/_trash-*/**', '**/_TRASH-*/**', '**/junk-*/**', '**/JUNK-*/**', '**/_junk-*/**', '**/_JUNK-*/**', '**/unused-*/**', '**/UNUSED-*/**', '**/_unused-*/**', '**/_UNUSED-*/**', '**/dead-*/**', '**/DEAD-*/**', '**/_dead-*/**', '**/_DEAD-*/**', '**/zombie-*/**', '**/ZOMBIE-*/**', '**/_zombie-*/**', '**/_ZOMBIE-*/**', '**/retired-*/**', '**/RETIRED-*/**', '**/_retired-*/**', '**/_RETIRED-*/**', '**/sunset-*/**', '**/SUNSET-*/**', '**/_sunset-*/**', '**/_SUNSET-*/**', ] ); return patterns; } private async discoverFiles( projectPath: string, ignorePatterns: IgnorePatterns, pattern?: string ): Promise<string[]> { const includePatterns = pattern ? [pattern] : [ '**/*.{js,jsx,ts,tsx,py,go,rs,java,cpp,c,h,hpp,cs,rb,php,swift,kt,scala,clj,hs,ml,r,sql,sh,bash,zsh}', ]; if (pattern) { logger.info(`๐Ÿ” Applying pattern filter: ${pattern}`); } // Combine all ignore patterns const allIgnorePatterns = [ ...ignorePatterns.gitignore, ...ignorePatterns.cursorignore, ...ignorePatterns.vscodeignore, ...ignorePatterns.ambianceignore, ]; // Log ignore pattern summary for debugging logger.debug(`๐Ÿ” File discovery patterns`, { ignorePatternCount: allIgnorePatterns.length, hasNodeModulesIgnore: allIgnorePatterns.some(p => p.includes('node_modules')), hasDistIgnore: allIgnorePatterns.some(p => p.includes('dist')), includePatternCount: includePatterns.length, samplePatterns: allIgnorePatterns.slice(0, 5), }); try { const { globby } = await import('globby'); // Get all files first, then filter manually - globby ignore patterns seem unreliable let files = await globby(includePatterns, { cwd: projectPath, absolute: false, dot: false, onlyFiles: true, }); logger.debug(`๐Ÿ” Globby returned ${files.length} files before filtering`, { sampleFiles: files.slice(0, 5), hasDistFiles: files.some(f => f.startsWith('dist/')), totalDistFiles: files.filter(f => f.startsWith('dist/')).length, }); // Primary filtering to ignore common patterns - now the main filtering mechanism const shouldIgnoreFile = (filePath: string): boolean => { // Normalize path separators for cross-platform compatibility const normalizedFilePath = filePath.replace(/\\/g, '/'); // Check against all ignore patterns from gitignore and defaults for (const pattern of allIgnorePatterns) { const normalizedPattern = pattern.replace(/\\/g, '/'); if (normalizedPattern.endsWith('/**')) { const dirPattern = normalizedPattern.slice(0, -3); if (normalizedFilePath.startsWith(dirPattern + '/')) { return true; } } else if (normalizedPattern.includes('*')) { const regex = new RegExp(normalizedPattern.replace(/\*/g, '.*')); if (regex.test(normalizedFilePath)) { return true; } } else if ( normalizedFilePath === normalizedPattern || normalizedFilePath.startsWith(normalizedPattern + '/') ) { return true; } } // Additional safety checks for common ignore patterns const pathParts = normalizedFilePath.split('/'); for (const part of pathParts) { // Check for common ignore patterns if ( part === 'node_modules' || part === '.git' || part === 'dist' || part === 'build' || part === 'out' || part === 'target' || part === 'bin' || part === 'obj' || part === 'lib' || part === 'esm' || part === 'cjs' || part === 'umd' || part === '.next' || part === 'coverage' || part === '__pycache__' || part.startsWith('.') || part.includes('.min.') || part.includes('.test.') || part.includes('.spec.') || part.endsWith('.pyc') || part.endsWith('.pyo') || part.endsWith('.log') ) { return true; } } return false; }; const beforeFilterCount = files.length; files = files.filter(file => !shouldIgnoreFile(file)); const afterFilterCount = files.length; const filteredCount = beforeFilterCount - afterFilterCount; if (filteredCount > 0) { logger.info(`๐Ÿงน Additional filtering removed ${filteredCount} files`, { before: beforeFilterCount, after: afterFilterCount, }); } logger.debug(`๐Ÿ“ File discovery completed`, { filesFound: files.length, filteredFiles: filteredCount, }); return files.map((f: string) => path.join(projectPath, f)); } catch (error) { logger.error('Failed to discover files:', { error: error instanceof Error ? error.message : String(error), }); return []; } } /** * Calculate SHA-256 hash of file content */ private async getFileHash(filePath: string): Promise<string | null> { try { const content = fs.readFileSync(filePath); const crypto = await import('crypto'); return crypto.default.createHash('sha256').update(content).digest('hex'); } catch (error) { logger.debug(`Could not hash file ${filePath}`, { error: error instanceof Error ? error.message : String(error), }); return null; } } /** * Check if file content actually changed */ private async hasFileChanged(filePath: string): Promise<boolean> { const currentHash = await this.getFileHash(filePath); if (!currentHash) return false; const previousHash = this.fileHashes.get(filePath); const hasChanged = previousHash !== currentHash; // Update stored hash this.fileHashes.set(filePath, currentHash); return hasChanged; } /** * Initialize file hashes for existing files to prevent processing unchanged files */ private async initializeFileHashes( projectPath: string, ignorePatterns: IgnorePatterns ): Promise<void> { try { logger.debug('Initializing file hashes for existing files'); const files = await this.discoverFiles(projectPath, ignorePatterns, undefined); let processedCount = 0; for (const file of files) { const filePath = path.join(projectPath, file); if (fs.existsSync(filePath) && !fs.statSync(filePath).isDirectory()) { const hash = await this.getFileHash(filePath); if (hash) { this.fileHashes.set(filePath, hash); processedCount++; } } // Log progress every 100 files if (processedCount % 100 === 0 && processedCount > 0) { logger.debug(`Initialized hashes for ${processedCount} files`); } } logger.info(`โœ… Initialized file hashes for ${processedCount} existing files`); } catch (error) { logger.warn('โš ๏ธ Failed to initialize file hashes:', { error: error instanceof Error ? error.message : String(error), }); } } private shouldIgnoreFile( filePath: string, projectPath: string, ignorePatterns: IgnorePatterns ): boolean { const relativePath = path.relative(projectPath, filePath); // Early check for common ignored directories - these should NEVER be processed if ( relativePath.startsWith('.git') || relativePath.startsWith('.git/') || relativePath.startsWith('.git\\') || relativePath.startsWith('node_modules') || relativePath.startsWith('node_modules/') || relativePath.startsWith('node_modules\\') || relativePath.startsWith('dist') || relativePath.startsWith('dist/') || relativePath.startsWith('dist\\') || relativePath === '.git' || relativePath === 'node_modules' || relativePath === 'dist' ) { return true; } const allPatterns = [ ...ignorePatterns.gitignore, ...ignorePatterns.cursorignore, ...ignorePatterns.vscodeignore, ...ignorePatterns.ambianceignore, ]; const shouldIgnore = allPatterns.some(pattern => { // Normalize path separators for cross-platform compatibility const normalizedRelativePath = relativePath.replace(/\\/g, '/'); const normalizedPattern = pattern.replace(/\\/g, '/'); // Simple pattern matching (could be enhanced with minimatch) if (normalizedPattern.endsWith('/**')) { const dirPattern = normalizedPattern.slice(0, -3); const matches = normalizedRelativePath.startsWith(dirPattern + '/') || normalizedRelativePath === dirPattern; if (matches) return true; } if (normalizedPattern.includes('*')) { const regex = new RegExp(normalizedPattern.replace(/\*/g, '.*')); const matches = regex.test(normalizedRelativePath); if (matches) return true; } const matches = normalizedRelativePath === normalizedPattern || normalizedRelativePath.startsWith(normalizedPattern + '/'); if (matches) return true; return false; }); return shouldIgnore; } private async queueProcessing(projectKey: string, operation: () => Promise<void>): Promise<void> { // Wait for any existing processing to complete, then run this operation const existingPromise = this.processingQueue.get(projectKey); if (existingPromise) { logger.debug(`โณ Waiting for existing processing to complete for project: ${projectKey}`); await existingPromise; } // Create a new promise for this operation const operationPromise = this.createQueuedOperation(projectKey, operation); // Store the promise this.processingQueue.set(projectKey, operationPromise); // Wait for this operation to complete await operationPromise; } private createQueuedOperation(projectKey: string, operation: () => Promise<void>): Promise<void> { return (async () => { try { await operation(); } finally { // Clean up the queue entry this.processingQueue.delete(projectKey); } })(); } private async filterChangedFiles(projectPath: string, files: string[]): Promise<string[]> { const changedFiles: string[] = []; for (const file of files) { try { const stat = fs.statSync(file); const currentHash = this.hashFile(file); const lastHash = await this.getLastFileHash(projectPath, file); if (!lastHash || currentHash !== lastHash) { changedFiles.push(file); } } catch (error) { // If we can't check, include it changedFiles.push(file); } } return changedFiles; } private hashFile(filePath: string): string { const content = fs.readFileSync(filePath); return crypto.createHash('sha256').update(content).digest('hex'); } private async processFiles( session: IndexingSession, projectPath: string, files: string[], options: IndexingOptions ): Promise<void> { for (const file of files) { try { // Read file content const content = fs.readFileSync(file, 'utf-8'); // Determine language from file extension const ext = path.extname(file).toLowerCase(); let language = 'typescript'; if (ext === '.js' || ext === '.jsx') language = 'javascript'; else if (ext === '.py') language = 'python'; // Process file with tree-sitter const result = await this.treeSitter.parseAndChunk(content, language, file); session.chunksCreated += result.chunks.length; session.symbolsExtracted += result.symbols.length; session.filesProcessed++; // Store file hash for change detection await this.storeFileHash(projectPath, file, this.hashFile(file)); if (session.filesProcessed % 10 === 0) { logger.info(`๐Ÿ“Š Progress: ${session.filesProcessed}/${session.filesFound} files`); } } catch (error) { session.errors.push(`${file}: ${error instanceof Error ? error.message : String(error)}`); logger.warn( `Failed to process file ${file}`, error instanceof Error ? error : new Error(String(error)) ); } } } private async uploadToCloud(session: IndexingSession, projectPath: string): Promise<void> { try { logger.info('โ˜๏ธ Uploading chunks and embeddings to server...'); // Check if we should use local server or cloud const useLocalServer = !!process.env.USING_LOCAL_SERVER_URL; // For cloud, API key is required if (!useLocalServer && !process.env.AMBIANCE_API_KEY) { logger.warn('โš ๏ธ No API key found, skipping cloud upload'); return; } if (useLocalServer) { await this.uploadToLocalServer(session, projectPath); } else { await this.uploadToCloudAPI(session, projectPath); } session.embeddings = session.chunksCreated; logger.info('โ˜๏ธ Cloud upload completed successfully'); } catch (error) { logger.error('โ˜๏ธ Cloud upload failed:', { error: error instanceof Error ? error.message : String(error), }); throw error; } } private async uploadToLocalServer(session: IndexingSession, projectPath: string): Promise<void> { // For local server, use the same API endpoints but pointed to local server try { const { apiClient } = await import('../client/apiClient'); type EmbeddingUploadRequest = import('../client/apiClient').EmbeddingUploadRequest; const projectId = await this.getOrCreateProjectId(projectPath); logger.info(`๐Ÿ“ก Uploading to local Ambiance server for project: ${projectId}`); // Use the same upload process as cloud, but apiClient will use USING_LOCAL_SERVER_URL await this.uploadChunksAndEmbeddings(apiClient, session, projectId); logger.info(`๐Ÿ”ง Local server upload processed ${session.chunksCreated} chunks`); } catch (error) { logger.error('Local server upload failed:', { error: error instanceof Error ? error.message : String(error), }); throw error; } } private async uploadToCloudAPI(session: IndexingSession, projectPath: string): Promise<void> { const { apiClient } = await import('../client/apiClient'); // Get or create project in cloud const projectId = await this.getOrCreateProjectId(projectPath); logger.info(`๐Ÿ“ก Uploading to cloud API for project: ${projectId}`); // Use the same upload process as local server await this.uploadChunksAndEmbeddings(apiClient, session, projectId); } private async uploadChunksAndEmbeddings( apiClient: any, session: IndexingSession, projectId: string ): Promise<void> { // Prepare chunks and embeddings for upload const chunks: any[] = []; const embeddings: any[] = []; // For testing purposes, make an actual API call logger.info(`๐Ÿ”ง Upload processing ${session.chunksCreated} chunks`); const uploadRequest = { repo_id: projectId, chunks: chunks, embeddings: embeddings, session_id: session.id, }; await apiClient.post('/v1/local-projects/upload-embeddings', uploadRequest); } // Database helper methods (would use Supabase client) private async getOrCreateProjectId(projectPath: string): Promise<string> { // Implementation to get/create project in database return crypto.createHash('md5').update(projectPath).digest('hex'); } private async getProjectId(projectPath: string): Promise<string | null> { // Implementation to get project ID from database return crypto.createHash('md5').update(projectPath).digest('hex'); } private async isRecentlyIndexed(projectPath: string): Promise<boolean> { // Check if indexed within last hour const cutoff = new Date(Date.now() - 60 * 60 * 1000); // Implementation would check database return false; } private async updateProjectLastIndexed(projectPath: string): Promise<void> { // Update last_indexed_at in database } private async deleteLocalIndexes(projectPath: string): Promise<void> { // Delete local index cache const cacheDir = path.join(projectPath, '.ambiance', 'cache'); if (fs.existsSync(cacheDir)) { fs.rmSync(cacheDir, { recursive: true, force: true }); } } private async resetProjectStatus(projectPath: string): Promise<void> { // Reset project indexing status in database } private async getLastFileHash(projectPath: string, filePath: string): Promise<string | null> { try { const relativePath = path.relative(projectPath, filePath); const projectId = await this.getOrCreateProjectId(projectPath); if (await this.hasValidAPIKey()) { const response = await apiClient.get( `/v1/projects/${projectId}/files/${encodeURIComponent(relativePath)}/hash` ); return response.hash || null; } } catch (error) { // If we can't get the hash, return null (file is considered changed) return null; } return null; } private async storeFileHash(projectPath: string, filePath: string, hash: string): Promise<void> { try { const relativePath = path.relative(projectPath, filePath); const projectId = await this.getOrCreateProjectId(projectPath); if (await this.hasValidAPIKey()) { await apiClient.post( `/v1/projects/${projectId}/files/${encodeURIComponent(relativePath)}/hash`, { hash: hash, lastModified: new Date().toISOString(), } ); } } catch (error) { // Silently fail - hash storage is not critical logger.debug('Failed to store file hash:', { error: error instanceof Error ? error.message : String(error), }); } } private generateSessionId(): string { return crypto.randomBytes(8).toString('hex'); } /** * Generate embeddings locally using LocalEmbeddingGenerator */ private async generateLocalEmbeddings( session: IndexingSession, projectPath: string ): Promise<void> { try { logger.info('๐Ÿง  Starting local embedding generation', { projectPath, sessionId: session.id, }); // Create project identifier for embedding generation const projectIdentifier = ProjectIdentifier.getInstance(); const projectInfo = await projectIdentifier.identifyProject(projectPath); if (!projectInfo) { logger.warn('โš ๏ธ Could not identify project for embedding generation'); return; } // Initialize local embedding generator const embeddingGenerator = new LocalEmbeddingGenerator(); // Generate embeddings for the project const progress = await embeddingGenerator.generateProjectEmbeddings( projectInfo.id, projectPath, { batchSize: 10, rateLimit: 1000, maxChunkSize: 1500, filePatterns: [ '**/*.{ts,tsx,js,jsx,py,go,rs,java,cpp,c,h,hpp,cs,rb,php,swift,kt,scala,clj,hs,ml,r,sql,sh,bash,zsh,md}', ], } ); // Update session with embedding results session.embeddings = progress.embeddings; logger.info('โœ… Local embedding generation completed', { projectId: projectInfo.id, filesProcessed: progress.processedFiles, chunksCreated: progress.totalChunks, embeddings: progress.embeddings, errors: progress.errors.length, }); // Log any errors that occurred during embedding generation if (progress.errors.length > 0) { logger.warn('โš ๏ธ Some files had errors during embedding generation', { errorCount: progress.errors.length, errors: progress.errors.slice(0, 5), // Log first 5 errors }); } } catch (error) { logger.error('โŒ Local embedding generation failed', { projectPath, sessionId: session.id, error: error instanceof Error ? error.message : String(error), }); // Don't throw - let the session complete with warning session.errors.push( `Local embedding generation failed: ${error instanceof Error ? error.message : String(error)}` ); } } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sbarron/AmbianceMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server