Skip to main content
Glama
extract.ts8.04 kB
import { resolve } from 'node:path'; import type { Config } from '@doc-agent/core'; import { extractDocument, type StreamChunk } from '@doc-agent/extract'; import { storage } from '@doc-agent/storage'; import kero from '@lytics/kero'; import chalk from 'chalk'; import ora from 'ora'; import prompts from 'prompts'; import { checkModelExists, checkOllamaInstalled, checkOllamaRunning, installOllama, type PullProgress, pullModel, startOllama, waitForOllama, } from '../services/ollama'; const logger = kero.createLogger({ level: (process.env.LOG_LEVEL as 'trace' | 'debug' | 'info' | 'warn' | 'error' | 'fatal') || 'info', }); export interface ExtractOptions { provider: 'gemini' | 'openai' | 'ollama'; model: string; dryRun: boolean; } /** * Format bytes to human readable string */ function formatBytes(bytes: number): string { if (bytes < 1024) return `${bytes} B`; if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; if (bytes < 1024 * 1024 * 1024) return `${(bytes / 1024 / 1024).toFixed(1)} MB`; return `${(bytes / 1024 / 1024 / 1024).toFixed(1)} GB`; } /** * Ensure Ollama is ready (installed, running, model pulled) */ async function ensureOllamaReady(model: string): Promise<boolean> { const spinner = ora(); // Check if Ollama is installed spinner.start('Checking Ollama installation...'); const isInstalled = await checkOllamaInstalled(); if (!isInstalled) { spinner.stop(); logger.info('Ollama not installed'); const { install } = await prompts({ type: 'confirm', name: 'install', message: 'Ollama is not installed. Install via Homebrew?', initial: true, }); if (!install) { console.log(chalk.yellow('Please install Ollama manually: https://ollama.com/download')); return false; } spinner.start('Installing Ollama via Homebrew...'); try { await installOllama((msg) => { spinner.text = msg; }); spinner.succeed('Ollama installed'); logger.info('Ollama installed successfully'); } catch (error) { spinner.fail('Failed to install Ollama'); logger.error({ error: String(error) }, 'Ollama installation failed'); console.error(chalk.red(String(error))); return false; } } else { spinner.succeed('Ollama installed'); } // Check if Ollama is running spinner.start('Checking Ollama server...'); let isRunning = await checkOllamaRunning(); if (!isRunning) { spinner.stop(); logger.info('Ollama server not running'); const { start } = await prompts({ type: 'confirm', name: 'start', message: 'Ollama server is not running. Start it?', initial: true, }); if (!start) { console.log(chalk.yellow('Please start Ollama: ollama serve')); return false; } spinner.start('Starting Ollama server...'); startOllama(); isRunning = await waitForOllama(15000); if (!isRunning) { spinner.fail('Failed to start Ollama server'); logger.error('Ollama server failed to start'); return false; } spinner.succeed('Ollama server started'); logger.info('Ollama server started'); } else { spinner.succeed('Ollama server running'); } // Check if model exists spinner.start(`Checking model: ${model}...`); const modelExists = await checkModelExists(model); if (!modelExists) { spinner.text = `Pulling model: ${model}...`; logger.info({ model }, 'Pulling model'); try { await pullModel(model, (progress: PullProgress) => { if (progress.total && progress.completed) { const pct = Math.round((progress.completed / progress.total) * 100); spinner.text = `Pulling ${model}: ${pct}% (${formatBytes(progress.completed)}/${formatBytes(progress.total)})`; } else if (progress.status) { spinner.text = `${model}: ${progress.status}`; } }); spinner.succeed(`Model ready: ${model}`); logger.info({ model }, 'Model pulled successfully'); } catch (error) { spinner.fail(`Failed to pull model: ${model}`); logger.error({ model, error: String(error) }, 'Model pull failed'); return false; } } else { spinner.succeed(`Model ready: ${model}`); } return true; } /** * Run document extraction */ export async function runExtract(file: string, options: ExtractOptions): Promise<void> { const { provider, model, dryRun } = options; const absolutePath = resolve(file); logger.info({ file: absolutePath, provider, model, dryRun }, 'Starting extraction'); // For Ollama, ensure everything is ready if (provider === 'ollama') { const ready = await ensureOllamaReady(model); if (!ready) { process.exitCode = 1; return; } } // Build config const config: Config = { aiProvider: provider, geminiApiKey: process.env.GEMINI_API_KEY, geminiModel: provider === 'gemini' ? model : undefined, openaiApiKey: process.env.OPENAI_API_KEY, ollamaModel: provider === 'ollama' ? model : undefined, }; // Run extraction const spinner = ora('Extracting document data...').start(); let lastPrompt = ''; let responseBuffer = ''; try { const result = await extractDocument(absolutePath, config, { onStream: (chunk: StreamChunk) => { if (!chunk) return; if (chunk.type === 'log') { // Log via kero - use simple string logging to avoid issues const msg = `${chunk.message}${chunk.data ? ` ${JSON.stringify(chunk.data)}` : ''}`; if (chunk.level === 'error') { logger.error(msg); } else if (chunk.level === 'warn') { logger.warn(msg); } else if (chunk.level === 'debug') { logger.debug(msg); } else { logger.info(msg); } // Update spinner for info logs if (chunk.level === 'info') { spinner.text = chunk.message; } } else if (chunk.type === 'prompt') { lastPrompt = chunk.content; // Show OCR progress in spinner if (chunk.content.includes('OCR')) { spinner.text = chunk.content.split('\n')[0]; } // Log full prompt at debug level (only for system/user prompts, not OCR progress) if (chunk.content.includes('System:') || chunk.content.includes('User:')) { logger.debug(`Prompt to model:\n${chunk.content}`); } } else if (chunk.type === 'response') { responseBuffer += chunk.content; // Show that we're receiving response spinner.text = `Receiving response... (${responseBuffer.length} chars)`; } }, }); spinner.succeed('Extraction complete'); logger.info( { type: result.type, itemCount: result.items?.length ?? 0 }, 'Extraction successful' ); // Save to database (unless dry run) if (!dryRun) { const saveSpinner = ora('Saving to database...').start(); try { await storage.saveDocument(result, absolutePath); saveSpinner.succeed(`Saved: ${result.filename} (ID: ${result.id})`); logger.info({ id: result.id, filename: result.filename }, 'Document saved'); } catch (error) { saveSpinner.fail('Failed to save'); logger.error({ error: String(error) }, 'Save failed'); throw error; } } else { console.log(chalk.gray('(dry run - not saved to database)')); } // Print result console.log(chalk.gray('─'.repeat(40))); console.log(JSON.stringify(result, null, 2)); } catch (error) { spinner.fail('Extraction failed'); logger.error({ error: String(error) }, 'Extraction failed'); // Show the prompt for debugging if available if (lastPrompt) { console.log(chalk.gray('\n─── Last Prompt ───')); console.log(chalk.gray(lastPrompt.slice(-500))); // Last 500 chars } console.error(chalk.red(String(error))); process.exitCode = 1; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/prosdevlab/doc-agent'

If you have feedback or need assistance with the MCP directory API, please join our Discord server