Skip to main content
Glama
summarization.ts14.1 kB
// ============================================================================= // kivv - Two-Stage AI Summarization Client // ============================================================================= // Stage 1: Claude Haiku for relevance triage (0.0-1.0 score) // Stage 2: Claude Sonnet for detailed summaries (only if score >= threshold) // Cost optimization: ~96% savings on irrelevant papers // Rate limiting: 5 req/s with jitter for Anthropic API // Budget tracking: Circuit breaker at $1/day // ============================================================================= import { hashContent } from './utils'; import { CLAUDE_HAIKU_MODEL, CLAUDE_SONNET_MODEL, MAX_SUMMARY_OUTPUT_TOKENS, MAX_TRIAGE_OUTPUT_TOKENS, DEFAULT_RELEVANCE_THRESHOLD, ANTHROPIC_RATE_LIMIT_MS, ANTHROPIC_JITTER_MIN_MS, ANTHROPIC_JITTER_MAX_MS, DAILY_BUDGET_CAP_USD, ANTHROPIC_API_BASE_URL, } from './constants'; // ============================================================================= // Types & Interfaces // ============================================================================= /** * Two-stage summarization result */ export interface SummarizationResult { /** Generated summary (null if irrelevant/skipped/error) */ summary: string | null; /** Relevance score from Haiku triage (0.0-1.0) */ relevance_score: number; /** SHA-256 hash of title + abstract for deduplication */ content_hash: string; /** Cost of Haiku triage in USD */ haiku_cost: number; /** Cost of Sonnet summary in USD */ sonnet_cost: number; /** Total cost (haiku + sonnet) in USD */ total_cost: number; /** Reason paper was skipped (if applicable) */ skipped_reason?: 'irrelevant' | 'budget_exceeded' | 'error'; } /** * Anthropic API response structure */ interface AnthropicResponse { id: string; type: string; role: string; content: Array<{ type: string; text: string; }>; model: string; usage: { input_tokens: number; output_tokens: number; }; } // ============================================================================= // Summarization Client // ============================================================================= /** * Two-stage AI summarization client using Claude Haiku + Sonnet * * Stage 1: Haiku triage for relevance scoring (~$0.00025/paper) * Stage 2: Sonnet summary for relevant papers (~$0.006/paper) * * Features: * - Rate limiting: 5 req/s with jitter * - Budget tracking: Circuit breaker at $1/day * - Content hashing: Detect duplicate papers * - Error handling: Graceful failures with retry * * @example * const client = new SummarizationClient(env.CLAUDE_API_KEY); * const result = await client.summarize( * "Attention Is All You Need", * "We propose a new architecture...", * ["transformers", "machine learning"] * ); * console.log(result.summary); // 3-sentence summary * console.log(result.relevance_score); // 0.95 * console.log(result.total_cost); // 0.00625 */ export class SummarizationClient { private apiKey: string; private lastRequestTime = 0; private totalCost = 0; /** * Create a new summarization client * * @param apiKey - Anthropic API key (from env.CLAUDE_API_KEY) */ constructor(apiKey: string) { this.apiKey = apiKey; } // =========================================================================== // Rate Limiting // =========================================================================== /** * Enforce rate limit: 5 req/s = 200ms between requests + jitter (50-100ms) * * This prevents hitting Anthropic's rate limit of 5 requests per second. * We add random jitter to avoid synchronized request patterns. */ private async enforceRateLimit(): Promise<void> { const now = Date.now(); const timeSinceLastRequest = now - this.lastRequestTime; const jitter = Math.random() * (ANTHROPIC_JITTER_MAX_MS - ANTHROPIC_JITTER_MIN_MS) + ANTHROPIC_JITTER_MIN_MS; const requiredDelay = ANTHROPIC_RATE_LIMIT_MS + jitter; if (timeSinceLastRequest < requiredDelay) { const sleepMs = requiredDelay - timeSinceLastRequest; await new Promise((resolve) => setTimeout(resolve, sleepMs)); } this.lastRequestTime = Date.now(); } // =========================================================================== // Stage 1: Haiku Triage // =========================================================================== /** * Stage 1: Use Claude Haiku to quickly assess paper relevance * * Prompt: Rate relevance of paper to user topics (0.0-1.0) * Model: Claude 3.5 Haiku * Cost: ~$0.00025 per paper * Max tokens: 10 (just need the number) * * @param title - Paper title * @param abstract - Paper abstract * @param userTopics - User's research topics * @returns Relevance score (0.0-1.0) and cost */ private async triageRelevance( title: string, abstract: string, userTopics: string[] ): Promise<{ score: number; cost: number }> { await this.enforceRateLimit(); const topicList = userTopics.join(', '); // Security-focused prompt for offensive security researcher const prompt = `You are evaluating research papers for an offensive security researcher and penetration tester. USER INTERESTS: ${topicList} SCORING CRITERIA (for offensive security relevance): - 0.9-1.0: Novel attack/exploit technique, directly weaponizable, reveals new vulnerability class - 0.7-0.9: Security-relevant technique, adversarial ML, practical offensive application - 0.5-0.7: Indirectly applicable (ML/AI techniques usable for security, defensive paper with offensive insights) - 0.3-0.5: Tangentially related (mentions security but not primary focus) - 0.0-0.3: Irrelevant to security research Consider: 1. Can techniques be weaponized or applied to offensive security? 2. Does it reveal new attack surfaces or vulnerability patterns? 3. Are there evasion/obfuscation techniques to learn from? 4. Could this improve red team operations or penetration testing? 5. Does it advance adversarial ML, malware analysis, or exploit development? Paper Title: ${title} Abstract: ${abstract} Return ONLY a number between 0.0 and 1.0. No explanation.`; const response = await this.callClaude( CLAUDE_HAIKU_MODEL, prompt, MAX_TRIAGE_OUTPUT_TOKENS ); // Parse score from response const scoreText = response.content[0].text.trim(); const score = parseFloat(scoreText); if (isNaN(score) || score < 0 || score > 1) { console.warn( `Invalid relevance score: ${scoreText}, defaulting to 0.5` ); return { score: 0.5, cost: this.calculateCost(response.usage, 'haiku') }; } return { score, cost: this.calculateCost(response.usage, 'haiku'), }; } // =========================================================================== // Stage 2: Sonnet Summary // =========================================================================== /** * Stage 2: Use Claude Sonnet to generate detailed summary * * Prompt: Summarize paper in 3 sentences (problem, approach, results) * Model: Claude 3.5 Sonnet * Cost: ~$0.006 per paper * Max tokens: 120 * * @param title - Paper title * @param abstract - Paper abstract * @returns Summary (3 sentences) and cost */ private async generateSummary( title: string, abstract: string ): Promise<{ summary: string; cost: number }> { await this.enforceRateLimit(); const prompt = `Summarize this research paper in exactly 3 sentences. Focus on: 1. The problem being addressed 2. The approach or method used 3. The key results or findings Paper Title: ${title} Abstract: ${abstract} Provide ONLY the 3-sentence summary, nothing else.`; const response = await this.callClaude( CLAUDE_SONNET_MODEL, prompt, MAX_SUMMARY_OUTPUT_TOKENS ); return { summary: response.content[0].text.trim(), cost: this.calculateCost(response.usage, 'sonnet'), }; } // =========================================================================== // Two-Stage Pipeline // =========================================================================== /** * Execute two-stage summarization pipeline * * Flow: * 1. Generate content hash (for deduplication) * 2. Check budget ($1/day circuit breaker) * 3. Stage 1: Haiku triage (~$0.00025) * 4. If score < threshold: Skip Sonnet (save ~$0.006) * 5. If score >= threshold: Stage 2 Sonnet summary (~$0.006) * * @param title - Paper title * @param abstract - Paper abstract * @param userTopics - User's research topics * @param relevanceThreshold - Minimum score for Sonnet (default: 0.7) * @returns Summarization result with summary, score, costs */ async summarize( title: string, abstract: string, userTopics: string[], relevanceThreshold = DEFAULT_RELEVANCE_THRESHOLD ): Promise<SummarizationResult> { // Check budget circuit breaker if (this.totalCost >= DAILY_BUDGET_CAP_USD) { return { summary: null, relevance_score: 0, content_hash: await hashContent(title + abstract), haiku_cost: 0, sonnet_cost: 0, total_cost: 0, skipped_reason: 'budget_exceeded', }; } const content_hash = await hashContent(title + abstract); try { // Stage 1: Haiku triage const { score, cost: haikuCost } = await this.triageRelevance( title, abstract, userTopics ); this.totalCost += haikuCost; // Check relevance threshold if (score < relevanceThreshold) { console.log( `Paper irrelevant (score: ${score.toFixed(2)}), skipping Sonnet` ); return { summary: null, relevance_score: score, content_hash, haiku_cost: haikuCost, sonnet_cost: 0, total_cost: haikuCost, skipped_reason: 'irrelevant', }; } // Stage 2: Sonnet summary (only for relevant papers) const { summary, cost: sonnetCost } = await this.generateSummary( title, abstract ); this.totalCost += sonnetCost; console.log( `Paper relevant (score: ${score.toFixed(2)}), generated summary` ); return { summary, relevance_score: score, content_hash, haiku_cost: haikuCost, sonnet_cost: sonnetCost, total_cost: haikuCost + sonnetCost, }; } catch (error) { console.error('Summarization failed:', error); return { summary: null, relevance_score: 0, content_hash, haiku_cost: 0, sonnet_cost: 0, total_cost: 0, skipped_reason: 'error', }; } } // =========================================================================== // Anthropic API Client // =========================================================================== /** * Call Anthropic Messages API * * Endpoint: POST https://api.anthropic.com/v1/messages * Headers: x-api-key, anthropic-version, content-type * Body: model, max_tokens, messages[] * * @param model - Model ID (haiku or sonnet) * @param prompt - User prompt * @param maxTokens - Maximum output tokens * @returns API response with content and usage */ private async callClaude( model: string, prompt: string, maxTokens: number ): Promise<AnthropicResponse> { const response = await fetch(`${ANTHROPIC_API_BASE_URL}/messages`, { method: 'POST', headers: { 'x-api-key': this.apiKey, 'anthropic-version': '2023-06-01', 'content-type': 'application/json', }, body: JSON.stringify({ model, max_tokens: maxTokens, messages: [ { role: 'user', content: prompt, }, ], }), }); if (!response.ok) { const errorText = await response.text(); throw new Error( `Anthropic API error: ${response.status} ${response.statusText} - ${errorText}` ); } return (await response.json()) as AnthropicResponse; } // =========================================================================== // Cost Calculation // =========================================================================== /** * Calculate cost based on token usage and model pricing * * Haiku pricing: * - Input: $0.25 per 1M tokens * - Output: $1.25 per 1M tokens * * Sonnet pricing: * - Input: $3.00 per 1M tokens * - Output: $15.00 per 1M tokens * * @param usage - Token usage from API response * @param model - Model type (haiku or sonnet) * @returns Total cost in USD */ private calculateCost( usage: { input_tokens: number; output_tokens: number }, model: 'haiku' | 'sonnet' ): number { const inputCost = usage.input_tokens * (model === 'haiku' ? 0.25 / 1_000_000 : 3.0 / 1_000_000); const outputCost = usage.output_tokens * (model === 'haiku' ? 1.25 / 1_000_000 : 15.0 / 1_000_000); return inputCost + outputCost; } // =========================================================================== // Budget Tracking // =========================================================================== /** * Get total cost for this session * * @returns Total cost in USD */ getTotalCost(): number { return this.totalCost; } /** * Reset cost tracking (call at start of new day) */ resetCost(): void { this.totalCost = 0; } /** * Check if budget is exceeded * * @returns True if total cost >= daily cap */ isBudgetExceeded(): boolean { return this.totalCost >= DAILY_BUDGET_CAP_USD; } /** * Get remaining budget * * @returns Remaining budget in USD */ getRemainingBudget(): number { return Math.max(0, DAILY_BUDGET_CAP_USD - this.totalCost); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jeffaf/kivv'

If you have feedback or need assistance with the MCP directory API, please join our Discord server