kivv

kivv
shared

summarization.ts•13.7 KiB

// ============================================================================= // kivv - Two-Stage AI Summarization Client // ============================================================================= // Stage 1: Claude Haiku for relevance triage (0.0-1.0 score) // Stage 2: Claude Sonnet for detailed summaries (only if score >= threshold) // Cost optimization: ~96% savings on irrelevant papers // Rate limiting: 5 req/s with jitter for Anthropic API // Budget tracking: Circuit breaker at $1/day // ============================================================================= import { hashContent } from './utils'; import { CLAUDE_HAIKU_MODEL, CLAUDE_SONNET_MODEL, MAX_SUMMARY_OUTPUT_TOKENS, MAX_TRIAGE_OUTPUT_TOKENS, DEFAULT_RELEVANCE_THRESHOLD, ANTHROPIC_RATE_LIMIT_MS, ANTHROPIC_JITTER_MIN_MS, ANTHROPIC_JITTER_MAX_MS, DAILY_BUDGET_CAP_USD, ANTHROPIC_API_BASE_URL, } from './constants'; // ============================================================================= // Types & Interfaces // ============================================================================= /** * Two-stage summarization result */ export interface SummarizationResult { /** Generated summary (null if irrelevant/skipped/error) */ summary: string | null; /** Relevance score from Haiku triage (0.0-1.0) */ relevance_score: number; /** SHA-256 hash of title + abstract for deduplication */ content_hash: string; /** Cost of Haiku triage in USD */ haiku_cost: number; /** Cost of Sonnet summary in USD */ sonnet_cost: number; /** Total cost (haiku + sonnet) in USD */ total_cost: number; /** Reason paper was skipped (if applicable) */ skipped_reason?: 'irrelevant' | 'budget_exceeded' | 'error'; } /** * Anthropic API response structure */ interface AnthropicResponse { id: string; type: string; role: string; content: Array<{ type: string; text: string; }>; model: string; usage: { input_tokens: number; output_tokens: number; }; } // ============================================================================= // Summarization Client // ============================================================================= /** * Two-stage AI summarization client using Claude Haiku + Sonnet * * Stage 1: Haiku triage for relevance scoring (~$0.00025/paper) * Stage 2: Sonnet summary for relevant papers (~$0.006/paper) * * Features: * - Rate limiting: 5 req/s with jitter * - Budget tracking: Circuit breaker at $1/day * - Content hashing: Detect duplicate papers * - Error handling: Graceful failures with retry * * @example * const client = new SummarizationClient(env.CLAUDE_API_KEY); * const result = await client.summarize( * "Attention Is All You Need", * "We propose a new architecture...", * ["transformers", "machine learning"] * ); * console.log(result.summary); // 3-sentence summary * console.log(result.relevance_score); // 0.95 * console.log(result.total_cost); // 0.00625 */ export class SummarizationClient { private apiKey: string; private lastRequestTime = 0; private totalCost = 0; /** * Create a new summarization client * * @param apiKey - Anthropic API key (from env.CLAUDE_API_KEY) */ constructor(apiKey: string) { this.apiKey = apiKey; } // =========================================================================== // Rate Limiting // =========================================================================== /** * Enforce rate limit: 5 req/s = 200ms between requests + jitter (50-100ms) * * This prevents hitting Anthropic's rate limit of 5 requests per second. * We add random jitter to avoid synchronized request patterns. */ private async enforceRateLimit(): Promise<void> { const now = Date.now(); const timeSinceLastRequest = now - this.lastRequestTime; const jitter = Math.random() * (ANTHROPIC_JITTER_MAX_MS - ANTHROPIC_JITTER_MIN_MS) + ANTHROPIC_JITTER_MIN_MS; const requiredDelay = ANTHROPIC_RATE_LIMIT_MS + jitter; if (timeSinceLastRequest < requiredDelay) { const sleepMs = requiredDelay - timeSinceLastRequest; await new Promise((resolve) => setTimeout(resolve, sleepMs)); } this.lastRequestTime = Date.now(); } // =========================================================================== // Stage 1: Haiku Triage // =========================================================================== /** * Stage 1: Use Claude Haiku to quickly assess paper relevance * * Prompt: Rate relevance of paper to user topics (0.0-1.0) * Model: Claude 3.5 Haiku * Cost: ~$0.00025 per paper * Max tokens: 10 (just need the number) * * @param title - Paper title * @param abstract - Paper abstract * @param userTopics - User's research topics * @returns Relevance score (0.0-1.0) and cost */ private async triageRelevance( title: string, abstract: string, userTopics: string[] ): Promise<{ score: number; cost: number }> { await this.enforceRateLimit(); const topicList = userTopics.join(', '); // Security-focused prompt for offensive security researcher const prompt = `You are evaluating research papers for an offensive security researcher and penetration tester. USER INTERESTS: ${topicList} SCORING CRITERIA (for offensive security relevance): - 0.9-1.0: Novel attack/exploit technique, directly weaponizable, reveals new vulnerability class - 0.7-0.9: Security-relevant technique, adversarial ML, practical offensive application - 0.5-0.7: Indirectly applicable (ML/AI techniques usable for security, defensive paper with offensive insights) - 0.3-0.5: Tangentially related (mentions security but not primary focus) - 0.0-0.3: Irrelevant to security research Consider: 1. Can techniques be weaponized or applied to offensive security? 2. Does it reveal new attack surfaces or vulnerability patterns? 3. Are there evasion/obfuscation techniques to learn from? 4. Could this improve red team operations or penetration testing? 5. Does it advance adversarial ML, malware analysis, or exploit development? Paper Title: ${title} Abstract: ${abstract} Return ONLY a number between 0.0 and 1.0. No explanation.`; const response = await this.callClaude( CLAUDE_HAIKU_MODEL, prompt, MAX_TRIAGE_OUTPUT_TOKENS ); // Parse score from response const scoreText = response.content[0].text.trim(); const score = parseFloat(scoreText); if (isNaN(score) || score < 0 || score > 1) { console.warn( `Invalid relevance score: ${scoreText}, defaulting to 0.5` ); return { score: 0.5, cost: this.calculateCost(response.usage, 'haiku') }; } return { score, cost: this.calculateCost(response.usage, 'haiku'), }; } // =========================================================================== // Stage 2: Sonnet Summary // =========================================================================== /** * Stage 2: Use Claude Sonnet to generate detailed summary * * Prompt: Summarize paper in 3 sentences (problem, approach, results) * Model: Claude 3.5 Sonnet * Cost: ~$0.006 per paper * Max tokens: 120 * * @param title - Paper title * @param abstract - Paper abstract * @returns Summary (3 sentences) and cost */ private async generateSummary( title: string, abstract: string ): Promise<{ summary: string; cost: number }> { await this.enforceRateLimit(); const prompt = `Summarize this research paper in exactly 3 sentences. Focus on: 1. The problem being addressed 2. The approach or method used 3. The key results or findings Paper Title: ${title} Abstract: ${abstract} Provide ONLY the 3-sentence summary, nothing else.`; const response = await this.callClaude( CLAUDE_SONNET_MODEL, prompt, MAX_SUMMARY_OUTPUT_TOKENS ); return { summary: response.content[0].text.trim(), cost: this.calculateCost(response.usage, 'sonnet'), }; } // =========================================================================== // Two-Stage Pipeline // =========================================================================== /** * Execute two-stage summarization pipeline * * Flow: * 1. Generate content hash (for deduplication) * 2. Check budget ($1/day circuit breaker) * 3. Stage 1: Haiku triage (~$0.00025) * 4. If score < threshold: Skip Sonnet (save ~$0.006) * 5. If score >= threshold: Stage 2 Sonnet summary (~$0.006) * * @param title - Paper title * @param abstract - Paper abstract * @param userTopics - User's research topics * @param relevanceThreshold - Minimum score for Sonnet (default: 0.7) * @returns Summarization result with summary, score, costs */ async summarize( title: string, abstract: string, userTopics: string[], relevanceThreshold = DEFAULT_RELEVANCE_THRESHOLD ): Promise<SummarizationResult> { // Check budget circuit breaker if (this.totalCost >= DAILY_BUDGET_CAP_USD) { return { summary: null, relevance_score: 0, content_hash: await hashContent(title + abstract), haiku_cost: 0, sonnet_cost: 0, total_cost: 0, skipped_reason: 'budget_exceeded', }; } const content_hash = await hashContent(title + abstract); try { // Stage 1: Haiku triage const { score, cost: haikuCost } = await this.triageRelevance( title, abstract, userTopics ); this.totalCost += haikuCost; // Check relevance threshold if (score < relevanceThreshold) { console.log( `Paper irrelevant (score: ${score.toFixed(2)}), skipping Sonnet` ); return { summary: null, relevance_score: score, content_hash, haiku_cost: haikuCost, sonnet_cost: 0, total_cost: haikuCost, skipped_reason: 'irrelevant', }; } // Stage 2: Sonnet summary (only for relevant papers) const { summary, cost: sonnetCost } = await this.generateSummary( title, abstract ); this.totalCost += sonnetCost; console.log( `Paper relevant (score: ${score.toFixed(2)}), generated summary` ); return { summary, relevance_score: score, content_hash, haiku_cost: haikuCost, sonnet_cost: sonnetCost, total_cost: haikuCost + sonnetCost, }; } catch (error) { console.error('Summarization failed:', error); return { summary: null, relevance_score: 0, content_hash, haiku_cost: 0, sonnet_cost: 0, total_cost: 0, skipped_reason: 'error', }; } } // =========================================================================== // Anthropic API Client // =========================================================================== /** * Call Anthropic Messages API * * Endpoint: POST https://api.anthropic.com/v1/messages * Headers: x-api-key, anthropic-version, content-type * Body: model, max_tokens, messages[] * * @param model - Model ID (haiku or sonnet) * @param prompt - User prompt * @param maxTokens - Maximum output tokens * @returns API response with content and usage */ private async callClaude( model: string, prompt: string, maxTokens: number ): Promise<AnthropicResponse> { const response = await fetch(`${ANTHROPIC_API_BASE_URL}/messages`, { method: 'POST', headers: { 'x-api-key': this.apiKey, 'anthropic-version': '2023-06-01', 'content-type': 'application/json', }, body: JSON.stringify({ model, max_tokens: maxTokens, messages: [ { role: 'user', content: prompt, }, ], }), }); if (!response.ok) { const errorText = await response.text(); throw new Error( `Anthropic API error: ${response.status} ${response.statusText} - ${errorText}` ); } return (await response.json()) as AnthropicResponse; } // =========================================================================== // Cost Calculation // =========================================================================== /** * Calculate cost based on token usage and model pricing * * Haiku pricing: * - Input: $0.25 per 1M tokens * - Output: $1.25 per 1M tokens * * Sonnet pricing: * - Input: $3.00 per 1M tokens * - Output: $15.00 per 1M tokens * * @param usage - Token usage from API response * @param model - Model type (haiku or sonnet) * @returns Total cost in USD */ private calculateCost( usage: { input_tokens: number; output_tokens: number }, model: 'haiku' | 'sonnet' ): number { const inputCost = usage.input_tokens * (model === 'haiku' ? 0.25 / 1_000_000 : 3.0 / 1_000_000); const outputCost = usage.output_tokens * (model === 'haiku' ? 1.25 / 1_000_000 : 15.0 / 1_000_000); return inputCost + outputCost; } // =========================================================================== // Budget Tracking // =========================================================================== /** * Get total cost for this session * * @returns Total cost in USD */ getTotalCost(): number { return this.totalCost; } /** * Reset cost tracking (call at start of new day) */ resetCost(): void { this.totalCost = 0; } /** * Check if budget is exceeded * * @returns True if total cost >= daily cap */ isBudgetExceeded(): boolean { return this.totalCost >= DAILY_BUDGET_CAP_USD; } /** * Get remaining budget * * @returns Remaining budget in USD */ getRemainingBudget(): number { return Math.max(0, DAILY_BUDGET_CAP_USD - this.totalCost); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jeffaf/kivv'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

summarization.ts•13.7 KiB