summarization.ts•14.1 kB
// =============================================================================
// kivv - Two-Stage AI Summarization Client
// =============================================================================
// Stage 1: Claude Haiku for relevance triage (0.0-1.0 score)
// Stage 2: Claude Sonnet for detailed summaries (only if score >= threshold)
// Cost optimization: ~96% savings on irrelevant papers
// Rate limiting: 5 req/s with jitter for Anthropic API
// Budget tracking: Circuit breaker at $1/day
// =============================================================================
import { hashContent } from './utils';
import {
CLAUDE_HAIKU_MODEL,
CLAUDE_SONNET_MODEL,
MAX_SUMMARY_OUTPUT_TOKENS,
MAX_TRIAGE_OUTPUT_TOKENS,
DEFAULT_RELEVANCE_THRESHOLD,
ANTHROPIC_RATE_LIMIT_MS,
ANTHROPIC_JITTER_MIN_MS,
ANTHROPIC_JITTER_MAX_MS,
DAILY_BUDGET_CAP_USD,
ANTHROPIC_API_BASE_URL,
} from './constants';
// =============================================================================
// Types & Interfaces
// =============================================================================
/**
* Two-stage summarization result
*/
export interface SummarizationResult {
/** Generated summary (null if irrelevant/skipped/error) */
summary: string | null;
/** Relevance score from Haiku triage (0.0-1.0) */
relevance_score: number;
/** SHA-256 hash of title + abstract for deduplication */
content_hash: string;
/** Cost of Haiku triage in USD */
haiku_cost: number;
/** Cost of Sonnet summary in USD */
sonnet_cost: number;
/** Total cost (haiku + sonnet) in USD */
total_cost: number;
/** Reason paper was skipped (if applicable) */
skipped_reason?: 'irrelevant' | 'budget_exceeded' | 'error';
}
/**
* Anthropic API response structure
*/
interface AnthropicResponse {
id: string;
type: string;
role: string;
content: Array<{
type: string;
text: string;
}>;
model: string;
usage: {
input_tokens: number;
output_tokens: number;
};
}
// =============================================================================
// Summarization Client
// =============================================================================
/**
* Two-stage AI summarization client using Claude Haiku + Sonnet
*
* Stage 1: Haiku triage for relevance scoring (~$0.00025/paper)
* Stage 2: Sonnet summary for relevant papers (~$0.006/paper)
*
* Features:
* - Rate limiting: 5 req/s with jitter
* - Budget tracking: Circuit breaker at $1/day
* - Content hashing: Detect duplicate papers
* - Error handling: Graceful failures with retry
*
* @example
* const client = new SummarizationClient(env.CLAUDE_API_KEY);
* const result = await client.summarize(
* "Attention Is All You Need",
* "We propose a new architecture...",
* ["transformers", "machine learning"]
* );
* console.log(result.summary); // 3-sentence summary
* console.log(result.relevance_score); // 0.95
* console.log(result.total_cost); // 0.00625
*/
export class SummarizationClient {
private apiKey: string;
private lastRequestTime = 0;
private totalCost = 0;
/**
* Create a new summarization client
*
* @param apiKey - Anthropic API key (from env.CLAUDE_API_KEY)
*/
constructor(apiKey: string) {
this.apiKey = apiKey;
}
// ===========================================================================
// Rate Limiting
// ===========================================================================
/**
* Enforce rate limit: 5 req/s = 200ms between requests + jitter (50-100ms)
*
* This prevents hitting Anthropic's rate limit of 5 requests per second.
* We add random jitter to avoid synchronized request patterns.
*/
private async enforceRateLimit(): Promise<void> {
const now = Date.now();
const timeSinceLastRequest = now - this.lastRequestTime;
const jitter =
Math.random() * (ANTHROPIC_JITTER_MAX_MS - ANTHROPIC_JITTER_MIN_MS) +
ANTHROPIC_JITTER_MIN_MS;
const requiredDelay = ANTHROPIC_RATE_LIMIT_MS + jitter;
if (timeSinceLastRequest < requiredDelay) {
const sleepMs = requiredDelay - timeSinceLastRequest;
await new Promise((resolve) => setTimeout(resolve, sleepMs));
}
this.lastRequestTime = Date.now();
}
// ===========================================================================
// Stage 1: Haiku Triage
// ===========================================================================
/**
* Stage 1: Use Claude Haiku to quickly assess paper relevance
*
* Prompt: Rate relevance of paper to user topics (0.0-1.0)
* Model: Claude 3.5 Haiku
* Cost: ~$0.00025 per paper
* Max tokens: 10 (just need the number)
*
* @param title - Paper title
* @param abstract - Paper abstract
* @param userTopics - User's research topics
* @returns Relevance score (0.0-1.0) and cost
*/
private async triageRelevance(
title: string,
abstract: string,
userTopics: string[]
): Promise<{ score: number; cost: number }> {
await this.enforceRateLimit();
const topicList = userTopics.join(', ');
// Security-focused prompt for offensive security researcher
const prompt = `You are evaluating research papers for an offensive security researcher and penetration tester.
USER INTERESTS: ${topicList}
SCORING CRITERIA (for offensive security relevance):
- 0.9-1.0: Novel attack/exploit technique, directly weaponizable, reveals new vulnerability class
- 0.7-0.9: Security-relevant technique, adversarial ML, practical offensive application
- 0.5-0.7: Indirectly applicable (ML/AI techniques usable for security, defensive paper with offensive insights)
- 0.3-0.5: Tangentially related (mentions security but not primary focus)
- 0.0-0.3: Irrelevant to security research
Consider:
1. Can techniques be weaponized or applied to offensive security?
2. Does it reveal new attack surfaces or vulnerability patterns?
3. Are there evasion/obfuscation techniques to learn from?
4. Could this improve red team operations or penetration testing?
5. Does it advance adversarial ML, malware analysis, or exploit development?
Paper Title: ${title}
Abstract: ${abstract}
Return ONLY a number between 0.0 and 1.0. No explanation.`;
const response = await this.callClaude(
CLAUDE_HAIKU_MODEL,
prompt,
MAX_TRIAGE_OUTPUT_TOKENS
);
// Parse score from response
const scoreText = response.content[0].text.trim();
const score = parseFloat(scoreText);
if (isNaN(score) || score < 0 || score > 1) {
console.warn(
`Invalid relevance score: ${scoreText}, defaulting to 0.5`
);
return { score: 0.5, cost: this.calculateCost(response.usage, 'haiku') };
}
return {
score,
cost: this.calculateCost(response.usage, 'haiku'),
};
}
// ===========================================================================
// Stage 2: Sonnet Summary
// ===========================================================================
/**
* Stage 2: Use Claude Sonnet to generate detailed summary
*
* Prompt: Summarize paper in 3 sentences (problem, approach, results)
* Model: Claude 3.5 Sonnet
* Cost: ~$0.006 per paper
* Max tokens: 120
*
* @param title - Paper title
* @param abstract - Paper abstract
* @returns Summary (3 sentences) and cost
*/
private async generateSummary(
title: string,
abstract: string
): Promise<{ summary: string; cost: number }> {
await this.enforceRateLimit();
const prompt = `Summarize this research paper in exactly 3 sentences. Focus on:
1. The problem being addressed
2. The approach or method used
3. The key results or findings
Paper Title: ${title}
Abstract: ${abstract}
Provide ONLY the 3-sentence summary, nothing else.`;
const response = await this.callClaude(
CLAUDE_SONNET_MODEL,
prompt,
MAX_SUMMARY_OUTPUT_TOKENS
);
return {
summary: response.content[0].text.trim(),
cost: this.calculateCost(response.usage, 'sonnet'),
};
}
// ===========================================================================
// Two-Stage Pipeline
// ===========================================================================
/**
* Execute two-stage summarization pipeline
*
* Flow:
* 1. Generate content hash (for deduplication)
* 2. Check budget ($1/day circuit breaker)
* 3. Stage 1: Haiku triage (~$0.00025)
* 4. If score < threshold: Skip Sonnet (save ~$0.006)
* 5. If score >= threshold: Stage 2 Sonnet summary (~$0.006)
*
* @param title - Paper title
* @param abstract - Paper abstract
* @param userTopics - User's research topics
* @param relevanceThreshold - Minimum score for Sonnet (default: 0.7)
* @returns Summarization result with summary, score, costs
*/
async summarize(
title: string,
abstract: string,
userTopics: string[],
relevanceThreshold = DEFAULT_RELEVANCE_THRESHOLD
): Promise<SummarizationResult> {
// Check budget circuit breaker
if (this.totalCost >= DAILY_BUDGET_CAP_USD) {
return {
summary: null,
relevance_score: 0,
content_hash: await hashContent(title + abstract),
haiku_cost: 0,
sonnet_cost: 0,
total_cost: 0,
skipped_reason: 'budget_exceeded',
};
}
const content_hash = await hashContent(title + abstract);
try {
// Stage 1: Haiku triage
const { score, cost: haikuCost } = await this.triageRelevance(
title,
abstract,
userTopics
);
this.totalCost += haikuCost;
// Check relevance threshold
if (score < relevanceThreshold) {
console.log(
`Paper irrelevant (score: ${score.toFixed(2)}), skipping Sonnet`
);
return {
summary: null,
relevance_score: score,
content_hash,
haiku_cost: haikuCost,
sonnet_cost: 0,
total_cost: haikuCost,
skipped_reason: 'irrelevant',
};
}
// Stage 2: Sonnet summary (only for relevant papers)
const { summary, cost: sonnetCost } = await this.generateSummary(
title,
abstract
);
this.totalCost += sonnetCost;
console.log(
`Paper relevant (score: ${score.toFixed(2)}), generated summary`
);
return {
summary,
relevance_score: score,
content_hash,
haiku_cost: haikuCost,
sonnet_cost: sonnetCost,
total_cost: haikuCost + sonnetCost,
};
} catch (error) {
console.error('Summarization failed:', error);
return {
summary: null,
relevance_score: 0,
content_hash,
haiku_cost: 0,
sonnet_cost: 0,
total_cost: 0,
skipped_reason: 'error',
};
}
}
// ===========================================================================
// Anthropic API Client
// ===========================================================================
/**
* Call Anthropic Messages API
*
* Endpoint: POST https://api.anthropic.com/v1/messages
* Headers: x-api-key, anthropic-version, content-type
* Body: model, max_tokens, messages[]
*
* @param model - Model ID (haiku or sonnet)
* @param prompt - User prompt
* @param maxTokens - Maximum output tokens
* @returns API response with content and usage
*/
private async callClaude(
model: string,
prompt: string,
maxTokens: number
): Promise<AnthropicResponse> {
const response = await fetch(`${ANTHROPIC_API_BASE_URL}/messages`, {
method: 'POST',
headers: {
'x-api-key': this.apiKey,
'anthropic-version': '2023-06-01',
'content-type': 'application/json',
},
body: JSON.stringify({
model,
max_tokens: maxTokens,
messages: [
{
role: 'user',
content: prompt,
},
],
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(
`Anthropic API error: ${response.status} ${response.statusText} - ${errorText}`
);
}
return (await response.json()) as AnthropicResponse;
}
// ===========================================================================
// Cost Calculation
// ===========================================================================
/**
* Calculate cost based on token usage and model pricing
*
* Haiku pricing:
* - Input: $0.25 per 1M tokens
* - Output: $1.25 per 1M tokens
*
* Sonnet pricing:
* - Input: $3.00 per 1M tokens
* - Output: $15.00 per 1M tokens
*
* @param usage - Token usage from API response
* @param model - Model type (haiku or sonnet)
* @returns Total cost in USD
*/
private calculateCost(
usage: { input_tokens: number; output_tokens: number },
model: 'haiku' | 'sonnet'
): number {
const inputCost =
usage.input_tokens *
(model === 'haiku' ? 0.25 / 1_000_000 : 3.0 / 1_000_000);
const outputCost =
usage.output_tokens *
(model === 'haiku' ? 1.25 / 1_000_000 : 15.0 / 1_000_000);
return inputCost + outputCost;
}
// ===========================================================================
// Budget Tracking
// ===========================================================================
/**
* Get total cost for this session
*
* @returns Total cost in USD
*/
getTotalCost(): number {
return this.totalCost;
}
/**
* Reset cost tracking (call at start of new day)
*/
resetCost(): void {
this.totalCost = 0;
}
/**
* Check if budget is exceeded
*
* @returns True if total cost >= daily cap
*/
isBudgetExceeded(): boolean {
return this.totalCost >= DAILY_BUDGET_CAP_USD;
}
/**
* Get remaining budget
*
* @returns Remaining budget in USD
*/
getRemainingBudget(): number {
return Math.max(0, DAILY_BUDGET_CAP_USD - this.totalCost);
}
}