import { z } from 'zod';
import { createLogger } from '../utils/logger.js';
import { ValidationError } from '../utils/errors.js';
import { rateLimiters } from '../utils/rate-limiter.js';
// Sonar API response schemas
const SonarCitationSchema = z.object({
url: z.string(),
title: z.string(),
snippet: z.string()
});
const SonarResponseSchema = z.object({
content: z.string(),
citations: z.array(SonarCitationSchema).optional(),
model: z.string().optional(),
usage: z.object({
prompt_tokens: z.number(),
completion_tokens: z.number(),
total_tokens: z.number()
}).optional()
});
// Benchmark data schemas
export const BenchmarkDataSchema = z.object({
industry: z.string(),
metric: z.string(),
value: z.number(),
unit: z.string(),
percentile: z.number().optional(),
source: z.string(),
date: z.string(),
confidence: z.number().min(0).max(1),
citations: z.array(z.object({
url: z.string(),
title: z.string()
})).optional()
});
export type BenchmarkData = z.infer<typeof BenchmarkDataSchema>;
// Industry benchmark request schema
const BenchmarkRequestSchema = z.object({
industry: z.enum([
'financial_services', 'healthcare', 'retail', 'manufacturing',
'technology', 'education', 'government', 'other'
]),
metrics: z.array(z.string()).optional(),
company_size: z.enum(['small', 'medium', 'large', 'enterprise']).optional(),
region: z.string().optional(),
use_case_type: z.string().optional()
});
export type BenchmarkRequest = z.infer<typeof BenchmarkRequestSchema>;
// Configuration for Sonar API
interface SonarConfig {
apiKey: string;
baseUrl?: string;
model?: 'sonar' | 'sonar-pro';
timeout?: number;
maxRetries?: number;
}
export class SonarBenchmarkService {
private logger = createLogger({ service: 'SonarBenchmark' });
private config: Required<SonarConfig>;
private cache = new Map<string, { data: BenchmarkData[]; timestamp: number }>();
private cacheTTL = 5 * 60 * 1000; // 5 minutes in milliseconds
constructor(config: SonarConfig) {
this.config = {
apiKey: config.apiKey,
baseUrl: config.baseUrl || 'https://api.perplexity.ai',
model: config.model || 'sonar-pro',
timeout: config.timeout || 30000,
maxRetries: config.maxRetries || 3
};
if (!this.config.apiKey) {
throw new ValidationError('Sonar API key is required');
}
}
/**
* Fetch industry benchmarks from Sonar API
*/
async fetchBenchmarks(request: BenchmarkRequest): Promise<BenchmarkData[]> {
const cacheKey = this.generateCacheKey(request);
// Check cache first
const cached = this.cache.get(cacheKey);
if (cached && Date.now() - cached.timestamp < this.cacheTTL) {
this.logger.debug('Returning cached benchmarks', { cacheKey });
return cached.data;
}
try {
// Build the prompt for Sonar
const prompt = this.buildBenchmarkPrompt(request);
// Call Sonar API
const response = await this.callSonarAPI(prompt, request.industry);
// Parse the response into structured benchmark data
const benchmarks = this.parseSonarResponse(response, request);
// Cache the results
if (benchmarks.length > 0) {
this.cache.set(cacheKey, { data: benchmarks, timestamp: Date.now() });
}
this.logger.info('Fetched benchmarks from Sonar', {
industry: request.industry,
count: benchmarks.length
});
return benchmarks;
} catch (error) {
this.logger.error('Failed to fetch benchmarks from Sonar', error as Error);
// Fall back to static benchmarks if available
return this.getStaticFallbackBenchmarks(request);
}
}
/**
* Fetch specific ROI benchmarks for a use case
*/
async fetchROIBenchmarks(
industry: string,
useCase: string,
companySize?: string
): Promise<{
expectedROI: number;
paybackPeriod: number;
successRate: number;
citations: Array<{ url: string; title: string }>;
}> {
const prompt = `
Provide current ROI benchmarks for ${useCase} implementations in the ${industry} industry
for ${companySize || 'average-sized'} companies. Include:
1. Expected ROI percentage (annual)
2. Typical payback period in months
3. Implementation success rate
4. Recent industry reports or case studies as sources
Format the response with specific numbers and cite your sources.
`;
try {
const response = await this.callSonarAPI(prompt, `${industry} ROI`);
// Parse ROI-specific metrics from response
const roiData = this.parseROIResponse(response);
this.logger.info('Fetched ROI benchmarks', {
industry,
useCase,
roi: roiData.expectedROI
});
return roiData;
} catch (error) {
this.logger.error('Failed to fetch ROI benchmarks', error as Error);
// Return conservative defaults
return {
expectedROI: 15, // 15% conservative default
paybackPeriod: 18, // 18 months default
successRate: 0.7, // 70% success rate
citations: []
};
}
}
/**
* Validate benchmark data freshness
*/
async validateBenchmarkFreshness(
benchmarks: BenchmarkData[]
): Promise<{ isValid: boolean; outdatedMetrics: string[] }> {
const sixMonthsAgo = new Date();
sixMonthsAgo.setMonth(sixMonthsAgo.getMonth() - 6);
const outdatedMetrics = benchmarks
.filter(b => new Date(b.date) < sixMonthsAgo)
.map(b => b.metric);
return {
isValid: outdatedMetrics.length === 0,
outdatedMetrics
};
}
/**
* Build a structured prompt for benchmark queries
*/
private buildBenchmarkPrompt(request: BenchmarkRequest): string {
const metrics = request.metrics?.join(', ') ||
'automation rates, cost reduction percentages, implementation timelines, ROI ranges';
let prompt = `
Provide current industry benchmarks for ${request.industry} sector.
Focus on AI/automation implementation metrics including: ${metrics}.
`;
if (request.company_size) {
prompt += `\nCompany size: ${request.company_size} (${this.getCompanySizeRange(request.company_size)}).`;
}
if (request.region) {
prompt += `\nGeographic region: ${request.region}.`;
}
if (request.use_case_type) {
prompt += `\nUse case type: ${request.use_case_type}.`;
}
prompt += `
Please provide:
1. Specific numerical benchmarks with units
2. Data sources and publication dates
3. Industry-specific considerations
4. Confidence levels for each metric
Format your response clearly with metric names, values, and sources.
`;
return prompt;
}
/**
* Call the Perplexity Sonar API
*/
private async callSonarAPI(prompt: string, context: string): Promise<z.infer<typeof SonarResponseSchema>> {
// Use rate limiter for API calls
return rateLimiters.perplexity.executeWithRateLimit(
async () => {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), this.config.timeout);
try {
const response = await fetch(`${this.config.baseUrl}/chat/completions`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.config.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: this.config.model,
messages: [
{
role: 'system',
content: 'You are an expert in industry benchmarks and ROI analysis. Provide accurate, well-sourced data.'
},
{
role: 'user',
content: prompt
}
],
temperature: 0.1, // Low temperature for factual responses
max_tokens: 1000
}),
signal: controller.signal
});
clearTimeout(timeout);
if (!response.ok) {
throw new Error(`Sonar API error: ${response.status} ${response.statusText}`);
}
const data: any = await response.json();
// Log the response structure for debugging
this.logger.debug('Sonar API response structure', {
hasChoices: !!data.choices,
hasCitations: !!data.citations,
citationType: data.citations ? typeof data.citations[0] : 'none'
});
// Extract the actual response content
const content = data.choices?.[0]?.message?.content || '';
let citations = this.extractCitations(data);
// Handle string citations (just URLs)
if (Array.isArray(data.citations) && typeof data.citations[0] === 'string') {
citations = data.citations.map((url: string, index: number) => ({
url,
title: `Source ${index + 1}`,
snippet: ''
}));
}
return SonarResponseSchema.parse({
content,
citations,
model: data.model,
usage: data.usage
});
} catch (error) {
clearTimeout(timeout);
if (error instanceof Error && error.name === 'AbortError') {
throw new Error('Sonar API request timed out');
}
throw error;
}
},
{ priority: 'normal', timeout: this.config.timeout }
);
}
/**
* Parse Sonar response into structured benchmark data
*/
private parseSonarResponse(
response: any,
request: BenchmarkRequest
): BenchmarkData[] {
const benchmarks: BenchmarkData[] = [];
const content = response.content;
// Extract numerical benchmarks using regex patterns
const patterns = {
automation: /automation.*?(\d+\.?\d*)\s*%/gi,
cost_reduction: /cost reduction.*?(\d+\.?\d*)\s*%/gi,
roi: /ROI.*?(\d+\.?\d*)\s*%/gi,
payback: /payback.*?(\d+\.?\d*)\s*(months?|years?)/gi,
implementation_time: /implementation.*?(\d+\.?\d*)\s*(months?|weeks?)/gi,
success_rate: /success rate.*?(\d+\.?\d*)\s*%/gi
};
// Extract benchmarks for each pattern
for (const [metric, pattern] of Object.entries(patterns)) {
const matches = content.matchAll(pattern);
for (const match of matches) {
const value = parseFloat(match[1]);
const unit = match[2] || '%';
benchmarks.push({
industry: request.industry,
metric: metric.replace(/_/g, ' '),
value,
unit,
source: 'Perplexity Sonar API',
date: new Date().toISOString().split('T')[0],
confidence: this.calculateConfidence(response.citations?.length || 0),
citations: response.citations?.map((c: any) => ({
url: c.url,
title: c.title
}))
});
}
}
// If no benchmarks extracted, return a default set
if (benchmarks.length === 0) {
this.logger.warn('No benchmarks extracted from Sonar response', {
industry: request.industry,
responseLength: content.length
});
return this.getStaticFallbackBenchmarks(request);
}
return benchmarks;
}
/**
* Parse ROI-specific response
*/
private parseROIResponse(response: any) {
const content = response.content;
// Extract ROI percentage
const roiMatch = content.match(/(?:ROI|return).*?(\d+\.?\d*)\s*%/i);
const expectedROI = roiMatch ? parseFloat(roiMatch[1]) : 20;
// Extract payback period
const paybackMatch = content.match(/payback.*?(\d+\.?\d*)\s*(months?|years?)/i);
let paybackPeriod = 12; // default
if (paybackMatch) {
paybackPeriod = parseFloat(paybackMatch[1]);
if (paybackMatch[2].startsWith('year')) {
paybackPeriod *= 12; // Convert to months
}
}
// Extract success rate
const successMatch = content.match(/success.*?(\d+\.?\d*)\s*%/i);
const successRate = successMatch ? parseFloat(successMatch[1]) / 100 : 0.75;
return {
expectedROI,
paybackPeriod,
successRate,
citations: response.citations?.map((c: any) => ({
url: c.url,
title: c.title
})) || []
};
}
/**
* Extract citations from Sonar API response
*/
private extractCitations(data: any): Array<{ url: string; title: string; snippet: string }> {
// Sonar may include citations in different formats
if (data.citations) {
return data.citations;
}
if (data.choices?.[0]?.message?.citations) {
return data.choices[0].message.citations;
}
return [];
}
/**
* Calculate confidence based on number of citations
*/
private calculateConfidence(citationCount: number): number {
if (citationCount >= 3) return 0.95;
if (citationCount >= 2) return 0.85;
if (citationCount >= 1) return 0.75;
return 0.6; // Low confidence without citations
}
/**
* Get company size range for context
*/
private getCompanySizeRange(size: string): string {
const ranges = {
small: '1-50 employees',
medium: '51-500 employees',
large: '501-5000 employees',
enterprise: '5000+ employees'
};
return ranges[size as keyof typeof ranges] || size;
}
/**
* Generate cache key for requests
*/
private generateCacheKey(request: BenchmarkRequest): string {
return JSON.stringify({
industry: request.industry,
metrics: request.metrics?.sort(),
company_size: request.company_size,
region: request.region,
use_case_type: request.use_case_type
});
}
/**
* Get static fallback benchmarks when API fails
*/
private getStaticFallbackBenchmarks(request: BenchmarkRequest): BenchmarkData[] {
// These are conservative industry averages
const fallbackData: Record<string, Partial<BenchmarkData>[]> = {
financial_services: [
{ metric: 'automation rate', value: 45, unit: '%' },
{ metric: 'cost reduction', value: 30, unit: '%' },
{ metric: 'roi', value: 25, unit: '%' },
{ metric: 'payback period', value: 14, unit: 'months' }
],
healthcare: [
{ metric: 'automation rate', value: 35, unit: '%' },
{ metric: 'cost reduction', value: 25, unit: '%' },
{ metric: 'roi', value: 20, unit: '%' },
{ metric: 'payback period', value: 18, unit: 'months' }
],
retail: [
{ metric: 'automation rate', value: 50, unit: '%' },
{ metric: 'cost reduction', value: 35, unit: '%' },
{ metric: 'roi', value: 30, unit: '%' },
{ metric: 'payback period', value: 12, unit: 'months' }
],
manufacturing: [
{ metric: 'automation rate', value: 55, unit: '%' },
{ metric: 'cost reduction', value: 40, unit: '%' },
{ metric: 'roi', value: 35, unit: '%' },
{ metric: 'payback period', value: 16, unit: 'months' }
],
technology: [
{ metric: 'automation rate', value: 60, unit: '%' },
{ metric: 'cost reduction', value: 45, unit: '%' },
{ metric: 'roi', value: 40, unit: '%' },
{ metric: 'payback period', value: 10, unit: 'months' }
]
};
const industryData = fallbackData[request.industry] || fallbackData.technology;
return industryData.map(data => ({
industry: request.industry,
metric: data.metric!,
value: data.value!,
unit: data.unit!,
source: 'Static Fallback (API Unavailable)',
date: new Date().toISOString().split('T')[0],
confidence: 0.6,
citations: []
}));
}
/**
* Clear the cache
*/
clearCache(): void {
this.cache.clear();
this.logger.debug('Benchmark cache cleared');
}
/**
* Get cache statistics
*/
getCacheStats(): { size: number; entries: string[] } {
return {
size: this.cache.size,
entries: Array.from(this.cache.keys())
};
}
}