Opik MCP Server
Official
by comet-ml
- src
- utils
/**
* Metrics information module for Opik Comet
* Provides detailed information about Opik's evaluation metrics
*/
export interface MetricInfo {
name: string;
description: string;
type: string;
use_cases: string[];
parameters: Record<string, string>;
example: string;
}
// Define the metrics information
const metricsInfo: Record<string, MetricInfo> = {
hallucination: {
name: 'Hallucination',
description: 'Detects unsupported or factually incorrect information generated by LLMs.',
type: 'AI-based',
use_cases: [
'Fact-checking LLM outputs',
'Ensuring responses are grounded in provided context',
'Identifying fabricated information',
'Quality control for knowledge-intensive applications',
],
parameters: {
answer: 'The LLM-generated text to evaluate',
context: 'Optional reference text to check against (if provided)',
},
example: `
// Example API call
const result = await opik.evaluateMetric({
metric: "hallucination",
parameters: {
answer: "Einstein was born in 1879 in Germany and developed the theory of relativity.",
context: "Albert Einstein was born on March 14, 1879, in Ulm, Germany."
}
});
// Returns a score between 0-1, where 0 indicates high hallucination and 1 indicates no hallucination
`,
},
answerrelevance: {
name: 'AnswerRelevance',
description: 'Evaluates how relevant an answer is to a given question.',
type: 'AI-based',
use_cases: [
'Ensuring LLM responses address the user query',
'Detecting off-topic or tangential responses',
'Measuring answer quality for question-answering systems',
'Filtering irrelevant content from responses',
],
parameters: {
question: 'The question or prompt given to the LLM',
answer: 'The LLM-generated response to evaluate',
},
example: `
// Example API call
const result = await opik.evaluateMetric({
metric: "answerrelevance",
parameters: {
question: "What are the main causes of climate change?",
answer: "Climate change is primarily caused by greenhouse gas emissions from human activities such as burning fossil fuels, deforestation, and industrial processes."
}
});
// Returns a score between 0-1, where higher values indicate greater relevance
`,
},
contextprecision: {
name: 'ContextPrecision',
description:
'Measures how precisely an answer uses the provided context without including irrelevant information.',
type: 'AI-based',
use_cases: [
'Evaluating RAG system outputs',
'Ensuring responses stay focused on relevant context',
'Detecting when models add unnecessary information',
'Measuring information quality in context-based responses',
],
parameters: {
answer: 'The LLM-generated response to evaluate',
context: 'The context provided to the LLM',
},
example: `
// Example API call
const result = await opik.evaluateMetric({
metric: "contextprecision",
parameters: {
answer: "The company reported a 15% increase in revenue for Q2 2023, exceeding analyst expectations.",
context: "In its quarterly report, the company announced a 15% year-over-year revenue increase for Q2 2023, with total revenue reaching $2.3 billion."
}
});
// Returns a score between 0-1, where higher values indicate better precision
`,
},
contextrecall: {
name: 'ContextRecall',
description:
'Assesses how completely an answer captures the relevant information from the provided context.',
type: 'AI-based',
use_cases: [
'Ensuring comprehensive use of relevant context',
'Detecting when important information is omitted',
'Evaluating information completeness in summaries',
'Measuring context utilization in RAG systems',
],
parameters: {
answer: 'The LLM-generated response to evaluate',
context: 'The context provided to the LLM',
},
example: `
// Example API call
const result = await opik.evaluateMetric({
metric: "contextrecall",
parameters: {
answer: "The company reported increased revenue in Q2.",
context: "In its quarterly report, the company announced a 15% year-over-year revenue increase for Q2 2023, with total revenue reaching $2.3 billion."
}
});
// Returns a score between 0-1, where higher values indicate better recall
`,
},
moderation: {
name: 'Moderation',
description:
'Detects harmful or inappropriate content in LLM outputs, including toxicity, profanity, and unsafe content.',
type: 'AI-based',
use_cases: [
'Content filtering for user-facing applications',
'Safety checks for generated content',
'Identifying policy violations in responses',
'Preventing harmful outputs in production systems',
],
parameters: {
text: 'The text to evaluate for harmful content',
},
example: `
// Example API call
const result = await opik.evaluateMetric({
metric: "moderation",
parameters: {
text: "This is a sample text that will be evaluated for harmful content."
}
});
// Returns a score between 0-1, where 0 indicates harmful content and 1 indicates safe content
`,
},
equals: {
name: 'Equals',
description: 'Simple exact match comparison between the answer and an expected value.',
type: 'Rule-based',
use_cases: [
'Validating exact outputs for deterministic tasks',
'Testing for specific expected responses',
'Checking format compliance in structured outputs',
'Verifying exact matches in critical applications',
],
parameters: {
answer: 'The text to evaluate',
expected: 'The expected exact value',
},
example: `
// Example API call
const result = await opik.evaluateMetric({
metric: "equals",
parameters: {
answer: "42",
expected: "42"
}
});
// Returns 1 for exact match, 0 otherwise
`,
},
regexmatch: {
name: 'RegexMatch',
description: 'Validates answers against regular expression patterns.',
type: 'Rule-based',
use_cases: [
'Validating formatted outputs (emails, phone numbers, etc.)',
'Checking for pattern compliance',
'Extracting structured data from responses',
'Verifying output format consistency',
],
parameters: {
answer: 'The text to evaluate',
pattern: 'The regex pattern to match against',
},
example: `
// Example API call
const result = await opik.evaluateMetric({
metric: "regexmatch",
parameters: {
answer: "user@example.com",
pattern: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
}
});
// Returns 1 if the pattern matches, 0 otherwise
`,
},
contains: {
name: 'Contains',
description: 'Checks if the answer contains specific substrings.',
type: 'Rule-based',
use_cases: [
'Verifying key information is included in responses',
'Checking for required elements in outputs',
'Testing for inclusion of critical terms',
'Basic content validation',
],
parameters: {
answer: 'The text to evaluate',
substrings: 'Array of substrings to check for',
},
example: `
// Example API call
const result = await opik.evaluateMetric({
metric: "contains",
parameters: {
answer: "The capital of France is Paris, which is known for the Eiffel Tower.",
substrings: ["Paris", "France", "capital"]
}
});
// Returns a score between 0-1 based on the proportion of substrings found
`,
},
levenshteinratio: {
name: 'LevenshteinRatio',
description: 'Measures string similarity using Levenshtein distance, normalized to a ratio.',
type: 'Rule-based',
use_cases: [
'Fuzzy matching for approximate answers',
'Evaluating text similarity with tolerance for minor differences',
'Spell-check validation',
'Measuring response closeness to expected outputs',
],
parameters: {
answer: 'The text to evaluate',
expected: 'The expected text to compare against',
},
example: `
// Example API call
const result = await opik.evaluateMetric({
metric: "levenshteinratio",
parameters: {
answer: "The capital of Frence is Paris.",
expected: "The capital of France is Paris."
}
});
// Returns a score between 0-1, where higher values indicate greater similarity
`,
},
};
/**
* Get information about a specific evaluation metric
* @param metric The name of the metric to get information about
* @returns Information about the specified metric or null if not found
*/
export function getMetricInfo(metric: string): MetricInfo | null {
if (!metric) return null;
const normalizedMetric = metric.toLowerCase();
// Check if the metric exists in our info object
for (const [key, info] of Object.entries(metricsInfo)) {
if (key.toLowerCase() === normalizedMetric) {
return info;
}
}
return null;
}
/**
* Get information about all available evaluation metrics
* @returns Array of metric information objects
*/
export function getAllMetricsInfo(): MetricInfo[] {
return Object.values(metricsInfo);
}