Opik MCP Server

Official
/** * Metrics information module for Opik Comet * Provides detailed information about Opik's evaluation metrics */ export interface MetricInfo { name: string; description: string; type: string; use_cases: string[]; parameters: Record<string, string>; example: string; } // Define the metrics information const metricsInfo: Record<string, MetricInfo> = { hallucination: { name: 'Hallucination', description: 'Detects unsupported or factually incorrect information generated by LLMs.', type: 'AI-based', use_cases: [ 'Fact-checking LLM outputs', 'Ensuring responses are grounded in provided context', 'Identifying fabricated information', 'Quality control for knowledge-intensive applications', ], parameters: { answer: 'The LLM-generated text to evaluate', context: 'Optional reference text to check against (if provided)', }, example: ` // Example API call const result = await opik.evaluateMetric({ metric: "hallucination", parameters: { answer: "Einstein was born in 1879 in Germany and developed the theory of relativity.", context: "Albert Einstein was born on March 14, 1879, in Ulm, Germany." } }); // Returns a score between 0-1, where 0 indicates high hallucination and 1 indicates no hallucination `, }, answerrelevance: { name: 'AnswerRelevance', description: 'Evaluates how relevant an answer is to a given question.', type: 'AI-based', use_cases: [ 'Ensuring LLM responses address the user query', 'Detecting off-topic or tangential responses', 'Measuring answer quality for question-answering systems', 'Filtering irrelevant content from responses', ], parameters: { question: 'The question or prompt given to the LLM', answer: 'The LLM-generated response to evaluate', }, example: ` // Example API call const result = await opik.evaluateMetric({ metric: "answerrelevance", parameters: { question: "What are the main causes of climate change?", answer: "Climate change is primarily caused by greenhouse gas emissions from human activities such as burning fossil fuels, deforestation, and industrial processes." } }); // Returns a score between 0-1, where higher values indicate greater relevance `, }, contextprecision: { name: 'ContextPrecision', description: 'Measures how precisely an answer uses the provided context without including irrelevant information.', type: 'AI-based', use_cases: [ 'Evaluating RAG system outputs', 'Ensuring responses stay focused on relevant context', 'Detecting when models add unnecessary information', 'Measuring information quality in context-based responses', ], parameters: { answer: 'The LLM-generated response to evaluate', context: 'The context provided to the LLM', }, example: ` // Example API call const result = await opik.evaluateMetric({ metric: "contextprecision", parameters: { answer: "The company reported a 15% increase in revenue for Q2 2023, exceeding analyst expectations.", context: "In its quarterly report, the company announced a 15% year-over-year revenue increase for Q2 2023, with total revenue reaching $2.3 billion." } }); // Returns a score between 0-1, where higher values indicate better precision `, }, contextrecall: { name: 'ContextRecall', description: 'Assesses how completely an answer captures the relevant information from the provided context.', type: 'AI-based', use_cases: [ 'Ensuring comprehensive use of relevant context', 'Detecting when important information is omitted', 'Evaluating information completeness in summaries', 'Measuring context utilization in RAG systems', ], parameters: { answer: 'The LLM-generated response to evaluate', context: 'The context provided to the LLM', }, example: ` // Example API call const result = await opik.evaluateMetric({ metric: "contextrecall", parameters: { answer: "The company reported increased revenue in Q2.", context: "In its quarterly report, the company announced a 15% year-over-year revenue increase for Q2 2023, with total revenue reaching $2.3 billion." } }); // Returns a score between 0-1, where higher values indicate better recall `, }, moderation: { name: 'Moderation', description: 'Detects harmful or inappropriate content in LLM outputs, including toxicity, profanity, and unsafe content.', type: 'AI-based', use_cases: [ 'Content filtering for user-facing applications', 'Safety checks for generated content', 'Identifying policy violations in responses', 'Preventing harmful outputs in production systems', ], parameters: { text: 'The text to evaluate for harmful content', }, example: ` // Example API call const result = await opik.evaluateMetric({ metric: "moderation", parameters: { text: "This is a sample text that will be evaluated for harmful content." } }); // Returns a score between 0-1, where 0 indicates harmful content and 1 indicates safe content `, }, equals: { name: 'Equals', description: 'Simple exact match comparison between the answer and an expected value.', type: 'Rule-based', use_cases: [ 'Validating exact outputs for deterministic tasks', 'Testing for specific expected responses', 'Checking format compliance in structured outputs', 'Verifying exact matches in critical applications', ], parameters: { answer: 'The text to evaluate', expected: 'The expected exact value', }, example: ` // Example API call const result = await opik.evaluateMetric({ metric: "equals", parameters: { answer: "42", expected: "42" } }); // Returns 1 for exact match, 0 otherwise `, }, regexmatch: { name: 'RegexMatch', description: 'Validates answers against regular expression patterns.', type: 'Rule-based', use_cases: [ 'Validating formatted outputs (emails, phone numbers, etc.)', 'Checking for pattern compliance', 'Extracting structured data from responses', 'Verifying output format consistency', ], parameters: { answer: 'The text to evaluate', pattern: 'The regex pattern to match against', }, example: ` // Example API call const result = await opik.evaluateMetric({ metric: "regexmatch", parameters: { answer: "user@example.com", pattern: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$" } }); // Returns 1 if the pattern matches, 0 otherwise `, }, contains: { name: 'Contains', description: 'Checks if the answer contains specific substrings.', type: 'Rule-based', use_cases: [ 'Verifying key information is included in responses', 'Checking for required elements in outputs', 'Testing for inclusion of critical terms', 'Basic content validation', ], parameters: { answer: 'The text to evaluate', substrings: 'Array of substrings to check for', }, example: ` // Example API call const result = await opik.evaluateMetric({ metric: "contains", parameters: { answer: "The capital of France is Paris, which is known for the Eiffel Tower.", substrings: ["Paris", "France", "capital"] } }); // Returns a score between 0-1 based on the proportion of substrings found `, }, levenshteinratio: { name: 'LevenshteinRatio', description: 'Measures string similarity using Levenshtein distance, normalized to a ratio.', type: 'Rule-based', use_cases: [ 'Fuzzy matching for approximate answers', 'Evaluating text similarity with tolerance for minor differences', 'Spell-check validation', 'Measuring response closeness to expected outputs', ], parameters: { answer: 'The text to evaluate', expected: 'The expected text to compare against', }, example: ` // Example API call const result = await opik.evaluateMetric({ metric: "levenshteinratio", parameters: { answer: "The capital of Frence is Paris.", expected: "The capital of France is Paris." } }); // Returns a score between 0-1, where higher values indicate greater similarity `, }, }; /** * Get information about a specific evaluation metric * @param metric The name of the metric to get information about * @returns Information about the specified metric or null if not found */ export function getMetricInfo(metric: string): MetricInfo | null { if (!metric) return null; const normalizedMetric = metric.toLowerCase(); // Check if the metric exists in our info object for (const [key, info] of Object.entries(metricsInfo)) { if (key.toLowerCase() === normalizedMetric) { return info; } } return null; } /** * Get information about all available evaluation metrics * @returns Array of metric information objects */ export function getAllMetricsInfo(): MetricInfo[] { return Object.values(metricsInfo); }