Opik MCP Server

Official
Apache 2.0
OverviewInspectSchema Related Servers Reviews Score
src
utils
/**
 * Metrics information module for Opik Comet
 * Provides detailed information about Opik's evaluation metrics
 */

export interface MetricInfo {
  name: string;
  description: string;
  type: string;
  use_cases: string[];
  parameters: Record<string, string>;
  example: string;
}

// Define the metrics information
const metricsInfo: Record<string, MetricInfo> = {
  hallucination: {
    name: 'Hallucination',
    description: 'Detects unsupported or factually incorrect information generated by LLMs.',
    type: 'AI-based',
    use_cases: [
      'Fact-checking LLM outputs',
      'Ensuring responses are grounded in provided context',
      'Identifying fabricated information',
      'Quality control for knowledge-intensive applications',
    ],
    parameters: {
      answer: 'The LLM-generated text to evaluate',
      context: 'Optional reference text to check against (if provided)',
    },
    example: `
// Example API call
const result = await opik.evaluateMetric({
  metric: "hallucination",
  parameters: {
    answer: "Einstein was born in 1879 in Germany and developed the theory of relativity.",
    context: "Albert Einstein was born on March 14, 1879, in Ulm, Germany."
  }
});
// Returns a score between 0-1, where 0 indicates high hallucination and 1 indicates no hallucination
`,
  },

  answerrelevance: {
    name: 'AnswerRelevance',
    description: 'Evaluates how relevant an answer is to a given question.',
    type: 'AI-based',
    use_cases: [
      'Ensuring LLM responses address the user query',
      'Detecting off-topic or tangential responses',
      'Measuring answer quality for question-answering systems',
      'Filtering irrelevant content from responses',
    ],
    parameters: {
      question: 'The question or prompt given to the LLM',
      answer: 'The LLM-generated response to evaluate',
    },
    example: `
// Example API call
const result = await opik.evaluateMetric({
  metric: "answerrelevance",
  parameters: {
    question: "What are the main causes of climate change?",
    answer: "Climate change is primarily caused by greenhouse gas emissions from human activities such as burning fossil fuels, deforestation, and industrial processes."
  }
});
// Returns a score between 0-1, where higher values indicate greater relevance
`,
  },

  contextprecision: {
    name: 'ContextPrecision',
    description:
      'Measures how precisely an answer uses the provided context without including irrelevant information.',
    type: 'AI-based',
    use_cases: [
      'Evaluating RAG system outputs',
      'Ensuring responses stay focused on relevant context',
      'Detecting when models add unnecessary information',
      'Measuring information quality in context-based responses',
    ],
    parameters: {
      answer: 'The LLM-generated response to evaluate',
      context: 'The context provided to the LLM',
    },
    example: `
// Example API call
const result = await opik.evaluateMetric({
  metric: "contextprecision",
  parameters: {
    answer: "The company reported a 15% increase in revenue for Q2 2023, exceeding analyst expectations.",
    context: "In its quarterly report, the company announced a 15% year-over-year revenue increase for Q2 2023, with total revenue reaching $2.3 billion."
  }
});
// Returns a score between 0-1, where higher values indicate better precision
`,
  },

  contextrecall: {
    name: 'ContextRecall',
    description:
      'Assesses how completely an answer captures the relevant information from the provided context.',
    type: 'AI-based',
    use_cases: [
      'Ensuring comprehensive use of relevant context',
      'Detecting when important information is omitted',
      'Evaluating information completeness in summaries',
      'Measuring context utilization in RAG systems',
    ],
    parameters: {
      answer: 'The LLM-generated response to evaluate',
      context: 'The context provided to the LLM',
    },
    example: `
// Example API call
const result = await opik.evaluateMetric({
  metric: "contextrecall",
  parameters: {
    answer: "The company reported increased revenue in Q2.",
    context: "In its quarterly report, the company announced a 15% year-over-year revenue increase for Q2 2023, with total revenue reaching $2.3 billion."
  }
});
// Returns a score between 0-1, where higher values indicate better recall
`,
  },

  moderation: {
    name: 'Moderation',
    description:
      'Detects harmful or inappropriate content in LLM outputs, including toxicity, profanity, and unsafe content.',
    type: 'AI-based',
    use_cases: [
      'Content filtering for user-facing applications',
      'Safety checks for generated content',
      'Identifying policy violations in responses',
      'Preventing harmful outputs in production systems',
    ],
    parameters: {
      text: 'The text to evaluate for harmful content',
    },
    example: `
// Example API call
const result = await opik.evaluateMetric({
  metric: "moderation",
  parameters: {
    text: "This is a sample text that will be evaluated for harmful content."
  }
});
// Returns a score between 0-1, where 0 indicates harmful content and 1 indicates safe content
`,
  },

  equals: {
    name: 'Equals',
    description: 'Simple exact match comparison between the answer and an expected value.',
    type: 'Rule-based',
    use_cases: [
      'Validating exact outputs for deterministic tasks',
      'Testing for specific expected responses',
      'Checking format compliance in structured outputs',
      'Verifying exact matches in critical applications',
    ],
    parameters: {
      answer: 'The text to evaluate',
      expected: 'The expected exact value',
    },
    example: `
// Example API call
const result = await opik.evaluateMetric({
  metric: "equals",
  parameters: {
    answer: "42",
    expected: "42"
  }
});
// Returns 1 for exact match, 0 otherwise
`,
  },

  regexmatch: {
    name: 'RegexMatch',
    description: 'Validates answers against regular expression patterns.',
    type: 'Rule-based',
    use_cases: [
      'Validating formatted outputs (emails, phone numbers, etc.)',
      'Checking for pattern compliance',
      'Extracting structured data from responses',
      'Verifying output format consistency',
    ],
    parameters: {
      answer: 'The text to evaluate',
      pattern: 'The regex pattern to match against',
    },
    example: `
// Example API call
const result = await opik.evaluateMetric({
  metric: "regexmatch",
  parameters: {
    answer: "user@example.com",
    pattern: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
  }
});
// Returns 1 if the pattern matches, 0 otherwise
`,
  },

  contains: {
    name: 'Contains',
    description: 'Checks if the answer contains specific substrings.',
    type: 'Rule-based',
    use_cases: [
      'Verifying key information is included in responses',
      'Checking for required elements in outputs',
      'Testing for inclusion of critical terms',
      'Basic content validation',
    ],
    parameters: {
      answer: 'The text to evaluate',
      substrings: 'Array of substrings to check for',
    },
    example: `
// Example API call
const result = await opik.evaluateMetric({
  metric: "contains",
  parameters: {
    answer: "The capital of France is Paris, which is known for the Eiffel Tower.",
    substrings: ["Paris", "France", "capital"]
  }
});
// Returns a score between 0-1 based on the proportion of substrings found
`,
  },

  levenshteinratio: {
    name: 'LevenshteinRatio',
    description: 'Measures string similarity using Levenshtein distance, normalized to a ratio.',
    type: 'Rule-based',
    use_cases: [
      'Fuzzy matching for approximate answers',
      'Evaluating text similarity with tolerance for minor differences',
      'Spell-check validation',
      'Measuring response closeness to expected outputs',
    ],
    parameters: {
      answer: 'The text to evaluate',
      expected: 'The expected text to compare against',
    },
    example: `
// Example API call
const result = await opik.evaluateMetric({
  metric: "levenshteinratio",
  parameters: {
    answer: "The capital of Frence is Paris.",
    expected: "The capital of France is Paris."
  }
});
// Returns a score between 0-1, where higher values indicate greater similarity
`,
  },
};

/**
 * Get information about a specific evaluation metric
 * @param metric The name of the metric to get information about
 * @returns Information about the specified metric or null if not found
 */
export function getMetricInfo(metric: string): MetricInfo | null {
  if (!metric) return null;

  const normalizedMetric = metric.toLowerCase();

  // Check if the metric exists in our info object
  for (const [key, info] of Object.entries(metricsInfo)) {
    if (key.toLowerCase() === normalizedMetric) {
      return info;
    }
  }

  return null;
}

/**
 * Get information about all available evaluation metrics
 * @returns Array of metric information objects
 */
export function getAllMetricsInfo(): MetricInfo[] {
  return Object.values(metricsInfo);
}