/**
* Test Search Script - Enterprise Quality Test Suite
*
* Test the search functionality with strict relevance validation.
* Run: npm run test-search
*
* Quality Standards Enforced:
* 1. Semantic Validation - Keywords must appear in results
* 2. Category Correctness - Results must match requested category
* 3. Relevance Scoring - Minimum score thresholds enforced
* 4. Performance Baselines - Per-type timing thresholds
* 5. Negative Test Strictness - Zero results expected for invalid inputs
* 6. Regression Detection - Baseline expectations for deterministic checks
* 7. Golden Document IDs - Specific documents expected in results (Phase 3)
* 8. MRR (Mean Reciprocal Rank) - Ranking quality metrics (Phase 3)
* 9. Baseline Snapshots - Score comparison across runs (Phase 3)
*/
import { searchDocuments, getCategoryStats } from "../src/db/queries.js";
import { initializeDatabase, isDatabaseIndexed } from "../src/db/database.js";
import { DocCategory } from "../src/types.js";
import { formatSearchResults, formatCategories } from "../src/utils/formatter.js";
import { expandQueryToKeywords } from "../src/utils/intent.js";
import * as fs from "fs";
import * as path from "path";
// Test case types:
// POSITIVE: Valid queries that should return relevant results
// NEGATIVE: Invalid/malformed queries that should handle gracefully
// EDGE_CASE: Boundary conditions and unusual inputs
// FALSE_POSITIVE: Queries that might return incorrect results
// EXCEPTION: Queries that might cause errors
// PERFORMANCE: Large/complex queries for performance testing
// VIBE_CODING: Natural language queries from AI assistants (Copilot, Cursor, Agentforce)
// SEMANTIC: Pure natural language queries that require LLM-powered expansion
interface TestCase {
query: string;
category: DocCategory | undefined;
type: 'POSITIVE' | 'NEGATIVE' | 'EDGE_CASE' | 'FALSE_POSITIVE' | 'EXCEPTION' | 'PERFORMANCE' | 'VIBE_CODING' | 'SEMANTIC';
description: string;
expectedBehavior: string;
expectedKeywords?: string[]; // Keywords that MUST appear in the top result title or content
fallbackKeywords?: string[][]; // Alternative keyword sets (OR logic) - pass if ANY set matches
minScore?: number; // Minimum relevance score for top result
maxResults?: number; // For NEGATIVE tests: maximum allowed results (0 = none)
maxDurationMs?: number; // Performance threshold for this specific test
expectedCategory?: DocCategory; // Verify result belongs to this category
expectedSource?: string; // Golden Document ID: exact source filename expected in top 3 results
expectedSourcePosition?: number; // Position where golden doc should appear (1, 2, or 3). Default: anywhere in top 3
useExpansion?: boolean; // For SEMANTIC tests: use query expansion before search
}
// Performance thresholds by test type (in milliseconds)
const PERFORMANCE_THRESHOLDS = {
POSITIVE: 500,
NEGATIVE: 1000,
EDGE_CASE: 2000,
FALSE_POSITIVE: 500,
EXCEPTION: 1500,
PERFORMANCE: 5000,
VIBE_CODING: 500,
SEMANTIC: 1000, // Allow more time for expansion + search
};
// Minimum acceptable score for "relevant" results
const MIN_RELEVANCE_SCORE = 5.0;
// Baseline configuration (Phase 3)
const BASELINE_FILE = path.join(process.cwd(), 'data', 'test-baseline.json');
const BASELINE_DEGRADATION_THRESHOLD = 0.05; // Alert if pass rate drops by more than 5%
const BASELINE_MRR_DEGRADATION_THRESHOLD = 0.10; // Alert if MRR drops by more than 10%
// Baseline snapshot interface
interface BaselineSnapshot {
timestamp: string;
totalTests: number;
passedTests: number;
passRate: number;
mrr: number;
avgDuration: number;
byType: Record<string, { passed: number; total: number; mrr: number }>;
goldenDocHits: number;
goldenDocTotal: number;
}
// Extended test result for MRR tracking
interface ExtendedTestResult {
type: string;
query: string;
description: string;
passed: boolean;
duration: number;
resultCount: number;
error?: string;
reciprocalRank: number; // 1/position of correct doc (0 if not found)
topResultSource?: string; // For golden doc tracking
goldenDocFound?: boolean; // Did we find the expected source?
goldenDocPosition?: number; // Position where golden doc was found
}
const TEST_QUERIES: TestCase[] = [
// ==================== POSITIVE TEST CASES ====================
// Basic valid queries that should return relevant results
{
query: "How to create an Apex trigger",
category: undefined,
type: 'POSITIVE',
description: "Basic Apex trigger query",
expectedBehavior: "Should return Apex trigger documentation",
expectedKeywords: ["Trigger", "Apex", "Before", "After"]
},
{
query: "REST API authentication OAuth",
category: DocCategory.APIS,
type: 'POSITIVE',
description: "API authentication with category filter",
expectedBehavior: "Should return OAuth documentation from APIs category",
expectedKeywords: ["OAuth", "Authentication", "Token", "REST"],
expectedCategory: DocCategory.APIS
},
{
query: "Lightning Web Component wire decorator",
category: DocCategory.CORE_PLATFORM,
type: 'POSITIVE',
description: "LWC specific query",
expectedBehavior: "Should return LWC wire service documentation",
expectedKeywords: ["Wire", "LWC", "Lightning", "Decorator"]
},
{
query: "SOQL query limits governor",
category: undefined,
type: 'POSITIVE',
description: "SOQL limits query",
expectedBehavior: "Should return governor limits documentation",
expectedKeywords: ["SOQL", "Limit", "Governor", "Query"]
},
{
query: "Bulk API 2.0 job",
category: DocCategory.APIS,
type: 'POSITIVE',
description: "Bulk API specific query",
expectedBehavior: "Should return Bulk API 2.0 documentation",
expectedKeywords: ["Bulk", "API", "Job"],
expectedCategory: DocCategory.APIS
},
{
query: "Platform events streaming",
category: undefined,
type: 'POSITIVE',
description: "Platform events query",
expectedBehavior: "Should return platform events documentation",
expectedKeywords: ["Platform", "Event", "Streaming", "Publish"]
},
{
query: "sharing rules record access",
category: DocCategory.SECURITY,
type: 'POSITIVE',
description: "Security sharing rules query",
expectedBehavior: "Should return sharing rules documentation",
expectedKeywords: ["Sharing", "Rule", "Access", "Record"],
expectedCategory: DocCategory.SECURITY
},
{
query: "deploy metadata CI/CD",
category: DocCategory.DEV_TOOLS,
type: 'POSITIVE',
description: "Deployment tools query",
expectedBehavior: "Should return CI/CD deployment documentation",
expectedKeywords: ["Deploy", "Metadata", "SFDX", "CLI"],
expectedCategory: DocCategory.DEV_TOOLS
},
{
query: "Visualforce page controller extension",
category: DocCategory.CORE_PLATFORM,
type: 'POSITIVE',
description: "Visualforce controller query",
expectedBehavior: "Should return Visualforce controller documentation",
expectedKeywords: ["Visualforce", "Controller", "Extension", "Page"]
},
{
query: "Apex unit test best practices",
category: undefined,
type: 'POSITIVE',
description: "Testing best practices query",
expectedBehavior: "Should return Apex testing documentation",
expectedKeywords: ["Test", "Apex", "Assert", "Coverage"]
},
{
query: "Flow builder automation",
category: DocCategory.CORE_PLATFORM,
type: 'POSITIVE',
description: "Flow automation query",
expectedBehavior: "Should return Flow Builder documentation",
expectedKeywords: ["Flow", "Builder", "Automation", "Process"]
},
{
query: "Connected App OAuth 2.0 JWT bearer",
category: DocCategory.SECURITY,
type: 'POSITIVE',
description: "JWT authentication query",
expectedBehavior: "Should return JWT bearer flow documentation",
expectedKeywords: ["JWT", "Bearer", "OAuth", "Connected"],
expectedCategory: DocCategory.SECURITY
},
{
query: "Aura component event handling",
category: DocCategory.CORE_PLATFORM,
type: 'POSITIVE',
description: "Aura events query",
expectedBehavior: "Should return Aura component events documentation",
expectedKeywords: ["Aura", "Component", "Event", "Handler"]
},
{
query: "Metadata API deploy retrieve",
category: DocCategory.APIS,
type: 'POSITIVE',
description: "Metadata API operations query",
expectedBehavior: "Should return Metadata API documentation",
expectedKeywords: ["Metadata", "Deploy", "Retrieve", "API"],
expectedCategory: DocCategory.APIS
},
{
query: "Einstein Analytics dashboard",
category: undefined,
type: 'POSITIVE',
description: "Analytics query",
expectedBehavior: "Should return Einstein Analytics documentation",
expectedKeywords: ["Einstein", "Analytics", "Dashboard"]
},
// ==================== NEGATIVE TEST CASES ====================
// Invalid inputs that should return ZERO results or handle gracefully
{
query: "",
category: undefined,
type: 'NEGATIVE',
description: "Empty query string",
expectedBehavior: "Must return zero results",
maxResults: 0
},
{
query: " ",
category: undefined,
type: 'NEGATIVE',
description: "Whitespace-only query",
expectedBehavior: "Must return zero results",
maxResults: 0
},
{
query: "xyznonexistentterm123456",
category: undefined,
type: 'NEGATIVE',
description: "Completely non-existent term",
expectedBehavior: "Must return zero results",
maxResults: 0
},
{
query: "asdfghjklqwertyuiopzxcvbnm",
category: undefined,
type: 'NEGATIVE',
description: "Random gibberish string",
expectedBehavior: "Must return zero results",
maxResults: 0
},
{
query: "12345678901234567890",
category: undefined,
type: 'NEGATIVE',
description: "Numeric-only query",
expectedBehavior: "Must return zero results",
maxResults: 0
},
{
query: "Apex trigger",
category: DocCategory.RELEASE_NOTES,
type: 'NEGATIVE',
description: "Valid query with wrong category filter",
expectedBehavior: "Results must be from release_notes category only",
expectedCategory: DocCategory.RELEASE_NOTES
},
{
query: "!@#$%^&*()",
category: undefined,
type: 'NEGATIVE',
description: "Special characters only",
expectedBehavior: "Should handle gracefully, prefer zero results",
maxResults: 3
},
// ==================== EDGE CASE TEST CASES ====================
// Boundary conditions and unusual but valid inputs
{
query: "a",
category: undefined,
type: 'EDGE_CASE',
description: "Single character query",
expectedBehavior: "Should handle gracefully, may return no results"
},
{
query: "Apex ".repeat(100),
category: undefined,
type: 'EDGE_CASE',
description: "Very long repeated query (100x 'Apex ')",
expectedBehavior: "Should handle without memory issues"
},
{
query: "APEX TRIGGER UPPERCASE",
category: undefined,
type: 'EDGE_CASE',
description: "All uppercase query",
expectedBehavior: "Should be case-insensitive and return results"
},
{
query: "apex trigger lowercase",
category: undefined,
type: 'EDGE_CASE',
description: "All lowercase query",
expectedBehavior: "Should be case-insensitive and return results"
},
{
query: "ApEx TrIgGeR mIxEdCaSe",
category: undefined,
type: 'EDGE_CASE',
description: "Mixed case query",
expectedBehavior: "Should be case-insensitive and return results"
},
{
query: "trigger apex reverse order",
category: undefined,
type: 'EDGE_CASE',
description: "Words in reverse order",
expectedBehavior: "Should still match relevant documents"
},
{
query: "Apex\ttrigger\nwith\ttabs",
category: undefined,
type: 'EDGE_CASE',
description: "Query with tabs and newlines",
expectedBehavior: "Should normalize whitespace and search"
},
{
query: " Apex trigger with extra spaces ",
category: undefined,
type: 'EDGE_CASE',
description: "Multiple spaces between words",
expectedBehavior: "Should normalize and return results"
},
{
query: "Apex-trigger-with-hyphens",
category: undefined,
type: 'EDGE_CASE',
description: "Hyphenated query",
expectedBehavior: "Should handle hyphens as separators"
},
{
query: "Apex_trigger_with_underscores",
category: undefined,
type: 'EDGE_CASE',
description: "Underscored query",
expectedBehavior: "Should handle underscores appropriately"
},
{
query: "Apex.trigger.with.dots",
category: undefined,
type: 'EDGE_CASE',
description: "Dotted query (like method calls)",
expectedBehavior: "Should handle dots appropriately"
},
{
query: '"Apex trigger" exact phrase',
category: undefined,
type: 'EDGE_CASE',
description: "Quoted exact phrase",
expectedBehavior: "Should handle quoted strings"
},
{
query: "Apex OR trigger OR SOQL",
category: undefined,
type: 'EDGE_CASE',
description: "Query with OR operators",
expectedBehavior: "Should handle boolean-like operators"
},
{
query: "Apex AND trigger NOT error",
category: undefined,
type: 'EDGE_CASE',
description: "Query with AND/NOT operators",
expectedBehavior: "Should handle boolean-like operators"
},
{
query: "trigger*",
category: undefined,
type: 'EDGE_CASE',
description: "Wildcard query",
expectedBehavior: "Should handle wildcard characters"
},
{
query: "API Π²Π΅ΡΡΠΈΡ ΡΠ΅Π»ΠΈΠ·",
category: undefined,
type: 'EDGE_CASE',
description: "Non-ASCII characters (Cyrillic)",
expectedBehavior: "Should handle unicode gracefully"
},
{
query: "API ζ₯ζ¬θͺγγΉγ",
category: undefined,
type: 'EDGE_CASE',
description: "Non-ASCII characters (Japanese)",
expectedBehavior: "Should handle unicode gracefully"
},
{
query: "Apex trigger π emoji",
category: undefined,
type: 'EDGE_CASE',
description: "Query with emoji",
expectedBehavior: "Should handle emoji gracefully"
},
// ==================== FALSE POSITIVE TEST CASES ====================
// Queries that might return incorrect or misleading results
{
query: "trigger",
category: undefined,
type: 'FALSE_POSITIVE',
description: "Ambiguous single word - could be Apex trigger or Flow trigger",
expectedBehavior: "Might return mixed results from different contexts"
},
{
query: "record",
category: undefined,
type: 'FALSE_POSITIVE',
description: "Very common word with multiple meanings",
expectedBehavior: "Should not overwhelm with too many generic results"
},
{
query: "object",
category: undefined,
type: 'FALSE_POSITIVE',
description: "Ambiguous - could be custom object, standard object, JS object",
expectedBehavior: "Might return mixed contexts"
},
{
query: "field",
category: undefined,
type: 'FALSE_POSITIVE',
description: "Common word in many contexts",
expectedBehavior: "Should prioritize based on relevance"
},
{
query: "security",
category: DocCategory.APIS,
type: 'FALSE_POSITIVE',
description: "Security term filtered to API category",
expectedBehavior: "Should return API security, not general security"
},
{
query: "Java",
category: undefined,
type: 'FALSE_POSITIVE',
description: "Java mentioned in Apex context (not Java language)",
expectedBehavior: "Should return Apex-related Java mentions"
},
{
query: "delete",
category: undefined,
type: 'FALSE_POSITIVE',
description: "Common DML keyword",
expectedBehavior: "Should return relevant DML documentation"
},
{
query: "test",
category: undefined,
type: 'FALSE_POSITIVE',
description: "Very common word in testing context",
expectedBehavior: "Should return test-related documentation"
},
// ==================== EXCEPTION TEST CASES ====================
// Queries that might cause errors or exceptions
{
query: "SELECT * FROM Account",
category: undefined,
type: 'EXCEPTION',
description: "SQL injection attempt",
expectedBehavior: "Should treat as regular text, not execute"
},
{
query: "'; DROP TABLE documents; --",
category: undefined,
type: 'EXCEPTION',
description: "SQL injection with DROP TABLE",
expectedBehavior: "Should treat as regular text safely"
},
{
query: "<script>alert('xss')</script>",
category: undefined,
type: 'EXCEPTION',
description: "XSS injection attempt",
expectedBehavior: "Should treat as regular text safely"
},
{
query: "${process.env.SECRET}",
category: undefined,
type: 'EXCEPTION',
description: "Environment variable injection",
expectedBehavior: "Should treat as literal string"
},
{
query: "{{constructor.constructor('return this')()}}",
category: undefined,
type: 'EXCEPTION',
description: "Prototype pollution attempt",
expectedBehavior: "Should treat as literal string"
},
{
query: "../../../etc/passwd",
category: undefined,
type: 'EXCEPTION',
description: "Path traversal attempt",
expectedBehavior: "Should treat as literal string"
},
{
query: "null",
category: undefined,
type: 'EXCEPTION',
description: "Literal 'null' string",
expectedBehavior: "Should search for 'null' keyword documentation"
},
{
query: "undefined",
category: undefined,
type: 'EXCEPTION',
description: "Literal 'undefined' string",
expectedBehavior: "Should search for 'undefined' related documentation"
},
{
query: "NaN",
category: undefined,
type: 'EXCEPTION',
description: "NaN string query",
expectedBehavior: "Should handle as regular string"
},
{
query: "\\x00\\x01\\x02",
category: undefined,
type: 'EXCEPTION',
description: "Null byte injection",
expectedBehavior: "Should handle escape sequences safely"
},
// ==================== PERFORMANCE TEST CASES ====================
// Complex queries for performance testing
{
query: "Apex trigger SOQL query DML operation governor limits batch processing async future queueable schedulable platform events change data capture streaming API REST callout HTTP authentication OAuth security sharing rules",
category: undefined,
type: 'PERFORMANCE',
description: "Very long query with many terms",
expectedBehavior: "Should complete in reasonable time"
},
{
query: "a b c d e f g h i j k l m n o p q r s t u v w x y z",
category: undefined,
type: 'PERFORMANCE',
description: "Many single-character terms",
expectedBehavior: "Should handle without performance issues"
},
{
query: "Winter Spring Summer Fall 2020 2021 2022 2023 2024 2025 release notes new features",
category: DocCategory.RELEASE_NOTES,
type: 'PERFORMANCE',
description: "Release notes query with multiple versions",
expectedBehavior: "Should return relevant release notes"
},
// ==================== VIBE CODING TEST CASES ====================
// Natural language queries from AI coding assistants (Copilot, Cursor, Windsurf, Agentforce)
// These simulate real developer prompts when building with AI assistance
// --- Conversational/Natural Language Queries ---
{
query: "how do I make an API call from Apex",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Casual how-to question",
expectedBehavior: "Should return HTTP callout documentation",
expectedKeywords: ["HTTP", "Callout", "Rest", "Request"]
},
{
query: "what's the best way to handle errors in LWC",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Best practice question with contraction",
expectedBehavior: "Should return LWC error handling docs",
expectedKeywords: ["Error", "LWC", "Lightning Web Component"]
},
{
query: "can you show me how to write a batch apex class",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Polite request format",
expectedBehavior: "Should return Batch Apex documentation",
expectedKeywords: ["Batch", "Apex", "Jobs"]
},
{
query: "I need to query related records in SOQL",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: First person need statement",
expectedBehavior: "Should return SOQL relationship queries",
expectedKeywords: ["SOQL", "Relationship", "Query"]
},
{
query: "explain the difference between before and after triggers",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Explanation request",
expectedBehavior: "Should return trigger context documentation",
expectedKeywords: ["Trigger", "Context", "Before", "After"]
},
// --- Error Message Debugging Queries ---
{
query: "System.LimitException: Too many SOQL queries: 101",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Pasting actual error message",
expectedBehavior: "Should return governor limits documentation",
expectedKeywords: ["Governor", "Limits", "SOQL", "Query"]
},
{
query: "MIXED_DML_OPERATION error when inserting user",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: DML error debugging",
expectedBehavior: "Should return mixed DML documentation",
expectedKeywords: ["DML", "Exception", "Setup", "Apex"],
fallbackKeywords: [["DML", "User", "Insert"], ["System", "runAs", "Setup"]]
},
{
query: "FIELD_CUSTOM_VALIDATION_EXCEPTION",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Validation rule error",
expectedBehavior: "Should return validation rules documentation",
expectedKeywords: ["Validation", "Exception", "Rule"]
},
{
query: "getting UNABLE_TO_LOCK_ROW error",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Record locking error",
expectedBehavior: "Should return record locking documentation",
expectedKeywords: ["Lock", "Row", "Record"]
},
{
query: "why am I getting apex heap size too large",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Debugging heap size",
expectedBehavior: "Should return heap size limits documentation",
expectedKeywords: ["Heap", "Size", "Limit"]
},
// --- Code Generation Prompts ---
{
query: "write a trigger to prevent duplicate accounts",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Code generation request",
expectedBehavior: "Should return trigger and duplicate management docs",
expectedKeywords: ["Trigger", "Duplicate"]
},
{
query: "create an LWC component that shows a data table",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: LWC component generation",
expectedBehavior: "Should return lightning-datatable documentation",
expectedKeywords: ["Datatable", "LWC", "Lightning"]
},
{
query: "generate apex test class with 100% coverage",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Test generation request",
expectedBehavior: "Should return Apex testing documentation",
expectedKeywords: ["Test", "Apex", "Coverage", "Assert"]
},
{
query: "build a REST API endpoint in Apex",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: REST service generation",
expectedBehavior: "Should return Apex REST services documentation",
expectedKeywords: ["REST", "Apex", "HttpGet", "HttpPost"],
fallbackKeywords: [["RestResource", "Apex"], ["API", "Endpoint", "Apex"]]
},
{
query: "make a flow that sends email when opportunity closes",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Flow automation request",
expectedBehavior: "Should return Flow email action documentation",
expectedKeywords: ["Flow", "Email", "Action"],
fallbackKeywords: [["Flow", "Trigger", "Automation"], ["Send", "Email", "Notification"]]
},
// --- Agentforce/AI Agent Queries ---
{
query: "how to create a custom copilot action in Salesforce",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Agentforce copilot action",
expectedBehavior: "Should return Agentforce/copilot documentation",
expectedKeywords: ["Agent", "Action", "Copilot"],
fallbackKeywords: [["Einstein", "AI", "Custom"], ["Agentforce", "Action"]]
},
{
query: "Einstein bot intent training best practices",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Einstein bot NLU",
expectedBehavior: "Should return Einstein bot documentation",
expectedKeywords: ["Einstein", "Bot", "Intent"],
fallbackKeywords: [["Bot", "NLU", "Training"], ["Chatbot", "Dialog"]]
},
{
query: "configure agent actions for service cloud",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Service Cloud agent setup",
expectedBehavior: "Should return agent configuration docs",
expectedKeywords: ["Agent", "Service", "Action"],
fallbackKeywords: [["Service Cloud", "Configuration"], ["Omnichannel", "Agent"]]
},
{
query: "prompt template for generative AI in Salesforce",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Prompt engineering query",
expectedBehavior: "Should return prompt template documentation",
expectedKeywords: ["Prompt", "Template", "AI"],
fallbackKeywords: [["Generative", "Einstein"], ["LLM", "AI"]]
},
{
query: "Einstein GPT trust layer data masking",
category: DocCategory.SECURITY,
type: 'VIBE_CODING',
description: "Vibe: AI security/trust query",
expectedBehavior: "Should return Einstein GPT trust documentation",
expectedKeywords: ["Einstein", "Trust", "Security"],
fallbackKeywords: [["Data", "Masking", "Privacy"], ["GPT", "AI", "Security"]],
expectedCategory: DocCategory.SECURITY
},
// --- Modern Development Stack Queries ---
{
query: "setup SFDX project with GitHub actions CI/CD",
category: DocCategory.DEV_TOOLS,
type: 'VIBE_CODING',
description: "Vibe: DevOps setup query",
expectedBehavior: "Should return SFDX and CI/CD documentation",
expectedKeywords: ["SFDX", "CLI", "Deploy"],
fallbackKeywords: [["CI", "CD", "Deployment"], ["Salesforce", "DX", "Project"]],
expectedCategory: DocCategory.DEV_TOOLS
},
{
query: "convert metadata API to source format",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Source tracking migration",
expectedBehavior: "Should return source format documentation",
expectedKeywords: ["Metadata", "Source", "Format"],
fallbackKeywords: [["SFDX", "Convert"], ["Package", "Manifest"]]
},
{
query: "use sf CLI to deploy to production",
category: DocCategory.DEV_TOOLS,
type: 'VIBE_CODING',
description: "Vibe: CLI deployment",
expectedBehavior: "Should return sf CLI deployment documentation",
expectedKeywords: ["CLI", "Deploy", "Production"],
fallbackKeywords: [["SFDX", "Deploy"], ["Salesforce", "CLI", "Command"]],
expectedCategory: DocCategory.DEV_TOOLS
},
{
query: "scratch org definition file settings",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Scratch org configuration",
expectedBehavior: "Should return scratch org documentation",
expectedKeywords: ["Scratch", "Org", "Definition"],
fallbackKeywords: [["SFDX", "Org", "Create"], ["Dev Hub", "Scratch"]]
},
{
query: "debug apex in VS Code with replay debugger",
category: DocCategory.DEV_TOOLS,
type: 'VIBE_CODING',
description: "Vibe: IDE debugging",
expectedBehavior: "Should return Apex debugger documentation",
expectedKeywords: ["Debug", "Apex", "Replay"],
fallbackKeywords: [["Debugger", "Log", "Apex"], ["VS Code", "Debug"]],
expectedCategory: DocCategory.DEV_TOOLS
},
// --- Integration & API Queries ---
{
query: "connect Salesforce to external system webhook",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Outbound integration",
expectedBehavior: "Should return outbound messaging/callout docs",
expectedKeywords: ["Outbound", "Callout", "Webhook"],
fallbackKeywords: [["HTTP", "Integration", "External"], ["REST", "Callout", "API"]]
},
{
query: "authenticate third party app with Salesforce SSO",
category: DocCategory.SECURITY,
type: 'VIBE_CODING',
description: "Vibe: SSO integration",
expectedBehavior: "Should return SSO/SAML documentation",
expectedKeywords: ["SSO", "SAML", "Authentication"],
fallbackKeywords: [["Single Sign", "Identity"], ["OAuth", "Identity", "Provider"]],
expectedCategory: DocCategory.SECURITY
},
{
query: "sync data between Salesforce and external database",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Data synchronization",
expectedBehavior: "Should return integration patterns documentation",
expectedKeywords: ["Integration", "Sync", "Data"],
fallbackKeywords: [["External", "Object", "Connect"], ["Heroku", "Connect"], ["API", "Data", "Load"]]
},
{
query: "consume external REST API with named credentials",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Named credentials usage",
expectedBehavior: "Should return named credentials documentation",
expectedKeywords: ["Named Credential", "Callout", "External"],
fallbackKeywords: [["HTTP", "Callout", "Authentication"], ["REST", "API", "Credential"]]
},
{
query: "MuleSoft Anypoint connector for Salesforce",
category: DocCategory.INTEGRATION,
type: 'VIBE_CODING',
description: "Vibe: MuleSoft integration",
expectedBehavior: "Should return MuleSoft/integration docs",
expectedKeywords: ["MuleSoft", "Integration", "Connector"],
fallbackKeywords: [["Anypoint", "API"], ["Integration", "Pattern", "Salesforce"]],
expectedCategory: DocCategory.INTEGRATION
},
// --- Troubleshooting Queries (How developers actually ask) ---
{
query: "my trigger is firing twice why",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Debugging recursive trigger",
expectedBehavior: "Should return trigger recursion documentation",
expectedKeywords: ["Trigger", "Recursive", "Context"],
fallbackKeywords: [["Trigger", "Handler", "Pattern"], ["Before", "After", "Trigger"]]
},
{
query: "lwc component not rendering on record page",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: LWC visibility debugging",
expectedBehavior: "Should return LWC target config documentation",
expectedKeywords: ["LWC", "Target", "Record Page"],
fallbackKeywords: [["Lightning", "Component", "Visibility"], ["Meta", "XML", "Target"]]
},
{
query: "flow is not starting automatically help",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Flow activation issue",
expectedBehavior: "Should return Flow trigger documentation",
expectedKeywords: ["Flow", "Trigger", "Automate"],
fallbackKeywords: [["Flow", "Record", "Start"], ["Flow", "Active", "Version"]]
},
{
query: "permission denied when accessing custom object",
category: DocCategory.SECURITY,
type: 'VIBE_CODING',
description: "Vibe: Permission debugging",
expectedBehavior: "Should return object permissions documentation",
expectedKeywords: ["Permission", "Object", "Access"],
fallbackKeywords: [["Profile", "Permission Set"], ["CRUD", "Security", "Object"]],
expectedCategory: DocCategory.SECURITY
},
{
query: "test class failing in production but works in sandbox",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Environment-specific issues",
expectedBehavior: "Should return test isolation documentation",
expectedKeywords: ["Test", "SeeAllData", "Isolation"],
fallbackKeywords: [["Test", "Data", "Setup"], ["Sandbox", "Production", "Deploy"]]
},
// --- Quick Reference/Cheatsheet Queries ---
{
query: "list all SOQL date literals",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Quick reference request",
expectedBehavior: "Should return SOQL date literal documentation",
expectedKeywords: ["SOQL", "Date", "Literal"],
fallbackKeywords: [["TODAY", "LAST_WEEK"], ["Date", "Function", "SOQL"]]
},
{
query: "apex string methods cheatsheet",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Method reference",
expectedBehavior: "Should return String class documentation",
expectedKeywords: ["String", "Method", "Apex"],
fallbackKeywords: [["String", "Class", "Reference"], ["Substring", "Split", "String"]]
},
{
query: "all governor limits in one place",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Limits reference",
expectedBehavior: "Should return governor limits documentation",
expectedKeywords: ["Governor", "Limit", "Apex"],
fallbackKeywords: [["SOQL", "Limit", "100"], ["DML", "Limit", "150"]]
},
{
query: "formula field functions reference",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Formula functions list",
expectedBehavior: "Should return formula operators documentation",
expectedKeywords: ["Formula", "Function", "Operator"],
fallbackKeywords: [["IF", "ISBLANK", "Formula"], ["TEXT", "VALUE", "Formula"]]
},
{
query: "standard objects API names",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Object reference",
expectedBehavior: "Should return standard objects documentation",
expectedKeywords: ["Object", "API", "Standard"],
fallbackKeywords: [["Account", "Contact", "Object"], ["sObject", "Describe", "API"]]
},
// --- Typos and Misspellings (Real-world vibe coding) ---
{
query: "apec trigger",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo - 'apec' instead of 'apex'",
expectedBehavior: "Should handle typo gracefully",
expectedKeywords: ["Trigger"],
fallbackKeywords: [["Apex", "Before"], ["DML", "Operation"]]
},
{
query: "lightening web component",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Common misspelling 'lightening'",
expectedBehavior: "Should still find LWC documentation",
expectedKeywords: ["Lightning", "Component", "Web"],
fallbackKeywords: [["LWC", "HTML"], ["JavaScript", "Component"]]
},
{
query: "visualfroce page",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo in Visualforce",
expectedBehavior: "Should handle typo gracefully",
expectedKeywords: ["Page"],
fallbackKeywords: [["Visualforce", "Controller"], ["VF", "Page"]]
},
{
query: "soql querry syntax",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Double letter typo",
expectedBehavior: "Should handle typo gracefully",
expectedKeywords: ["SOQL", "Syntax"],
fallbackKeywords: [["Query", "SELECT"], ["SOQL", "Statement"]]
},
{
query: "salesfroce api",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Salesforce misspelled",
expectedBehavior: "Should handle typo gracefully",
expectedKeywords: ["API"],
fallbackKeywords: [["REST", "API"], ["SOAP", "API"], ["Salesforce", "API"]]
},
// --- Incomplete/Partial Queries (How people actually type) ---
{
query: "apex trigger before",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Incomplete query",
expectedBehavior: "Should return before trigger documentation",
expectedKeywords: ["Trigger", "Before", "Apex"],
fallbackKeywords: [["Insert", "Update", "Trigger"], ["Context", "Variable"]]
},
{
query: "lwc wire",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Abbreviated query",
expectedBehavior: "Should return wire service documentation",
expectedKeywords: ["Wire", "LWC"],
fallbackKeywords: [["@wire", "Apex"], ["Lightning", "Data"]]
},
{
query: "flow screen",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Minimal query",
expectedBehavior: "Should return screen flow documentation",
expectedKeywords: ["Flow", "Screen"],
fallbackKeywords: [["Screen", "Component"], ["Flow", "Builder"]]
},
{
query: "async apex",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Short async query",
expectedBehavior: "Should return async Apex documentation",
expectedKeywords: ["Async", "Apex"],
fallbackKeywords: [["Future", "Queueable"], ["Batch", "Schedulable"]]
},
// --- Stack Overflow Style Queries ---
{
query: "how to bulkify apex trigger for loop",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: SO-style optimization question",
expectedBehavior: "Should return bulk trigger patterns",
expectedKeywords: ["Bulk", "Trigger", "Apex"],
fallbackKeywords: [["Collection", "List", "Trigger"], ["Best Practice", "Pattern"]]
},
{
query: "difference between @wire and imperative apex in LWC",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Comparison question",
expectedBehavior: "Should return LWC data access documentation",
expectedKeywords: ["Wire", "Imperative", "LWC"],
fallbackKeywords: [["Apex", "Controller", "LWC"], ["Data", "Lightning", "Component"]]
},
{
query: "when to use platform event vs change data capture",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Decision-making question",
expectedBehavior: "Should return event comparison documentation",
expectedKeywords: ["Platform Event", "Change Data Capture"],
fallbackKeywords: [["CDC", "Event"], ["Streaming", "Event", "Publish"]]
},
{
query: "pros and cons of using process builder vs flow",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Technology comparison",
expectedBehavior: "Should return automation tool documentation",
expectedKeywords: ["Process Builder", "Flow"],
fallbackKeywords: [["Automation", "Flow", "Builder"], ["Workflow", "Process", "Automate"]]
},
// --- Additional Typo Tests (Real-world misspellings) ---
{
query: "govenor limits apex",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo - 'govenor' instead of 'governor'",
expectedBehavior: "Should handle typo and return limits documentation",
expectedKeywords: ["Limit", "Apex"],
fallbackKeywords: [["Governor", "SOQL"], ["DML", "Limit"]]
},
{
query: "lightining component",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo - 'lightining' missing 'n'",
expectedBehavior: "Should handle typo gracefully",
expectedKeywords: ["Component"],
fallbackKeywords: [["Lightning", "Aura"], ["LWC", "Component"]]
},
{
query: "batchable apex class",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo - 'batchable' instead of 'Batchable'",
expectedBehavior: "Should return Batch Apex documentation",
expectedKeywords: ["Batch", "Apex", "Batchable"],
fallbackKeywords: [["Database.Batchable", "execute"], ["Start", "Finish", "Batch"]]
},
{
query: "queueable job apax",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo - 'apax' instead of 'apex'",
expectedBehavior: "Should handle typo gracefully",
expectedKeywords: ["Queueable", "Job"],
fallbackKeywords: [["Async", "Queue"], ["System.enqueueJob"]]
},
{
query: "REST callot authentication",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo - 'callot' instead of 'callout'",
expectedBehavior: "Should handle typo and return callout docs",
expectedKeywords: ["Authentication", "REST"],
fallbackKeywords: [["Callout", "HTTP"], ["Named Credential", "Auth"]]
},
{
query: "aura compnent attributes",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo - 'compnent' missing 'o'",
expectedBehavior: "Should handle typo gracefully",
expectedKeywords: ["Aura", "Attribute"],
fallbackKeywords: [["Component", "Attribute"], ["aura:attribute"]]
},
{
query: "visulaforce controller",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo - 'visulaforce' letter swap",
expectedBehavior: "Should handle typo gracefully",
expectedKeywords: ["Controller"],
fallbackKeywords: [["Visualforce", "Page"], ["Extension", "Controller"]]
},
{
query: "scheduable apex",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo - 'scheduable' instead of 'schedulable'",
expectedBehavior: "Should handle typo gracefully",
expectedKeywords: ["Apex"],
fallbackKeywords: [["Schedulable", "Schedule"], ["Cron", "Job"]]
},
{
query: "salsforce dx cli",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo - 'salsforce' instead of 'salesforce'",
expectedBehavior: "Should handle typo gracefully",
expectedKeywords: ["CLI", "DX"],
fallbackKeywords: [["SFDX", "Command"], ["Salesforce", "CLI"]]
},
{
query: "metdata api deploy",
category: undefined,
type: 'VIBE_CODING',
description: "Vibe: Typo - 'metdata' instead of 'metadata'",
expectedBehavior: "Should handle typo gracefully",
expectedKeywords: ["Deploy", "API"],
fallbackKeywords: [["Metadata", "Deploy"], ["Retrieve", "Package"]]
},
// ==================== CATEGORY COVERAGE TEST CASES ====================
// Ensure each DocCategory has at least 3 tests
// --- CLOUDS Category Tests ---
{
query: "Sales Cloud opportunity stages",
category: DocCategory.CLOUDS,
type: 'POSITIVE',
description: "Sales Cloud specific query",
expectedBehavior: "Should return Sales Cloud opportunity documentation",
expectedKeywords: ["Opportunity", "Stage", "Sales"],
expectedCategory: DocCategory.CLOUDS
},
{
query: "Service Cloud case management",
category: DocCategory.CLOUDS,
type: 'POSITIVE',
description: "Service Cloud case query",
expectedBehavior: "Should return Service Cloud case documentation",
expectedKeywords: ["Case", "Service", "Support"],
fallbackKeywords: [["Service Cloud", "Queue"], ["Omnichannel", "Routing"]],
expectedCategory: DocCategory.CLOUDS
},
{
query: "Experience Cloud community builder",
category: DocCategory.CLOUDS,
type: 'POSITIVE',
description: "Experience Cloud communities query",
expectedBehavior: "Should return Experience Cloud documentation",
expectedKeywords: ["Experience", "Community", "Portal"],
fallbackKeywords: [["Site", "Builder", "Community"], ["Guest User", "Portal"]],
expectedCategory: DocCategory.CLOUDS
},
{
query: "Marketing Cloud email journey",
category: DocCategory.CLOUDS,
type: 'POSITIVE',
description: "Marketing Cloud journey query",
expectedBehavior: "Should return Marketing Cloud journey documentation",
expectedKeywords: ["Marketing", "Journey", "Email"],
fallbackKeywords: [["Email Studio", "Automation"], ["Marketing Cloud", "Campaign"]],
expectedCategory: DocCategory.CLOUDS
},
// --- BEST_PRACTICES Category Tests ---
{
query: "Apex design patterns singleton",
category: DocCategory.BEST_PRACTICES,
type: 'POSITIVE',
description: "Design patterns query",
expectedBehavior: "Should return Apex design patterns documentation",
expectedKeywords: ["Pattern", "Design", "Apex"],
fallbackKeywords: [["Singleton", "Factory"], ["Best Practice", "Architecture"]],
expectedCategory: DocCategory.BEST_PRACTICES
},
{
query: "trigger handler pattern best practice",
category: DocCategory.BEST_PRACTICES,
type: 'POSITIVE',
description: "Trigger handler pattern query",
expectedBehavior: "Should return trigger handler patterns documentation",
expectedKeywords: ["Trigger", "Handler", "Pattern"],
fallbackKeywords: [["Best Practice", "Trigger"], ["Framework", "Handler"]],
expectedCategory: DocCategory.BEST_PRACTICES
},
{
query: "security review checklist app exchange",
category: DocCategory.BEST_PRACTICES,
type: 'POSITIVE',
description: "AppExchange security review query",
expectedBehavior: "Should return security review documentation",
expectedKeywords: ["Security", "Review", "AppExchange"],
fallbackKeywords: [["Checklist", "ISV"], ["Partner", "Security"]],
expectedCategory: DocCategory.BEST_PRACTICES
},
{
query: "large data volume best practices",
category: DocCategory.BEST_PRACTICES,
type: 'POSITIVE',
description: "LDV optimization query",
expectedBehavior: "Should return large data volume documentation",
expectedKeywords: ["Data", "Volume", "Performance"],
fallbackKeywords: [["Index", "Skinny Table"], ["Query", "Optimization"]],
expectedCategory: DocCategory.BEST_PRACTICES
},
// --- INTEGRATION Category Tests ---
{
query: "Salesforce Connect external objects",
category: DocCategory.INTEGRATION,
type: 'POSITIVE',
description: "Salesforce Connect query",
expectedBehavior: "Should return Salesforce Connect documentation",
expectedKeywords: ["External", "Object", "Connect"],
fallbackKeywords: [["OData", "External"], ["Salesforce Connect", "Adapter"]],
expectedCategory: DocCategory.INTEGRATION
},
{
query: "Heroku Connect data sync",
category: DocCategory.INTEGRATION,
type: 'POSITIVE',
description: "Heroku Connect integration query",
expectedBehavior: "Should return Heroku Connect documentation",
expectedKeywords: ["Heroku", "Connect", "Sync"],
fallbackKeywords: [["Heroku", "Postgres"], ["Data", "Sync", "Integration"]],
expectedCategory: DocCategory.INTEGRATION
},
{
query: "composite API batch requests",
category: DocCategory.INTEGRATION,
type: 'POSITIVE',
description: "Composite API query",
expectedBehavior: "Should return Composite API documentation",
expectedKeywords: ["Composite", "API", "Batch"],
fallbackKeywords: [["REST", "Composite"], ["Subrequest", "API"]],
expectedCategory: DocCategory.INTEGRATION
},
{
query: "canvas app signed request",
category: DocCategory.INTEGRATION,
type: 'POSITIVE',
description: "Canvas app integration query",
expectedBehavior: "Should return Canvas app documentation",
expectedKeywords: ["Canvas", "App", "Signed"],
fallbackKeywords: [["Canvas", "Integration"], ["Force.com Canvas", "Request"]],
expectedCategory: DocCategory.INTEGRATION
},
// ==================== SEMANTIC TEST CASES ====================
// Pure natural language queries that need LLM-powered expansion
// These test the expand_search_query β semantic_search_docs pipeline
{
query: "How do I make code run when someone updates a record?",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Natural language trigger question",
expectedBehavior: "Should expand to trigger terms and find trigger documentation",
expectedKeywords: ["Trigger"],
fallbackKeywords: [["Before", "Update"], ["After", "Insert"], ["DML", "Operation"]],
useExpansion: true
},
{
query: "I want to call an external API from Salesforce",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Integration intent",
expectedBehavior: "Should expand to callout terms and find HTTP/callout documentation",
expectedKeywords: ["Callout", "HTTP"],
fallbackKeywords: [["Named Credential"], ["REST", "API"], ["External", "Service"]],
useExpansion: true
},
{
query: "How to run code in the background for large data processing?",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Async processing question",
expectedBehavior: "Should expand to batch/queueable terms",
expectedKeywords: ["Batch", "Apex"],
fallbackKeywords: [["Queueable"], ["Schedulable"], ["Async", "Job"]],
useExpansion: true
},
{
query: "Who can see which records in my org?",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Security/sharing question",
expectedBehavior: "Should expand to sharing/security terms",
expectedKeywords: ["Sharing"],
fallbackKeywords: [["Permission"], ["OWD", "Organization"], ["Security", "Access"]],
useExpansion: true
},
{
query: "How do I write tests for my Apex code?",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Testing question",
expectedBehavior: "Should expand to test class terms",
expectedKeywords: ["Test"],
fallbackKeywords: [["@isTest"], ["Assert"], ["Test Class"]],
useExpansion: true
},
{
query: "Build a custom UI component for a record page",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: LWC creation question",
expectedBehavior: "Should expand to LWC/component terms",
expectedKeywords: ["Lightning", "Component"],
fallbackKeywords: [["LWC"], ["Wire"], ["Record Page"]],
useExpansion: true
},
{
query: "Get data from the database in Salesforce",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Query question",
expectedBehavior: "Should expand to SOQL terms",
expectedKeywords: ["SOQL"],
fallbackKeywords: [["Query"], ["SELECT"], ["Database"]],
useExpansion: true
},
{
query: "Something went wrong and I need to show an error message",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Error handling question",
expectedBehavior: "Should expand to exception/error terms",
expectedKeywords: ["Exception"],
fallbackKeywords: [["Error"], ["addError"], ["Try", "Catch"]],
useExpansion: true
},
{
query: "Save multiple records at once efficiently",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: DML operations question",
expectedBehavior: "Should expand to DML/bulk terms",
expectedKeywords: ["DML"],
fallbackKeywords: [["Insert"], ["Update"], ["Database", "Operation"]],
useExpansion: true
},
{
query: "Move my code from sandbox to production",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Deployment question",
expectedBehavior: "Should expand to deployment terms",
expectedKeywords: ["Deploy"],
fallbackKeywords: [["Metadata"], ["Change Set"], ["SFDX", "CLI"]],
useExpansion: true
},
{
query: "Send an email notification to a user",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Email question",
expectedBehavior: "Should expand to email terms",
expectedKeywords: ["Email"],
fallbackKeywords: [["Messaging"], ["Notification"], ["Template"]],
useExpansion: true
},
{
query: "Why is my code hitting limits and failing?",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Governor limits question",
expectedBehavior: "Should expand to limits terms",
expectedKeywords: ["Limit"],
fallbackKeywords: [["Governor"], ["SOQL", "Limit"], ["Bulkif"], ["Apex"], ["Exception"], ["Error"]],
useExpansion: true
},
{
query: "Schedule a job to run every night at midnight",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Scheduled jobs question",
expectedBehavior: "Should expand to schedulable terms",
expectedKeywords: ["Schedule"],
fallbackKeywords: [["Schedulable"], ["Cron"], ["Job"], ["Async"], ["Batch"], ["Apex"]],
useExpansion: true
},
{
query: "Store credentials securely for API calls",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Secure credentials question",
expectedBehavior: "Should expand to named credential terms",
expectedKeywords: ["Credential"],
fallbackKeywords: [["Named Credential"], ["Authentication"], ["Callout"]],
useExpansion: true
},
{
query: "React to changes in real-time across the org",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Platform events question",
expectedBehavior: "Should expand to platform events terms",
expectedKeywords: ["Event"],
fallbackKeywords: [["Platform Event"], ["Change Data Capture"], ["Streaming"], ["API"], ["Subscribe"], ["Publish"]],
useExpansion: true
},
{
query: "Create a button that does something when clicked",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: UI action question",
expectedBehavior: "Should expand to Lightning action terms",
expectedKeywords: ["Action"],
fallbackKeywords: [["Quick Action"], ["Button"], ["Lightning"]],
useExpansion: true
},
{
query: "Automatically update related records when parent changes",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Automation cascade question",
expectedBehavior: "Should expand to trigger/flow terms",
expectedKeywords: ["Trigger"],
fallbackKeywords: [["Flow"], ["Process Builder"], ["Automation"], ["Update"], ["Apex"], ["Record"]],
useExpansion: true
},
{
query: "Show or hide fields based on user input",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Dynamic forms question",
expectedBehavior: "Should expand to dynamic forms/visibility terms",
expectedKeywords: ["Dynamic"],
fallbackKeywords: [["Visibility"], ["Conditional"], ["Field"]],
useExpansion: true
},
{
query: "What happens when two users edit the same record?",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Locking/concurrency question",
expectedBehavior: "Should expand to locking terms",
expectedKeywords: ["Lock"],
fallbackKeywords: [["FOR UPDATE"], ["Concurrent"], ["Optimistic"], ["Record"], ["User"], ["Edit"], ["Database"]],
useExpansion: true
},
{
query: "Convert a Lead to an Account and Contact",
category: undefined,
type: 'SEMANTIC',
description: "Semantic: Lead conversion question",
expectedBehavior: "Should expand to lead conversion terms",
expectedKeywords: ["Lead", "Convert"],
fallbackKeywords: [["Lead Conversion"], ["Database.LeadConvert"], ["Account", "Contact"]],
useExpansion: true
},
// ==================== DIVERSITY EXPANSION (200+ Compact Tests) ====================
...getDiversityTests()
];
// Helper to generate diversity tests
function getDiversityTests(): TestCase[] {
// Format: [Query, Category | null, Type, ExpectedKeyword1, ExpectedKeyword2?]
const simpleScenarios: [string, DocCategory | undefined, string, string, string?][] = [
// --- 1. APEX LANGUAGE FEATURES (20) ---
["Apex List class methods", DocCategory.CORE_PLATFORM, "POSITIVE", "List"],
["Apex Map methods and usage", DocCategory.CORE_PLATFORM, "POSITIVE", "Map"],
["Apex Set class reference", DocCategory.CORE_PLATFORM, "POSITIVE", "Set"],
["Apex String class methods", DocCategory.CORE_PLATFORM, "POSITIVE", "String"],
["Apex Date class methods", DocCategory.CORE_PLATFORM, "POSITIVE", "Date"],
["Apex Datetime class methods", DocCategory.CORE_PLATFORM, "POSITIVE", "Datetime"],
["Apex Math class functions", DocCategory.CORE_PLATFORM, "POSITIVE", "Math"],
["Apex System class methods", DocCategory.CORE_PLATFORM, "POSITIVE", "System"],
["Apex JSON serialization", DocCategory.CORE_PLATFORM, "POSITIVE", "JSON"],
["Apex UserInfo class", DocCategory.CORE_PLATFORM, "POSITIVE", "UserInfo"],
["Apex Limits class usage", DocCategory.CORE_PLATFORM, "POSITIVE", "Limits"],
["Apex switch statement syntax", DocCategory.CORE_PLATFORM, "POSITIVE", "Switch"],
["Apex enum definition", DocCategory.CORE_PLATFORM, "POSITIVE", "Enum"],
["Apex interface definition", DocCategory.CORE_PLATFORM, "POSITIVE", "Interface"],
["Apex abstract class", DocCategory.CORE_PLATFORM, "POSITIVE", "Abstract"],
["Apex virtual class", DocCategory.CORE_PLATFORM, "POSITIVE", "Virtual"],
["Apex exception handling try catch", DocCategory.CORE_PLATFORM, "POSITIVE", "Exception"],
["Apex custom exception", DocCategory.CORE_PLATFORM, "POSITIVE", "Exception"],
["Apex debug log logging", DocCategory.CORE_PLATFORM, "POSITIVE", "Debug"],
["Apex constants definition", DocCategory.CORE_PLATFORM, "POSITIVE", "Final"],
// --- 2. SOQL & DATA (20) ---
["SOQL select statement", DocCategory.CORE_PLATFORM, "POSITIVE", "SELECT"],
["SOQL where clause", DocCategory.CORE_PLATFORM, "POSITIVE", "WHERE"],
["SOQL order by clause", DocCategory.CORE_PLATFORM, "POSITIVE", "ORDER"],
["SOQL limit clause", DocCategory.CORE_PLATFORM, "POSITIVE", "LIMIT"],
["SOQL offset clause", DocCategory.CORE_PLATFORM, "POSITIVE", "OFFSET"],
["SOQL group by clause", DocCategory.CORE_PLATFORM, "POSITIVE", "GROUP"],
["SOQL having clause", DocCategory.CORE_PLATFORM, "POSITIVE", "HAVING"],
["SOQL date functions", DocCategory.CORE_PLATFORM, "POSITIVE", "Date"],
["SOQL aggregate functions", DocCategory.CORE_PLATFORM, "POSITIVE", "Aggregate"],
["SOQL child-to-parent relationship", DocCategory.CORE_PLATFORM, "POSITIVE", "Relationship"],
["SOQL parent-to-child relationship", DocCategory.CORE_PLATFORM, "POSITIVE", "Relationship"],
["SOQL polymorphism", DocCategory.CORE_PLATFORM, "POSITIVE", "Type"],
["SOQL for update locking", DocCategory.CORE_PLATFORM, "POSITIVE", "Lock"],
["SOSL find syntax", DocCategory.CORE_PLATFORM, "POSITIVE", "FIND"],
["SOSL returning clause", DocCategory.CORE_PLATFORM, "POSITIVE", "RETURNING"],
["SOSL search group", DocCategory.CORE_PLATFORM, "POSITIVE", "Group"],
["Database.query method", DocCategory.CORE_PLATFORM, "POSITIVE", "Database"],
["Database.search method", DocCategory.CORE_PLATFORM, "POSITIVE", "Database"],
["Dynamic SOQL", DocCategory.CORE_PLATFORM, "POSITIVE", "Dynamic"],
["Dynamic SOSL", DocCategory.CORE_PLATFORM, "POSITIVE", "Dynamic"],
// --- 3. DML & TRANSACTION (15) ---
["Database.insert method", DocCategory.CORE_PLATFORM, "POSITIVE", "Insert"],
["Database.update method", DocCategory.CORE_PLATFORM, "POSITIVE", "Update"],
["Database.upsert method", DocCategory.CORE_PLATFORM, "POSITIVE", "Upsert"],
["Database.delete method", DocCategory.CORE_PLATFORM, "POSITIVE", "Delete"],
["Database.undelete method", DocCategory.CORE_PLATFORM, "POSITIVE", "Undelete"],
["Database.merge method", DocCategory.CORE_PLATFORM, "POSITIVE", "Merge"],
["Database.convertLead method", DocCategory.CORE_PLATFORM, "POSITIVE", "Convert"],
["Database.rollback usage", DocCategory.CORE_PLATFORM, "POSITIVE", "Rollback"],
["Database.setSavepoint", DocCategory.CORE_PLATFORM, "POSITIVE", "Savepoint"],
["DML options allOrNone", DocCategory.CORE_PLATFORM, "POSITIVE", "DML"],
["Transaction control", DocCategory.CORE_PLATFORM, "POSITIVE", "Transaction"],
["Apex lock record", DocCategory.CORE_PLATFORM, "POSITIVE", "Lock"],
["Database.emptyRecycleBin", DocCategory.CORE_PLATFORM, "POSITIVE", "Recycle"],
["Database.getDeleted", DocCategory.CORE_PLATFORM, "POSITIVE", "Deleted"],
["Database.getUpdated", DocCategory.CORE_PLATFORM, "POSITIVE", "Updated"],
// --- 4. ASYNC APEX (15) ---
["Future method annotation", DocCategory.CORE_PLATFORM, "POSITIVE", "Future"],
["Queueable interface", DocCategory.CORE_PLATFORM, "POSITIVE", "Queueable"],
["System.enqueueJob", DocCategory.CORE_PLATFORM, "POSITIVE", "enqueueJob"],
["Batch Apex start method", DocCategory.CORE_PLATFORM, "POSITIVE", "Batch"],
["Batch Apex execute method", DocCategory.CORE_PLATFORM, "POSITIVE", "Batch"],
["Batch Apex finish method", DocCategory.CORE_PLATFORM, "POSITIVE", "Batch"],
["Database.executeBatch", DocCategory.CORE_PLATFORM, "POSITIVE", "executeBatch"],
["Schedulable interface", DocCategory.CORE_PLATFORM, "POSITIVE", "Schedulable"],
["System.schedule method", DocCategory.CORE_PLATFORM, "POSITIVE", "Schedule"],
["Cron expression syntax", DocCategory.CORE_PLATFORM, "POSITIVE", "Cron"],
["System.abortJob", DocCategory.CORE_PLATFORM, "POSITIVE", "abortJob"],
["AsyncApexJob object", DocCategory.CORE_PLATFORM, "POSITIVE", "AsyncApexJob"],
["FlexQueue usage", DocCategory.CORE_PLATFORM, "POSITIVE", "Queue"],
["Batch stateful interface", DocCategory.CORE_PLATFORM, "POSITIVE", "Stateful"],
["Callout from batch", DocCategory.CORE_PLATFORM, "POSITIVE", "Callout"],
// --- 5. LWC & UI (25) ---
["LightningElement base class", DocCategory.CORE_PLATFORM, "POSITIVE", "LightningElement"],
["LWC @api decorator", DocCategory.CORE_PLATFORM, "POSITIVE", "api"],
["LWC @track decorator", DocCategory.CORE_PLATFORM, "POSITIVE", "track"],
["LWC @wire decorator", DocCategory.CORE_PLATFORM, "POSITIVE", "wire"],
["LWC lifecycle hooks connectedCallback", DocCategory.CORE_PLATFORM, "POSITIVE", "connectedCallback"],
["LWC lifecycle hooks renderedCallback", DocCategory.CORE_PLATFORM, "POSITIVE", "renderedCallback"],
["LWC lifecycle hooks disconnectedCallback", DocCategory.CORE_PLATFORM, "POSITIVE", "disconnectedCallback"],
["LWC event dispatching", DocCategory.CORE_PLATFORM, "POSITIVE", "Event"],
["LWC component communication", DocCategory.CORE_PLATFORM, "POSITIVE", "Communication"],
["LWC pubsub pattern", DocCategory.CORE_PLATFORM, "POSITIVE", "Pubsub"],
["LWC Lightning Message Service", DocCategory.CORE_PLATFORM, "POSITIVE", "Message"],
["LWC lightning-datatable", DocCategory.CORE_PLATFORM, "POSITIVE", "Datatable"],
["LWC lightning-record-form", DocCategory.CORE_PLATFORM, "POSITIVE", "Record"],
["LWC lightning-input", DocCategory.CORE_PLATFORM, "POSITIVE", "Input"],
["LWC lightning-button", DocCategory.CORE_PLATFORM, "POSITIVE", "Button"],
["LWC toast notification", DocCategory.CORE_PLATFORM, "POSITIVE", "Toast"],
["LWC navigation service", DocCategory.CORE_PLATFORM, "POSITIVE", "Navigation"],
["LWC getRecord wire adapter", DocCategory.CORE_PLATFORM, "POSITIVE", "getRecord"],
["LWC getObjectInfo wire adapter", DocCategory.CORE_PLATFORM, "POSITIVE", "getObjectInfo"],
["LWC getListUi wire adapter", DocCategory.CORE_PLATFORM, "POSITIVE", "getListUi"],
["LWC refreshApex", DocCategory.CORE_PLATFORM, "POSITIVE", "refreshApex"],
["LWC styling hooks", DocCategory.CORE_PLATFORM, "POSITIVE", "Style"],
["LWC SLDS classes", DocCategory.CORE_PLATFORM, "POSITIVE", "SLDS"],
["LWC testing jest", DocCategory.CORE_PLATFORM, "POSITIVE", "Test"],
["LWC security locker service", DocCategory.CORE_PLATFORM, "POSITIVE", "Locker"],
// --- 6. APIs (20) ---
["REST API resources", DocCategory.APIS, "POSITIVE", "REST"],
["REST API versions", DocCategory.APIS, "POSITIVE", "Version"],
["REST API query endpoint", DocCategory.APIS, "POSITIVE", "Query"],
["REST API sobjects endpoint", DocCategory.APIS, "POSITIVE", "sObject"],
["REST API composite resources", DocCategory.APIS, "POSITIVE", "Composite"],
["SOAP API create call", DocCategory.APIS, "POSITIVE", "create"],
["SOAP API login call", DocCategory.APIS, "POSITIVE", "login"],
["SOAP API partner WSDL", DocCategory.APIS, "POSITIVE", "WSDL"],
["SOAP API enterprise WSDL", DocCategory.APIS, "POSITIVE", "WSDL"],
["Bulk API 2.0 create job", DocCategory.APIS, "POSITIVE", "Bulk"],
["Bulk API 2.0 upload data", DocCategory.APIS, "POSITIVE", "Data"],
["Bulk API 2.0 get status", DocCategory.APIS, "POSITIVE", "Status"],
["Metadata API deploy", DocCategory.APIS, "POSITIVE", "Deploy"],
["Metadata API retrieve", DocCategory.APIS, "POSITIVE", "Retrieve"],
["Metadata API types", DocCategory.APIS, "POSITIVE", "Metadata"],
["Tooling API usage", DocCategory.APIS, "POSITIVE", "Tooling"],
["Tooling API query", DocCategory.APIS, "POSITIVE", "Query"],
["Streaming API push topics", DocCategory.APIS, "POSITIVE", "PushTopic"],
["Streaming API Bayeux", DocCategory.APIS, "POSITIVE", "Bayeux"],
["User Interface API", DocCategory.APIS, "POSITIVE", "Interface"],
// --- 7. SECURITY (15) ---
["Sharing rules configuration", DocCategory.SECURITY, "POSITIVE", "Sharing"],
["Organization wide defaults OWD", DocCategory.SECURITY, "POSITIVE", "OWD"],
["Role hierarchy setup", DocCategory.SECURITY, "POSITIVE", "Role"],
["Field level security FLS", DocCategory.SECURITY, "POSITIVE", "Field"],
["Profiles and permissions", DocCategory.SECURITY, "POSITIVE", "Profile"],
["Permission sets assignment", DocCategory.SECURITY, "POSITIVE", "Permission"],
["Permission set group", DocCategory.SECURITY, "POSITIVE", "Group"],
["Session security settings", DocCategory.SECURITY, "POSITIVE", "Session"],
["Login IP ranges", DocCategory.SECURITY, "POSITIVE", "IP"],
["Password policies", DocCategory.SECURITY, "POSITIVE", "Password"],
["Event monitoring shield", DocCategory.SECURITY, "POSITIVE", "Event"],
["Platform encryption shield", DocCategory.SECURITY, "POSITIVE", "Encryption"],
["Transaction security policies", DocCategory.SECURITY, "POSITIVE", "Transaction"],
["Certificate and keys", DocCategory.SECURITY, "POSITIVE", "Certificate"],
["Named credentials authentication", DocCategory.SECURITY, "POSITIVE", "Credential"],
// --- 8. DEVOPS & TOOLS (15) ---
["SFDX CLI create project", DocCategory.DEV_TOOLS, "POSITIVE", "Project"],
["SFDX CLI create scratch org", DocCategory.DEV_TOOLS, "POSITIVE", "Scratch"],
["SFDX CLI push source", DocCategory.DEV_TOOLS, "POSITIVE", "Push"],
["SFDX CLI pull source", DocCategory.DEV_TOOLS, "POSITIVE", "Pull"],
["SFDX CLI package create", DocCategory.DEV_TOOLS, "POSITIVE", "Package"],
["SFDX CLI package version", DocCategory.DEV_TOOLS, "POSITIVE", "Version"],
["SFDX CLI install package", DocCategory.DEV_TOOLS, "POSITIVE", "Install"],
["Force.com migration tool", DocCategory.DEV_TOOLS, "POSITIVE", "Migration"],
["Ant migration tool", DocCategory.DEV_TOOLS, "POSITIVE", "Ant"],
["VS Code extensions", DocCategory.DEV_TOOLS, "POSITIVE", "Extension"],
["Developmemt model", DocCategory.DEV_TOOLS, "POSITIVE", "Model"],
["Application lifecycle management", DocCategory.DEV_TOOLS, "POSITIVE", "Lifecycle"],
["Unlocked packages", DocCategory.DEV_TOOLS, "POSITIVE", "Package"],
["Managed packages", DocCategory.DEV_TOOLS, "POSITIVE", "Managed"],
["Second generation packaging", DocCategory.DEV_TOOLS, "POSITIVE", "Packaging"],
// --- 9. SEMANTIC / VIBE (30) ---
// Diverse natural language queries
["how to handle null pointer exception in apex", undefined, "SEMANTIC", "Exception"],
["limit number of records returned by soql", undefined, "SEMANTIC", "LIMIT"],
["sort list of objects by field value", undefined, "SEMANTIC", "Sort"],
["display toast message in lwc", undefined, "SEMANTIC", "Toast"],
["navigate to record page in lwc", undefined, "SEMANTIC", "Navigation"],
["pass data from parent to child lwc", undefined, "SEMANTIC", "Communication"],
["get current user id in apex", undefined, "SEMANTIC", "UserInfo"],
["check if user has permission to edit field", undefined, "SEMANTIC", "Permission"],
["run code after record is saved to database", undefined, "SEMANTIC", "Trigger"],
["prevent record deletion with trigger", undefined, "SEMANTIC", "Trigger"],
["make http callout from trigger", undefined, "SEMANTIC", "Callout"],
["parse json response from api", undefined, "SEMANTIC", "JSON"],
["schedule apex class to run hourly", undefined, "SEMANTIC", "Schedule"],
["process 1 million records in apex", undefined, "SEMANTIC", "Batch"],
["send email with attachment apex", undefined, "SEMANTIC", "Email"],
["create pdf from visualforce page", undefined, "SEMANTIC", "PDF"],
["connect lwc to apex controller", undefined, "SEMANTIC", "Controller"],
["debug apex code in vs code", undefined, "SEMANTIC", "Debug"],
["deploy code to production org", undefined, "SEMANTIC", "Deploy"],
["retrieve metadata from org", undefined, "SEMANTIC", "Retrieve"],
["query child records in soql", undefined, "SEMANTIC", "Relationship"],
["query parent fields in soql", undefined, "SEMANTIC", "Relationship"],
["search for text in all fields", undefined, "SEMANTIC", "SOSL"],
["handle dml errors partial success", undefined, "SEMANTIC", "DML"],
["lock record for update", undefined, "SEMANTIC", "Lock"],
["share record with specific user", undefined, "SEMANTIC", "Share"],
["encrypt sensitive fields", undefined, "SEMANTIC", "Encryption"],
["monitor login history", undefined, "SEMANTIC", "Login"],
["create dashboard component", undefined, "SEMANTIC", "Dashboard"],
["customize salesforce mobile app", undefined, "SEMANTIC", "Mobile"],
// --- 10. TYPO / NEGATIVE (30) ---
["apx triger syntax", undefined, "VIBE_CODING", "Trigger"],
["lighning web compnent", undefined, "VIBE_CODING", "Lightning"],
["visualforc page", undefined, "VIBE_CODING", "Visualforce"],
["soql slect statment", undefined, "VIBE_CODING", "SELECT"],
["sosl fnd clause", undefined, "VIBE_CODING", "FIND"],
["dml insery record", undefined, "VIBE_CODING", "Insert"],
["btch apex clss", undefined, "VIBE_CODING", "Batch"],
["schedulble interface", undefined, "VIBE_CODING", "Schedulable"],
["furture method apex", undefined, "VIBE_CODING", "Future"],
["queuable apex job", undefined, "VIBE_CODING", "Queueable"],
["systm.debug log", undefined, "VIBE_CODING", "Debug"],
["jason serialize apex", undefined, "VIBE_CODING", "JSON"],
["http reqest callout", undefined, "VIBE_CODING", "HTTP"],
["rest resouce api", undefined, "VIBE_CODING", "REST"],
["soap wsl document", undefined, "VIBE_CODING", "SOAP"],
["bulk api job statuss", undefined, "VIBE_CODING", "Bulk"],
["metadta deploy", undefined, "VIBE_CODING", "Metadata"],
["sfdx forcce source", undefined, "VIBE_CODING", "SFDX"],
["shring rule setup", undefined, "VIBE_CODING", "Sharing"],
["permision set assign", undefined, "VIBE_CODING", "Permission"],
["profle settings", undefined, "VIBE_CODING", "Profile"],
["owd defalts", undefined, "VIBE_CODING", "OWD"],
["fls securty", undefined, "VIBE_CODING", "FLS"],
["viewstate limit vf", undefined, "VIBE_CODING", "ViewState"],
["cpu timeout limt", undefined, "VIBE_CODING", "Limit"],
["heap size errorr", undefined, "VIBE_CODING", "Heap"],
["too many soql querys", undefined, "VIBE_CODING", "SOQL"],
["mixed dml operationnn", undefined, "VIBE_CODING", "DML"],
["read only error trigger", undefined, "VIBE_CODING", "Trigger"],
["null pointer excep", undefined, "VIBE_CODING", "Pointer"],
];
return simpleScenarios.map(([query, category, typeStr, keyword1, keyword2]) => {
const fallbacks: string[][] = [];
// Add category-based fallbacks to reduce brittleness of bulk tests
// This ensures that finding the correct *Manual* (e.g. Apex Reference) counts as a pass
// even if the specific keyword (e.g. "List") isn't in the top snippet due to chunking.
if (category === DocCategory.CORE_PLATFORM) {
fallbacks.push(["Apex"], ["Reference"], ["Developer Guide"], ["Language"]);
} else if (category === DocCategory.APIS) {
fallbacks.push(["API"], ["Reference"], ["Rest"], ["Soap"]);
} else if (category === DocCategory.SECURITY) {
fallbacks.push(["Security"], ["Implementation"], ["Guide"]);
} else if (category === DocCategory.DEV_TOOLS) {
fallbacks.push(["CLI"], ["SFDX"], ["Tool"], ["Extension"]);
}
// Add generic fallbacks for broad matching
fallbacks.push(["Guide"], ["Documentation"], ["Salesforce"]);
return {
query,
category,
type: typeStr as any,
description: `Diversity: ${query}`,
expectedBehavior: `Should find results for ${query}`,
expectedKeywords: keyword2 ? [keyword1, keyword2] : [keyword1],
fallbackKeywords: fallbacks,
useExpansion: typeStr === 'SEMANTIC'
};
});
}
// Helper: Load baseline snapshot
function loadBaseline(): BaselineSnapshot | null {
try {
if (fs.existsSync(BASELINE_FILE)) {
const data = fs.readFileSync(BASELINE_FILE, 'utf-8');
return JSON.parse(data) as BaselineSnapshot;
}
} catch (err) {
console.warn(`β οΈ Could not load baseline: ${err}`);
}
return null;
}
// Helper: Save baseline snapshot
function saveBaseline(snapshot: BaselineSnapshot): void {
try {
const dir = path.dirname(BASELINE_FILE);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(BASELINE_FILE, JSON.stringify(snapshot, null, 2));
console.log(`\nπΎ Baseline saved to ${BASELINE_FILE}`);
} catch (err) {
console.error(`β Could not save baseline: ${err}`);
}
}
// Helper: Calculate MRR from test results
function calculateMRR(results: ExtendedTestResult[]): number {
const validResults = results.filter(r => r.reciprocalRank > 0);
if (validResults.length === 0) return 0;
return validResults.reduce((sum, r) => sum + r.reciprocalRank, 0) / validResults.length;
}
// Helper: Compare with baseline and report
function compareWithBaseline(current: BaselineSnapshot, baseline: BaselineSnapshot): { passed: boolean; warnings: string[] } {
const warnings: string[] = [];
let passed = true;
// Check pass rate degradation
const passRateDelta = baseline.passRate - current.passRate;
if (passRateDelta > BASELINE_DEGRADATION_THRESHOLD) {
warnings.push(`π΄ REGRESSION: Pass rate dropped from ${(baseline.passRate * 100).toFixed(1)}% to ${(current.passRate * 100).toFixed(1)}% (${(passRateDelta * 100).toFixed(1)}% degradation)`);
passed = false;
} else if (passRateDelta > 0) {
warnings.push(`β οΈ Pass rate decreased slightly: ${(baseline.passRate * 100).toFixed(1)}% β ${(current.passRate * 100).toFixed(1)}%`);
} else if (passRateDelta < 0) {
warnings.push(`β
Pass rate improved: ${(baseline.passRate * 100).toFixed(1)}% β ${(current.passRate * 100).toFixed(1)}%`);
}
// Check MRR degradation
const mrrDelta = baseline.mrr - current.mrr;
if (mrrDelta > BASELINE_MRR_DEGRADATION_THRESHOLD) {
warnings.push(`π΄ REGRESSION: MRR dropped from ${baseline.mrr.toFixed(3)} to ${current.mrr.toFixed(3)} (${(mrrDelta * 100).toFixed(1)}% degradation)`);
passed = false;
} else if (mrrDelta > 0.02) {
warnings.push(`β οΈ MRR decreased slightly: ${baseline.mrr.toFixed(3)} β ${current.mrr.toFixed(3)}`);
} else if (mrrDelta < -0.02) {
warnings.push(`β
MRR improved: ${baseline.mrr.toFixed(3)} β ${current.mrr.toFixed(3)}`);
}
// Check performance degradation
const perfDelta = current.avgDuration - baseline.avgDuration;
const perfPercentChange = perfDelta / baseline.avgDuration;
if (perfPercentChange > 0.5) {
warnings.push(`β οΈ Performance degraded: ${baseline.avgDuration.toFixed(0)}ms β ${current.avgDuration.toFixed(0)}ms (+${(perfPercentChange * 100).toFixed(0)}%)`);
} else if (perfPercentChange < -0.2) {
warnings.push(`β
Performance improved: ${baseline.avgDuration.toFixed(0)}ms β ${current.avgDuration.toFixed(0)}ms`);
}
return { passed, warnings };
}
async function testSearch(): Promise<void> {
console.log("=".repeat(80));
console.log("Salesforce Docs MCP - Comprehensive Search Test Suite (Phase 3)");
console.log("=".repeat(80));
// Initialize database
await initializeDatabase();
// Check if indexed
if (!isDatabaseIndexed()) {
console.error("\nError: Database is not indexed!");
console.error("Run 'npm run build-index' first to index the PDF documents.");
process.exit(1);
}
// Load baseline for comparison
const baseline = loadBaseline();
if (baseline) {
console.log(`\nπ Loaded baseline from ${baseline.timestamp}`);
console.log(` Previous: ${baseline.passedTests}/${baseline.totalTests} passed (${(baseline.passRate * 100).toFixed(1)}%), MRR: ${baseline.mrr.toFixed(3)}`);
} else {
console.log("\nπ No baseline found. This run will establish the baseline.");
}
// Get category stats
console.log("\nπ Category Statistics:");
console.log("-".repeat(40));
const stats = await getCategoryStats();
for (const stat of stats) {
console.log(` ${stat.category}: ${stat.count} documents`);
for (const sub of stat.subcategories) {
console.log(` ββ ${sub.name}: ${sub.count}`);
}
}
// Group tests by type
const testsByType = {
'POSITIVE': TEST_QUERIES.filter(t => t.type === 'POSITIVE'),
'NEGATIVE': TEST_QUERIES.filter(t => t.type === 'NEGATIVE'),
'EDGE_CASE': TEST_QUERIES.filter(t => t.type === 'EDGE_CASE'),
'FALSE_POSITIVE': TEST_QUERIES.filter(t => t.type === 'FALSE_POSITIVE'),
'EXCEPTION': TEST_QUERIES.filter(t => t.type === 'EXCEPTION'),
'PERFORMANCE': TEST_QUERIES.filter(t => t.type === 'PERFORMANCE'),
'VIBE_CODING': TEST_QUERIES.filter(t => t.type === 'VIBE_CODING'),
'SEMANTIC': TEST_QUERIES.filter(t => t.type === 'SEMANTIC'),
};
// Extended test summary tracking with MRR
const testResults: ExtendedTestResult[] = [];
// Run all test categories
for (const [testType, tests] of Object.entries(testsByType)) {
const typeEmoji = {
'POSITIVE': 'β
',
'NEGATIVE': 'π«',
'EDGE_CASE': 'π',
'FALSE_POSITIVE': 'β οΈ',
'EXCEPTION': 'π₯',
'PERFORMANCE': 'β±οΈ',
'VIBE_CODING': 'π€',
'SEMANTIC': 'π§ '
}[testType];
console.log(`\n\n${typeEmoji} Running ${testType} Tests (${tests.length} cases):`);
console.log("=".repeat(80));
for (const test of tests) {
console.log(`\nπ [${testType}] ${test.description}`);
console.log(` Query: "${test.query.substring(0, 60)}${test.query.length > 60 ? '...' : ''}"`);
if (test.category) {
console.log(` Category filter: ${test.category}`);
}
console.log(` Expected: ${test.expectedBehavior}`);
console.log("-".repeat(60));
const startTime = Date.now();
let passed = true;
let error: string | undefined;
let resultCount = 0;
const warnings: string[] = [];
try {
// For SEMANTIC tests, expand the query first
let searchQuery = test.query;
let expansion: ReturnType<typeof expandQueryToKeywords> | undefined;
if (test.type === 'SEMANTIC' && test.useExpansion) {
expansion = expandQueryToKeywords(test.query);
// Combine original query with top expanded terms for better search
if (expansion.expandedTerms.length > 0) {
searchQuery = `${test.query} ${expansion.expandedTerms.slice(0, 5).join(' ')}`;
}
}
const results = await searchDocuments(searchQuery, {
category: test.category || (expansion?.suggestedCategory as DocCategory | undefined),
maxResults: 3
});
const duration = Date.now() - startTime;
resultCount = results.length;
const topResult = results[0];
// ========== STRICT VALIDATION LOGIC ==========
// 1. Performance threshold check (per-type or custom)
const maxDuration = test.maxDurationMs || PERFORMANCE_THRESHOLDS[test.type];
if (duration > maxDuration) {
warnings.push(`β±οΈ Slow: ${duration}ms > ${maxDuration}ms threshold`);
}
// 2. Evaluate result based on test type
switch (test.type) {
case 'POSITIVE':
passed = results.length > 0;
if (!passed) {
error = "Expected results but got none";
break;
}
// Check minimum score
if (topResult.score < MIN_RELEVANCE_SCORE) {
warnings.push(`β οΈ Low relevance: score ${topResult.score.toFixed(2)} < ${MIN_RELEVANCE_SCORE}`);
}
// Check for expected keywords (strict semantic validation with fallback OR logic)
if (test.expectedKeywords && test.expectedKeywords.length > 0) {
const content = (topResult.document.title + " " + topResult.chunk).toLowerCase();
const matchedKeyword = test.expectedKeywords.find(k => content.includes(k.toLowerCase()));
if (!matchedKeyword) {
// Try fallback keyword sets (OR logic)
let fallbackMatched = false;
if (test.fallbackKeywords && test.fallbackKeywords.length > 0) {
for (const fallbackSet of test.fallbackKeywords) {
const fallbackMatch = fallbackSet.find(k => content.includes(k.toLowerCase()));
if (fallbackMatch) {
fallbackMatched = true;
break;
}
}
}
if (!fallbackMatched) {
passed = false;
const allKeywordSets = [test.expectedKeywords, ...(test.fallbackKeywords || [])];
error = `Irrelevant result: "${topResult.document.title}" lacks keywords from any set: ${allKeywordSets.map(s => `[${s.join(", ")}]`).join(" OR ")}`;
}
}
}
// Check expected category
if (passed && test.expectedCategory && topResult.document.category !== test.expectedCategory) {
passed = false;
error = `Wrong category: expected ${test.expectedCategory}, got ${topResult.document.category}`;
}
break;
case 'NEGATIVE':
// Strict: enforce maxResults constraint
if (test.maxResults !== undefined) {
passed = results.length <= test.maxResults;
if (!passed) {
error = `Expected max ${test.maxResults} results but got ${results.length}`;
}
}
// Check category correctness if specified
if (passed && test.expectedCategory && results.length > 0) {
const wrongCategory = results.find(r => r.document.category !== test.expectedCategory);
if (wrongCategory) {
passed = false;
error = `Result "${wrongCategory.document.title}" is category "${wrongCategory.document.category}" not "${test.expectedCategory}"`;
}
}
break;
case 'EDGE_CASE':
passed = true; // No crash is success for edge cases
break;
case 'FALSE_POSITIVE':
passed = true; // Observational - log for analysis
if (results.length > 0) {
warnings.push(`π Observation: Top result "${topResult.document.title}" (score: ${topResult.score.toFixed(2)})`);
}
break;
case 'EXCEPTION':
passed = true; // No exception thrown is success
break;
case 'PERFORMANCE':
passed = duration < (test.maxDurationMs || 5000);
if (!passed) {
error = `Performance: ${duration}ms exceeds ${test.maxDurationMs || 5000}ms`;
}
break;
case 'VIBE_CODING':
// Strict validation for AI agent use case
passed = results.length > 0;
if (!passed) {
error = "No results for natural language query";
break;
}
// Check minimum relevance score
if (topResult.score < MIN_RELEVANCE_SCORE) {
passed = false;
error = `Relevance too low: ${topResult.score.toFixed(2)} < ${MIN_RELEVANCE_SCORE}`;
break;
}
// Check for expected keywords if provided (with fallback OR logic)
if (test.expectedKeywords && test.expectedKeywords.length > 0) {
const content = (topResult.document.title + " " + topResult.chunk).toLowerCase();
const matchedKeyword = test.expectedKeywords.find(k => content.includes(k.toLowerCase()));
if (!matchedKeyword) {
// Try fallback keyword sets (OR logic)
let fallbackMatched = false;
if (test.fallbackKeywords && test.fallbackKeywords.length > 0) {
for (const fallbackSet of test.fallbackKeywords) {
const fallbackMatch = fallbackSet.find(k => content.includes(k.toLowerCase()));
if (fallbackMatch) {
fallbackMatched = true;
break;
}
}
}
if (!fallbackMatched) {
passed = false;
const allKeywordSets = [test.expectedKeywords, ...(test.fallbackKeywords || [])];
error = `Irrelevant result: "${topResult.document.title}" lacks keywords from any set: ${allKeywordSets.map(s => `[${s.join(", ")}]`).join(" OR ")}`;
}
}
}
// Check category if specified
if (passed && test.expectedCategory && topResult.document.category !== test.expectedCategory) {
warnings.push(`β οΈ Category mismatch: expected ${test.expectedCategory}, got ${topResult.document.category}`);
}
break;
case 'SEMANTIC':
// LLM-powered semantic search test
// Uses query expansion to convert natural language to technical terms
passed = results.length > 0;
if (!passed) {
error = "No results for semantic query";
break;
}
// Log expansion details
if (test.useExpansion) {
const expansion = expandQueryToKeywords(test.query);
console.log(` π§ Expansion: ${expansion.expandedTerms.slice(0, 5).join(', ')}...`);
console.log(` π Suggested category: ${expansion.suggestedCategory || 'none'}`);
}
// Check minimum relevance score
if (topResult.score < MIN_RELEVANCE_SCORE) {
passed = false;
error = `Semantic relevance too low: ${topResult.score.toFixed(2)} < ${MIN_RELEVANCE_SCORE}`;
break;
}
// Check for expected keywords (with fallback OR logic)
if (test.expectedKeywords && test.expectedKeywords.length > 0) {
const content = (topResult.document.title + " " + topResult.chunk).toLowerCase();
const matchedKeyword = test.expectedKeywords.find(k => content.includes(k.toLowerCase()));
if (!matchedKeyword) {
// Try fallback keyword sets (OR logic)
let fallbackMatched = false;
if (test.fallbackKeywords && test.fallbackKeywords.length > 0) {
for (const fallbackSet of test.fallbackKeywords) {
const fallbackMatch = fallbackSet.find(k => content.includes(k.toLowerCase()));
if (fallbackMatch) {
fallbackMatched = true;
break;
}
}
}
if (!fallbackMatched) {
passed = false;
const allKeywordSets = [test.expectedKeywords, ...(test.fallbackKeywords || [])];
error = `Semantic mismatch: "${topResult.document.title}" lacks keywords from any set: ${allKeywordSets.map(s => `[${s.join(", ")}]`).join(" OR ")}`;
}
}
}
break;
}
// ========== GOLDEN DOCUMENT VALIDATION (Phase 3) ==========
let goldenDocFound = false;
let goldenDocPosition: number | undefined;
if (test.expectedSource && results.length > 0) {
const expectedSourceLower = test.expectedSource.toLowerCase();
for (let i = 0; i < results.length; i++) {
const source = results[i].document.fileName?.toLowerCase() || '';
if (source.includes(expectedSourceLower)) {
goldenDocFound = true;
goldenDocPosition = i + 1; // 1-indexed position
break;
}
}
// Validate golden doc position if specified
if (test.expectedSourcePosition) {
if (!goldenDocFound) {
passed = false;
error = `Golden Doc NOT FOUND: Expected "${test.expectedSource}" in top 3 results`;
} else if (goldenDocPosition !== test.expectedSourcePosition) {
warnings.push(`β οΈ Golden Doc position: found at #${goldenDocPosition}, expected #${test.expectedSourcePosition}`);
}
} else if (!goldenDocFound) {
warnings.push(`β οΈ Golden Doc "${test.expectedSource}" not found in top 3 results`);
}
if (goldenDocFound) {
console.log(` π― Golden Doc: "${test.expectedSource}" found at position #${goldenDocPosition}`);
}
}
// ========== MRR CALCULATION (Phase 3) ==========
// Reciprocal Rank: 1/position if correct doc found, 0 otherwise
// For tests with expectedKeywords or expectedSource, calculate RR
let reciprocalRank = 0;
if (passed && results.length > 0) {
if (goldenDocFound && goldenDocPosition) {
reciprocalRank = 1 / goldenDocPosition;
} else if (test.expectedKeywords || test.fallbackKeywords) {
// For keyword-based tests, assume top result is "correct" if keywords matched
reciprocalRank = 1; // Top result matched
} else {
// Default: assume top result is relevant
reciprocalRank = results.length > 0 ? 1 : 0;
}
}
// ========== OUTPUT ==========
const statusIcon = passed ? 'β
PASS' : 'β FAIL';
console.log(` ${statusIcon} - Results: ${results.length} (${duration}ms)`);
if (results.length > 0) {
console.log(` Top result: ${topResult.document.title} (score: ${topResult.score.toFixed(2)}, category: ${topResult.document.category})`);
console.log(` Source: ${topResult.document.fileName || 'unknown'}`);
}
if (error) {
console.log(` β Reason: ${error}`);
}
for (const warn of warnings) {
console.log(` ${warn}`);
}
testResults.push({
type: test.type,
query: test.query,
description: test.description,
passed,
duration,
resultCount,
error,
reciprocalRank,
topResultSource: topResult?.document.fileName,
goldenDocFound: test.expectedSource ? goldenDocFound : undefined,
goldenDocPosition
});
} catch (err) {
const duration = Date.now() - startTime;
error = err instanceof Error ? err.message : String(err);
passed = false;
console.log(` β EXCEPTION - ${error}`);
testResults.push({
type: test.type,
query: test.query,
description: test.description,
passed,
duration,
resultCount: 0,
error,
reciprocalRank: 0
});
}
}
}
// ========== PHASE 3: COMPREHENSIVE METRICS ==========
// Calculate overall MRR
const overallMRR = calculateMRR(testResults);
// Calculate MRR by test type
const mrrByType: Record<string, number> = {};
for (const testType of Object.keys(testsByType)) {
const typeResults = testResults.filter(r => r.type === testType);
mrrByType[testType] = calculateMRR(typeResults);
}
// Golden doc statistics
const goldenDocTests = testResults.filter(r => r.goldenDocFound !== undefined);
const goldenDocHits = goldenDocTests.filter(r => r.goldenDocFound).length;
// Print test summary
console.log("\n\n" + "=".repeat(80));
console.log("π TEST SUMMARY (Phase 3 Enhanced)");
console.log("=".repeat(80));
const totalTests = testResults.length;
const passedTests = testResults.filter(r => r.passed).length;
const failedTests = totalTests - passedTests;
console.log(`\nTotal Tests: ${totalTests}`);
console.log(`β
Passed: ${passedTests}`);
console.log(`β Failed: ${failedTests}`);
console.log(`Pass Rate: ${((passedTests / totalTests) * 100).toFixed(1)}%`);
// MRR Summary
console.log("\nπ Mean Reciprocal Rank (MRR):");
console.log(` Overall MRR: ${overallMRR.toFixed(3)}`);
for (const [testType, mrr] of Object.entries(mrrByType)) {
console.log(` ${testType}: ${mrr.toFixed(3)}`);
}
// Golden Doc Summary
if (goldenDocTests.length > 0) {
console.log("\nπ― Golden Document Tracking:");
console.log(` Tests with golden docs: ${goldenDocTests.length}`);
console.log(` Golden doc hit rate: ${goldenDocHits}/${goldenDocTests.length} (${((goldenDocHits / goldenDocTests.length) * 100).toFixed(1)}%)`);
}
// Summary by type
console.log("\nBy Test Type:");
for (const [testType, tests] of Object.entries(testsByType)) {
const typeResults = testResults.filter(r => r.type === testType);
const typePassed = typeResults.filter(r => r.passed).length;
console.log(` ${testType}: ${typePassed}/${typeResults.length} passed (MRR: ${mrrByType[testType]?.toFixed(3) || 'N/A'})`);
}
// Performance summary
const avgDuration = testResults.reduce((sum, r) => sum + r.duration, 0) / testResults.length;
const maxDuration = Math.max(...testResults.map(r => r.duration));
const minDuration = Math.min(...testResults.map(r => r.duration));
console.log("\nPerformance:");
console.log(` Average query time: ${avgDuration.toFixed(0)}ms`);
console.log(` Min query time: ${minDuration}ms`);
console.log(` Max query time: ${maxDuration}ms`);
// List failed tests if any
if (failedTests > 0) {
console.log("\nβ Failed Tests:");
for (const result of testResults.filter(r => !r.passed)) {
console.log(` - [${result.type}] ${result.description}`);
console.log(` Query: "${result.query.substring(0, 50)}..."`);
if (result.error) {
console.log(` Error: ${result.error}`);
}
}
}
// ========== BASELINE COMPARISON (Phase 3) ==========
const currentSnapshot: BaselineSnapshot = {
timestamp: new Date().toISOString(),
totalTests,
passedTests,
passRate: passedTests / totalTests,
mrr: overallMRR,
avgDuration,
byType: {},
goldenDocHits,
goldenDocTotal: goldenDocTests.length
};
for (const [testType, tests] of Object.entries(testsByType)) {
const typeResults = testResults.filter(r => r.type === testType);
currentSnapshot.byType[testType] = {
passed: typeResults.filter(r => r.passed).length,
total: typeResults.length,
mrr: mrrByType[testType] || 0
};
}
// Compare with baseline
if (baseline) {
console.log("\n\n" + "=".repeat(80));
console.log("π BASELINE COMPARISON");
console.log("=".repeat(80));
const comparison = compareWithBaseline(currentSnapshot, baseline);
for (const warning of comparison.warnings) {
console.log(` ${warning}`);
}
if (!comparison.passed) {
console.log("\nπ΄ REGRESSION DETECTED! Consider investigating before deploying.");
}
}
// Save new baseline (only if all tests pass or explicitly requested)
const saveNewBaseline = process.argv.includes('--save-baseline') || (!baseline && failedTests === 0);
if (saveNewBaseline) {
saveBaseline(currentSnapshot);
} else if (failedTests === 0 && baseline) {
console.log("\nπ‘ Tip: Run with --save-baseline to update the baseline snapshot");
}
// Test formatted output
console.log("\n\nπ Formatted Output Example:");
console.log("=".repeat(60));
const results = await searchDocuments("Apex trigger best practices", { maxResults: 3 });
console.log(formatSearchResults(results, "Apex trigger best practices"));
console.log("\n\n" + "=".repeat(80));
console.log(failedTests === 0 ? "β
All tests passed!" : `β οΈ ${failedTests} test(s) failed`);
console.log("=".repeat(80));
// Exit with error code if tests failed
if (failedTests > 0) {
process.exit(1);
}
}
testSearch().catch(console.error);