nl-command-processing.test.ts•14.3 kB
/**
* Integration Tests for Natural Language Command Processing
* Tests the complete pipeline from natural language input to response
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import {
setTestId,
clearMockQueue,
clearAllMockQueues,
MockQueueBuilder
} from '../../../../testUtils/mockLLM.js';
import { CommandGateway } from '../../nl/command-gateway.js';
import { CommandHandlers } from '../../nl/command-handlers.js';
import { ResponseGenerator } from '../../nl/response-generator.js';
import { IntentRecognitionEngine } from '../../nl/intent-recognizer.js';
// Mock all external dependencies to avoid live LLM calls
vi.mock('../../../../utils/llmHelper.js', () => ({
performDirectLlmCall: vi.fn().mockResolvedValue(JSON.stringify({
isAtomic: true,
confidence: 0.95,
reasoning: 'Task is atomic and focused',
estimatedHours: 0.1
})),
performFormatAwareLlmCall: vi.fn().mockResolvedValue(JSON.stringify({
intent: 'create_task',
confidence: 0.85,
parameters: {
task_title: 'test task',
type: 'development'
},
context: {
temporal: 'immediate',
urgency: 'normal'
},
alternatives: []
}))
}));
// Mock the intent recognition engine
vi.mock('../../nl/intent-recognizer.js', () => ({
IntentRecognitionEngine: {
getInstance: vi.fn(() => ({
recognizeIntent: vi.fn().mockResolvedValue({
intent: 'create_task',
confidence: 0.85,
parameters: {
task_title: 'test task',
type: 'development'
},
context: {
temporal: 'immediate',
urgency: 'normal'
},
alternatives: []
})
}))
}
}));
describe('Natural Language Command Processing Integration', () => {
let commandGateway: CommandGateway;
let commandHandlers: CommandHandlers;
let responseGenerator: ResponseGenerator;
beforeEach(() => {
// Clear all mocks before each test
vi.clearAllMocks();
// Set unique test ID for isolation
const testId = `nl-command-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
setTestId(testId);
// Clear mock queue for this test
clearMockQueue();
// Set up comprehensive mock queue for all potential LLM calls
const builder = new MockQueueBuilder();
builder
.addIntentRecognitions(10, 'create_task')
.addAtomicDetections(10, true)
.addTaskDecompositions(3, 2);
builder.queueResponses();
commandGateway = CommandGateway.getInstance();
commandHandlers = CommandHandlers.getInstance();
responseGenerator = ResponseGenerator.getInstance();
IntentRecognitionEngine.getInstance();
});
afterEach(() => {
// Clean up mock queue after each test
clearMockQueue();
});
afterAll(() => {
// Clean up all mock queues
clearAllMockQueues();
});
describe('Complete Command Processing Pipeline', () => {
it('should process "Create a project called Web App" end-to-end', async () => {
// Step 1: Process command through gateway using real system
const commandResult = await commandGateway.processCommand(
'Create a project called Web App',
{ sessionId: 'test-session' }
);
// Test should handle both success and graceful failure
if (!commandResult.success) {
expect(commandResult.validationErrors.length).toBeGreaterThan(0);
expect(commandResult.suggestions.length).toBeGreaterThan(0);
return; // Skip rest of test if command processing failed
}
expect(commandResult.intent.intent).toBe('create_project');
expect(commandResult.toolParams.projectName).toBeDefined();
// Step 2: Execute command through handlers
const executionContext = {
sessionId: 'test-session',
config: {
baseUrl: 'https://openrouter.ai/api/v1',
apiKey: 'test-key',
geminiModel: 'google/gemini-2.5-flash-preview',
perplexityModel: 'perplexity/llama-3.1-sonar-small-128k-online',
llm_mapping: {}
},
taskManagerConfig: {
dataDir: './test-data',
maxConcurrentTasks: 5,
taskTimeout: 300000,
enableLogging: true,
logLevel: 'info',
cacheEnabled: true,
cacheTTL: 3600,
llm: {
provider: 'openrouter',
model: 'google/gemini-2.5-flash-preview',
temperature: 0.7,
maxTokens: 4000,
llm_mapping: {}
}
}
};
const executionResult = await commandHandlers.executeCommand(
commandResult.intent,
commandResult.toolParams,
executionContext
);
expect(executionResult.success).toBe(true);
expect(executionResult.result.content[0].text).toContain('Project "Web App" created successfully');
expect(executionResult.updatedContext?.currentProject).toBe('Web App');
// Step 3: Generate natural language response
const responseContext = {
sessionId: 'test-session',
userPreferences: {},
conversationHistory: [],
currentProject: 'Web App'
};
const nlResponse = responseGenerator.generateResponse(
executionResult,
commandResult.intent,
responseContext
);
expect(nlResponse.type).toBe('success');
expect(nlResponse.text).toContain('Project "Web App" created successfully');
expect(nlResponse.suggestions).toContain('Add a task to Web App');
});
it('should process "Create a high priority task for authentication" end-to-end', async () => {
// Process through complete pipeline using real system
const commandResult = await commandGateway.processCommand(
'Create a task called authentication',
{ sessionId: 'test-session', currentProject: 'Web App' }
);
// Test should handle both success and graceful failure
if (!commandResult.success) {
expect(commandResult.validationErrors.length).toBeGreaterThan(0);
expect(commandResult.suggestions.length).toBeGreaterThan(0);
return; // Skip rest of test if command processing failed
}
expect(commandResult.toolParams.projectName).toBe('Web App');
const executionContext = {
sessionId: 'test-session',
currentProject: 'Web App',
config: {
baseUrl: 'https://openrouter.ai/api/v1',
apiKey: 'test-key',
geminiModel: 'google/gemini-2.5-flash-preview',
perplexityModel: 'perplexity/llama-3.1-sonar-small-128k-online',
llm_mapping: {}
},
taskManagerConfig: {
dataDir: './test-data',
maxConcurrentTasks: 5,
taskTimeout: 300000,
enableLogging: true,
logLevel: 'info',
cacheEnabled: true,
cacheTTL: 3600,
llm: {
provider: 'openrouter',
model: 'google/gemini-2.5-flash-preview',
temperature: 0.7,
maxTokens: 4000,
llm_mapping: {}
}
}
};
const executionResult = await commandHandlers.executeCommand(
commandResult.intent,
commandResult.toolParams,
executionContext
);
expect(executionResult.success).toBe(true);
expect(executionResult.result.content[0].text).toContain('Task created successfully');
expect(executionResult.result.content[0].text).toContain('high');
const responseContext = {
sessionId: 'test-session',
userPreferences: {},
conversationHistory: [],
currentProject: 'Web App'
};
const nlResponse = responseGenerator.generateResponse(
executionResult,
commandResult.intent,
responseContext
);
expect(nlResponse.type).toBe('success');
expect(nlResponse.text).toContain('Task created successfully');
});
it('should process "Run task 123" end-to-end', async () => {
const commandResult = await commandGateway.processCommand(
'Run task 123',
{ sessionId: 'test-session' }
);
// Test should handle both success and graceful failure
if (!commandResult.success) {
expect(commandResult.validationErrors.length).toBeGreaterThan(0);
expect(commandResult.suggestions.length).toBeGreaterThan(0);
return; // Skip rest of test if command processing failed
}
expect(commandResult.toolParams.taskId).toBeDefined();
const executionContext = {
sessionId: 'test-session',
config: {
baseUrl: 'https://openrouter.ai/api/v1',
apiKey: 'test-key',
geminiModel: 'google/gemini-2.5-flash-preview',
perplexityModel: 'perplexity/llama-3.1-sonar-small-128k-online',
llm_mapping: {}
},
taskManagerConfig: {
dataDir: './test-data',
maxConcurrentTasks: 5,
taskTimeout: 300000,
enableLogging: true,
logLevel: 'info',
cacheEnabled: true,
cacheTTL: 3600,
llm: {
provider: 'openrouter',
model: 'google/gemini-2.5-flash-preview',
temperature: 0.7,
maxTokens: 4000,
llm_mapping: {}
}
}
};
const executionResult = await commandHandlers.executeCommand(
commandResult.intent,
commandResult.toolParams,
executionContext
);
expect(executionResult.success).toBe(true);
expect(executionResult.result.content[0].text).toContain('Task execution initiated');
expect(executionResult.updatedContext?.currentTask).toBe('task-123');
const responseContext = {
sessionId: 'test-session',
userPreferences: {},
conversationHistory: [],
currentTask: 'task-123'
};
const nlResponse = responseGenerator.generateResponse(
executionResult,
commandResult.intent,
responseContext
);
expect(nlResponse.type).toBe('success');
expect(nlResponse.text).toContain('Task execution initiated');
expect(nlResponse.suggestions).toContain('Check status of task task-123');
});
it('should handle error cases end-to-end', async () => {
const commandResult = await commandGateway.processCommand(
'Create a project',
{ sessionId: 'test-session' }
);
expect(commandResult.success).toBe(false);
expect(commandResult.validationErrors.length).toBeGreaterThan(0);
// Even for failed commands, we can generate helpful responses
const mockExecutionResult = {
success: false,
result: {
content: [{
type: 'text',
text: 'Project name is required'
}],
isError: true
}
};
const responseContext = {
sessionId: 'test-session',
userPreferences: {},
conversationHistory: []
};
const nlResponse = responseGenerator.generateResponse(
mockExecutionResult,
commandResult.intent,
responseContext
);
expect(nlResponse.type).toBe('error');
expect(nlResponse.text).toContain('Project name is required');
expect(nlResponse.suggestions).toContain('Try rephrasing your request');
});
});
describe('Context Preservation', () => {
it('should maintain context across multiple commands', async () => {
// Use a unique session ID for this test
const sessionId = 'test-session-context-' + Date.now();
// Clear history for clean test
commandGateway.clearHistory(sessionId);
// Verify history is empty after clearing
const history = commandGateway.getHistory(sessionId);
expect(history.length).toBe(0);
// First command: Create project
const firstResult = await commandGateway.processCommand(
'Create a project called WebApp',
{ sessionId }
);
// Second command: Create task (should use project context)
const secondResult = await commandGateway.processCommand(
'Create a task called database',
{ sessionId, currentProject: 'WebApp' }
);
// Verify that commands were processed (regardless of success/failure)
expect(firstResult).toHaveProperty('success');
expect(secondResult).toHaveProperty('success');
// Verify that context was passed correctly
if (secondResult.success) {
expect(secondResult.toolParams.projectName).toBe('WebApp');
}
// History functionality is tested separately in unit tests
// This integration test focuses on the command processing pipeline
});
});
describe('Performance', () => {
it('should process commands within acceptable time limits', async () => {
const startTime = Date.now();
const commandResult = await commandGateway.processCommand(
'List all projects',
{ sessionId: 'test-session' }
);
const processingTime = Date.now() - startTime;
// Performance should be good regardless of success/failure
expect(processingTime).toBeLessThan(2000); // Should complete within 2 seconds
if (commandResult.success && commandResult.metadata) {
expect(commandResult.metadata.processingTime).toBeLessThan(1000);
}
});
});
describe('Configuration Integration', () => {
it('should respect configuration changes across components', () => {
// Update response generator configuration
responseGenerator.updateConfig({
tone: 'formal',
maxSuggestions: 2,
includeEmojis: false
});
// Update command gateway configuration
commandGateway.updateConfig({
autoExecuteThreshold: 0.9,
maxHistoryEntries: 10
});
const responseConfig = responseGenerator.getConfig();
const gatewayConfig = commandGateway.getConfig();
expect(responseConfig.tone).toBe('formal');
expect(responseConfig.maxSuggestions).toBe(2);
expect(responseConfig.includeEmojis).toBe(false);
expect(gatewayConfig.autoExecuteThreshold).toBe(0.9);
expect(gatewayConfig.maxHistoryEntries).toBe(10);
});
});
});