import { describe, it, expect, mock } from 'bun:test';
import { HtmlParser } from '@/spider/parser.js';
import { ContentAnalyzer } from '@/llm/analyzer.js';
import { LLMClient } from '@/llm/client.js';
import { AnalysisType } from '@/llm/types.js';
import { readFileSync } from 'fs';
import { join } from 'path';
// Mock LLM Client that simulates Claude's code extraction
const mockLLMClient = {
analyze: mock(),
isAvailable: mock().mockReturnValue(true),
getProviderName: mock().mockReturnValue('anthropic'),
};
describe('code example extraction integration', () => {
it('should extract code examples from HTML with multiple languages', async () => {
const htmlContent = readFileSync(
join(process.cwd(), 'test/fixtures/sample-pages/with-code.html'),
'utf-8'
);
// Parse HTML content
const parser = new HtmlParser('https://example.com/api-docs');
const parsed = parser.parse(htmlContent);
// Mock Claude's response for code example extraction
const mockResponse = JSON.stringify([
{
language: 'bash',
code: 'curl -H "Authorization: Bearer YOUR_API_KEY" https://api.example.com/users',
description: 'cURL command to authenticate and fetch users',
category: 'api_call'
},
{
language: 'javascript',
code: `const response = await fetch('/api/users', {
method: 'GET',
headers: {
'Authorization': 'Bearer ' + apiKey,
'Content-Type': 'application/json'
}
});
const users = await response.json();
console.log(users);`,
description: 'JavaScript fetch API call to get users',
category: 'api_call'
},
{
language: 'python',
code: `import requests
headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
response = requests.get('https://api.example.com/users', headers=headers)
users = response.json()
print(users)`,
description: 'Python requests example to fetch users',
category: 'api_call'
},
{
language: 'javascript',
code: `const newUser = {
name: 'John Doe',
email: 'john@example.com'
};
const response = await fetch('/api/users', {
method: 'POST',
headers: {
'Authorization': 'Bearer ' + apiKey,
'Content-Type': 'application/json'
},
body: JSON.stringify(newUser)
});
const createdUser = await response.json();`,
description: 'JavaScript example for creating a new user',
category: 'api_call'
},
{
language: 'bash',
code: `# Environment configuration
API_KEY=your_secret_api_key_here
DATABASE_URL=postgresql://user:pass@localhost/db
REDIS_URL=redis://localhost:6379`,
description: 'Environment variables configuration',
category: 'configuration'
},
{
language: 'json',
code: `{
"users": [
{
"id": 1,
"name": "John Doe",
"email": "john@example.com",
"created_at": "2023-01-01T00:00:00Z"
},
{
"id": 2,
"name": "Jane Smith",
"email": "jane@example.com",
"created_at": "2023-01-02T00:00:00Z"
}
],
"total": 2
}`,
description: 'Example API response format',
category: 'usage_example'
}
]);
mockLLMClient.analyze.mockResolvedValue(mockResponse);
const analyzer = new ContentAnalyzer(mockLLMClient as any);
// Perform code examples analysis
const result = await analyzer.analyzeContent({
content: parsed.content,
url: 'https://example.com/api-docs',
title: parsed.title,
links: parsed.links,
analysisType: AnalysisType.CODE_EXAMPLES_ONLY
});
expect(result).not.toBeNull();
expect(result!.codeExamples).toHaveLength(6);
// Check different language examples
const languages = result!.codeExamples.map(ex => ex.language);
expect(languages).toContain('bash');
expect(languages).toContain('javascript');
expect(languages).toContain('python');
expect(languages).toContain('json');
// Check different categories
const categories = result!.codeExamples.map(ex => ex.category);
expect(categories).toContain('api_call');
expect(categories).toContain('configuration');
expect(categories).toContain('usage_example');
// Verify specific examples
const curlExample = result!.codeExamples.find(ex => ex.language === 'bash' && ex.code.includes('curl'));
expect(curlExample).toBeDefined();
expect(curlExample!.description).toContain('authenticate');
const pythonExample = result!.codeExamples.find(ex => ex.language === 'python');
expect(pythonExample).toBeDefined();
expect(pythonExample!.code).toContain('import requests');
expect(pythonExample!.category).toBe('api_call');
const configExample = result!.codeExamples.find(ex => ex.category === 'configuration');
expect(configExample).toBeDefined();
expect(configExample!.code).toContain('API_KEY=');
const jsonExample = result!.codeExamples.find(ex => ex.language === 'json');
expect(jsonExample).toBeDefined();
expect(jsonExample!.code).toContain('"users"');
});
it('should handle full analysis with code examples included', async () => {
const htmlContent = readFileSync(
join(process.cwd(), 'test/fixtures/sample-pages/with-code.html'),
'utf-8'
);
const parser = new HtmlParser('https://example.com/api-docs');
const parsed = parser.parse(htmlContent);
// Mock full analysis response including code examples
const mockResponse = JSON.stringify({
summary: 'Comprehensive API documentation with authentication, user management endpoints, and code examples in multiple programming languages.',
keyPoints: [
'RESTful API for user management',
'Bearer token authentication required',
'Support for JavaScript, Python, and cURL examples',
'JSON response format with user objects'
],
contentType: 'api_docs',
relevantLinks: [],
codeExamples: [
{
language: 'javascript',
code: 'const response = await fetch(\'/api/users\');',
description: 'Basic API call example',
category: 'api_call'
},
{
language: 'python',
code: 'response = requests.get(\'https://api.example.com/users\')',
description: 'Python API request',
category: 'api_call'
}
],
confidence: 0.95
});
mockLLMClient.analyze.mockResolvedValue(mockResponse);
const analyzer = new ContentAnalyzer(mockLLMClient as any);
const result = await analyzer.analyzeContent({
content: parsed.content,
url: 'https://example.com/api-docs',
title: parsed.title,
links: parsed.links,
analysisType: AnalysisType.FULL
});
expect(result).not.toBeNull();
expect(result!.summary).toContain('API documentation');
expect(result!.keyPoints).toHaveLength(4);
expect(result!.contentType).toBe('api_docs');
expect(result!.codeExamples).toHaveLength(2);
expect(result!.confidence).toBe(0.95);
// Verify code examples are properly included in full analysis
expect(result!.codeExamples[0].language).toBe('javascript');
expect(result!.codeExamples[1].language).toBe('python');
});
it('should preserve code formatting and detect complex examples', async () => {
const complexCode = `class UserService {
constructor(apiKey) {
this.apiKey = apiKey;
this.baseUrl = 'https://api.example.com';
}
async getUsers() {
try {
const response = await fetch(\`\${this.baseUrl}/users\`, {
headers: {
'Authorization': \`Bearer \${this.apiKey}\`,
'Content-Type': 'application/json'
}
});
if (!response.ok) {
throw new Error(\`HTTP error! status: \${response.status}\`);
}
return await response.json();
} catch (error) {
console.error('Failed to fetch users:', error);
throw error;
}
}
}`;
const mockResponse = JSON.stringify([
{
language: 'javascript',
code: complexCode,
description: 'Complete UserService class with error handling',
category: 'implementation'
}
]);
mockLLMClient.analyze.mockResolvedValue(mockResponse);
const analyzer = new ContentAnalyzer(mockLLMClient as any);
const result = await analyzer.analyzeContent({
content: 'Documentation with complex JavaScript class example',
url: 'https://example.com/advanced',
title: 'Advanced Examples',
links: [],
analysisType: AnalysisType.CODE_EXAMPLES_ONLY
});
expect(result).not.toBeNull();
expect(result!.codeExamples).toHaveLength(1);
const example = result!.codeExamples[0];
expect(example.language).toBe('javascript');
expect(example.code).toBe(complexCode);
expect(example.category).toBe('implementation');
expect(example.description).toContain('class');
expect(example.code).toContain('class UserService');
expect(example.code).toContain('async getUsers()');
expect(example.code).toContain('try {');
});
});