import { describe, it, expect, beforeEach, afterEach } from 'bun:test';
import { SpiderService } from '@/spider/index.js';
import { promises as fs } from 'fs';
import { join } from 'path';
describe('crawling integration', () => {
const testCacheDir = './test-cache';
beforeEach(async () => {
// clean test cache
try {
await fs.rm(testCacheDir, { recursive: true, force: true });
} catch {
// ignore if doesn't exist
}
});
afterEach(async () => {
// cleanup test cache
try {
await fs.rm(testCacheDir, { recursive: true, force: true });
} catch {
// ignore if doesn't exist
}
});
describe('spider service', () => {
it('should create service instance', () => {
const service = new SpiderService(testCacheDir);
expect(service).toBeInstanceOf(SpiderService);
});
it('should handle empty cache gracefully', async () => {
const service = new SpiderService(testCacheDir);
const page = await service.getPage('https://example.com/nonexistent');
expect(page).toBeNull();
});
it('should search in empty cache', async () => {
const service = new SpiderService(testCacheDir);
const results = await service.searchDocs('test query');
expect(results).toEqual([]);
});
it('should list pages in empty cache', async () => {
const service = new SpiderService(testCacheDir);
const pages = await service.listPages();
expect(pages).toEqual([]);
});
it('should clear empty cache', async () => {
const service = new SpiderService(testCacheDir);
const cleared = await service.clearCache();
expect(cleared).toBe(0);
});
});
// note: testing actual web crawling would require either:
// 1. a mock http server with test content
// 2. integration with real websites (unreliable for tests)
// 3. pre-recorded http responses
// for this demo, we focus on the service interface
});