Skip to main content
Glama
crawler.service.error.test.ts11 kB
import { CrawlerService } from '../../services/crawler.service'; import { getTestPrismaClient } from '../utils/testDb'; import { ErrorConditionMocks, ErrorType } from '../../services/crawler/test-utils/mocks/ErrorConditionMocks'; import { JobStatus } from '../../generated/prisma'; describe('CrawlerService Error Handling Tests', () => { let crawlerService: CrawlerService; const prisma = getTestPrismaClient(); let errorMocks: ErrorConditionMocks; const baseUrl = 'https://error-test.example.com'; beforeAll(async () => { crawlerService = new CrawlerService(); }); beforeEach(async () => { // Clean up the database before each test await prisma.chunk.deleteMany(); await prisma.document.deleteMany(); await prisma.job.deleteMany(); // Reset mock state if (errorMocks) { errorMocks.cleanup(); } }); afterEach(() => { if (errorMocks) { errorMocks.cleanup(); } }); describe('Error Handling', () => { it('should continue crawling after handling 404 errors', async () => { const jobId = 'test-404-error'; // Create the job record await prisma.job.create({ data: { id: jobId, url: baseUrl, startDate: new Date(), }, }); // Set up error condition mocks errorMocks = new ErrorConditionMocks({ baseUrl, customErrorMessage: 'Test 404 Error', }); // Add a custom home page that links to both good and error pages const scope = errorMocks['scope']; // Add a good home page scope.get('/') .reply(200, ` <!DOCTYPE html> <html> <head> <title>Error Test Home</title> </head> <body> <h1>Error Test Home</h1> <p>This page links to working pages and error pages.</p> <ul> <li><a href="/good-page">Good Page</a></li> <li><a href="/error-${ErrorType.NOT_FOUND}">404 Not Found Page</a></li> </ul> </body> </html> `); // Add a good page scope.get('/good-page') .reply(200, ` <!DOCTYPE html> <html> <head> <title>Good Page</title> </head> <body> <h1>Good Page</h1> <p>This is a normal page that should be crawled successfully.</p> </body> </html> `); // Set up the mocks errorMocks.setup(); // Start crawling await crawlerService.crawl(jobId, baseUrl, { maxDepth: 1 }); // Verify documents were created (home page + good page) const documents = await prisma.document.findMany({ where: { jobId }, }); // Should have home page and good page, not the 404 page expect(documents.length).toBe(2); // Check documents const urls = documents.map(d => d.url).sort(); expect(urls).toEqual([ baseUrl, `${baseUrl}/good-page` ]); // Verify job has an error but completed const finalJob = await prisma.job.findUnique({ where: { id: jobId } }); expect(finalJob).toBeDefined(); expect(finalJob?.status).toBe('completed' as JobStatus); expect(finalJob?.error).toContain('errors during crawling'); }); it('should handle server errors gracefully', async () => { const jobId = 'test-500-error'; // Create the job record await prisma.job.create({ data: { id: jobId, url: baseUrl, startDate: new Date(), }, }); // Set up error condition mocks errorMocks = new ErrorConditionMocks({ baseUrl, customErrorMessage: 'Test 500 Error', }); // Add a custom home page that links to both good and error pages const scope = errorMocks['scope']; // Add a good home page scope.get('/') .reply(200, ` <!DOCTYPE html> <html> <head> <title>Error Test Home</title> </head> <body> <h1>Error Test Home</h1> <p>This page links to working pages and error pages.</p> <ul> <li><a href="/good-page-1">Good Page 1</a></li> <li><a href="/good-page-2">Good Page 2</a></li> <li><a href="/error-${ErrorType.SERVER_ERROR}">500 Server Error</a></li> </ul> </body> </html> `); // Add good pages scope.get('/good-page-1') .reply(200, ` <!DOCTYPE html> <html> <head> <title>Good Page 1</title> </head> <body> <h1>Good Page 1</h1> <p>This is a normal page that should be crawled successfully.</p> </body> </html> `); scope.get('/good-page-2') .reply(200, ` <!DOCTYPE html> <html> <head> <title>Good Page 2</title> </head> <body> <h1>Good Page 2</h1> <p>This is another normal page that should be crawled successfully.</p> </body> </html> `); // Set up the mocks errorMocks.setup(); // Start crawling await crawlerService.crawl(jobId, baseUrl, { maxDepth: 1 }); // Verify only good pages were crawled const documents = await prisma.document.findMany({ where: { jobId }, }); // Should have home page and good pages, not the 500 error page expect(documents.length).toBe(3); // Check documents const urls = documents.map(d => d.url).sort(); expect(urls).toEqual([ baseUrl, `${baseUrl}/good-page-1`, `${baseUrl}/good-page-2` ]); // Verify job has an error but completed const finalJob = await prisma.job.findUnique({ where: { id: jobId } }); expect(finalJob).toBeDefined(); expect(finalJob?.status).toBe('completed' as JobStatus); expect(finalJob?.error).toContain('errors during crawling'); }); it('should handle network errors and continue crawling', async () => { const jobId = 'test-network-error'; // Create the job record await prisma.job.create({ data: { id: jobId, url: baseUrl, startDate: new Date(), }, }); // Set up error condition mocks errorMocks = new ErrorConditionMocks({ baseUrl, }); // Add a custom home page that links to both good and error pages const scope = errorMocks['scope']; // Add a good home page scope.get('/') .reply(200, ` <!DOCTYPE html> <html> <head> <title>Error Test Home</title> </head> <body> <h1>Error Test Home</h1> <p>This page links to working pages and error pages.</p> <ul> <li><a href="/good-page">Good Page</a></li> <li><a href="/error-${ErrorType.NETWORK_ERROR}">Network Error</a></li> </ul> </body> </html> `); // Add a good page scope.get('/good-page') .reply(200, ` <!DOCTYPE html> <html> <head> <title>Good Page</title> </head> <body> <h1>Good Page</h1> <p>This is a normal page that should be crawled successfully.</p> </body> </html> `); // Set up the mocks errorMocks.setup(); // Start crawling await crawlerService.crawl(jobId, baseUrl, { maxDepth: 1 }); // Verify only good pages were crawled const documents = await prisma.document.findMany({ where: { jobId }, }); // Should have home page and good page, not the network error page expect(documents.length).toBe(2); // Verify job has network error info but completed const finalJob = await prisma.job.findUnique({ where: { id: jobId } }); expect(finalJob).toBeDefined(); expect(finalJob?.status).toBe('completed' as JobStatus); expect(finalJob?.error).toContain('Network error'); }); it('should handle multiple types of errors in a single crawl', async () => { const jobId = 'test-mixed-errors'; // Create the job record await prisma.job.create({ data: { id: jobId, url: baseUrl, startDate: new Date(), }, }); // Set up error condition mocks errorMocks = new ErrorConditionMocks({ baseUrl, }); // Add a custom home page that links to both good and various error pages const scope = errorMocks['scope']; // Add a good home page scope.get('/') .reply(200, ` <!DOCTYPE html> <html> <head> <title>Error Test Home</title> </head> <body> <h1>Error Test Home</h1> <p>This page links to working pages and error pages.</p> <ul> <li><a href="/good-page">Good Page</a></li> <li><a href="/error-${ErrorType.NOT_FOUND}">404 Not Found</a></li> <li><a href="/error-${ErrorType.SERVER_ERROR}">500 Server Error</a></li> <li><a href="/error-${ErrorType.NETWORK_ERROR}">Network Error</a></li> <li><a href="/error-${ErrorType.MALFORMED_CONTENT}">Malformed HTML</a></li> </ul> </body> </html> `); // Add a good page scope.get('/good-page') .reply(200, ` <!DOCTYPE html> <html> <head> <title>Good Page</title> </head> <body> <h1>Good Page</h1> <p>This is a normal page that should be crawled successfully.</p> </body> </html> `); // Set up the mocks errorMocks.setup(); // Start crawling await crawlerService.crawl(jobId, baseUrl, { maxDepth: 1 }); // Verify only good pages were crawled const documents = await prisma.document.findMany({ where: { jobId }, }); // Should have home page and good page only expect(documents.length).toBe(2); // Check documents const urls = documents.map(d => d.url).sort(); expect(urls).toEqual([ baseUrl, `${baseUrl}/good-page` ]); // Verify job has errors but completed const finalJob = await prisma.job.findUnique({ where: { id: jobId } }); expect(finalJob).toBeDefined(); expect(finalJob?.status).toBe('completed' as JobStatus); expect(finalJob?.error).toBeDefined(); expect(finalJob?.errorCount).toBeGreaterThan(1); }); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/visheshd/docmcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server