import { describe, it, expect, jest, beforeEach } from '@jest/globals';
import { WebFetchTool } from '../../src/tools/webfetch.js';
import axios from 'axios';
// Mock axios
jest.mock('axios');
const mockedAxios = axios as jest.Mocked<typeof axios>;
describe('WebFetchTool', () => {
let tool: WebFetchTool;
beforeEach(() => {
tool = new WebFetchTool();
jest.clearAllMocks();
});
describe('Tool Definition', () => {
it('should have correct name', () => {
expect(tool.name).toBe('webfetch');
});
it('should have description', () => {
expect(tool.description).toBeTruthy();
expect(tool.description.length).toBeGreaterThan(0);
});
it('should have schema', () => {
expect(tool.schema).toBeDefined();
});
});
describe('HTML Fetching', () => {
const mockHtml = `
<!DOCTYPE html>
<html>
<head>
<title>Test Page</title>
<meta name="description" content="Test description">
<meta property="og:title" content="OG Title">
</head>
<body>
<h1>Main Heading</h1>
<h2>Subheading</h2>
<p>This is a test paragraph.</p>
<a href="/relative">Relative Link</a>
<a href="https://example.com/absolute">Absolute Link</a>
<img src="/image.jpg" alt="Test Image">
</body>
</html>
`;
beforeEach(() => {
mockedAxios.create.mockReturnValue({
get: jest.fn<any>().mockResolvedValue({ data: mockHtml }),
} as any);
});
it('should fetch and extract text content', async () => {
const result = await tool.run({
url: 'https://example.com',
extract: ['text'],
});
expect(result.isError).toBeFalsy();
const content = JSON.parse(result.content[0].text);
expect(content.text).toContain('Main Heading');
expect(content.text).toContain('test paragraph');
});
it('should extract headings', async () => {
const result = await tool.run({
url: 'https://example.com',
extract: ['headings'],
});
const content = JSON.parse(result.content[0].text);
expect(content.headings).toBeDefined();
expect(content.headings.length).toBeGreaterThan(0);
expect(content.headings.some((h: any) => h.text === 'Main Heading')).toBe(true);
});
it('should extract links with absolute URLs', async () => {
const result = await tool.run({
url: 'https://example.com',
extract: ['links'],
});
const content = JSON.parse(result.content[0].text);
expect(content.links).toBeDefined();
expect(content.links.some((l: any) => l.href.startsWith('https://'))).toBe(true);
});
it('should extract metadata', async () => {
const result = await tool.run({
url: 'https://example.com',
extract: ['metadata'],
});
const content = JSON.parse(result.content[0].text);
expect(content.metadata).toBeDefined();
expect(content.metadata.title).toBe('Test Page');
expect(content.metadata.description).toBe('Test description');
});
it('should extract images', async () => {
const result = await tool.run({
url: 'https://example.com',
extract: ['images'],
});
const content = JSON.parse(result.content[0].text);
expect(content.images).toBeDefined();
expect(content.images.length).toBeGreaterThan(0);
});
it('should extract all content types by default', async () => {
const result = await tool.run({
url: 'https://example.com',
});
const content = JSON.parse(result.content[0].text);
expect(content.text).toBeDefined();
expect(content.headings).toBeDefined();
expect(content.links).toBeDefined();
expect(content.metadata).toBeDefined();
expect(content.images).toBeDefined();
});
});
describe('Custom Selectors', () => {
const mockHtml = `
<html>
<body>
<article class="main">Main Article</article>
<aside class="sidebar">Sidebar Content</aside>
<div class="item">Item 1</div>
<div class="item">Item 2</div>
</body>
</html>
`;
beforeEach(() => {
mockedAxios.create.mockReturnValue({
get: jest.fn<any>().mockResolvedValue({ data: mockHtml }),
} as any);
});
it('should extract content using custom selectors', async () => {
const result = await tool.run({
url: 'https://example.com',
selectors: {
mainContent: 'article.main',
sidebar: 'aside.sidebar',
},
});
const content = JSON.parse(result.content[0].text);
expect(content.custom).toBeDefined();
expect(content.custom.mainContent).toBe('Main Article');
expect(content.custom.sidebar).toBe('Sidebar Content');
});
it('should handle multiple elements with same selector', async () => {
const result = await tool.run({
url: 'https://example.com',
selectors: {
items: 'div.item',
},
});
const content = JSON.parse(result.content[0].text);
expect(Array.isArray(content.custom.items)).toBe(true);
expect(content.custom.items).toContain('Item 1');
expect(content.custom.items).toContain('Item 2');
});
it('should return empty string for non-existent selector', async () => {
const result = await tool.run({
url: 'https://example.com',
selectors: {
nonExistent: 'div.does-not-exist',
},
});
const content = JSON.parse(result.content[0].text);
expect(content.custom.nonExistent).toBe('');
});
});
describe('Configuration Options', () => {
beforeEach(() => {
mockedAxios.create.mockReturnValue({
get: jest.fn<any>().mockResolvedValue({ data: '<html><body>Test</body></html>' }),
} as any);
});
it('should respect timeout option', async () => {
await tool.run({
url: 'https://example.com',
options: { timeout: 5000 },
});
// Verify axios was called with timeout config
const axiosInstance: any = mockedAxios.create.mock.results[0].value;
expect(axiosInstance.get).toHaveBeenCalled();
});
it('should use custom user agent', async () => {
await tool.run({
url: 'https://example.com',
options: { userAgent: 'CustomBot/1.0' },
});
const axiosInstance: any = mockedAxios.create.mock.results[0].value;
expect(axiosInstance.get).toHaveBeenCalled();
});
it('should handle redirect configuration', async () => {
await tool.run({
url: 'https://example.com',
options: { followRedirects: false },
});
const axiosInstance: any = mockedAxios.create.mock.results[0].value;
expect(axiosInstance.get).toHaveBeenCalled();
});
});
describe('Error Handling', () => {
it('should handle 404 errors', async () => {
mockedAxios.create.mockReturnValue({
get: jest.fn<any>().mockRejectedValue({
isAxiosError: true,
response: { status: 404, statusText: 'Not Found' },
}),
} as any);
const result = await tool.run({
url: 'https://example.com/notfound',
});
expect(result.isError).toBe(true);
expect(result.content[0].text).toContain('404');
});
it('should handle timeout errors', async () => {
mockedAxios.create.mockReturnValue({
get: jest.fn<any>().mockRejectedValue({
isAxiosError: true,
code: 'ECONNABORTED',
}),
} as any);
const result = await tool.run({
url: 'https://example.com',
});
expect(result.isError).toBe(true);
expect(result.content[0].text).toContain('timeout');
});
it('should handle network errors', async () => {
mockedAxios.create.mockReturnValue({
get: jest.fn<any>().mockRejectedValue({
isAxiosError: true,
request: {},
}),
} as any);
const result = await tool.run({
url: 'https://example.com',
});
expect(result.isError).toBe(true);
expect(result.content[0].text).toContain('Network error');
});
it('should validate URL format', async () => {
const result = await tool.run({
url: 'not-a-valid-url',
});
expect(result.isError).toBe(true);
});
});
describe('Content Cleaning', () => {
const dirtyHtml = `
<html>
<head>
<script>console.log('test');</script>
<style>.test { color: red; }</style>
</head>
<body>
<p>Clean content</p>
<script>alert('remove me');</script>
</body>
</html>
`;
beforeEach(() => {
mockedAxios.create.mockReturnValue({
get: jest.fn<any>().mockResolvedValue({ data: dirtyHtml }),
} as any);
});
it('should remove script and style tags from text', async () => {
const result = await tool.run({
url: 'https://example.com',
extract: ['text'],
});
const content = JSON.parse(result.content[0].text);
expect(content.text).not.toContain('console.log');
expect(content.text).not.toContain('color: red');
expect(content.text).toContain('Clean content');
});
});
});