import { z } from 'zod';
import axios from 'axios';
import * as cheerio from 'cheerio';
import axiosRetry from 'axios-retry';
import { BaseTool } from './base.js';
import { logger } from '../utils/logger.js';
// Configure axios with retry logic
const httpClient = axios.create({
timeout: 10000,
headers: {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
},
});
axiosRetry(httpClient, {
retries: 3,
retryDelay: axiosRetry.exponentialDelay,
retryCondition: (error) => {
return axiosRetry.isNetworkOrIdempotentRequestError(error) || error.response?.status === 429;
},
});
/**
* Search result interface
*/
export interface SearchResult {
title: string;
url: string;
snippet: string;
source: string;
}
/**
* Search filters schema
*/
const searchFiltersSchema = z
.object({
fileType: z.string().optional().describe('Filter by file type (e.g., pdf, doc, xls)'),
language: z.string().optional().describe('Filter by language code (e.g., en, pl, de)'),
region: z.string().optional().describe('Filter by region/country code (e.g., us, pl, uk)'),
dateRange: z
.enum(['day', 'week', 'month', 'year'])
.optional()
.describe('Filter by date range'),
domain: z.string().optional().describe('Filter by specific domain (e.g., github.com)'),
exactPhrase: z.boolean().optional().describe('Search for exact phrase match'),
})
.optional();
/**
* WebSearch tool schema
*/
const webSearchSchema = z.object({
query: z.string().min(1).describe('Search query string'),
engine: z
.enum(['duckduckgo', 'brave', 'google', 'bing', 'serpapi'])
.optional()
.default('duckduckgo')
.describe('Search engine to use'),
limit: z.number().int().min(1).max(50).optional().default(10).describe('Maximum number of results'),
filters: searchFiltersSchema,
});
type WebSearchParams = z.infer<typeof webSearchSchema>;
/**
* WebSearchTool - Search the web using multiple search engines
*/
export class WebSearchTool extends BaseTool<typeof webSearchSchema> {
readonly name = 'websearch';
readonly description =
'Search the web using various search engines (DuckDuckGo, Brave, Google, Bing, SerpAPI) with advanced filtering options. Returns a list of search results with titles, URLs, and snippets.';
readonly schema = webSearchSchema;
protected async execute(params: WebSearchParams): Promise<SearchResult[]> {
logger.info(`Searching with ${params.engine}`, { query: params.query, limit: params.limit });
try {
switch (params.engine) {
case 'duckduckgo':
return await this.searchDuckDuckGo(params);
case 'brave':
return await this.searchBrave(params);
case 'google':
return await this.searchGoogle(params);
case 'bing':
return await this.searchBing(params);
case 'serpapi':
return await this.searchSerpApi(params);
default:
throw new Error(`Unsupported search engine: ${params.engine}`);
}
} catch (error) {
logger.error(`Search failed for ${params.engine}`, { error });
throw new Error(
`Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`
);
}
}
/**
* Search using DuckDuckGo (free, no API key required)
*/
private async searchDuckDuckGo(params: WebSearchParams): Promise<SearchResult[]> {
let query = params.query;
// Apply filters to query
if (params.filters?.exactPhrase) {
query = `"${query}"`;
}
if (params.filters?.fileType) {
query += ` filetype:${params.filters.fileType}`;
}
if (params.filters?.domain) {
query += ` site:${params.filters.domain}`;
}
const url = 'https://html.duckduckgo.com/html/';
const response = await httpClient.post(
url,
new URLSearchParams({
q: query,
kl: params.filters?.region || '',
}),
{
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
}
);
const $ = cheerio.load(response.data);
const results: SearchResult[] = [];
$('.result').each((_, element): false | void => {
if (results.length >= params.limit) return false;
const $result = $(element);
const $title = $result.find('.result__title');
const $snippet = $result.find('.result__snippet');
const title = $title.text().trim();
const snippet = $snippet.text().trim();
const url = $title.find('a').attr('href') || '';
if (title && url) {
results.push({
title,
url: this.cleanDuckDuckGoUrl(url),
snippet,
source: 'duckduckgo',
});
}
});
logger.info(`DuckDuckGo search completed`, { resultsFound: results.length });
return results;
}
/**
* Search using Brave Search API
*/
private async searchBrave(params: WebSearchParams): Promise<SearchResult[]> {
const apiKey = process.env.BRAVE_API_KEY;
if (!apiKey) {
throw new Error('BRAVE_API_KEY environment variable is required for Brave Search');
}
let query = params.query;
if (params.filters?.exactPhrase) {
query = `"${query}"`;
}
if (params.filters?.fileType) {
query += ` filetype:${params.filters.fileType}`;
}
if (params.filters?.domain) {
query += ` site:${params.filters.domain}`;
}
const url = 'https://api.search.brave.com/res/v1/web/search';
const response = await httpClient.get(url, {
params: {
q: query,
count: params.limit,
country: params.filters?.region?.toUpperCase(),
},
headers: {
'X-Subscription-Token': apiKey,
Accept: 'application/json',
},
});
const results: SearchResult[] = (response.data.web?.results || []).map((item: any) => ({
title: item.title,
url: item.url,
snippet: item.description || '',
source: 'brave',
}));
logger.info(`Brave search completed`, { resultsFound: results.length });
return results;
}
/**
* Search using Google Custom Search API
*/
private async searchGoogle(params: WebSearchParams): Promise<SearchResult[]> {
const apiKey = process.env.GOOGLE_API_KEY;
const searchEngineId = process.env.GOOGLE_SEARCH_ENGINE_ID;
if (!apiKey || !searchEngineId) {
throw new Error(
'GOOGLE_API_KEY and GOOGLE_SEARCH_ENGINE_ID environment variables are required for Google Search'
);
}
let query = params.query;
if (params.filters?.exactPhrase) {
query = `"${query}"`;
}
if (params.filters?.fileType) {
query += ` filetype:${params.filters.fileType}`;
}
if (params.filters?.domain) {
query += ` site:${params.filters.domain}`;
}
const url = 'https://www.googleapis.com/customsearch/v1';
const response = await httpClient.get(url, {
params: {
key: apiKey,
cx: searchEngineId,
q: query,
num: Math.min(params.limit, 10), // Google API max is 10 per request
lr: params.filters?.language ? `lang_${params.filters.language}` : undefined,
gl: params.filters?.region,
dateRestrict: this.getGoogleDateRestrict(params.filters?.dateRange),
},
});
const results: SearchResult[] = (response.data.items || []).map((item: any) => ({
title: item.title,
url: item.link,
snippet: item.snippet || '',
source: 'google',
}));
logger.info(`Google search completed`, { resultsFound: results.length });
return results;
}
/**
* Search using Bing Search API
*/
private async searchBing(params: WebSearchParams): Promise<SearchResult[]> {
const apiKey = process.env.BING_API_KEY;
if (!apiKey) {
throw new Error('BING_API_KEY environment variable is required for Bing Search');
}
let query = params.query;
if (params.filters?.exactPhrase) {
query = `"${query}"`;
}
if (params.filters?.fileType) {
query += ` filetype:${params.filters.fileType}`;
}
if (params.filters?.domain) {
query += ` site:${params.filters.domain}`;
}
const url = 'https://api.bing.microsoft.com/v7.0/search';
const response = await httpClient.get(url, {
params: {
q: query,
count: params.limit,
mkt: params.filters?.region || 'en-US',
freshness: this.getBingFreshness(params.filters?.dateRange),
},
headers: {
'Ocp-Apim-Subscription-Key': apiKey,
},
});
const results: SearchResult[] = (response.data.webPages?.value || []).map((item: any) => ({
title: item.name,
url: item.url,
snippet: item.snippet || '',
source: 'bing',
}));
logger.info(`Bing search completed`, { resultsFound: results.length });
return results;
}
/**
* Clean DuckDuckGo redirect URLs
*/
private cleanDuckDuckGoUrl(url: string): string {
try {
const urlObj = new URL(url, 'https://duckduckgo.com');
const uddg = urlObj.searchParams.get('uddg');
return uddg || url;
} catch {
return url;
}
}
/**
* Convert date range to Google dateRestrict format
*/
private getGoogleDateRestrict(dateRange?: string): string | undefined {
if (!dateRange) return undefined;
const map: Record<string, string> = {
day: 'd1',
week: 'w1',
month: 'm1',
year: 'y1',
};
return map[dateRange];
}
/**
* Convert date range to Bing freshness format
*/
private getBingFreshness(dateRange?: string): string | undefined {
if (!dateRange) return undefined;
const map: Record<string, string> = {
day: 'Day',
week: 'Week',
month: 'Month',
year: 'Year',
};
return map[dateRange];
}
/**
* Search using SerpAPI (supports multiple search engines)
*/
private async searchSerpApi(params: WebSearchParams): Promise<SearchResult[]> {
const apiKey = process.env.SERP_API_KEY;
if (!apiKey) {
throw new Error('SERP_API_KEY environment variable is required for SerpAPI');
}
let query = params.query;
if (params.filters?.exactPhrase) {
query = `"${query}"`;
}
if (params.filters?.fileType) {
query += ` filetype:${params.filters.fileType}`;
}
if (params.filters?.domain) {
query += ` site:${params.filters.domain}`;
}
const url = 'https://serpapi.com/search';
const response = await httpClient.get(url, {
params: {
api_key: apiKey,
q: query,
num: params.limit,
engine: 'google', // SerpAPI supports multiple engines, defaulting to google
hl: params.filters?.language || 'en',
gl: params.filters?.region || 'us',
tbs: this.getSerpApiDateFilter(params.filters?.dateRange),
},
});
const results: SearchResult[] = (response.data.organic_results || []).map((item: any) => ({
title: item.title,
url: item.link,
snippet: item.snippet || '',
source: 'serpapi',
}));
logger.info(`SerpAPI search completed`, { resultsFound: results.length });
return results.slice(0, params.limit);
}
/**
* Convert date range to SerpAPI tbs format
*/
private getSerpApiDateFilter(dateRange?: string): string | undefined {
if (!dateRange) return undefined;
const map: Record<string, string> = {
day: 'qdr:d',
week: 'qdr:w',
month: 'qdr:m',
year: 'qdr:y',
};
return map[dateRange];
}
}