search-engine-manager.ts•4.79 kB
/**
* Search Engine Manager
* 提供统一的搜索引擎接口,支持多种搜索策略
*/
import { createLogger } from '../utils/logger.js';
const logger = createLogger('SearchEngineManager');
export interface SearchOptions {
maxResults?: number;
timeout?: number;
userAgent?: string;
}
export interface SearchResult {
title: string;
url: string;
snippet: string;
source: string;
}
export interface SearchResponse {
results: SearchResult[];
html?: string;
totalResults?: number;
searchTime?: number;
}
export class SearchEngineManager {
private static instance: SearchEngineManager;
private userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
];
private constructor() {}
public static getInstance(): SearchEngineManager {
if (!SearchEngineManager.instance) {
SearchEngineManager.instance = new SearchEngineManager();
}
return SearchEngineManager.instance;
}
/**
* 执行搜索查询
*/
async search(query: string, options: SearchOptions = {}): Promise<SearchResponse> {
const {
maxResults = 10,
timeout = 10000,
userAgent = this.getRandomUserAgent()
} = options;
const startTime = Date.now();
try {
logger.info(`Searching for: ${query}`);
// 使用DuckDuckGo作为主要搜索引擎(无需API密钥)
const searchUrl = `https://duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
const response = await fetch(searchUrl, {
headers: {
'User-Agent': userAgent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive'
},
signal: AbortSignal.timeout(timeout)
});
if (!response.ok) {
throw new Error(`Search request failed: ${response.status} ${response.statusText}`);
}
const html = await response.text();
const results = this.parseSearchResults(html, query);
const searchTime = Date.now() - startTime;
logger.info(`Found ${results.length} search results in ${searchTime}ms`);
return {
results: results.slice(0, maxResults),
html,
totalResults: results.length,
searchTime
};
} catch (error) {
logger.error('Search failed:', error);
// 返回空结果而不是抛出错误
return {
results: [],
html: '',
totalResults: 0,
searchTime: Date.now() - startTime
};
}
}
/**
* 解析搜索结果HTML
*/
private parseSearchResults(html: string, query: string): SearchResult[] {
const results: SearchResult[] = [];
try {
// 简单的HTML解析,提取链接和标题
const linkPattern = /<a[^>]*href=["']([^"']+)["'][^>]*>([^<]+)<\/a>/gi;
let match;
while ((match = linkPattern.exec(html)) !== null) {
const url = match[1];
const title = match[2];
// 过滤掉内部链接和无效URL
if (url.startsWith('http') && !url.includes('duckduckgo.com') && title.length > 5) {
results.push({
title: this.cleanText(title),
url: url,
snippet: `Search result for "${query}"`,
source: 'DuckDuckGo'
});
}
}
// 如果没有找到结果,生成一些基本的搜索结果
if (results.length === 0) {
results.push({
title: `Search results for ${query}`,
url: `https://example.com/search?q=${encodeURIComponent(query)}`,
snippet: `Information about ${query}`,
source: 'Fallback'
});
}
} catch (error) {
logger.warn('Failed to parse search results:', error);
}
return results;
}
/**
* 清理文本内容
*/
private cleanText(text: string): string {
return text
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(/\s+/g, ' ')
.trim();
}
/**
* 获取随机User-Agent
*/
private getRandomUserAgent(): string {
return this.userAgents[Math.floor(Math.random() * this.userAgents.length)];
}
/**
* 延迟函数
*/
private delay(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
export default SearchEngineManager;