search.ts•3.24 kB
/**
* Web Search using DuckDuckGo (no API key required)
*/
interface SearchResult {
title: string;
url: string;
snippet: string;
}
/**
* Search the web using DuckDuckGo HTML interface
* This is a simple implementation that doesn't require an API key
*/
export async function searchWeb(
query: string,
maxResults: number = 5
): Promise<SearchResult[]> {
try {
// Use DuckDuckGo's HTML interface
const encodedQuery = encodeURIComponent(query);
const response = await fetch(
`https://html.duckduckgo.com/html/?q=${encodedQuery}`,
{
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
},
}
);
if (!response.ok) {
throw new Error(`Search failed: ${response.status}`);
}
const html = await response.text();
const results: SearchResult[] = [];
// Simple regex-based parsing of DuckDuckGo HTML results
// Look for result links and snippets
const resultPattern =
/<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>([^<]*)<\/a>[\s\S]*?<a[^>]*class="result__snippet"[^>]*>([^<]*)/g;
let match;
while ((match = resultPattern.exec(html)) !== null && results.length < maxResults) {
const [, url, title, snippet] = match;
if (url && title) {
// Decode DuckDuckGo's redirect URL
let actualUrl = url;
if (url.includes("uddg=")) {
const uddgMatch = url.match(/uddg=([^&]*)/);
if (uddgMatch) {
actualUrl = decodeURIComponent(uddgMatch[1]);
}
}
results.push({
title: decodeHTMLEntities(title.trim()),
url: actualUrl,
snippet: decodeHTMLEntities(snippet.trim()),
});
}
}
// Fallback: try alternative parsing if regex didn't work
if (results.length === 0) {
// Try to extract any links that look like results
const linkPattern = /<a[^>]*href="(https?:\/\/[^"]+)"[^>]*>([^<]+)<\/a>/g;
while ((match = linkPattern.exec(html)) !== null && results.length < maxResults) {
const [, url, title] = match;
// Skip DuckDuckGo internal links
if (!url.includes("duckduckgo.com") && title.length > 10) {
results.push({
title: decodeHTMLEntities(title.trim()),
url,
snippet: "",
});
}
}
}
return results;
} catch (error) {
console.error("Search error:", error);
return [];
}
}
/**
* Decode HTML entities
*/
function decodeHTMLEntities(text: string): string {
const entities: Record<string, string> = {
"&": "&",
"<": "<",
">": ">",
""": '"',
"'": "'",
" ": " ",
};
return text.replace(/&[^;]+;/g, (entity) => entities[entity] || entity);
}
/**
* Format search results for LLM consumption
*/
export function formatSearchResults(results: SearchResult[]): string {
if (results.length === 0) {
return "No search results found.";
}
return results
.map(
(r, i) =>
`[${i + 1}] ${r.title}\nURL: ${r.url}\n${r.snippet ? `Summary: ${r.snippet}` : ""}`
)
.join("\n\n");
}