Google Search MCP Server

index.ts•15.5 KiB

#!/usr/bin/env node import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { z } from "zod"; import * as cheerio from "cheerio"; import { Readability } from "@mozilla/readability"; import { JSDOM } from "jsdom"; const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY; const GOOGLE_CX = process.env.GOOGLE_CX; interface SearchResult { title: string; link: string; snippet: string; position: number; date?: string; } interface GoogleSearchResponse { items?: Array<{ title: string; link: string; snippet: string; pagemap?: { metatags?: Array<{ "article:published_time"?: string }>; }; }>; searchInformation?: { totalResults: string; }; } interface FetchedContent { url: string; title: string; content: string; success: boolean; error?: string; wordCount?: number; } // Search types supported type SearchType = "web" | "news" | "images"; async function googleSearch( query: string, numResults: number = 10, searchType: SearchType = "web" ): Promise<SearchResult[]> { if (!GOOGLE_API_KEY) { throw new Error("GOOGLE_API_KEY environment variable is required"); } if (!GOOGLE_CX) { throw new Error("GOOGLE_CX environment variable is required"); } // Build the Google Custom Search API URL const baseUrl = "https://customsearch.googleapis.com/customsearch/v1"; const params = new URLSearchParams({ key: GOOGLE_API_KEY, cx: GOOGLE_CX, q: query, num: Math.min(numResults, 10).toString(), // Google API max is 10 per request }); // Add search type specific parameters if (searchType === "news") { // For news, we can use tbm=nws or sort by date params.append("sort", "date"); } else if (searchType === "images") { params.append("searchType", "image"); } const response = await fetch(`${baseUrl}?${params.toString()}`); if (!response.ok) { const errorText = await response.text(); throw new Error(`Google Custom Search API error: ${response.status} ${response.statusText} - ${errorText}`); } const data = (await response.json()) as GoogleSearchResponse; if (!data.items || data.items.length === 0) { return []; } return data.items.map((item, idx) => ({ title: item.title, link: item.link, snippet: item.snippet || "", position: idx + 1, date: item.pagemap?.metatags?.[0]?.["article:published_time"], })); } // Improved content extraction using Readability function extractContentWithReadability(html: string, url: string): string { try { const dom = new JSDOM(html, { url }); const reader = new Readability(dom.window.document); const article = reader.parse(); if (article && article.textContent) { // Clean up the text content return article.textContent .replace(/\s+/g, " ") .replace(/\n\s*\n\s*\n/g, "\n\n") .trim(); } } catch { // Fall back to cheerio extraction } return extractContentWithCheerio(html); } // Fallback content extraction using Cheerio function extractContentWithCheerio(html: string): string { const $ = cheerio.load(html); // Remove non-content elements $( "script, style, nav, footer, header, aside, noscript, svg, iframe, form, .ads, .advertisement, .sidebar, .comments, .social-share" ).remove(); // Try to find main content area let content = ""; const contentSelectors = [ "article", "main", '[role="main"]', ".post-content", ".article-content", ".entry-content", ".content", "#content", ".post", ".blog-post", ]; for (const selector of contentSelectors) { const element = $(selector); if (element.length > 0) { content = element.text(); break; } } // Fallback to body if no content area found if (!content) { content = $("body").text(); } // Clean up whitespace return content.replace(/\s+/g, " ").replace(/\n\s*\n\s*\n/g, "\n\n").trim(); } // Retry logic with exponential backoff async function fetchWithRetry( url: string, maxRetries: number = 3, baseDelay: number = 1000 ): Promise<Response> { let lastError: Error | null = null; for (let attempt = 0; attempt < maxRetries; attempt++) { try { const response = await fetch(url, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", }, signal: AbortSignal.timeout(15000), }); if (response.ok) { return response; } // Don't retry on client errors (4xx) except 429 (rate limit) if (response.status >= 400 && response.status < 500 && response.status !== 429) { throw new Error(`HTTP ${response.status}`); } lastError = new Error(`HTTP ${response.status}`); } catch (error) { lastError = error instanceof Error ? error : new Error("Unknown error"); // Don't retry on timeout if (lastError.message.includes("timeout")) { throw lastError; } } // Exponential backoff if (attempt < maxRetries - 1) { const delay = baseDelay * Math.pow(2, attempt); await new Promise((resolve) => setTimeout(resolve, delay)); } } throw lastError || new Error("Max retries exceeded"); } async function fetchUrl(url: string, maxLength: number = 100000): Promise<FetchedContent> { try { const response = await fetchWithRetry(url); const html = await response.text(); // Extract title using cheerio const $ = cheerio.load(html); const title = $("title").text().trim() || $('meta[property="og:title"]').attr("content") || $('meta[name="title"]').attr("content") || ""; // Use Readability for better content extraction let text = extractContentWithReadability(html, url); // Calculate word count before truncation const wordCount = text.split(/\s+/).length; // Truncate to max length if (text.length > maxLength) { text = text.substring(0, maxLength) + "\n\n[Content truncated...]"; } return { url, title, content: text, success: true, wordCount }; } catch (error) { const errorMsg = error instanceof Error ? error.message : "Unknown error"; return { url, title: "", content: "", success: false, error: errorMsg }; } } async function fetchAllUrls( urls: string[], concurrency: number = 5 ): Promise<FetchedContent[]> { const results: FetchedContent[] = new Array(urls.length); // Process in batches for controlled concurrency for (let i = 0; i < urls.length; i += concurrency) { const batch = urls.slice(i, i + concurrency); const batchResults = await Promise.allSettled(batch.map((url) => fetchUrl(url))); batchResults.forEach((result, idx) => { const globalIdx = i + idx; if (result.status === "fulfilled") { results[globalIdx] = result.value; } else { results[globalIdx] = { url: urls[globalIdx], title: "", content: "", success: false, error: "Promise rejected", }; } }); } return results; } function formatResponse( query: string, searchResults: SearchResult[], fetchedContents: FetchedContent[], maxContentPerPage: number, searchType: SearchType ): string { const successCount = fetchedContents.filter((c) => c.success).length; const totalWords = fetchedContents.reduce((sum, c) => sum + (c.wordCount || 0), 0); let response = `# Deep Search Results for: "${query}"\n\n`; response += `**Search Type:** ${searchType}\n`; response += `**Results:** ${searchResults.length} found, ${successCount} pages fetched successfully\n`; response += `**Total Content:** ~${totalWords.toLocaleString()} words\n\n`; response += "---\n\n"; for (let i = 0; i < searchResults.length; i++) { const search = searchResults[i]; const fetched = fetchedContents[i]; response += `## ${i + 1}. ${search.title}\n`; response += `**URL:** ${search.link}\n`; if (search.date) { response += `**Date:** ${search.date}\n`; } response += "\n"; if (fetched?.success && fetched.content) { const content = fetched.content.length > maxContentPerPage ? fetched.content.substring(0, maxContentPerPage) + "\n\n[Content truncated...]" : fetched.content; response += `### Full Page Content:\n\n${content}\n\n`; } else if (fetched?.error) { response += `*Could not fetch content: ${fetched.error}*\n\n`; response += `**Search Snippet:** ${search.snippet}\n\n`; } response += "---\n\n"; } return response; } // Create MCP Server const server = new McpServer({ name: "google-search-mcp", version: "2.1.0", }); // Register the google_search tool (simple search with snippets only) server.tool( "google_search", "Simple Google search for quick lookups. Returns snippets only without fetching full page content. For deep research with full page content, use deep_search instead.", { query: z.string().describe("Search query"), num_results: z .number() .min(1) .max(10) .default(10) .describe("Number of results to return (1-10, default: 10)"), }, async ({ query, num_results = 10 }: { query: string; num_results?: number }) => { try { const searchResults = await googleSearch(query, num_results, "web"); if (searchResults.length === 0) { return { content: [ { type: "text" as const, text: `No search results found for: "${query}"` }, ], }; } let response = `# Search Results for: "${query}"\n\n`; response += `**Results:** ${searchResults.length} found\n\n`; response += "---\n\n"; for (const result of searchResults) { response += `## ${result.position}. ${result.title}\n`; response += `**URL:** ${result.link}\n`; response += `**Snippet:** ${result.snippet}\n\n`; response += "---\n\n"; } return { content: [{ type: "text" as const, text: response }], }; } catch (error) { const errorMsg = error instanceof Error ? error.message : "Unknown error"; return { content: [ { type: "text" as const, text: `Error performing search: ${errorMsg}` }, ], isError: true, }; } } ); // Register the deep_search tool server.tool( "deep_search", "Performs a comprehensive web search using Google Custom Search API, fetching the FULL content from top results using advanced content extraction (Readability algorithm), and returning consolidated content. Supports web, news, and image search types. Includes retry logic for reliability.", { query: z.string().describe("The search query to look up"), num_results: z .number() .min(1) .max(10) .default(10) .describe("Number of results to fetch (1-10, default: 10)"), max_content_per_page: z .number() .min(5000) .max(100000) .default(50000) .describe("Maximum characters of content to return per page (5000-100000, default: 50000)"), search_type: z .enum(["web", "news", "images"]) .default("web") .describe("Type of search: 'web' for general search, 'news' for news articles, 'images' for image search"), include_domains: z .string() .optional() .describe("Comma-separated list of domains to include (e.g., 'reddit.com,github.com')"), exclude_domains: z .string() .optional() .describe("Comma-separated list of domains to exclude (e.g., 'pinterest.com,facebook.com')"), }, async ({ query, num_results = 10, max_content_per_page = 50000, search_type = "web", include_domains, exclude_domains, }: { query: string; num_results?: number; max_content_per_page?: number; search_type?: SearchType; include_domains?: string; exclude_domains?: string; }) => { try { // Build query with domain filters let searchQuery = query; if (include_domains) { const domains = include_domains.split(",").map((d: string) => d.trim()); searchQuery += " " + domains.map((d: string) => `site:${d}`).join(" OR "); } if (exclude_domains) { const domains = exclude_domains.split(",").map((d: string) => d.trim()); searchQuery += " " + domains.map((d: string) => `-site:${d}`).join(" "); } // Step 1: Google search const searchResults = await googleSearch(searchQuery, num_results, search_type); if (searchResults.length === 0) { return { content: [ { type: "text" as const, text: `No search results found for: "${query}"` }, ], }; } // Step 2: Fetch all URLs with controlled concurrency const urls = searchResults.map((r) => r.link); const fetchedContents = await fetchAllUrls(urls, 5); // Step 3: Format and return response const response = formatResponse( query, searchResults, fetchedContents, max_content_per_page, search_type ); return { content: [{ type: "text" as const, text: response }], }; } catch (error) { const errorMsg = error instanceof Error ? error.message : "Unknown error"; return { content: [ { type: "text" as const, text: `Error performing deep search: ${errorMsg}` }, ], isError: true, }; } } ); // Register deep_search_news tool for convenience server.tool( "deep_search_news", "Searches for recent news articles on a topic using Google Custom Search API, fetches full article content, and returns consolidated results. Optimized for news and current events.", { query: z.string().describe("The news topic to search for"), num_results: z .number() .min(1) .max(10) .default(10) .describe("Number of news articles to fetch (1-10, default: 10)"), max_content_per_page: z .number() .min(5000) .max(100000) .default(30000) .describe("Maximum characters per article (default: 30000)"), }, async ({ query, num_results = 10, max_content_per_page = 30000 }: { query: string; num_results?: number; max_content_per_page?: number; }) => { try { const searchResults = await googleSearch(query, num_results, "news"); if (searchResults.length === 0) { return { content: [ { type: "text" as const, text: `No news results found for: "${query}"` }, ], }; } const urls = searchResults.map((r) => r.link); const fetchedContents = await fetchAllUrls(urls, 5); const response = formatResponse( query, searchResults, fetchedContents, max_content_per_page, "news" ); return { content: [{ type: "text" as const, text: response }], }; } catch (error) { const errorMsg = error instanceof Error ? error.message : "Unknown error"; return { content: [ { type: "text" as const, text: `Error performing news search: ${errorMsg}` }, ], isError: true, }; } } ); // Start the server async function main() { try { const transport = new StdioServerTransport(); await server.connect(transport); } catch (error) { // Write error to a file for debugging since we can't use stderr const fs = await import("fs"); fs.writeFileSync("/tmp/deep-search-mcp-error.log", String(error)); process.exit(1); } } main();

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/thejusdutt/google-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

index.ts•15.5 KiB