Pomera AI Commander

web_search.py•20.7 KiB

#!/usr/bin/env python3 """ Web Search Tool =============== Search the web using multiple search engine APIs. API keys are loaded from encrypted database settings (Pomera settings.db). Configure keys in Pomera UI: Select "Web Search" tool and enter API keys. ## Usage # Basic search (DuckDuckGo - no API key required) python web_search.py "your search query" # Use specific engine python web_search.py "query" --engine tavily # Save to JSON file python web_search.py "query" --output searches/ python web_search.py "query" -o searches/ --task seo-research ## Engine Selection Priority Default order: 1. duckduckgo - Free, no API key required (default) 2. tavily - AI-optimized snippets, 1000 free/month 3. google - Complex queries, 100 free/day 4. brave - General search, 2000 free/month 5. serpapi - Multi-engine, 100 free TOTAL 6. serper - Fast Google SERP, 2500 free TOTAL """ import argparse import json import os import sys import re from pathlib import Path from typing import Dict, List, Optional import urllib.request import urllib.parse import urllib.error from datetime import datetime, timedelta # Optional async imports for DuckDuckGo try: import httpx from bs4 import BeautifulSoup DUCKDUCKGO_AVAILABLE = True except ImportError: DUCKDUCKGO_AVAILABLE = False def get_encrypted_api_key(engine_key: str) -> str: """ Load encrypted API key for a search engine from database settings. Uses the same database path as the Pomera UI to ensure keys are loaded from the correct location. Args: engine_key: Engine name (e.g., 'tavily', 'google', 'brave') Returns: Decrypted API key or empty string if not configured """ try: from tools.ai_tools import decrypt_api_key from core.database_settings_manager import DatabaseSettingsManager # Get the correct database path (same as UI uses) try: from core.data_directory import get_database_path db_path = get_database_path("settings.db") except ImportError: db_path = "settings.db" settings_manager = DatabaseSettingsManager(db_path=db_path) web_search_settings = settings_manager.get_tool_settings("Web Search") # web_search_settings is a dict with keys like 'tavily_api_key', 'google_cse_id', etc. encrypted = web_search_settings.get(f"{engine_key}_api_key", "") if encrypted: return decrypt_api_key(encrypted) except Exception as e: print(f"[DEBUG] Failed to load API key for {engine_key}: {e}") return "" def get_web_search_setting(engine_key: str, setting: str, default: str = "") -> str: """Get a web search setting from database. Uses the same database path as the Pomera UI. """ try: from core.database_settings_manager import DatabaseSettingsManager # Get the correct database path (same as UI uses) try: from core.data_directory import get_database_path db_path = get_database_path("settings.db") except ImportError: db_path = "settings.db" settings_manager = DatabaseSettingsManager(db_path=db_path) web_search_settings = settings_manager.get_tool_settings("Web Search") return web_search_settings.get(f"{engine_key}_{setting}", default) except Exception: return default def search_google(query: str, count: int = 5) -> List[Dict]: """Search using Google Custom Search API.""" api_key = get_encrypted_api_key("google") cse_id = get_web_search_setting("google", "cse_id", "") if not api_key: print("[ERROR] Google API key not configured. Add in Pomera Web Search settings.") return [] if not cse_id: print("[ERROR] Google CSE ID not configured. Add in Pomera Web Search settings.") return [] url = f"https://www.googleapis.com/customsearch/v1?key={api_key}&cx={cse_id}&q={urllib.parse.quote(query)}&num={min(count, 10)}" try: req = urllib.request.Request(url) with urllib.request.urlopen(req, timeout=30) as response: data = json.loads(response.read().decode()) if "error" in data: print(f"[ERROR] Google API: {data['error']['message']}") return [] results = [] for item in data.get("items", []): results.append({ "title": item.get("title", ""), "snippet": item.get("snippet", ""), "url": item.get("link", ""), "source": "google" }) return results except urllib.error.HTTPError as e: print(f"[ERROR] HTTP {e.code}: {e.reason}") return [] except Exception as e: print(f"[ERROR] {e}") return [] def search_brave(query: str, count: int = 5) -> List[Dict]: """Search using Brave Search API.""" api_key = get_encrypted_api_key("brave") if not api_key: print("[ERROR] Brave API key not configured. Add in Pomera Web Search settings.") return [] url = f"https://api.search.brave.com/res/v1/web/search?q={urllib.parse.quote(query)}&count={min(count, 20)}" try: req = urllib.request.Request(url) req.add_header("Accept", "application/json") req.add_header("X-Subscription-Token", api_key) with urllib.request.urlopen(req, timeout=30) as response: data = json.loads(response.read().decode()) results = [] for item in data.get("web", {}).get("results", []): results.append({ "title": item.get("title", ""), "snippet": item.get("description", ""), "url": item.get("url", ""), "source": "brave" }) return results except urllib.error.HTTPError as e: print(f"[ERROR] HTTP {e.code}: {e.reason}") return [] except Exception as e: print(f"[ERROR] {e}") return [] def search_duckduckgo_sync(query: str, count: int = 10) -> List[Dict]: """ Search DuckDuckGo using the ddgs package. No API key required - free and reliable. """ try: from ddgs import DDGS except ImportError: print("[ERROR] DuckDuckGo requires: pip install ddgs") return [] try: with DDGS() as ddgs: results_gen = ddgs.text(query, max_results=count) results = [] for r in results_gen: results.append({ "title": r.get("title", ""), "snippet": r.get("body", ""), "url": r.get("href", ""), "source": "duckduckgo", "position": len(results) + 1 }) return results except Exception as e: print(f"[ERROR] DuckDuckGo search failed: {e}") return [] def search_serper(query: str, count: int = 5) -> List[Dict]: """ Search using Serper.dev Google SERP API. Fast, reliable Google results. 2500 free queries (no CC required). """ api_key = get_encrypted_api_key("serper") if not api_key: print("[ERROR] Serper API key not configured. Add in Pomera Web Search settings.") return [] try: data = json.dumps({"q": query, "num": min(count, 100)}).encode('utf-8') req = urllib.request.Request( "https://google.serper.dev/search", data=data, headers={ "X-API-KEY": api_key, "Content-Type": "application/json" } ) with urllib.request.urlopen(req, timeout=30) as response: result = json.loads(response.read().decode()) results = [] for item in result.get("organic", [])[:count]: results.append({ "title": item.get("title", ""), "snippet": item.get("snippet", ""), "url": item.get("link", ""), "source": "serper", "position": item.get("position", len(results) + 1) }) return results except urllib.error.HTTPError as e: print(f"[ERROR] Serper HTTP {e.code}: {e.reason}") return [] except Exception as e: print(f"[ERROR] Serper: {e}") return [] def search_tavily(query: str, count: int = 5, search_depth: str = "basic") -> List[Dict]: """ Search using Tavily AI-optimized search API. Designed for AI agents. 1000 free calls/month. Args: query: Search query string count: Number of results (max 20) search_depth: "basic" (1 credit, balanced) or "advanced" (2 credits, highest relevance) Advanced mode returns multiple semantic snippets per source. """ api_key = get_encrypted_api_key("tavily") if not api_key: print("[ERROR] Tavily API key not configured. Add in Pomera Web Search settings.") return [] # Validate search_depth if search_depth not in ("basic", "advanced"): search_depth = "basic" try: data = json.dumps({ "api_key": api_key, "query": query, "max_results": min(count, 20), "search_depth": search_depth }).encode('utf-8') req = urllib.request.Request( "https://api.tavily.com/search", data=data, headers={"Content-Type": "application/json"} ) with urllib.request.urlopen(req, timeout=30) as response: result = json.loads(response.read().decode()) results = [] for i, item in enumerate(result.get("results", [])[:count], 1): results.append({ "title": item.get("title", ""), "snippet": item.get("content", ""), "url": item.get("url", ""), "source": "tavily", "position": i, "score": item.get("score", None) }) return results except urllib.error.HTTPError as e: print(f"[ERROR] Tavily HTTP {e.code}: {e.reason}") return [] except Exception as e: print(f"[ERROR] Tavily: {e}") return [] def search_serpapi(query: str, count: int = 5) -> List[Dict]: """ Search using SerpApi (supports Google, Bing, Yahoo, etc). 100 free searches total (one-time credit). """ api_key = get_encrypted_api_key("serpapi") if not api_key: print("[ERROR] SerpApi key not configured. Add in Pomera Web Search settings.") return [] params = urllib.parse.urlencode({ "q": query, "api_key": api_key, "num": min(count, 100), "engine": "google" }) try: url = f"https://serpapi.com/search.json?{params}" req = urllib.request.Request(url) with urllib.request.urlopen(req, timeout=30) as response: result = json.loads(response.read().decode()) results = [] for item in result.get("organic_results", [])[:count]: results.append({ "title": item.get("title", ""), "snippet": item.get("snippet", ""), "url": item.get("link", ""), "source": "serpapi", "position": item.get("position", len(results) + 1) }) return results except urllib.error.HTTPError as e: print(f"[ERROR] SerpApi HTTP {e.code}: {e.reason}") return [] except Exception as e: print(f"[ERROR] SerpApi: {e}") return [] def search_exa( query: str, count: int = 10, search_type: str = "auto", content_type: str = "highlights", max_characters: int = 2000, category: str = "", max_age_hours: int = -1, include_text: str = "" ) -> List[Dict]: """ Search using Exa AI-powered neural search API. Args: query: Search query string count: Number of results (default 10, max 20) search_type: "auto" (balanced), "fast" (speed), or "neural" (deep relevance) content_type: "highlights" (token efficient, default) or "text" (full webpage) max_characters: Max characters for content (default 2000, range 100-20000) category: Specialized index - "news", "research paper", "company", "tweet", or "" for general max_age_hours: Content freshness - 0=livecrawl, 24=daily, -1=cache only (default) include_text: Only return results containing this phrase (optional) Returns: List of result dicts with title, snippet, url, source, score """ api_key = get_encrypted_api_key("exa") if not api_key: print("[ERROR] Exa API key not configured. Add in Pomera Web Search settings.") return [] # Validate search_type if search_type not in ("auto", "fast", "neural"): search_type = "auto" # Build request payload payload = { "query": query, "type": search_type, "numResults": min(count, 20), } # Add category if specified if category and category in ("news", "research paper", "company", "tweet"): payload["category"] = category # Add content freshness if specified if max_age_hours >= 0: payload["maxAgeHours"] = max_age_hours # Add text filter if specified if include_text: payload["includeText"] = [include_text] # Configure content retrieval max_chars = max(100, min(max_characters, 20000)) if content_type == "text": payload["contents"] = { "text": { "maxCharacters": max_chars } } else: # Default to highlights (token efficient) payload["contents"] = { "highlights": { "numSentences": 5, "highlightsPerUrl": 3 } } try: import requests # Use requests library (urllib has issues with Exa API headers) response = requests.post( "https://api.exa.ai/search", json=payload, headers={"x-api-key": api_key}, timeout=30 ) response.raise_for_status() result = response.json() results = [] for i, item in enumerate(result.get("results", [])[:count], 1): # Extract snippet based on content type if content_type == "text": snippet = item.get("text", "")[:max_chars] else: highlights = item.get("highlights", []) snippet = " ".join(highlights) if highlights else item.get("text", "")[:500] results.append({ "title": item.get("title", ""), "snippet": snippet, "url": item.get("url", ""), "source": "exa", "position": i, "score": item.get("score", None), "published_date": item.get("publishedDate", None) }) return results except requests.exceptions.HTTPError as e: print(f"[ERROR] Exa HTTP {e.response.status_code}: {e.response.reason}") return [] except Exception as e: print(f"[ERROR] Exa: {e}") return [] def search(query: str, engine: str = "duckduckgo", count: int = 5, search_depth: str = "basic") -> List[Dict]: """ Search the web using the specified engine. Args: query: Search query string engine: Engine name (default: duckduckgo) count: Number of results (default: 5) search_depth: For Tavily only: "basic" or "advanced" (default: basic) Returns: List of result dicts with title, snippet, url, source """ engine = engine.lower() if engine == "google": return search_google(query, count) elif engine in ("duckduckgo", "ddg"): return search_duckduckgo_sync(query, count) elif engine == "serper": return search_serper(query, count) elif engine == "tavily": return search_tavily(query, count, search_depth) elif engine == "serpapi": return search_serpapi(query, count) elif engine == "brave": return search_brave(query, count) elif engine == "exa": return search_exa(query, count) else: print(f"[ERROR] Unknown engine: {engine}") return [] # ============================================================================= # PERSISTENT STORAGE # ============================================================================= def slugify(text: str, max_length: int = 40) -> str: """Convert text to URL-safe slug for filenames.""" slug = text.lower().strip() slug = re.sub(r'[\s_]+', '-', slug) slug = re.sub(r'[^a-z0-9-]', '', slug) slug = re.sub(r'-+', '-', slug).strip('-') if len(slug) > max_length: slug = slug[:max_length].rsplit('-', 1)[0] return slug or 'search' def save_results( results: List[Dict], query: str, engine: str, output_dir: str, task: Optional[str] = None, count: int = 5 ) -> Path: """Save search results to organized JSON file.""" now = datetime.now() date_dir = now.strftime('%Y-%m-%d') time_prefix = now.strftime('%H-%M-%S') query_slug = slugify(query) if task: filename = f"{time_prefix}-{engine}-task-{slugify(task, 20)}-{query_slug}.json" else: filename = f"{time_prefix}-{engine}-{query_slug}.json" save_dir = Path(output_dir) / date_dir save_dir.mkdir(parents=True, exist_ok=True) filepath = save_dir / filename output = { "meta": { "query": query, "engine": engine, "timestamp": now.isoformat(), "count_requested": count, "count_returned": len(results), }, "results": results } if task: output["meta"]["task"] = task with open(filepath, 'w', encoding='utf-8') as f: json.dump(output, f, indent=2, ensure_ascii=False) return filepath def format_results(results: List[Dict], query: str) -> str: """Format search results for display.""" if not results: return f"No results found for '{query}'" source = results[0]['source'] output = [f"\n[SEARCH] Results for: \"{query}\" ({source})\n"] output.append("=" * 60) for i, r in enumerate(results, 1): output.append(f"\n{i}. {r['title']}") snippet = r['snippet'] if len(snippet) > 200: snippet = snippet[:200] + "..." output.append(f" {snippet}") output.append(f" URL: {r['url']}") output.append("\n" + "=" * 60) return "\n".join(output) def main(): parser = argparse.ArgumentParser( description="Search the web using multiple search engine APIs", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python web_search.py "how to respond to blog comments" python web_search.py "python asyncio tutorial" --engine tavily --count 10 python web_search.py "best cycling routes NYC" --output searches/ API keys are loaded from encrypted database settings (settings.db). Configure keys in Pomera UI: Select "Web Search" tool and enter API keys. """ ) parser.add_argument("query", help="Search query") parser.add_argument("--engine", "-e", choices=["duckduckgo", "ddg", "tavily", "google", "brave", "serpapi", "serper"], default="duckduckgo", help="Search engine to use (default: duckduckgo)") parser.add_argument("--count", "-c", type=int, default=5, help="Number of results (default: 5)") parser.add_argument("--depth", "-d", choices=["basic", "advanced"], default="basic", help="Tavily only: search depth (basic=1 credit, advanced=2 credits with higher relevance)") parser.add_argument("--json", "-j", action="store_true", help="Output raw JSON to console") parser.add_argument("--output", "-o", type=str, metavar="DIR", help="Save results as JSON to DIR (organized by date)") parser.add_argument("--task", "-t", type=str, metavar="NAME", help="Tag search with task/plan name (used in filename)") args = parser.parse_args() results = search(args.query, args.engine, args.count, args.depth) if args.output: filepath = save_results( results=results, query=args.query, engine=args.engine, output_dir=args.output, task=args.task, count=args.count ) print(f"[OK] Saved {len(results)} results to: {filepath}") if not args.json: print(format_results(results, args.query)) elif args.json: print(json.dumps(results, indent=2)) else: print(format_results(results, args.query)) if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/matbanik/Pomera-AI-Commander'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

web_search.py•20.7 KiB