Skip to main content
Glama
msp_search.py4.23 kB
import logging import urllib.parse from typing import List, Dict from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from webdriver_manager.chrome import ChromeDriverManager import driver logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) log = logging.getLogger(__name__) def get_driver() -> webdriver.Chrome: """Поднимаем headless Chrome через webdriver-manager.""" return driver.get_driver(log) def search_google(driver: webdriver.Chrome, query: str, num: int = 5) -> List[Dict[str, str]]: """Ищем в Google, парсим заголовок из <h3>, ссылку из родительского <a>, сниппет — из соседних DIV.""" url = "https://www.google.com/search?q=" + urllib.parse.quote_plus(query) + "&hl=en" driver.get(url) results = [] items = driver.find_elements(By.CSS_SELECTOR, "div.g") for elem in items[:num]: try: h3 = elem.find_element(By.TAG_NAME, "h3") a = h3.find_element(By.XPATH, "./ancestor::a") title = h3.text link = a.get_attribute("href") # сниппет может быть в разных div’ах snippet = "" for sel in ("div.IsZvec", "div.VwiC3b", "span.aCOpRe"): try: snippet = elem.find_element(By.CSS_SELECTOR, sel).text if snippet: break except: continue results.append({"title": title, "url": link, "snippet": snippet}) except Exception: continue return results def search_yandex(driver: webdriver.Chrome, query: str, num: int = 5) -> List[Dict[str, str]]: """Ищем в Яндексе, парсим из h2 > a и div.text.""" url = "https://yandex.ru/search/?text=" + urllib.parse.quote_plus(query) driver.get(url) results = [] items = driver.find_elements(By.CSS_SELECTOR, "li.serp-item") for item in items[:num]: try: a = item.find_element(By.CSS_SELECTOR, "h2 a, a.link") title = a.text link = a.get_attribute("href") snippet = "" try: snippet = item.find_element(By.CSS_SELECTOR, "div.text, .organic__snippet").text except: pass results.append({"title": title, "url": link, "snippet": snippet}) except Exception: continue return results def search_duckduckgo(driver: webdriver.Chrome, query: str, num: int = 5) -> List[Dict[str, str]]: """Ищем в DuckDuckGo (HTML-версия), парсим из div.result.""" url = "https://duckduckgo.com/html/?q=" + urllib.parse.quote_plus(query) driver.get(url) results = [] items = driver.find_elements(By.CSS_SELECTOR, "div.result") for item in items[:num]: try: a = item.find_element(By.CSS_SELECTOR, "a.result__a") title = a.text link = a.get_attribute("href") snippet = "" try: snippet = item.find_element(By.CSS_SELECTOR, "div.result__snippet, a.result__snippet").text except: pass results.append({"title": title, "url": link, "snippet": snippet}) except Exception: continue return results def main(): query = "Python programming" num_results = 5 driver = get_driver() try: for name, func in [ ("Google", search_google), ("Yandex", search_yandex), ("DuckDuckGo", search_duckduckgo), ]: log.info(f"Searching with {name}…") res = func(driver, query, num_results) print(f"\n=== {name.upper()} — {len(res)} результатов ===") for idx, hit in enumerate(res, 1): print(f"{idx}. {hit['title']}\n {hit['url']}\n {hit['snippet']}\n") finally: driver.quit() if __name__ == "__main__": main()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/NeoXider/web-search-neo'

If you have feedback or need assistance with the MCP directory API, please join our Discord server