RivalSearchMCP

aggregator.py•8.01 KiB

""" News aggregation from multiple free sources. No authentication required - uses Google News RSS and Bing News. """ import httpx import feedparser from typing import List, Dict, Any from datetime import datetime from urllib.parse import quote_plus from src.logging.logger import logger class NewsAggregator: """Aggregate news from multiple free sources without authentication.""" def __init__(self): self.sources = { 'google_news': 'https://news.google.com/rss/search', 'duckduckgo_news': 'https://duckduckgo.com/', 'yahoo_news': 'https://news.search.yahoo.com/search' } async def search_news( self, query: str, max_results: int = 10, language: str = "en", country: str = "US" ) -> List[Dict[str, Any]]: """ Search news from multiple sources. Args: query: Search query max_results: Maximum results to return language: Language code (default: en) country: Country code (default: US) Returns: List of news article dictionaries """ all_articles = [] # Search all news sources concurrently import asyncio results = await asyncio.gather( self._search_google_news(query, max_results, language, country), self._search_duckduckgo_news(query, max_results), self._search_yahoo_news(query, max_results), return_exceptions=True ) # Collect results for result in results: if isinstance(result, list): all_articles.extend(result) # Deduplicate by URL and title similarity seen_urls = set() seen_titles = set() unique_articles = [] for article in all_articles: url = article['url'] title_lower = article['title'].lower()[:50] # First 50 chars for fuzzy match if url not in seen_urls and title_lower not in seen_titles: seen_urls.add(url) seen_titles.add(title_lower) unique_articles.append(article) logger.info(f"Aggregated {len(unique_articles)} unique articles from {len(results)} sources") return unique_articles[:max_results] async def _search_google_news( self, query: str, max_results: int, language: str, country: str ) -> List[Dict[str, Any]]: """Search Google News RSS feed.""" try: url = f"https://news.google.com/rss/search?q={quote_plus(query)}&hl={language}&gl={country}&ceid={country}:{language}" async with httpx.AsyncClient(timeout=30.0) as client: response = await client.get(url) response.raise_for_status() # Parse RSS feed feed = feedparser.parse(response.text) articles = [] for entry in feed.entries[:max_results]: articles.append({ 'title': entry.get('title', ''), 'url': entry.get('link', ''), 'description': entry.get('summary', ''), 'published': entry.get('published', ''), 'source': entry.get('source', {}).get('title', 'Google News'), 'platform': 'google_news' }) logger.info(f"Found {len(articles)} Google News articles for: {query}") return articles except Exception as e: logger.error(f"Google News search failed: {e}") return [] async def _search_duckduckgo_news( self, query: str, max_results: int ) -> List[Dict[str, Any]]: """Search DuckDuckGo News.""" try: from bs4 import BeautifulSoup url = "https://lite.duckduckgo.com/lite/" params = { 'q': query, 'kn': '1' # News filter } headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36' } async with httpx.AsyncClient(headers=headers, timeout=30.0) as client: response = await client.get(url, params=params) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') articles = [] # Find news result links result_links = soup.find_all('a', class_='result-link') for link in result_links[:max_results]: title = link.get_text(strip=True) url = link.get('href', '') if title and url: articles.append({ 'title': title, 'url': url, 'description': '', 'published': '', 'source': 'DuckDuckGo News', 'platform': 'duckduckgo_news' }) logger.info(f"Found {len(articles)} DuckDuckGo News articles for: {query}") return articles except Exception as e: logger.error(f"DuckDuckGo News search failed: {e}") return [] async def _search_yahoo_news( self, query: str, max_results: int ) -> List[Dict[str, Any]]: """Search Yahoo News.""" try: from bs4 import BeautifulSoup, Tag url = "https://news.search.yahoo.com/search" params = { 'p': query, 'n': max_results } headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36' } async with httpx.AsyncClient(headers=headers, timeout=30.0) as client: response = await client.get(url, params=params) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') articles = [] # Find news result containers result_containers = soup.find_all('div', class_='dd') if not result_containers: result_containers = soup.find_all('div', class_='NewsArticle') for container in result_containers[:max_results]: if isinstance(container, Tag): title_elem = container.find('a') if title_elem: title = title_elem.get_text(strip=True) url = title_elem.get('href', '') # Try to find description desc_elem = container.find('p') description = desc_elem.get_text(strip=True) if desc_elem else '' if title and url: articles.append({ 'title': title, 'url': url, 'description': description, 'published': '', 'source': 'Yahoo News', 'platform': 'yahoo_news' }) logger.info(f"Found {len(articles)} Yahoo News articles for: {query}") return articles except Exception as e: logger.error(f"Yahoo News search failed: {e}") return []

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DamionR/RivalSearchMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

aggregator.py•8.01 KiB