TrendRadar

Overview Schema Related Servers Score Discussions

TrendRadar
trendradar

__main__.py•26.2 KiB

# coding=utf-8 """ TrendRadar 主程序热点新闻聚合与分析工具支持: python -m trendradar """ import os import webbrowser from pathlib import Path from typing import Dict, List, Tuple, Optional import requests from trendradar.context import AppContext # 版本号直接定义，避免循环导入 VERSION = "4.0.0" from trendradar.core import load_config from trendradar.crawler import DataFetcher from trendradar.storage import convert_crawl_results_to_news_data def check_version_update( current_version: str, version_url: str, proxy_url: Optional[str] = None ) -> Tuple[bool, Optional[str]]: """检查版本更新""" try: proxies = None if proxy_url: proxies = {"http": proxy_url, "https": proxy_url} headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Accept": "text/plain, */*", "Cache-Control": "no-cache", } response = requests.get( version_url, proxies=proxies, headers=headers, timeout=10 ) response.raise_for_status() remote_version = response.text.strip() print(f"当前版本: {current_version}, 远程版本: {remote_version}") # 比较版本 def parse_version(version_str): try: parts = version_str.strip().split(".") if len(parts) != 3: raise ValueError("版本号格式不正确") return int(parts[0]), int(parts[1]), int(parts[2]) except: return 0, 0, 0 current_tuple = parse_version(current_version) remote_tuple = parse_version(remote_version) need_update = current_tuple < remote_tuple return need_update, remote_version if need_update else None except Exception as e: print(f"版本检查失败: {e}") return False, None # === 主分析器 === class NewsAnalyzer: """新闻分析器""" # 模式策略定义 MODE_STRATEGIES = { "incremental": { "mode_name": "增量模式", "description": "增量模式（只关注新增新闻，无新增时不推送）", "realtime_report_type": "实时增量", "summary_report_type": "当日汇总", "should_send_realtime": True, "should_generate_summary": True, "summary_mode": "daily", }, "current": { "mode_name": "当前榜单模式", "description": "当前榜单模式（当前榜单匹配新闻 + 新增新闻区域 + 按时推送）", "realtime_report_type": "实时当前榜单", "summary_report_type": "当前榜单汇总", "should_send_realtime": True, "should_generate_summary": True, "summary_mode": "current", }, "daily": { "mode_name": "当日汇总模式", "description": "当日汇总模式（所有匹配新闻 + 新增新闻区域 + 按时推送）", "realtime_report_type": "", "summary_report_type": "当日汇总", "should_send_realtime": False, "should_generate_summary": True, "summary_mode": "daily", }, } def __init__(self): # 加载配置 print("正在加载配置...") config = load_config() print(f"TrendRadar v{VERSION} 配置加载完成") print(f"监控平台数量: {len(config['PLATFORMS'])}") print(f"时区: {config.get('TIMEZONE', 'Asia/Shanghai')}") # 创建应用上下文 self.ctx = AppContext(config) self.request_interval = self.ctx.config["REQUEST_INTERVAL"] self.report_mode = self.ctx.config["REPORT_MODE"] self.rank_threshold = self.ctx.rank_threshold self.is_github_actions = os.environ.get("GITHUB_ACTIONS") == "true" self.is_docker_container = self._detect_docker_environment() self.update_info = None self.proxy_url = None self._setup_proxy() self.data_fetcher = DataFetcher(self.proxy_url) # 初始化存储管理器（使用 AppContext） self._init_storage_manager() if self.is_github_actions: self._check_version_update() def _init_storage_manager(self) -> None: """初始化存储管理器（使用 AppContext）""" # 获取数据保留天数（支持环境变量覆盖） env_retention = os.environ.get("STORAGE_RETENTION_DAYS", "").strip() if env_retention: # 环境变量覆盖配置 self.ctx.config["STORAGE"]["RETENTION_DAYS"] = int(env_retention) self.storage_manager = self.ctx.get_storage_manager() print(f"存储后端: {self.storage_manager.backend_name}") retention_days = self.ctx.config.get("STORAGE", {}).get("RETENTION_DAYS", 0) if retention_days > 0: print(f"数据保留天数: {retention_days} 天") def _detect_docker_environment(self) -> bool: """检测是否运行在 Docker 容器中""" try: if os.environ.get("DOCKER_CONTAINER") == "true": return True if os.path.exists("/.dockerenv"): return True return False except Exception: return False def _should_open_browser(self) -> bool: """判断是否应该打开浏览器""" return not self.is_github_actions and not self.is_docker_container def _setup_proxy(self) -> None: """设置代理配置""" if not self.is_github_actions and self.ctx.config["USE_PROXY"]: self.proxy_url = self.ctx.config["DEFAULT_PROXY"] print("本地环境，使用代理") elif not self.is_github_actions and not self.ctx.config["USE_PROXY"]: print("本地环境，未启用代理") else: print("GitHub Actions环境，不使用代理") def _check_version_update(self) -> None: """检查版本更新""" try: need_update, remote_version = check_version_update( VERSION, self.ctx.config["VERSION_CHECK_URL"], self.proxy_url ) if need_update and remote_version: self.update_info = { "current_version": VERSION, "remote_version": remote_version, } print(f"发现新版本: {remote_version} (当前: {VERSION})") else: print("版本检查完成，当前为最新版本") except Exception as e: print(f"版本检查出错: {e}") def _get_mode_strategy(self) -> Dict: """获取当前模式的策略配置""" return self.MODE_STRATEGIES.get(self.report_mode, self.MODE_STRATEGIES["daily"]) def _has_notification_configured(self) -> bool: """检查是否配置了任何通知渠道""" cfg = self.ctx.config return any( [ cfg["FEISHU_WEBHOOK_URL"], cfg["DINGTALK_WEBHOOK_URL"], cfg["WEWORK_WEBHOOK_URL"], (cfg["TELEGRAM_BOT_TOKEN"] and cfg["TELEGRAM_CHAT_ID"]), ( cfg["EMAIL_FROM"] and cfg["EMAIL_PASSWORD"] and cfg["EMAIL_TO"] ), (cfg["NTFY_SERVER_URL"] and cfg["NTFY_TOPIC"]), cfg["BARK_URL"], cfg["SLACK_WEBHOOK_URL"], ] ) def _has_valid_content( self, stats: List[Dict], new_titles: Optional[Dict] = None ) -> bool: """检查是否有有效的新闻内容""" if self.report_mode in ["incremental", "current"]: # 增量模式和current模式下，只要stats有内容就说明有匹配的新闻 return any(stat["count"] > 0 for stat in stats) else: # 当日汇总模式下，检查是否有匹配的频率词新闻或新增新闻 has_matched_news = any(stat["count"] > 0 for stat in stats) has_new_news = bool( new_titles and any(len(titles) > 0 for titles in new_titles.values()) ) return has_matched_news or has_new_news def _load_analysis_data( self, ) -> Optional[Tuple[Dict, Dict, Dict, Dict, List, List]]: """统一的数据加载和预处理，使用当前监控平台列表过滤历史数据""" try: # 获取当前配置的监控平台ID列表 current_platform_ids = self.ctx.platform_ids print(f"当前监控平台: {current_platform_ids}") all_results, id_to_name, title_info = self.ctx.read_today_titles( current_platform_ids ) if not all_results: print("没有找到当天的数据") return None total_titles = sum(len(titles) for titles in all_results.values()) print(f"读取到 {total_titles} 个标题（已按当前监控平台过滤）") new_titles = self.ctx.detect_new_titles(current_platform_ids) word_groups, filter_words, global_filters = self.ctx.load_frequency_words() return ( all_results, id_to_name, title_info, new_titles, word_groups, filter_words, global_filters, ) except Exception as e: print(f"数据加载失败: {e}") return None def _prepare_current_title_info(self, results: Dict, time_info: str) -> Dict: """从当前抓取结果构建标题信息""" title_info = {} for source_id, titles_data in results.items(): title_info[source_id] = {} for title, title_data in titles_data.items(): ranks = title_data.get("ranks", []) url = title_data.get("url", "") mobile_url = title_data.get("mobileUrl", "") title_info[source_id][title] = { "first_time": time_info, "last_time": time_info, "count": 1, "ranks": ranks, "url": url, "mobileUrl": mobile_url, } return title_info def _run_analysis_pipeline( self, data_source: Dict, mode: str, title_info: Dict, new_titles: Dict, word_groups: List[Dict], filter_words: List[str], id_to_name: Dict, failed_ids: Optional[List] = None, is_daily_summary: bool = False, global_filters: Optional[List[str]] = None, ) -> Tuple[List[Dict], Optional[str]]: """统一的分析流水线：数据处理 → 统计计算 → HTML生成""" # 统计计算（使用 AppContext） stats, total_titles = self.ctx.count_frequency( data_source, word_groups, filter_words, id_to_name, title_info, new_titles, mode=mode, global_filters=global_filters, ) # HTML生成（如果启用） html_file = None if self.ctx.config["STORAGE"]["FORMATS"]["HTML"]: html_file = self.ctx.generate_html( stats, total_titles, failed_ids=failed_ids, new_titles=new_titles, id_to_name=id_to_name, mode=mode, is_daily_summary=is_daily_summary, update_info=self.update_info if self.ctx.config["SHOW_VERSION_UPDATE"] else None, ) return stats, html_file def _send_notification_if_needed( self, stats: List[Dict], report_type: str, mode: str, failed_ids: Optional[List] = None, new_titles: Optional[Dict] = None, id_to_name: Optional[Dict] = None, html_file_path: Optional[str] = None, ) -> bool: """统一的通知发送逻辑，包含所有判断条件""" has_notification = self._has_notification_configured() cfg = self.ctx.config if ( cfg["ENABLE_NOTIFICATION"] and has_notification and self._has_valid_content(stats, new_titles) ): # 推送窗口控制 if cfg["PUSH_WINDOW"]["ENABLED"]: push_manager = self.ctx.create_push_manager() time_range_start = cfg["PUSH_WINDOW"]["TIME_RANGE"]["START"] time_range_end = cfg["PUSH_WINDOW"]["TIME_RANGE"]["END"] if not push_manager.is_in_time_range(time_range_start, time_range_end): now = self.ctx.get_time() print( f"推送窗口控制：当前时间 {now.strftime('%H:%M')} 不在推送时间窗口 {time_range_start}-{time_range_end} 内，跳过推送" ) return False if cfg["PUSH_WINDOW"]["ONCE_PER_DAY"]: if push_manager.has_pushed_today(): print(f"推送窗口控制：今天已推送过，跳过本次推送") return False else: print(f"推送窗口控制：今天首次推送") # 准备报告数据 report_data = self.ctx.prepare_report(stats, failed_ids, new_titles, id_to_name, mode) # 是否发送版本更新信息 update_info_to_send = self.update_info if cfg["SHOW_VERSION_UPDATE"] else None # 使用 NotificationDispatcher 发送到所有渠道 dispatcher = self.ctx.create_notification_dispatcher() results = dispatcher.dispatch_all( report_data=report_data, report_type=report_type, update_info=update_info_to_send, proxy_url=self.proxy_url, mode=mode, html_file_path=html_file_path, ) if not results: print("未配置任何通知渠道，跳过通知发送") return False # 如果成功发送了任何通知，且启用了每天只推一次，则记录推送 if ( cfg["PUSH_WINDOW"]["ENABLED"] and cfg["PUSH_WINDOW"]["ONCE_PER_DAY"] and any(results.values()) ): push_manager = self.ctx.create_push_manager() push_manager.record_push(report_type) return True elif cfg["ENABLE_NOTIFICATION"] and not has_notification: print("⚠️ 警告：通知功能已启用但未配置任何通知渠道，将跳过通知发送") elif not cfg["ENABLE_NOTIFICATION"]: print(f"跳过{report_type}通知：通知功能已禁用") elif ( cfg["ENABLE_NOTIFICATION"] and has_notification and not self._has_valid_content(stats, new_titles) ): mode_strategy = self._get_mode_strategy() if "实时" in report_type: print( f"跳过实时推送通知：{mode_strategy['mode_name']}下未检测到匹配的新闻" ) else: print( f"跳过{mode_strategy['summary_report_type']}通知：未匹配到有效的新闻内容" ) return False def _generate_summary_report(self, mode_strategy: Dict) -> Optional[str]: """生成汇总报告（带通知）""" summary_type = ( "当前榜单汇总" if mode_strategy["summary_mode"] == "current" else "当日汇总" ) print(f"生成{summary_type}报告...") # 加载分析数据 analysis_data = self._load_analysis_data() if not analysis_data: return None all_results, id_to_name, title_info, new_titles, word_groups, filter_words, global_filters = ( analysis_data ) # 运行分析流水线 stats, html_file = self._run_analysis_pipeline( all_results, mode_strategy["summary_mode"], title_info, new_titles, word_groups, filter_words, id_to_name, is_daily_summary=True, global_filters=global_filters, ) if html_file: print(f"{summary_type}报告已生成: {html_file}") # 发送通知 self._send_notification_if_needed( stats, mode_strategy["summary_report_type"], mode_strategy["summary_mode"], failed_ids=[], new_titles=new_titles, id_to_name=id_to_name, html_file_path=html_file, ) return html_file def _generate_summary_html(self, mode: str = "daily") -> Optional[str]: """生成汇总HTML""" summary_type = "当前榜单汇总" if mode == "current" else "当日汇总" print(f"生成{summary_type}HTML...") # 加载分析数据 analysis_data = self._load_analysis_data() if not analysis_data: return None all_results, id_to_name, title_info, new_titles, word_groups, filter_words, global_filters = ( analysis_data ) # 运行分析流水线 _, html_file = self._run_analysis_pipeline( all_results, mode, title_info, new_titles, word_groups, filter_words, id_to_name, is_daily_summary=True, global_filters=global_filters, ) if html_file: print(f"{summary_type}HTML已生成: {html_file}") return html_file def _initialize_and_check_config(self) -> None: """通用初始化和配置检查""" now = self.ctx.get_time() print(f"当前北京时间: {now.strftime('%Y-%m-%d %H:%M:%S')}") if not self.ctx.config["ENABLE_CRAWLER"]: print("爬虫功能已禁用（ENABLE_CRAWLER=False），程序退出") return has_notification = self._has_notification_configured() if not self.ctx.config["ENABLE_NOTIFICATION"]: print("通知功能已禁用（ENABLE_NOTIFICATION=False），将只进行数据抓取") elif not has_notification: print("未配置任何通知渠道，将只进行数据抓取，不发送通知") else: print("通知功能已启用，将发送通知") mode_strategy = self._get_mode_strategy() print(f"报告模式: {self.report_mode}") print(f"运行模式: {mode_strategy['description']}") def _crawl_data(self) -> Tuple[Dict, Dict, List]: """执行数据爬取""" ids = [] for platform in self.ctx.platforms: if "name" in platform: ids.append((platform["id"], platform["name"])) else: ids.append(platform["id"]) print( f"配置的监控平台: {[p.get('name', p['id']) for p in self.ctx.platforms]}" ) print(f"开始爬取数据，请求间隔 {self.request_interval} 毫秒") Path("output").mkdir(parents=True, exist_ok=True) results, id_to_name, failed_ids = self.data_fetcher.crawl_websites( ids, self.request_interval ) # 转换为 NewsData 格式并保存到存储后端 crawl_time = self.ctx.format_time() crawl_date = self.ctx.format_date() news_data = convert_crawl_results_to_news_data( results, id_to_name, failed_ids, crawl_time, crawl_date ) # 保存到存储后端（SQLite） if self.storage_manager.save_news_data(news_data): print(f"数据已保存到存储后端: {self.storage_manager.backend_name}") # 保存 TXT 快照（如果启用） txt_file = self.storage_manager.save_txt_snapshot(news_data) if txt_file: print(f"TXT 快照已保存: {txt_file}") # 兼容：同时保存到原有 TXT 格式（确保向后兼容） if self.ctx.config["STORAGE"]["FORMATS"]["TXT"]: title_file = self.ctx.save_titles(results, id_to_name, failed_ids) print(f"标题已保存到: {title_file}") return results, id_to_name, failed_ids def _execute_mode_strategy( self, mode_strategy: Dict, results: Dict, id_to_name: Dict, failed_ids: List ) -> Optional[str]: """执行模式特定逻辑""" # 获取当前监控平台ID列表 current_platform_ids = self.ctx.platform_ids new_titles = self.ctx.detect_new_titles(current_platform_ids) time_info = self.ctx.format_time() if self.ctx.config["STORAGE"]["FORMATS"]["TXT"]: self.ctx.save_titles(results, id_to_name, failed_ids) word_groups, filter_words, global_filters = self.ctx.load_frequency_words() # current模式下，实时推送需要使用完整的历史数据来保证统计信息的完整性 if self.report_mode == "current": # 加载完整的历史数据（已按当前平台过滤） analysis_data = self._load_analysis_data() if analysis_data: ( all_results, historical_id_to_name, historical_title_info, historical_new_titles, _, _, _, ) = analysis_data print( f"current模式：使用过滤后的历史数据，包含平台：{list(all_results.keys())}" ) stats, html_file = self._run_analysis_pipeline( all_results, self.report_mode, historical_title_info, historical_new_titles, word_groups, filter_words, historical_id_to_name, failed_ids=failed_ids, global_filters=global_filters, ) combined_id_to_name = {**historical_id_to_name, **id_to_name} if html_file: print(f"HTML报告已生成: {html_file}") # 发送实时通知（使用完整历史数据的统计结果） summary_html = None if mode_strategy["should_send_realtime"]: self._send_notification_if_needed( stats, mode_strategy["realtime_report_type"], self.report_mode, failed_ids=failed_ids, new_titles=historical_new_titles, id_to_name=combined_id_to_name, html_file_path=html_file, ) else: print("❌ 严重错误：无法读取刚保存的数据文件") raise RuntimeError("数据一致性检查失败：保存后立即读取失败") else: title_info = self._prepare_current_title_info(results, time_info) stats, html_file = self._run_analysis_pipeline( results, self.report_mode, title_info, new_titles, word_groups, filter_words, id_to_name, failed_ids=failed_ids, global_filters=global_filters, ) if html_file: print(f"HTML报告已生成: {html_file}") # 发送实时通知（如果需要） summary_html = None if mode_strategy["should_send_realtime"]: self._send_notification_if_needed( stats, mode_strategy["realtime_report_type"], self.report_mode, failed_ids=failed_ids, new_titles=new_titles, id_to_name=id_to_name, html_file_path=html_file, ) # 生成汇总报告（如果需要） summary_html = None if mode_strategy["should_generate_summary"]: if mode_strategy["should_send_realtime"]: # 如果已经发送了实时通知，汇总只生成HTML不发送通知 summary_html = self._generate_summary_html( mode_strategy["summary_mode"] ) else: # daily模式：直接生成汇总报告并发送通知 summary_html = self._generate_summary_report(mode_strategy) # 打开浏览器（仅在非容器环境） if self._should_open_browser() and html_file: if summary_html: summary_url = "file://" + str(Path(summary_html).resolve()) print(f"正在打开汇总报告: {summary_url}") webbrowser.open(summary_url) else: file_url = "file://" + str(Path(html_file).resolve()) print(f"正在打开HTML报告: {file_url}") webbrowser.open(file_url) elif self.is_docker_container and html_file: if summary_html: print(f"汇总报告已生成（Docker环境）: {summary_html}") else: print(f"HTML报告已生成（Docker环境）: {html_file}") return summary_html def run(self) -> None: """执行分析流程""" try: self._initialize_and_check_config() mode_strategy = self._get_mode_strategy() results, id_to_name, failed_ids = self._crawl_data() self._execute_mode_strategy(mode_strategy, results, id_to_name, failed_ids) except Exception as e: print(f"分析流程执行出错: {e}") raise finally: # 清理资源（包括过期数据清理和数据库连接关闭） self.ctx.cleanup() def main(): """主程序入口""" try: analyzer = NewsAnalyzer() analyzer.run() except FileNotFoundError as e: print(f"❌ 配置文件错误: {e}") print("\n请确保以下文件存在:") print(" • config/config.yaml") print(" • config/frequency_words.txt") print("\n参考项目文档进行正确配置") except Exception as e: print(f"❌ 程序运行错误: {e}") raise if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/SsdSalesman/TrendRadar'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

__main__.py•26.2 KiB