search_brave_with_summary
Search the web using Brave Search API and get summarized results to quickly find relevant information.
Instructions
Search the web using Brave Search API
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes |
Implementation Reference
- mcp2brave.py:419-421 (handler)The handler function for the 'search_brave_with_summary' tool, registered via @mcp.tool(). It takes a query string and delegates execution to the internal _do_search_with_summary helper.def search_brave_with_summary(query: str) -> str: """Search the web using Brave Search API """ return _do_search_with_summary(query)
- mcp2brave.py:185-281 (helper)Core implementation logic for the search tool. Performs web search via Brave API, attempts to use built-in summarizer or generates one by fetching and extracting content from top results, then formats results with summary and links.def _do_search_with_summary(query: str) -> str: """Internal function to handle the search logic with summary support""" try: query = query.encode('utf-8').decode('utf-8') url = "https://api.search.brave.com/res/v1/web/search" headers = { "Accept": "application/json", "X-Subscription-Token": API_KEY } params = { "q": query, "count": 5, "result_filter": "web", "enable_summarizer": True, "format": "json" } response = requests.get(url, headers=headers, params=params) response.raise_for_status() data = response.json() logger.debug("API Response Structure:") logger.debug(f"Response Keys: {list(data.keys())}") # 处理搜索结果 summary_text = "" search_results = [] # 获取网页搜索结果 if 'web' in data and 'results' in data['web']: results = data['web']['results'] # 获取摘要 if 'summarizer' in data: logger.debug("Found official summarizer data") summary = data.get('summarizer', {}) summary_text = summary.get('text', '') else: logger.debug("No summarizer found, generating summary from top results") # 使用前两个结果的内容作为摘要 try: summaries = [] for result in results[:2]: # 只处理前两个结果 url = result.get('url') if url: logger.debug(f"Fetching content from: {url}") content = _get_url_content_direct(url) # 提取HTML中的文本内容 raw_content = content.split('---\n\n')[-1] text_content = _extract_text_from_html(raw_content) if text_content: # 添加标题和来源信息 title = result.get('title', 'No title') date = result.get('age', '') or result.get('published_time', '') summaries.append(f"### {title}") if date: summaries.append(f"Published: {date}") summaries.append(text_content) if summaries: summary_text = "\n\n".join([ "Generated summary from top results:", *summaries ]) logger.debug("Successfully generated summary from content") else: summary_text = results[0].get('description', '') except Exception as e: logger.error(f"Error generating summary from content: {str(e)}") summary_text = results[0].get('description', '') # 处理搜索结果显示 for result in results: title = result.get('title', 'No title').encode('utf-8').decode('utf-8') url = result.get('url', 'No URL') description = result.get('description', 'No description').encode('utf-8').decode('utf-8') search_results.append(f"- {title}\n URL: {url}\n Description: {description}\n") # 组合输出 output = [] if summary_text: output.append(f"Summary:\n{summary_text}\n") if search_results: output.append("Search Results:\n" + "\n".join(search_results)) logger.debug(f"Has summary: {bool(summary_text)}") logger.debug(f"Number of results: {len(search_results)}") return "\n".join(output) if output else "No results found for your query." except Exception as e: logger.error(f"Search error: {str(e)}") logger.exception("Detailed error trace:") return f"Error performing search: {str(e)}"
- mcp2brave.py:136-184 (helper)Helper function to extract meaningful text from HTML content, removing scripts/styles/etc., prioritizing article/main content, cleaning and truncating to ~1000 chars. Used for generating summaries from fetched pages.def _extract_text_from_html(html_content: str) -> str: """从HTML内容中提取有意义的文本""" try: from bs4 import BeautifulSoup soup = BeautifulSoup(html_content, 'html.parser') # 移除不需要的元素 for element in soup(['script', 'style', 'header', 'footer', 'nav', 'aside', 'iframe', 'ad', '.advertisement']): element.decompose() # 优先提取文章主要内容 article = soup.find('article') if article: content = article else: # 尝试找到主要内容区域 content = soup.find(['main', '.content', '#content', '.post-content', '.article-content']) if not content: content = soup # 获取文本 text = content.get_text(separator='\n') # 文本清理 lines = [] for line in text.split('\n'): line = line.strip() # 跳过空行和太短的行 if line and len(line) > 30: lines.append(line) # 组合文本,限制在1000字符以内 cleaned_text = ' '.join(lines) if len(cleaned_text) > 1000: # 尝试在句子边界截断 end_pos = cleaned_text.rfind('. ', 0, 1000) if end_pos > 0: cleaned_text = cleaned_text[:end_pos + 1] else: cleaned_text = cleaned_text[:1000] return cleaned_text except Exception as e: logger.error(f"Error extracting text from HTML: {str(e)}") # 如果无法处理HTML,返回原始内容的一部分 text = html_content.replace('<', ' <').replace('>', '> ').split() return ' '.join(text)[:500]
- mcp2brave.py:282-355 (helper)Helper function to directly fetch webpage content via requests, extract main text using BeautifulSoup similar to _extract_text_from_html, add metadata, truncate. Called when no API summarizer available.def _get_url_content_direct(url: str) -> str: """Internal function to get content directly using requests""" try: logger.debug(f"Directly fetching content from URL: {url}") response = requests.get(url, timeout=10, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' }) response.raise_for_status() # 尝试检测编码 if 'charset' in response.headers.get('content-type', '').lower(): response.encoding = response.apparent_encoding try: from bs4 import BeautifulSoup soup = BeautifulSoup(response.text, 'html.parser') # 移除不需要的元素 for element in soup(['script', 'style', 'header', 'footer', 'nav', 'aside', 'iframe', 'ad', '.advertisement']): element.decompose() # 尝试找到主要内容区域 main_content = None possible_content_elements = [ soup.find('article'), soup.find('main'), soup.find(class_='content'), soup.find(id='content'), soup.find(class_='post-content'), soup.find(class_='article-content'), soup.find(class_='entry-content'), soup.find(class_='main-content'), soup.select_one('div[class*="content"]'), # 包含 "content" 的任何 class ] for element in possible_content_elements: if element: main_content = element break if not main_content: main_content = soup text = main_content.get_text(separator='\n') lines = [] for line in text.split('\n'): line = line.strip() if line and len(line) > 30: lines.append(line) cleaned_text = ' '.join(lines) if len(cleaned_text) > 1000: end_pos = cleaned_text.rfind('. ', 0, 1000) if end_pos > 0: cleaned_text = cleaned_text[:end_pos + 1] else: cleaned_text = cleaned_text[:1000] metadata = f"URL: {url}\n" metadata += f"Content Length: {len(response.text)} characters\n" metadata += f"Content Type: {response.headers.get('content-type', 'Unknown')}\n" metadata += "---\n\n" return f"{metadata}{cleaned_text}" except Exception as e: logger.error(f"Error extracting text from HTML: {str(e)}") return f"Error extracting text: {str(e)}" except Exception as e: logger.error(f"Error fetching URL content directly: {str(e)}") return f"Error getting content: {str(e)}"