search_brave_with_summary
Search the web using Brave Search API to find information and generate concise summaries for quick understanding of search results.
Instructions
Search the web using Brave Search API
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes |
Implementation Reference
- mcp2brave.py:419-421 (handler)This is the handler function for the 'search_brave_with_summary' tool. It is registered via the @mcp.tool() decorator and executes the tool logic by calling the internal helper _do_search_with_summary.def search_brave_with_summary(query: str) -> str: """Search the web using Brave Search API """ return _do_search_with_summary(query)
- mcp2brave.py:185-281 (helper)Core helper function implementing the Brave Search API call, response parsing, optional summarization using API or by fetching and extracting content from top results, and formatting of search results with summaries.def _do_search_with_summary(query: str) -> str: """Internal function to handle the search logic with summary support""" try: query = query.encode('utf-8').decode('utf-8') url = "https://api.search.brave.com/res/v1/web/search" headers = { "Accept": "application/json", "X-Subscription-Token": API_KEY } params = { "q": query, "count": 5, "result_filter": "web", "enable_summarizer": True, "format": "json" } response = requests.get(url, headers=headers, params=params) response.raise_for_status() data = response.json() logger.debug("API Response Structure:") logger.debug(f"Response Keys: {list(data.keys())}") # 处理搜索结果 summary_text = "" search_results = [] # 获取网页搜索结果 if 'web' in data and 'results' in data['web']: results = data['web']['results'] # 获取摘要 if 'summarizer' in data: logger.debug("Found official summarizer data") summary = data.get('summarizer', {}) summary_text = summary.get('text', '') else: logger.debug("No summarizer found, generating summary from top results") # 使用前两个结果的内容作为摘要 try: summaries = [] for result in results[:2]: # 只处理前两个结果 url = result.get('url') if url: logger.debug(f"Fetching content from: {url}") content = _get_url_content_direct(url) # 提取HTML中的文本内容 raw_content = content.split('---\n\n')[-1] text_content = _extract_text_from_html(raw_content) if text_content: # 添加标题和来源信息 title = result.get('title', 'No title') date = result.get('age', '') or result.get('published_time', '') summaries.append(f"### {title}") if date: summaries.append(f"Published: {date}") summaries.append(text_content) if summaries: summary_text = "\n\n".join([ "Generated summary from top results:", *summaries ]) logger.debug("Successfully generated summary from content") else: summary_text = results[0].get('description', '') except Exception as e: logger.error(f"Error generating summary from content: {str(e)}") summary_text = results[0].get('description', '') # 处理搜索结果显示 for result in results: title = result.get('title', 'No title').encode('utf-8').decode('utf-8') url = result.get('url', 'No URL') description = result.get('description', 'No description').encode('utf-8').decode('utf-8') search_results.append(f"- {title}\n URL: {url}\n Description: {description}\n") # 组合输出 output = [] if summary_text: output.append(f"Summary:\n{summary_text}\n") if search_results: output.append("Search Results:\n" + "\n".join(search_results)) logger.debug(f"Has summary: {bool(summary_text)}") logger.debug(f"Number of results: {len(search_results)}") return "\n".join(output) if output else "No results found for your query." except Exception as e: logger.error(f"Search error: {str(e)}") logger.exception("Detailed error trace:") return f"Error performing search: {str(e)}"
- mcp2brave.py:136-184 (helper)Helper function to extract meaningful text content from HTML, used for generating summaries from fetched web pages.def _extract_text_from_html(html_content: str) -> str: """从HTML内容中提取有意义的文本""" try: from bs4 import BeautifulSoup soup = BeautifulSoup(html_content, 'html.parser') # 移除不需要的元素 for element in soup(['script', 'style', 'header', 'footer', 'nav', 'aside', 'iframe', 'ad', '.advertisement']): element.decompose() # 优先提取文章主要内容 article = soup.find('article') if article: content = article else: # 尝试找到主要内容区域 content = soup.find(['main', '.content', '#content', '.post-content', '.article-content']) if not content: content = soup # 获取文本 text = content.get_text(separator='\n') # 文本清理 lines = [] for line in text.split('\n'): line = line.strip() # 跳过空行和太短的行 if line and len(line) > 30: lines.append(line) # 组合文本,限制在1000字符以内 cleaned_text = ' '.join(lines) if len(cleaned_text) > 1000: # 尝试在句子边界截断 end_pos = cleaned_text.rfind('. ', 0, 1000) if end_pos > 0: cleaned_text = cleaned_text[:end_pos + 1] else: cleaned_text = cleaned_text[:1000] return cleaned_text except Exception as e: logger.error(f"Error extracting text from HTML: {str(e)}") # 如果无法处理HTML,返回原始内容的一部分 text = html_content.replace('<', ' <').replace('>', '> ').split() return ' '.join(text)[:500]
- mcp2brave.py:282-355 (helper)Helper function to fetch webpage content directly via HTTP and extract cleaned text using BeautifulSoup, used when generating custom summaries.def _get_url_content_direct(url: str) -> str: """Internal function to get content directly using requests""" try: logger.debug(f"Directly fetching content from URL: {url}") response = requests.get(url, timeout=10, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' }) response.raise_for_status() # 尝试检测编码 if 'charset' in response.headers.get('content-type', '').lower(): response.encoding = response.apparent_encoding try: from bs4 import BeautifulSoup soup = BeautifulSoup(response.text, 'html.parser') # 移除不需要的元素 for element in soup(['script', 'style', 'header', 'footer', 'nav', 'aside', 'iframe', 'ad', '.advertisement']): element.decompose() # 尝试找到主要内容区域 main_content = None possible_content_elements = [ soup.find('article'), soup.find('main'), soup.find(class_='content'), soup.find(id='content'), soup.find(class_='post-content'), soup.find(class_='article-content'), soup.find(class_='entry-content'), soup.find(class_='main-content'), soup.select_one('div[class*="content"]'), # 包含 "content" 的任何 class ] for element in possible_content_elements: if element: main_content = element break if not main_content: main_content = soup text = main_content.get_text(separator='\n') lines = [] for line in text.split('\n'): line = line.strip() if line and len(line) > 30: lines.append(line) cleaned_text = ' '.join(lines) if len(cleaned_text) > 1000: end_pos = cleaned_text.rfind('. ', 0, 1000) if end_pos > 0: cleaned_text = cleaned_text[:end_pos + 1] else: cleaned_text = cleaned_text[:1000] metadata = f"URL: {url}\n" metadata += f"Content Length: {len(response.text)} characters\n" metadata += f"Content Type: {response.headers.get('content-type', 'Unknown')}\n" metadata += "---\n\n" return f"{metadata}{cleaned_text}" except Exception as e: logger.error(f"Error extracting text from HTML: {str(e)}") return f"Error extracting text: {str(e)}" except Exception as e: logger.error(f"Error fetching URL content directly: {str(e)}") return f"Error getting content: {str(e)}"