Skip to main content
Glama
brightdata_html_search.py5.24 kB
import streamlit as st import ssl import urllib.request import urllib.parse from bs4 import BeautifulSoup import pandas as pd import matplotlib.pyplot as plt from io import BytesIO from fpdf import FPDF # ========== CONFIGURAÇÃO DO PROXY ========== PROXY = 'http://brd-customer-hl_c103e9b9-zone-serp_api1:prz02bbteqnk@brd.superproxy.io:33335' HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0', 'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' } # ========== FUNÇÃO DE BUSCA ========== def proxy_search_google(query: str): ssl._create_default_https_context = ssl._create_unverified_context # ignora SSL opener = urllib.request.build_opener( urllib.request.ProxyHandler({'http': PROXY, 'https': PROXY}) ) opener.addheaders = [(k, v) for k, v in HEADERS.items()] encoded_query = urllib.parse.quote(query) url = f'https://www.google.com/search?q={encoded_query}&hl=pt' response = opener.open(url) html = response.read() return html # ========== PARSER DO HTML ========== def extract_results_from_html(html): soup = BeautifulSoup(html, 'html.parser') results = [] for result in soup.select('div.g'): title_elem = result.select_one('h3') link_elem = result.select_one('a[href]') snippet_elem = result.select_one('.VwiC3b') or result.select_one('.IsZvec') if title_elem and link_elem: results.append({ "title": title_elem.get_text(), "link": link_elem['href'], "snippet": snippet_elem.get_text() if snippet_elem else "" }) return pd.DataFrame(results) # ========== GRÁFICO DE TENDÊNCIA ========== def plot_trend(query): trend_data = pd.DataFrame({ "Month": pd.date_range(start="2024-01-01", periods=6, freq='M'), "Interest": [20, 35, 55, 70, 85, 100] }) fig, ax = plt.subplots(figsize=(8, 4)) ax.plot(trend_data["Month"], trend_data["Interest"], marker='o') ax.set_title(f"Tendência de Interesse: {query}") ax.set_xlabel("Mês") ax.set_ylabel("Interesse") ax.grid(True) buf = BytesIO() plt.tight_layout() fig.savefig(buf, format="png") buf.seek(0) return buf # ========== PDF ========== class PDF(FPDF): def header(self): self.set_font("Arial", "B", 12) self.cell(0, 10, "Resultados da Pesquisa", ln=True, align="C") def chapter_body(self, df): self.set_font("Arial", "", 10) for idx, row in df.iterrows(): self.multi_cell(0, 8, f"{idx+1}. {row['title']}\n{row['link']}\n{row['snippet']}\n", border=0) self.ln(2) def create_pdf(self, df): self.add_page() self.chapter_body(df) def generate_pdf(df): clean_df = df.replace({u"\u2019": "'", u"\u201c": '"', u"\u201d": '"'}, regex=True) pdf = PDF() pdf.set_auto_page_break(auto=True, margin=15) pdf.create_pdf(clean_df) buf = BytesIO() pdf.output(buf) buf.seek(0) return buf # ========== STREAMLIT UI ========== st.set_page_config(page_title="Google via Proxy Bright Data", layout="wide") st.title("🔎 Pesquisa no Google via Proxy (Bright Data)") query = st.text_input("Digite o termo de busca", "geoai") if st.button("Pesquisar"): with st.spinner("Buscando dados via proxy Bright Data..."): try: html = proxy_search_google(query) df_links = extract_results_from_html(html) if df_links.empty: st.warning("Nenhum resultado encontrado.") with st.expander("🔍 Ver HTML bruto retornado"): st.code(html[:3000], language="html") else: st.success(f"{len(df_links)} resultados encontrados.") st.dataframe(df_links, use_container_width=True) st.subheader("📈 Tendência do Tema") trend_buf = plot_trend(query) st.image(trend_buf, caption="Gráfico simulado de interesse") st.subheader("📥 Baixar dados") csv_buf = BytesIO() df_links.to_csv(csv_buf, index=False) st.download_button("⬇️ CSV", data=csv_buf.getvalue(), file_name="links.csv", mime="text/csv") excel_buf = BytesIO() df_links.to_excel(excel_buf, index=False) st.download_button("⬇️ Excel", data=excel_buf.getvalue(), file_name="links.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") st.download_button("⬇️ PNG (Gráfico)", data=trend_buf.getvalue(), file_name="trend.png", mime="image/png") pdf_buf = generate_pdf(df_links) st.download_button("⬇️ PDF", data=pdf_buf.getvalue(), file_name="links.pdf", mime="application/pdf") except Exception as e: st.error(f"Erro ao consultar via proxy: {e}")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sandovalmedeiros/mcp_agentic_rag'

If you have feedback or need assistance with the MCP directory API, please join our Discord server