Skip to main content
Glama
summarize_statics.ts2.31 kB
import { CrawledPage } from "@hyperbrowser/sdk/types"; import fs from "fs"; import { OpenAI } from "openai"; type BasicSummary = { pathname: string; data: CrawledPage | undefined; }[]; type Summary = { pathname: string; summary: string; data: CrawledPage | undefined; }; const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); export const summarize = async (inputPath: string, outputPath: string) => { const urlToDataMap: BasicSummary = JSON.parse( fs.readFileSync(inputPath, "utf8") ); const summaries: Summary[] = []; const batchSize = 5; const totalBatches = Math.ceil(urlToDataMap.length / batchSize); for (let i = 0; i < urlToDataMap.length; i += batchSize) { const currentBatch = Math.floor(i / batchSize) + 1; console.log(`Processing batch ${currentBatch}/${totalBatches}...`); const batch = urlToDataMap.slice(i, i + batchSize); const batchPromises = batch.map(async (item) => { if (!item.data?.markdown) { return { pathname: item.pathname, summary: "No summary available", data: item.data, }; } const completion = await openai.chat.completions.create({ messages: [ { role: "system", content: "You are a helpful assistant that generates brief summaries. The information is about Hyperbrowser, a tool for web scraping and automation, and contains information about the documentation of the tool. You should provide the summary so that it describes the topic of the page with respect to hyperbrowser and the content of the page.", }, { role: "user", content: `Please summarize the following content in less than 20 words:\n\ntitle: ${item.data.metadata?.title}\n\n$markdown: {item.data.markdown}`, }, ], model: "gpt-4o-mini", }); return { pathname: item.pathname, data: item.data, summary: completion.choices[0]?.message?.content || "", }; }); const batchResults = await Promise.all(batchPromises); summaries.push(...batchResults); } console.log("Processing complete! Writing results to file..."); fs.writeFileSync(outputPath, JSON.stringify(summaries, null, 2)); return summaries; };

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/BACH-AI-Tools/bach-hyperbrowser-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server