extract_github
Extract real-time GitHub repository data including README, stars, forks, language, topics, and last commit. Returns timestamped information for accurate, current insights.
Instructions
Extract real-time data from a GitHub repository — README, stars, forks, language, topics, last commit. Returns timestamped freshcontext.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | Yes | Full GitHub repo URL e.g. https://github.com/owner/repo | |
| max_length | No | Max content length |
Implementation Reference
- src/adapters/github.ts:5-54 (handler)The `githubAdapter` function performs the actual scraping and data extraction of GitHub repository details using Playwright.
export async function githubAdapter(options: ExtractOptions): Promise<AdapterResult> { const safeUrl = validateUrl(options.url, "github"); options = { ...options, url: safeUrl }; const browser = await chromium.launch({ headless: true }); const page = await browser.newPage(); // Spoof a real browser UA to avoid bot detection await page.setExtraHTTPHeaders({ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", }); await page.goto(options.url, { waitUntil: "domcontentloaded", timeout: 20000 }); // Extract key repo signals — no inner functions to avoid esbuild __name injection const data = await page.evaluate(`(function() { var readme = (document.querySelector('[data-target="readme-toc.content"]') || document.querySelector('.markdown-body') || {}).textContent || null; var starsEl = document.querySelector('[id="repo-stars-counter-star"]') || document.querySelector('.Counter.js-social-count'); var stars = starsEl ? starsEl.textContent.trim() : null; var forksEl = document.querySelector('[id="repo-network-counter"]'); var forks = forksEl ? forksEl.textContent.trim() : null; var commitEl = document.querySelector('relative-time'); var lastCommit = commitEl ? commitEl.getAttribute('datetime') : null; var descEl = document.querySelector('.f4.my-3'); var description = descEl ? descEl.textContent.trim() : null; var topics = Array.from(document.querySelectorAll('.topic-tag')).map(function(t) { return t.textContent.trim(); }); var langEl = document.querySelector('.color-fg-default.text-bold.mr-1'); var language = langEl ? langEl.textContent.trim() : null; return { readme: readme, stars: stars, forks: forks, lastCommit: lastCommit, description: description, topics: topics, language: language }; })()`); const typedData = data as { readme: string | null; stars: string | null; forks: string | null; lastCommit: string | null; description: string | null; topics: string[]; language: string | null }; await browser.close(); const raw = [ `Description: ${typedData.description ?? "N/A"}`, `Stars: ${typedData.stars ?? "N/A"} | Forks: ${typedData.forks ?? "N/A"}`, `Language: ${typedData.language ?? "N/A"}`, `Last commit: ${typedData.lastCommit ?? "N/A"}`, `Topics: ${typedData.topics?.join(", ") ?? "none"}`, `\n--- README ---\n${typedData.readme ?? "No README found"}`, ].join("\n"); return { raw, content_date: typedData.lastCommit ?? null, freshness_confidence: typedData.lastCommit ? "high" : "medium", }; } - src/server.ts:28-49 (registration)Tool registration for `extract_github` in `src/server.ts`, which calls `githubAdapter`.
// ─── Tool: extract_github ──────────────────────────────────────────────────── server.registerTool( "extract_github", { description: "Extract real-time data from a GitHub repository — README, stars, forks, language, topics, last commit. Returns timestamped freshcontext.", inputSchema: z.object({ url: z.string().url().describe("Full GitHub repo URL e.g. https://github.com/owner/repo"), max_length: z.number().optional().default(6000).describe("Max content length"), }), annotations: { readOnlyHint: true, openWorldHint: true }, }, async ({ url, max_length }) => { try { const result = await githubAdapter({ url, maxLength: max_length }); const ctx = stampFreshness(result, { url, maxLength: max_length }, "github"); return { content: [{ type: "text", text: formatForLLM(ctx) }] }; } catch (err) { return { content: [{ type: "text", text: formatSecurityError(err) }] }; } } );