extract_yc
Extract structured data from Y Combinator company listings to analyze startups by name, batch, tags, and description with freshness timestamps.
Instructions
Scrape YC company listings. Use https://www.ycombinator.com/companies?query=KEYWORD to find startups in a space. Returns name, batch, tags, description per company with freshness timestamp.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | Yes | YC companies URL e.g. https://www.ycombinator.com/companies?query=mcp | |
| max_length | No |
Implementation Reference
- src/server.ts:98-118 (handler)The MCP tool registration and handler implementation for "extract_yc" in src/server.ts.
server.registerTool( "extract_yc", { description: "Scrape YC company listings. Use https://www.ycombinator.com/companies?query=KEYWORD to find startups in a space. Returns name, batch, tags, description per company with freshness timestamp.", inputSchema: z.object({ url: z.string().url().describe("YC companies URL e.g. https://www.ycombinator.com/companies?query=mcp"), max_length: z.number().optional().default(6000), }), annotations: { readOnlyHint: true, openWorldHint: true }, }, async ({ url, max_length }) => { try { const result = await ycAdapter({ url, maxLength: max_length }); const ctx = stampFreshness(result, { url, maxLength: max_length }, "ycombinator"); return { content: [{ type: "text", text: formatForLLM(ctx) }] }; } catch (err) { return { content: [{ type: "text", text: formatSecurityError(err) }] }; } } ); - src/adapters/yc.ts:5-103 (handler)The core logic for scraping YC data, implemented in ycAdapter.
export async function ycAdapter(options: ExtractOptions): Promise<AdapterResult> { const safeUrl = validateUrl(options.url, "yc"); options = { ...options, url: safeUrl }; const browser = await chromium.launch({ headless: true }); const page = await browser.newPage(); // YC company directory is React-rendered — wait for network to settle await page.goto(options.url, { waitUntil: "networkidle", timeout: 30000 }); // Wait for company cards to appear await page.waitForSelector('a[href*="/companies/"]', { timeout: 15000 }).catch(() => null); const data = await page.evaluate(`(function() { // YC company cards — robust multi-strategy extraction var results = []; // Strategy 1: structured company divs with name + description + batch var cards = Array.from(document.querySelectorAll('div[class*="_company_"]')); if (cards.length === 0) { // Strategy 2: anchor links to /companies/* pages cards = Array.from(document.querySelectorAll('a[href*="/companies/"]')) .filter(function(el) { return el.querySelector('span, p, div'); }); } cards.slice(0, 25).forEach(function(el) { var allText = el.innerText || el.textContent || ""; var lines = allText.split('\\n').map(function(l) { return l.trim(); }).filter(Boolean); // Try to find structured spans var spans = Array.from(el.querySelectorAll('span')); var name = null, description = null, batch = null; var tags = []; spans.forEach(function(s) { var t = s.textContent.trim(); if (!t) return; if (s.className && s.className.toString().includes('Name')) name = t; else if (s.className && s.className.toString().includes('Desc')) description = t; else if (s.className && s.className.toString().includes('Batch')) batch = t; else if (s.className && s.className.toString().includes('Tag')) tags.push(t); }); // Fallback to line parsing if (!name && lines.length > 0) name = lines[0]; if (!description && lines.length > 1) description = lines[1]; var link = el.tagName === 'A' ? el.getAttribute('href') : (el.querySelector('a') ? el.querySelector('a').getAttribute('href') : null); if (name && name.length > 1 && name.length < 80) { results.push({ name, description, batch, tags, link }); } }); return results; })()`); await browser.close(); const typedData = data as Array<{ name: string | null; description: string | null; batch: string | null; tags: string[]; link: string | null; }>; if (!typedData.length) { return { raw: "No YC companies found — page may have changed structure. Try visiting: " + options.url, content_date: null, freshness_confidence: "low", }; } const raw = typedData .map((r, i) => [ `[${i + 1}] ${r.name ?? "Unknown"}`, `Batch: ${r.batch ?? "Unknown"}`, `Tags: ${r.tags?.join(", ") || "none"}`, `Description: ${r.description ?? "N/A"}`, `Link: ${r.link ? (r.link.startsWith("http") ? r.link : "https://www.ycombinator.com" + r.link) : "N/A"}`, ].join("\n") ) .join("\n\n") .slice(0, options.maxLength ?? 6000); return { raw, content_date: new Date().toISOString().split("T")[0], freshness_confidence: "high", }; }