Skip to main content
Glama

LinkedIn Scraper MCP Server

by superyuser
client.ts16.7 kB
import { Builder, WebDriver, By, until, Key } from 'selenium-webdriver'; import chrome from 'selenium-webdriver/chrome.js'; import { ProfileData, ExperienceItem, EducationItem, ScrapeResult } from './types.js'; const DEFAULT_TIMEOUT = 15000; const SCROLL_PAUSE_MIN = 600; const SCROLL_PAUSE_MAX = 1100; const SECTION_TIMEOUT = 5000; export class LinkedInScraperClient { private driver?: WebDriver; /** * Creates a Chrome WebDriver instance with LinkedIn-optimized settings */ private async createDriver(headless: boolean = false): Promise<WebDriver> { const options = new chrome.Options(); if (headless) { options.addArguments('--headless=new'); } // Stability and anti-detection settings options.addArguments( '--disable-blink-features=AutomationControlled', '--no-sandbox', '--disable-dev-shm-usage', '--start-maximized', '--disable-infobars', '--disable-gpu', '--lang=en-US', '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36' ); const driver = await new Builder() .forBrowser('chrome') .setChromeOptions(options) .build(); await driver.manage().setTimeouts({ pageLoad: 45000 }); return driver; } /** * Random sleep utility */ private async randomSleep(min: number, max: number): Promise<void> { const delay = Math.random() * (max - min) + min; return new Promise(resolve => setTimeout(resolve, delay)); } /** * Safe element finder with timeout */ private async safeFind(by: By, timeout: number = SECTION_TIMEOUT) { if (!this.driver) throw new Error('Driver not initialized'); try { return await this.driver.wait(until.elementLocated(by), timeout); } catch { return null; } } /** * Safe multiple elements finder */ private async safeFindAll(by: By, timeout: number = SECTION_TIMEOUT) { if (!this.driver) throw new Error('Driver not initialized'); try { await this.driver.wait(until.elementLocated(by), timeout); return await this.driver.findElements(by); } catch { return []; } } /** * Get text from element safely */ private async getText(element: any): Promise<string | null> { try { const text = await element.getText(); return text.trim() || null; } catch { return null; } } /** * Scroll page progressively */ private async scrollPage(steps: number = 6): Promise<void> { if (!this.driver) return; for (let i = 0; i < steps; i++) { await this.driver.executeScript('window.scrollBy(0, document.body.scrollHeight/6);'); await this.randomSleep(SCROLL_PAUSE_MIN, SCROLL_PAUSE_MAX); } } /** * Scroll to element */ private async scrollToElement(element: any): Promise<void> { if (!this.driver) return; try { await this.driver.executeScript("arguments[0].scrollIntoView({block:'center'});", element); await this.randomSleep(300, 700); } catch { // Ignore scroll errors } } /** * Find section by heading text */ private async findSectionByHeading(headingText: string) { const xpath = `//section[.//h2[contains(translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '${headingText.toLowerCase()}')]]`; return await this.safeFind(By.xpath(xpath), 3000); } /** * Expand "see more" buttons in a section */ private async expandSeeMoreInSection(section: any): Promise<void> { if (!this.driver) return; try { const buttons = await section.findElements(By.xpath(".//button[.//span[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'see more') or contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'show more')]]")); for (const btn of buttons) { try { await this.driver.executeScript("arguments[0].click();", btn); await this.randomSleep(200, 500); } catch { // Ignore click errors } } } catch { // Ignore expansion errors } } /** * Login to LinkedIn */ private async linkedinLogin(email: string, password: string): Promise<void> { if (!this.driver) throw new Error('Driver not initialized'); await this.driver.get('https://www.linkedin.com/login'); await this.driver.wait(until.elementLocated(By.id('username')), DEFAULT_TIMEOUT); const usernameField = await this.driver.findElement(By.id('username')); const passwordField = await this.driver.findElement(By.id('password')); const submitButton = await this.driver.findElement(By.xpath('//button[@type="submit"]')); await usernameField.sendKeys(email); await passwordField.sendKeys(password); await submitButton.click(); await this.driver.wait(until.urlContains('feed'), DEFAULT_TIMEOUT); } /** * Extract header information (name, headline, location) */ private async extractHeader(data: ProfileData): Promise<void> { if (!this.driver) return; // Name (h1) const h1 = await this.safeFind(By.tagName('h1'), DEFAULT_TIMEOUT); if (h1) { data.name = await this.getText(h1); } // Headline const headline = await this.safeFind(By.css('div.text-body-medium.break-words'), 5000); if (headline) { data.headline = await this.getText(headline); } // Location const location = await this.safeFind(By.xpath("//div[contains(@class,'mt2')]//span[contains(@class,'text-body-small')]"), 5000); if (location) { data.location = await this.getText(location); } } /** * Extract about section */ private async extractAbout(data: ProfileData): Promise<void> { const section = await this.findSectionByHeading('About'); if (!section) return; await this.scrollToElement(section); await this.expandSeeMoreInSection(section); const aboutEl = await section.findElement(By.xpath(".//div[contains(@class,'inline-show-more-text') or contains(@class,'display-flex') or contains(@class,'break-words')]")).catch(() => null); if (aboutEl) { data.about = await this.getText(aboutEl); } } /** * Extract experience section */ private async extractExperience(data: ProfileData): Promise<void> { const section = await this.findSectionByHeading('Experience'); if (!section) return; await this.scrollToElement(section); await this.expandSeeMoreInSection(section); const items = await section.findElements(By.xpath(".//li[.//div[contains(@class,'display-flex')]]")); for (const item of items) { try { const exp: ExperienceItem = {}; // Title const titleElements = await item.findElements(By.xpath(".//span[contains(@class,'mr1')]/span|.//div[contains(@class,'t-bold')]/span|.//a//div[contains(@class,'t-bold')]/span")); if (titleElements.length > 0) { exp.title = await this.getText(titleElements[0]); } // Company const companyElements = await item.findElements(By.xpath(".//span[contains(@class,'t-normal')]/span|.//span[contains(@class,'t-14 t-normal')]/span")); if (companyElements.length > 0) { exp.company = await this.getText(companyElements[0]); } // Date range and location const dateElements = await item.findElements(By.xpath(".//span[contains(@class,'t-14 t-normal t-black--light')]/span|.//span[contains(@class,'t-14 t-normal t-black--light')]")); if (dateElements.length >= 1) { exp.date_range = await this.getText(dateElements[0]); } if (dateElements.length >= 2) { exp.location = await this.getText(dateElements[1]); } // Description const descElements = await item.findElements(By.xpath(".//div[contains(@class,'inline-show-more-text') or contains(@class,'show-more-less-text')]")); if (descElements.length > 0) { exp.description = await this.getText(descElements[0]); } // Only add if has content if (exp.title || exp.company || exp.date_range || exp.location || exp.description) { data.experiences.push(exp); } } catch { continue; } } } /** * Extract education section */ private async extractEducation(data: ProfileData): Promise<void> { const section = await this.findSectionByHeading('Education'); if (!section) return; await this.scrollToElement(section); await this.expandSeeMoreInSection(section); const items = await section.findElements(By.xpath(".//li[.//div[contains(@class,'display-flex')]]")); for (const item of items) { try { const edu: EducationItem = {}; // School const schoolElements = await item.findElements(By.xpath(".//span[contains(@class,'mr1')]/span|.//div[contains(@class,'t-bold')]/span")); if (schoolElements.length > 0) { edu.school = await this.getText(schoolElements[0]); } // Degree and field const degreeElements = await item.findElements(By.xpath(".//span[contains(@class,'t-14 t-normal')]/span|.//span[contains(@class,'t-14 t-normal')]")); if (degreeElements.length > 0) { const degreeText = await this.getText(degreeElements[0]); if (degreeText) { const parts = degreeText.split('·'); if (parts.length >= 1) edu.degree = parts[0].trim(); if (parts.length >= 2) edu.field = parts[1].trim(); } } // Date range const dateElements = await item.findElements(By.xpath(".//span[contains(@class,'t-14 t-normal t-black--light')]/span|.//span[contains(@class,'t-14 t-normal t-black--light')]")); if (dateElements.length > 0) { edu.date_range = await this.getText(dateElements[0]); } // Description const descElements = await item.findElements(By.xpath(".//div[contains(@class,'inline-show-more-text') or contains(@class,'show-more-less-text')]")); if (descElements.length > 0) { edu.description = await this.getText(descElements[0]); } // Only add if has content if (edu.school || edu.degree || edu.field || edu.date_range || edu.description) { data.education.push(edu); } } catch { continue; } } } /** * Extract skills section */ private async extractSkills(data: ProfileData): Promise<void> { const section = await this.findSectionByHeading('Skills'); if (!section) return; await this.scrollToElement(section); await this.expandSeeMoreInSection(section); const skillElements = await section.findElements(By.xpath(".//span[contains(@class,'mr1')]/span|.//a//span[contains(@class,'mr1')]/span|.//span[contains(@class,'t-bold')]/span")); for (const element of skillElements) { const skillText = await this.getText(element); if (skillText && !data.skills.some(s => s.toLowerCase() === skillText.toLowerCase())) { data.skills.push(skillText); } } } /** * Extract contact info (websites, email) */ private async extractContactInfo(data: ProfileData): Promise<void> { if (!this.driver) return; // Try to click contact info link try { const contactLink = await this.safeFind(By.xpath("//a[contains(@href,'contact-info')] | //a[.//span[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'contact info')]]"), 2000); if (contactLink) { await this.driver.executeScript("arguments[0].click();", contactLink); // Wait for modal const modal = await this.safeFind(By.xpath("//div[@role='dialog' or @role='alertdialog']"), 5000); if (modal) { // Extract websites const links = await modal.findElements(By.xpath(".//a[@href and not(contains(@href,'mailto:'))]")); for (const link of links) { const href = await link.getAttribute('href'); if (href && !href.includes('linkedin.com')) { data.websites.push(href); } } // Extract email const mailtoLinks = await modal.findElements(By.xpath(".//a[starts-with(@href,'mailto:')]")); if (mailtoLinks.length > 0) { const href = await mailtoLinks[0].getAttribute('href'); data.email = href.replace('mailto:', '').trim(); } // Close modal await this.driver.actions().sendKeys(Key.ESCAPE).perform(); await this.randomSleep(200, 400); } } } catch { // Ignore contact info extraction errors } } /** * Main scraping method */ async scrapeProfile(url: string, email: string, password: string, headless: boolean = false): Promise<ScrapeResult> { try { this.driver = await this.createDriver(headless); // Initialize profile data const data: ProfileData = { url, name: undefined, headline: undefined, location: undefined, about: undefined, experiences: [], education: [], skills: [], websites: [], email: undefined }; // Login await this.linkedinLogin(email, password); // Navigate to profile const cleanUrl = url.split('?')[0] + (url.endsWith('/') ? '' : '/'); await this.driver.get(cleanUrl); await this.driver.wait(until.elementLocated(By.tagName('body')), DEFAULT_TIMEOUT); await this.randomSleep(1200, 2000); // Check for auth issues const currentUrl = await this.driver.getCurrentUrl(); if (currentUrl.includes('checkpoint') || currentUrl.includes('login')) { throw new Error('Authentication failed or hit checkpoint wall'); } // Progressive scroll to load sections await this.scrollPage(8); // Extract all sections await this.extractHeader(data); await this.extractAbout(data); await this.extractExperience(data); await this.extractEducation(data); await this.extractSkills(data); await this.extractContactInfo(data); return { success: true, profile: data, timestamp: new Date().toISOString() }; } catch (error) { return { success: false, error: error instanceof Error ? error.message : String(error), timestamp: new Date().toISOString() }; } finally { if (this.driver) { await this.randomSleep(500, 1200); await this.driver.quit(); this.driver = undefined; } } } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/superyuser/linkedin-scraper-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server