client.ts•16.7 kB
import { Builder, WebDriver, By, until, Key } from 'selenium-webdriver';
import chrome from 'selenium-webdriver/chrome.js';
import { ProfileData, ExperienceItem, EducationItem, ScrapeResult } from './types.js';
const DEFAULT_TIMEOUT = 15000;
const SCROLL_PAUSE_MIN = 600;
const SCROLL_PAUSE_MAX = 1100;
const SECTION_TIMEOUT = 5000;
export class LinkedInScraperClient {
private driver?: WebDriver;
/**
* Creates a Chrome WebDriver instance with LinkedIn-optimized settings
*/
private async createDriver(headless: boolean = false): Promise<WebDriver> {
const options = new chrome.Options();
if (headless) {
options.addArguments('--headless=new');
}
// Stability and anti-detection settings
options.addArguments(
'--disable-blink-features=AutomationControlled',
'--no-sandbox',
'--disable-dev-shm-usage',
'--start-maximized',
'--disable-infobars',
'--disable-gpu',
'--lang=en-US',
'--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
);
const driver = await new Builder()
.forBrowser('chrome')
.setChromeOptions(options)
.build();
await driver.manage().setTimeouts({ pageLoad: 45000 });
return driver;
}
/**
* Random sleep utility
*/
private async randomSleep(min: number, max: number): Promise<void> {
const delay = Math.random() * (max - min) + min;
return new Promise(resolve => setTimeout(resolve, delay));
}
/**
* Safe element finder with timeout
*/
private async safeFind(by: By, timeout: number = SECTION_TIMEOUT) {
if (!this.driver) throw new Error('Driver not initialized');
try {
return await this.driver.wait(until.elementLocated(by), timeout);
} catch {
return null;
}
}
/**
* Safe multiple elements finder
*/
private async safeFindAll(by: By, timeout: number = SECTION_TIMEOUT) {
if (!this.driver) throw new Error('Driver not initialized');
try {
await this.driver.wait(until.elementLocated(by), timeout);
return await this.driver.findElements(by);
} catch {
return [];
}
}
/**
* Get text from element safely
*/
private async getText(element: any): Promise<string | null> {
try {
const text = await element.getText();
return text.trim() || null;
} catch {
return null;
}
}
/**
* Scroll page progressively
*/
private async scrollPage(steps: number = 6): Promise<void> {
if (!this.driver) return;
for (let i = 0; i < steps; i++) {
await this.driver.executeScript('window.scrollBy(0, document.body.scrollHeight/6);');
await this.randomSleep(SCROLL_PAUSE_MIN, SCROLL_PAUSE_MAX);
}
}
/**
* Scroll to element
*/
private async scrollToElement(element: any): Promise<void> {
if (!this.driver) return;
try {
await this.driver.executeScript("arguments[0].scrollIntoView({block:'center'});", element);
await this.randomSleep(300, 700);
} catch {
// Ignore scroll errors
}
}
/**
* Find section by heading text
*/
private async findSectionByHeading(headingText: string) {
const xpath = `//section[.//h2[contains(translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '${headingText.toLowerCase()}')]]`;
return await this.safeFind(By.xpath(xpath), 3000);
}
/**
* Expand "see more" buttons in a section
*/
private async expandSeeMoreInSection(section: any): Promise<void> {
if (!this.driver) return;
try {
const buttons = await section.findElements(By.xpath(".//button[.//span[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'see more') or contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'show more')]]"));
for (const btn of buttons) {
try {
await this.driver.executeScript("arguments[0].click();", btn);
await this.randomSleep(200, 500);
} catch {
// Ignore click errors
}
}
} catch {
// Ignore expansion errors
}
}
/**
* Login to LinkedIn
*/
private async linkedinLogin(email: string, password: string): Promise<void> {
if (!this.driver) throw new Error('Driver not initialized');
await this.driver.get('https://www.linkedin.com/login');
await this.driver.wait(until.elementLocated(By.id('username')), DEFAULT_TIMEOUT);
const usernameField = await this.driver.findElement(By.id('username'));
const passwordField = await this.driver.findElement(By.id('password'));
const submitButton = await this.driver.findElement(By.xpath('//button[@type="submit"]'));
await usernameField.sendKeys(email);
await passwordField.sendKeys(password);
await submitButton.click();
await this.driver.wait(until.urlContains('feed'), DEFAULT_TIMEOUT);
}
/**
* Extract header information (name, headline, location)
*/
private async extractHeader(data: ProfileData): Promise<void> {
if (!this.driver) return;
// Name (h1)
const h1 = await this.safeFind(By.tagName('h1'), DEFAULT_TIMEOUT);
if (h1) {
data.name = await this.getText(h1);
}
// Headline
const headline = await this.safeFind(By.css('div.text-body-medium.break-words'), 5000);
if (headline) {
data.headline = await this.getText(headline);
}
// Location
const location = await this.safeFind(By.xpath("//div[contains(@class,'mt2')]//span[contains(@class,'text-body-small')]"), 5000);
if (location) {
data.location = await this.getText(location);
}
}
/**
* Extract about section
*/
private async extractAbout(data: ProfileData): Promise<void> {
const section = await this.findSectionByHeading('About');
if (!section) return;
await this.scrollToElement(section);
await this.expandSeeMoreInSection(section);
const aboutEl = await section.findElement(By.xpath(".//div[contains(@class,'inline-show-more-text') or contains(@class,'display-flex') or contains(@class,'break-words')]")).catch(() => null);
if (aboutEl) {
data.about = await this.getText(aboutEl);
}
}
/**
* Extract experience section
*/
private async extractExperience(data: ProfileData): Promise<void> {
const section = await this.findSectionByHeading('Experience');
if (!section) return;
await this.scrollToElement(section);
await this.expandSeeMoreInSection(section);
const items = await section.findElements(By.xpath(".//li[.//div[contains(@class,'display-flex')]]"));
for (const item of items) {
try {
const exp: ExperienceItem = {};
// Title
const titleElements = await item.findElements(By.xpath(".//span[contains(@class,'mr1')]/span|.//div[contains(@class,'t-bold')]/span|.//a//div[contains(@class,'t-bold')]/span"));
if (titleElements.length > 0) {
exp.title = await this.getText(titleElements[0]);
}
// Company
const companyElements = await item.findElements(By.xpath(".//span[contains(@class,'t-normal')]/span|.//span[contains(@class,'t-14 t-normal')]/span"));
if (companyElements.length > 0) {
exp.company = await this.getText(companyElements[0]);
}
// Date range and location
const dateElements = await item.findElements(By.xpath(".//span[contains(@class,'t-14 t-normal t-black--light')]/span|.//span[contains(@class,'t-14 t-normal t-black--light')]"));
if (dateElements.length >= 1) {
exp.date_range = await this.getText(dateElements[0]);
}
if (dateElements.length >= 2) {
exp.location = await this.getText(dateElements[1]);
}
// Description
const descElements = await item.findElements(By.xpath(".//div[contains(@class,'inline-show-more-text') or contains(@class,'show-more-less-text')]"));
if (descElements.length > 0) {
exp.description = await this.getText(descElements[0]);
}
// Only add if has content
if (exp.title || exp.company || exp.date_range || exp.location || exp.description) {
data.experiences.push(exp);
}
} catch {
continue;
}
}
}
/**
* Extract education section
*/
private async extractEducation(data: ProfileData): Promise<void> {
const section = await this.findSectionByHeading('Education');
if (!section) return;
await this.scrollToElement(section);
await this.expandSeeMoreInSection(section);
const items = await section.findElements(By.xpath(".//li[.//div[contains(@class,'display-flex')]]"));
for (const item of items) {
try {
const edu: EducationItem = {};
// School
const schoolElements = await item.findElements(By.xpath(".//span[contains(@class,'mr1')]/span|.//div[contains(@class,'t-bold')]/span"));
if (schoolElements.length > 0) {
edu.school = await this.getText(schoolElements[0]);
}
// Degree and field
const degreeElements = await item.findElements(By.xpath(".//span[contains(@class,'t-14 t-normal')]/span|.//span[contains(@class,'t-14 t-normal')]"));
if (degreeElements.length > 0) {
const degreeText = await this.getText(degreeElements[0]);
if (degreeText) {
const parts = degreeText.split('·');
if (parts.length >= 1) edu.degree = parts[0].trim();
if (parts.length >= 2) edu.field = parts[1].trim();
}
}
// Date range
const dateElements = await item.findElements(By.xpath(".//span[contains(@class,'t-14 t-normal t-black--light')]/span|.//span[contains(@class,'t-14 t-normal t-black--light')]"));
if (dateElements.length > 0) {
edu.date_range = await this.getText(dateElements[0]);
}
// Description
const descElements = await item.findElements(By.xpath(".//div[contains(@class,'inline-show-more-text') or contains(@class,'show-more-less-text')]"));
if (descElements.length > 0) {
edu.description = await this.getText(descElements[0]);
}
// Only add if has content
if (edu.school || edu.degree || edu.field || edu.date_range || edu.description) {
data.education.push(edu);
}
} catch {
continue;
}
}
}
/**
* Extract skills section
*/
private async extractSkills(data: ProfileData): Promise<void> {
const section = await this.findSectionByHeading('Skills');
if (!section) return;
await this.scrollToElement(section);
await this.expandSeeMoreInSection(section);
const skillElements = await section.findElements(By.xpath(".//span[contains(@class,'mr1')]/span|.//a//span[contains(@class,'mr1')]/span|.//span[contains(@class,'t-bold')]/span"));
for (const element of skillElements) {
const skillText = await this.getText(element);
if (skillText && !data.skills.some(s => s.toLowerCase() === skillText.toLowerCase())) {
data.skills.push(skillText);
}
}
}
/**
* Extract contact info (websites, email)
*/
private async extractContactInfo(data: ProfileData): Promise<void> {
if (!this.driver) return;
// Try to click contact info link
try {
const contactLink = await this.safeFind(By.xpath("//a[contains(@href,'contact-info')] | //a[.//span[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'),'contact info')]]"), 2000);
if (contactLink) {
await this.driver.executeScript("arguments[0].click();", contactLink);
// Wait for modal
const modal = await this.safeFind(By.xpath("//div[@role='dialog' or @role='alertdialog']"), 5000);
if (modal) {
// Extract websites
const links = await modal.findElements(By.xpath(".//a[@href and not(contains(@href,'mailto:'))]"));
for (const link of links) {
const href = await link.getAttribute('href');
if (href && !href.includes('linkedin.com')) {
data.websites.push(href);
}
}
// Extract email
const mailtoLinks = await modal.findElements(By.xpath(".//a[starts-with(@href,'mailto:')]"));
if (mailtoLinks.length > 0) {
const href = await mailtoLinks[0].getAttribute('href');
data.email = href.replace('mailto:', '').trim();
}
// Close modal
await this.driver.actions().sendKeys(Key.ESCAPE).perform();
await this.randomSleep(200, 400);
}
}
} catch {
// Ignore contact info extraction errors
}
}
/**
* Main scraping method
*/
async scrapeProfile(url: string, email: string, password: string, headless: boolean = false): Promise<ScrapeResult> {
try {
this.driver = await this.createDriver(headless);
// Initialize profile data
const data: ProfileData = {
url,
name: undefined,
headline: undefined,
location: undefined,
about: undefined,
experiences: [],
education: [],
skills: [],
websites: [],
email: undefined
};
// Login
await this.linkedinLogin(email, password);
// Navigate to profile
const cleanUrl = url.split('?')[0] + (url.endsWith('/') ? '' : '/');
await this.driver.get(cleanUrl);
await this.driver.wait(until.elementLocated(By.tagName('body')), DEFAULT_TIMEOUT);
await this.randomSleep(1200, 2000);
// Check for auth issues
const currentUrl = await this.driver.getCurrentUrl();
if (currentUrl.includes('checkpoint') || currentUrl.includes('login')) {
throw new Error('Authentication failed or hit checkpoint wall');
}
// Progressive scroll to load sections
await this.scrollPage(8);
// Extract all sections
await this.extractHeader(data);
await this.extractAbout(data);
await this.extractExperience(data);
await this.extractEducation(data);
await this.extractSkills(data);
await this.extractContactInfo(data);
return {
success: true,
profile: data,
timestamp: new Date().toISOString()
};
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : String(error),
timestamp: new Date().toISOString()
};
} finally {
if (this.driver) {
await this.randomSleep(500, 1200);
await this.driver.quit();
this.driver = undefined;
}
}
}
}