import puppeteer from 'puppeteer-core';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
class LinkedInAutomation {
constructor() {
this.browser = null;
this.page = null;
this.isConnected = false;
}
async connect(cdpUrl, useUnblock = false, useStealth = false) {
try {
// Handle different CDP URL formats
let connectOptions = { defaultViewport: null };
// If it's already a WebSocket URL and not using unblock, use it directly
if ((cdpUrl.startsWith('ws://') || cdpUrl.startsWith('wss://')) && !useUnblock) {
connectOptions.browserWSEndpoint = cdpUrl;
} else if (cdpUrl.includes('browserless.io')) {
// Browserless.io: Use /chrome/unblock API to bypass bot detection
if (useUnblock) {
try {
// Extract token and hostname from WebSocket URL
// Handle both wss://host?token=xxx and wss://host/?token=xxx formats
let token, hostname;
try {
// Try parsing as WebSocket URL (may fail, so we have fallback)
const urlObj = new URL(cdpUrl.replace(/^ws/, 'http')); // Convert ws:// to http:// for URL parsing
token = urlObj.searchParams.get('token');
hostname = urlObj.hostname || urlObj.host;
} catch (parseError) {
// Fallback: manual parsing
const match = cdpUrl.match(/wss?:\/\/([^\/\?]+)/);
hostname = match ? match[1] : null;
const tokenMatch = cdpUrl.match(/[?&]token=([^&]+)/);
token = tokenMatch ? tokenMatch[1] : null;
}
if (!token) {
throw new Error('Token is required for Browserless.io /unblock API. Make sure your CDP URL includes ?token=YOUR_TOKEN');
}
if (!hostname) {
throw new Error('Could not extract hostname from CDP URL');
}
// Construct HTTP base URL (Browserless.io uses HTTPS)
const httpBase = `https://${hostname}`;
// Use /unblock endpoint (NOT /chrome/unblock) - this is the correct endpoint per Browserless.io docs
// Also add proxy=residential for better LinkedIn access
const unblockUrl = `${httpBase}/unblock?blockAds=false&timeout=60000&proxy=residential&token=${encodeURIComponent(token)}`;
// Call Browserless.io /unblock API (correct endpoint name)
const unblockResponse = await fetch(unblockUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
url: 'https://www.linkedin.com',
emulateMediaType: '',
setJavaScriptEnabled: true, // Enable JavaScript for LinkedIn
browserWSEndpoint: true, // Request WebSocket endpoint for Puppeteer
content: false,
cookies: false,
screenshot: false,
ttl: 60000 // Time-to-live: 60 seconds for browser session
})
});
if (!unblockResponse.ok) {
const errorText = await unblockResponse.text();
// Provide detailed error message
if (unblockResponse.status === 401) {
throw new Error(`Browserless.io authentication failed (401). Check: 1) Token is correct, 2) /unblock endpoint is available on your plan, 3) Token hasn't expired. Error: ${errorText.substring(0, 200)}`);
}
throw new Error(`Browserless.io /unblock API failed (${unblockResponse.status}): ${errorText.substring(0, 200)}`);
}
const unblockData = await unblockResponse.json();
// Get WebSocket endpoint from response
if (unblockData.browserWSEndpoint) {
// Use the WebSocket endpoint from the unblock response
connectOptions.browserWSEndpoint = unblockData.browserWSEndpoint;
} else if (unblockData.data && unblockData.data.browserWSEndpoint) {
// Sometimes the response is nested in a 'data' property
connectOptions.browserWSEndpoint = unblockData.data.browserWSEndpoint;
} else {
// Fallback: construct WebSocket URL manually (but this won't have unblock benefits)
const wsUrl = cdpUrl.replace(/^https?:\/\//, 'wss://')
.replace(/^http:\/\//, 'ws://')
.replace('/chrome', '');
connectOptions.browserWSEndpoint = wsUrl;
}
} catch (unblockError) {
// If /unblock fails, fall back to standard connection
const wsUrl = cdpUrl.replace(/^https?:\/\//, 'wss://')
.replace(/^http:\/\//, 'ws://')
.replace('/chrome', '');
connectOptions.browserWSEndpoint = wsUrl;
// Don't throw - just use standard connection
}
} else {
// Standard Browserless.io WebSocket connection
const wsUrl = cdpUrl.replace(/^https?:\/\//, 'wss://')
.replace(/^http:\/\//, 'ws://')
.replace('/chrome', '');
connectOptions.browserWSEndpoint = wsUrl;
}
} else {
// Standard local CDP or other providers - use as browserURL
// For local Chrome: http://localhost:9222
connectOptions.browserURL = cdpUrl;
}
// Connect to Chrome instance via CDP
// For Browserless.io with stealth mode, add stealth parameter to WebSocket URL
if (useStealth && cdpUrl.includes('browserless.io') && connectOptions.browserWSEndpoint) {
try {
const wsUrl = new URL(connectOptions.browserWSEndpoint);
wsUrl.searchParams.set('stealth', 'true');
connectOptions.browserWSEndpoint = wsUrl.toString();
} catch (e) {
// If URL parsing fails, append stealth parameter manually
connectOptions.browserWSEndpoint += (connectOptions.browserWSEndpoint.includes('?') ? '&' : '?') + 'stealth=true';
}
}
this.browser = await puppeteer.connect(connectOptions);
const pages = await this.browser.pages();
this.page = pages[0] || await this.browser.newPage();
// Configure page to look like a real browser (important for LinkedIn)
await this.page.setViewport({
width: 1920,
height: 1080,
deviceScaleFactor: 1
});
// Set a realistic user agent (Chrome on Windows)
await this.page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36'
);
// Set language preferences and additional headers for stealth
await this.page.setExtraHTTPHeaders({
'Accept-Language': 'en-US,en;q=0.9',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
});
// Enable JavaScript (should be default, but explicit)
await this.page.setJavaScriptEnabled(true);
// Additional stealth settings to bypass bot detection
await this.page.evaluateOnNewDocument(() => {
// Override navigator.webdriver
Object.defineProperty(navigator, 'webdriver', {
get: () => false
});
// Override chrome property
window.chrome = {
runtime: {}
};
// Override permissions API
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission }) :
originalQuery(parameters)
);
// Add plugins
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5]
});
// Add languages
Object.defineProperty(navigator, 'languages', {
get: () => ['en-US', 'en']
});
});
this.isConnected = true;
return { success: true, message: 'Connected to browser' };
} catch (error) {
// Provide more detailed error information
const errorMessage = error.message || String(error);
// Check if it's a Browserless.io connection issue
if (cdpUrl && cdpUrl.includes('browserless.io')) {
return {
success: false,
error: `Browserless.io connection failed: ${errorMessage}. Make sure you're using the correct WebSocket endpoint format: wss://production-sfo.browserless.io?token=YOUR_TOKEN`
};
}
return { success: false, error: `Browser connection failed: ${errorMessage}` };
}
}
async setupSession(liAtCookie) {
if (!this.page) {
throw new Error('Browser not connected. Call connect() first.');
}
try {
// First, set cookie (before navigation or after, both work)
// Setting before navigation ensures it's available when page loads
await this.page.setCookie({
name: 'li_at',
value: liAtCookie,
domain: '.linkedin.com',
path: '/',
httpOnly: true,
secure: true,
sameSite: 'None'
});
// Navigate to LinkedIn feed with longer timeout for cloud browsers
// Use 'domcontentloaded' instead of 'networkidle2' for faster/more reliable detection
let navigationSucceeded = false;
let currentUrl = '';
let navigationError = null;
try {
// Try navigating to LinkedIn homepage first (simpler, faster)
// This helps establish connection and test network connectivity
await this.page.goto('https://www.linkedin.com/', {
waitUntil: 'domcontentloaded',
timeout: 60000 // 60s for initial connection
});
// Check if we got a chrome error
currentUrl = this.page.url();
if (currentUrl.startsWith('chrome-error://')) {
throw new Error(`Chrome error page: ${currentUrl}. Network connectivity issue.`);
}
// If homepage loaded successfully, navigate to feed
await this.page.goto('https://www.linkedin.com/feed/', {
waitUntil: 'domcontentloaded',
timeout: 60000 // 60s for feed navigation
});
navigationSucceeded = true;
currentUrl = this.page.url();
} catch (gotoError) {
navigationError = gotoError;
// Check current URL even if navigation didn't complete fully
try {
currentUrl = this.page.url();
// If we're already on LinkedIn, navigation might have partially succeeded
if (currentUrl.includes('linkedin.com')) {
navigationSucceeded = true;
console.log(`Navigation timeout, but reached LinkedIn: ${currentUrl}`);
} else {
// Try a more lenient approach - just wait for any load event
await Promise.race([
this.page.goto('https://www.linkedin.com/feed/', {
waitUntil: 'load',
timeout: 60000
}),
new Promise(resolve => setTimeout(resolve, 10000)) // Max 10s wait
]);
currentUrl = this.page.url();
navigationSucceeded = currentUrl.includes('linkedin.com');
}
} catch (fallbackError) {
// Last resort - get whatever URL we're on
currentUrl = this.page.url();
navigationSucceeded = currentUrl.includes('linkedin.com');
}
}
// Check for Chrome error pages
if (currentUrl.startsWith('chrome-error://') || currentUrl.startsWith('chrome://error')) {
// Try to get more details about the error
let errorDetails = 'Unknown error';
try {
const errorText = await this.page.evaluate(() => {
const errorTextEl = document.querySelector('.error-code');
return errorTextEl ? errorTextEl.textContent : document.body.innerText.substring(0, 200);
});
errorDetails = errorText;
} catch (e) {
errorDetails = currentUrl;
}
return {
success: false,
error: `Chrome error page encountered. This usually indicates a network connectivity issue from Browserless.io to LinkedIn. Error: ${errorDetails}. Possible causes: LinkedIn blocking cloud browser IPs, DNS resolution failure, or network timeout.`,
debug: {
currentUrl,
errorDetails
}
};
}
if (!navigationSucceeded || !currentUrl.includes('linkedin.com')) {
// Get page content for debugging
let pageInfo = {};
try {
pageInfo = await this.page.evaluate(() => ({
url: window.location.href,
title: document.title,
readyState: document.readyState,
hasBody: !!document.body
}));
} catch (e) {
pageInfo = { error: e.message };
}
return {
success: false,
error: `Failed to navigate to LinkedIn. Current URL: ${currentUrl || 'unknown'}. Network timeout or connection issue from Browserless.io to LinkedIn.`,
debug: {
currentUrl,
pageInfo,
suggestion: 'LinkedIn may be blocking cloud browser connections. Try using a local browser or check Browserless.io network connectivity.'
}
};
}
// Wait a bit for page to fully load and execute JavaScript (reduced from 3s to 2s)
await new Promise(resolve => setTimeout(resolve, 2000));
// Check what page we're on (reuse currentUrl variable)
currentUrl = this.page.url(); // Update URL in case it changed after navigation
const pageTitle = await this.page.title();
// Check if we're on login page (authentication failed)
if (currentUrl.includes('linkedin.com/login') || currentUrl.includes('linkedin.com/checkpoint')) {
const pageText = await this.page.evaluate(() => document.body.innerText);
return {
success: false,
error: `Authentication failed - redirected to login/checkpoint page. Current URL: ${currentUrl}. Cookie may be expired or invalid.`
};
}
// Try multiple selectors to verify we're logged in
const feedSelectors = [
'[data-view-name="feed-full-update"]',
'[data-testid="feed-update"]',
'.feed-container',
'.feed-shared-update-v2',
'main[role="main"]',
'.scaffold-finite-scroll__content'
];
let loggedIn = false;
let lastError = null;
for (const selector of feedSelectors) {
try {
await this.page.waitForSelector(selector, { timeout: 5000 });
loggedIn = true;
break;
} catch (error) {
lastError = error;
continue;
}
}
if (!loggedIn) {
// Try to detect if we're logged in by checking for user menu or profile link
try {
const hasUserMenu = await this.page.evaluate(() => {
return !!(
document.querySelector('[data-control-name="nav.settings"]') ||
document.querySelector('[data-testid="nav-settings"]') ||
document.querySelector('.global-nav__me') ||
document.querySelector('a[href*="/in/"]') ||
document.querySelector('a[href*="/me/"]')
);
});
if (hasUserMenu) {
loggedIn = true;
}
} catch (e) {
// Ignore
}
}
if (loggedIn) {
// Save session cookies for future use
const cookies = await this.page.cookies();
return {
success: true,
message: 'Session authenticated',
cookies: cookies.filter(c => c.domain.includes('linkedin.com'))
};
} else {
// Get page content for debugging
const pageContent = await this.page.evaluate(() => ({
url: window.location.href,
title: document.title,
bodyText: document.body.innerText.substring(0, 500)
}));
return {
success: false,
error: `Authentication verification failed. Page URL: ${pageContent.url}, Title: ${pageContent.title}. Cookie may be expired or LinkedIn may have changed their page structure.`,
debug: {
url: pageContent.url,
title: pageContent.title,
error: lastError?.message
}
};
}
} catch (error) {
return {
success: false,
error: `Session setup failed: ${error.message}`,
details: error.stack
};
}
}
async searchLeads(params) {
const { keywords, location, industry, jobTitle, company, limit = 25 } = params;
try {
// Build LinkedIn search URL
let searchUrl = 'https://www.linkedin.com/search/results/people/?';
const searchParams = new URLSearchParams();
if (keywords) searchParams.append('keywords', keywords);
if (location) searchParams.append('geoUrn', location);
searchUrl += searchParams.toString();
await this.page.goto(searchUrl, { waitUntil: 'networkidle2', timeout: 30000 });
// Wait for search results
await this.page.waitForSelector('.reusable-search__result-container', { timeout: 10000 });
// Extract profile URLs and basic data
const results = await this.page.evaluate((maxResults) => {
const resultContainers = document.querySelectorAll('.reusable-search__result-container');
const profiles = [];
for (let i = 0; i < Math.min(resultContainers.length, maxResults); i++) {
const container = resultContainers[i];
const nameElement = container.querySelector('.entity-result__title-text a');
const headlineElement = container.querySelector('.entity-result__primary-subtitle');
const locationElement = container.querySelector('.entity-result__secondary-subtitle');
if (nameElement) {
profiles.push({
name: nameElement.innerText.trim(),
profile_url: nameElement.href,
headline: headlineElement ? headlineElement.innerText.trim() : '',
location: locationElement ? locationElement.innerText.trim() : ''
});
}
}
return profiles;
}, limit);
// Add random delay to appear human-like
await this.randomDelay(2000, 5000);
return { success: true, profiles: results };
} catch (error) {
return { success: false, error: error.message };
}
}
async analyzeProfile(profileUrl) {
try {
await this.page.goto(profileUrl, { waitUntil: 'networkidle2', timeout: 30000 });
// Wait for profile content to load
await this.page.waitForSelector('.pv-top-card', { timeout: 10000 });
// Extract comprehensive profile data
const profileData = await this.page.evaluate(() => {
const data = {
name: '',
headline: '',
about: '',
location: '',
connections: 0,
experience: [],
education: [],
skills: [],
activity: []
};
// Name
const nameElement = document.querySelector('.pv-top-card--list li:first-child');
if (nameElement) data.name = nameElement.innerText.trim();
// Headline
const headlineElement = document.querySelector('.pv-top-card--list li:nth-child(2)');
if (headlineElement) data.headline = headlineElement.innerText.trim();
// About
const aboutElement = document.querySelector('#about ~ .pvs-list__outer-container .visually-hidden');
if (aboutElement) data.about = aboutElement.innerText.trim();
// Location
const locationElement = document.querySelector('.pv-top-card--list-bullet li:first-child');
if (locationElement) data.location = locationElement.innerText.trim();
// Connections
const connectionsElement = document.querySelector('.pv-top-card--list-bullet li:last-child');
if (connectionsElement) {
const match = connectionsElement.innerText.match(/(\d+)/);
if (match) data.connections = parseInt(match[1]);
}
// Experience
const experienceItems = document.querySelectorAll('#experience ~ .pvs-list__container .pvs-entity');
experienceItems.forEach(item => {
const title = item.querySelector('.t-bold span')?.innerText.trim();
const company = item.querySelector('.t-14 span')?.innerText.trim();
const duration = item.querySelector('.t-black--light span')?.innerText.trim();
if (title) {
data.experience.push({ title, company, duration });
}
});
// Education
const educationItems = document.querySelectorAll('#education ~ .pvs-list__container .pvs-entity');
educationItems.forEach(item => {
const school = item.querySelector('.t-bold span')?.innerText.trim();
const degree = item.querySelector('.t-14 span')?.innerText.trim();
if (school) {
data.education.push({ school, degree });
}
});
// Skills (top skills)
const skillItems = document.querySelectorAll('#skills ~ .pvs-list__container .pvs-entity');
skillItems.forEach((item, index) => {
if (index < 10) {
const skill = item.querySelector('.t-bold span')?.innerText.trim();
if (skill) data.skills.push(skill);
}
});
return data;
});
// Add random delay
await this.randomDelay(1000, 3000);
return { success: true, data: profileData };
} catch (error) {
return { success: false, error: error.message };
}
}
async sendMessage(profileUrl, messageText, isConnectionRequest = false) {
try {
await this.page.goto(profileUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await this.randomDelay(1000, 2000);
if (isConnectionRequest) {
// Send connection request with message
const connectButton = await this.page.$('button[aria-label*="Connect"]');
if (!connectButton) {
return { success: false, error: 'Connect button not found' };
}
await connectButton.click();
await this.page.waitForSelector('button[aria-label="Add a note"]', { timeout: 5000 });
const addNoteButton = await this.page.$('button[aria-label="Add a note"]');
await addNoteButton.click();
await this.page.waitForSelector('textarea[name="message"]', { timeout: 5000 });
await this.page.type('textarea[name="message"]', messageText, { delay: 50 });
await this.randomDelay(500, 1000);
const sendButton = await this.page.$('button[aria-label="Send invitation"]');
await sendButton.click();
await this.randomDelay(2000, 3000);
return { success: true, message: 'Connection request sent with message' };
} else {
// Send direct message
const messageButton = await this.page.$('button[aria-label*="Message"]');
if (!messageButton) {
return { success: false, error: 'Message button not found - may not be connected' };
}
await messageButton.click();
await this.page.waitForSelector('.msg-form__contenteditable', { timeout: 5000 });
const messageBox = await this.page.$('.msg-form__contenteditable');
await messageBox.click();
await this.page.keyboard.type(messageText, { delay: 50 });
await this.randomDelay(500, 1000);
const sendButton = await this.page.$('button[type="submit"].msg-form__send-button');
await sendButton.click();
await this.randomDelay(2000, 3000);
return { success: true, message: 'Message sent successfully' };
}
} catch (error) {
return { success: false, error: error.message };
}
}
async checkForResponse(profileUrl) {
try {
await this.page.goto('https://www.linkedin.com/messaging/', {
waitUntil: 'networkidle2',
timeout: 30000
});
await this.page.waitForSelector('.msg-conversations-container', { timeout: 10000 });
// Search for conversation with this profile
const conversations = await this.page.evaluate((url) => {
const convItems = document.querySelectorAll('.msg-conversation-listitem');
for (const item of convItems) {
const link = item.querySelector('a[href*="/in/"]');
if (link && link.href === url) {
const unread = item.querySelector('.msg-conversation-card__unread-count');
return { hasUnread: !!unread, unreadCount: unread ? parseInt(unread.innerText) : 0 };
}
}
return { hasUnread: false, unreadCount: 0 };
}, profileUrl);
return { success: true, data: conversations };
} catch (error) {
return { success: false, error: error.message };
}
}
randomDelay(min, max) {
const delay = Math.floor(Math.random() * (max - min + 1)) + min;
return new Promise(resolve => setTimeout(resolve, delay));
}
async disconnect() {
if (this.browser) {
await this.browser.disconnect();
this.isConnected = false;
}
}
}
export default LinkedInAutomation;