browser-client.tsā¢28.3 kB
import { chromium, Browser, Page, BrowserContext } from 'playwright';
import { writeFileSync, readFileSync, existsSync } from 'fs';
import { join } from 'path';
export interface MediumArticle {
title: string;
content: string;
url?: string;
publishDate?: string;
tags?: string[];
claps?: number;
}
export interface PublishOptions {
title: string;
content: string;
tags?: string[];
isDraft?: boolean;
}
export class BrowserMediumClient {
private browser: Browser | null = null;
private context: BrowserContext | null = null;
private page: Page | null = null;
private sessionPath = join(process.cwd(), 'medium-session.json');
async initialize(): Promise<void> {
this.browser = await chromium.launch({
headless: false, // Keep visible for login
slowMo: 100, // Slow down for reliability
args: [
'--no-first-run',
'--no-default-browser-check',
'--disable-blink-features=AutomationControlled',
'--disable-features=VizDisplayCompositor',
'--disable-web-security',
'--disable-features=TranslateUI',
'--disable-ipc-flooding-protection',
'--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
]
});
// Load existing session if available
const contextOptions: any = {
viewport: { width: 1280, height: 720 },
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
extraHTTPHeaders: {
'Accept-Language': 'en-US,en;q=0.9'
}
};
if (existsSync(this.sessionPath)) {
try {
const sessionData = JSON.parse(readFileSync(this.sessionPath, 'utf8'));
contextOptions.storageState = sessionData;
} catch (error) {
console.error('Failed to load session:', error);
}
}
this.context = await this.browser.newContext(contextOptions);
// Add script to remove webdriver property
await this.context.addInitScript(() => {
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
});
// Remove automation indicators
delete (window as any).cdc_adoQpoasnfa76pfcZLmcfl_Array;
delete (window as any).cdc_adoQpoasnfa76pfcZLmcfl_Promise;
delete (window as any).cdc_adoQpoasnfa76pfcZLmcfl_Symbol;
});
this.page = await this.context.newPage();
}
async ensureLoggedIn(): Promise<boolean> {
if (!this.page) throw new Error('Browser not initialized');
// First check if we have a saved session
if (existsSync(this.sessionPath)) {
console.error('š¾ Found existing session file, testing login status...');
}
// Try a simpler page first to check login status
await this.page.goto('https://medium.com');
await this.page.waitForLoadState('networkidle');
// Check if we're logged in by looking for user-specific elements
try {
// Try multiple selectors for logged-in state
const loginSelectors = [
'[data-testid="headerUserButton"]',
'.avatar',
'[data-testid="user-menu"]',
'button[aria-label*="user"]',
'img[alt*="avatar"]',
'[data-testid="write-button"]', // Write button only appears when logged in
'a[href="/me/stories"]'
];
let isLoggedIn = false;
for (const selector of loginSelectors) {
try {
await this.page.waitForSelector(selector, { timeout: 3000 });
console.error(`ā
Login detected using selector: ${selector}`);
isLoggedIn = true;
break;
} catch {
// Try next selector
}
}
if (isLoggedIn) {
console.error('ā
Already logged in to Medium');
await this.saveSession();
return true;
} else {
throw new Error('Not logged in');
}
} catch {
console.error('ā Not logged in. Please log in manually...');
// Navigate to login page
await this.page.goto('https://medium.com/m/signin');
// Wait for user to complete login
console.error('ā³ Waiting for you to complete login in the browser...');
console.error('');
console.error('š LOGIN INSTRUCTIONS:');
console.error(' 1. In the opened browser, choose "Sign in with email"');
console.error(' 2. Use your Medium email/password (avoid Google login if possible)');
console.error(' 3. If you must use Google login:');
console.error(' - Try clicking "Sign in with Google"');
console.error(' - If blocked, manually navigate to medium.com in a regular browser');
console.error(' - Login there first, then come back to this automated browser');
console.error(' 4. Complete any 2FA if prompted');
console.error(' 5. The script will continue automatically once logged in...');
console.error('');
// Wait for successful login (user button appears)
try {
await this.page.waitForSelector('[data-testid="headerUserButton"], .avatar, [data-testid="user-menu"]', { timeout: 300000 }); // 5 minutes
console.error('ā
Login successful!');
await this.saveSession();
return true;
} catch (error) {
console.error('ā Login timeout. Please try again.');
return false;
}
}
}
async saveSession(): Promise<void> {
if (!this.context) return;
try {
const sessionData = await this.context.storageState();
writeFileSync(this.sessionPath, JSON.stringify(sessionData, null, 2));
console.error('š¾ Session saved for future use');
} catch (error) {
console.error('Failed to save session:', error);
}
}
async getUserArticles(): Promise<MediumArticle[]> {
if (!this.page) throw new Error('Browser not initialized');
await this.ensureLoggedIn();
// Navigate to user's stories
await this.page.goto('https://medium.com/me/stories/public');
await this.page.waitForLoadState('networkidle');
// Extract article information
const articles = await this.page.evaluate(() => {
const articleElements = document.querySelectorAll('[data-testid="story-preview"]');
const articles: MediumArticle[] = [];
articleElements.forEach(element => {
try {
const titleElement = element.querySelector('h3, h2, [data-testid="story-title"]');
const linkElement = element.querySelector('a[href*="/"]');
const dateElement = element.querySelector('[data-testid="story-publish-date"], time');
if (titleElement && linkElement) {
articles.push({
title: titleElement.textContent?.trim() || '',
content: '', // We'll need to fetch full content separately
url: (linkElement as HTMLAnchorElement).href,
publishDate: dateElement?.textContent?.trim() || '',
tags: []
});
}
} catch (error) {
console.error('Error extracting article:', error);
}
});
return articles;
});
return articles;
}
async getArticleContent(url: string, requireLogin: boolean = true): Promise<string> {
if (!this.page) throw new Error('Browser not initialized');
console.error(`š Fetching article content from: ${url}`);
// Check if we have a saved session first
let isLoggedIn = false;
if (existsSync(this.sessionPath)) {
console.error('š¾ Found saved session, checking if still valid...');
// Quick check: try to access Medium homepage and look for login indicators
try {
await this.page.goto('https://medium.com');
await this.page.waitForLoadState('networkidle');
// Try to find login indicators quickly
const loginIndicators = [
'[data-testid="headerUserButton"]',
'[data-testid="write-button"]',
'a[href="/me/stories"]'
];
for (const selector of loginIndicators) {
try {
await this.page.waitForSelector(selector, { timeout: 2000 });
console.error('ā
Session is still valid, user is logged in');
isLoggedIn = true;
break;
} catch {
// Try next selector
}
}
} catch (error) {
console.error('ā ļø Could not verify session validity');
}
}
if (!isLoggedIn && requireLogin) {
console.error('š Not logged in. Attempting login for full content access...');
isLoggedIn = await this.ensureLoggedIn();
} else if (!isLoggedIn && !requireLogin) {
console.error('š Skipping login as requested. Will get preview content only.');
}
if (!isLoggedIn) {
console.error('ā ļø Warning: Login failed or skipped. You may only get partial content (preview).');
} else {
console.error('ā
Ready to fetch full article content with login session');
}
try {
console.error(`š Navigating to article: ${url}`);
await this.page.goto(url, { waitUntil: 'networkidle' });
// Wait a bit more for dynamic content
await this.page.waitForTimeout(3000);
console.error('š Page loaded, extracting content...');
// Extract article content with multiple strategies
const content = await this.page.evaluate(() => {
const log = (...args: any[]) => {
// Silent in browser context to avoid JSON interference
};
log('š Starting content extraction...');
// Strategy 1: Try modern Medium article selectors
const modernSelectors = [
'article section p',
'article div[data-testid="story-content"] p',
'[data-testid="story-content"] p',
'article section div p',
'article p'
];
// Strategy 2: Try classic Medium selectors
const classicSelectors = [
'.postArticle-content p',
'.section-content p',
'.graf--p',
'.postArticle p'
];
// Strategy 3: Generic content selectors
const genericSelectors = [
'main p',
'[role="main"] p',
'.story p',
'.post p'
];
const allSelectors = [...modernSelectors, ...classicSelectors, ...genericSelectors];
let extractedContent = '';
// Try each selector strategy
for (const selector of allSelectors) {
const elements = document.querySelectorAll(selector);
log(`šÆ Selector "${selector}" found ${elements.length} paragraphs`);
if (elements.length > 3) { // Need at least a few paragraphs for meaningful content
const paragraphs: string[] = [];
elements.forEach((element, index) => {
const text = element.textContent?.trim();
if (text && text.length > 20) { // Filter out very short paragraphs
paragraphs.push(text);
}
});
if (paragraphs.length > 2) { // Need meaningful content
extractedContent = paragraphs.join('\n\n');
log(`ā
Successfully extracted ${paragraphs.length} paragraphs using: ${selector}`);
break;
}
}
}
// Fallback: Try to get any substantial text content
if (!extractedContent) {
log('š Trying fallback content extraction...');
const fallbackSelectors = [
'article',
'main',
'[role="main"]',
'.story',
'.post'
];
for (const selector of fallbackSelectors) {
const element = document.querySelector(selector);
if (element) {
const text = element.textContent?.trim();
if (text && text.length > 200) {
// Clean up the text a bit
extractedContent = text
.replace(/\s+/g, ' ') // Normalize whitespace
.replace(/(.{100})/g, '$1\n\n') // Add paragraph breaks
.substring(0, 5000); // Limit length
log(`ā
Fallback extraction successful using: ${selector}`);
break;
}
}
}
}
// Debug info if still no content
if (!extractedContent) {
log('ā No content found. Page analysis:');
log('Page title:', document.title);
log('Page URL:', window.location.href);
log('Body text length:', document.body.textContent?.length || 0);
// Check if we hit a paywall or login requirement
const paywallIndicators = [
'sign up',
'subscribe',
'member-only',
'paywall',
'premium',
'upgrade'
];
const pageText = document.body.textContent?.toLowerCase() || '';
const foundIndicators = paywallIndicators.filter(indicator =>
pageText.includes(indicator)
);
if (foundIndicators.length > 0) {
log('š« Possible paywall detected:', foundIndicators);
return `Content may be behind a paywall or require login. Found indicators: ${foundIndicators.join(', ')}`;
}
return 'Unable to extract article content. The article may be behind a paywall, require login, or use an unsupported layout.';
}
// Check if we might be getting only a preview (very short content)
if (extractedContent.length < 500) {
log('ā ļø Warning: Content seems short, might be preview only');
// Look for "continue reading" or member-only indicators
const previewIndicators = [
'continue reading',
'read more',
'member-only story',
'this story is for members only',
'become a member',
'sign up to continue',
'subscribe to read'
];
const pageText = document.body.textContent?.toLowerCase() || '';
const foundPreviewIndicators = previewIndicators.filter(indicator =>
pageText.includes(indicator)
);
if (foundPreviewIndicators.length > 0) {
log('š Preview-only content detected:', foundPreviewIndicators);
extractedContent = `[PREVIEW ONLY - Login required for full content]\n\n${extractedContent}\n\n[This appears to be only a preview. The full article requires Medium membership or login. Found indicators: ${foundPreviewIndicators.join(', ')}]`;
}
}
log(`š Final content length: ${extractedContent.length} characters`);
return extractedContent;
});
console.error(`ā
Content extraction completed. Length: ${content.length} characters`);
return content;
} catch (error) {
console.error('ā Error fetching article content:', error);
throw new Error(`Failed to fetch article content: ${error}`);
}
}
async publishArticle(options: PublishOptions): Promise<{ success: boolean; url?: string; error?: string }> {
if (!this.page) throw new Error('Browser not initialized');
await this.ensureLoggedIn();
try {
// Navigate to the new story page
await this.page.goto('https://medium.com/new-story');
await this.page.waitForLoadState('networkidle');
// Wait for the editor to load
await this.page.waitForSelector('[data-testid="richTextEditor"]', { timeout: 10000 });
// Add title
const titleSelector = '[data-testid="richTextEditor"] h1, [placeholder*="Title"], .graf--title';
await this.page.waitForSelector(titleSelector);
await this.page.click(titleSelector);
await this.page.fill(titleSelector, options.title);
// Add content
const contentSelector = '[data-testid="richTextEditor"] p, .graf--p';
await this.page.waitForSelector(contentSelector);
await this.page.click(contentSelector);
// Split content into paragraphs and add them
const paragraphs = options.content.split('\n\n').filter(p => p.trim());
for (let i = 0; i < paragraphs.length; i++) {
if (i > 0) {
await this.page.keyboard.press('Enter');
await this.page.keyboard.press('Enter');
}
await this.page.keyboard.type(paragraphs[i]);
}
// Add tags if provided
if (options.tags && options.tags.length > 0) {
// Look for publish button to access settings
const publishButton = await this.page.locator('button:has-text("Publish"), [data-testid="publish-button"]').first();
if (await publishButton.isVisible()) {
await publishButton.click();
// Wait for publish modal and add tags
await this.page.waitForSelector('[data-testid="tag-input"], input[placeholder*="tag"]', { timeout: 5000 });
for (const tag of options.tags) {
await this.page.fill('[data-testid="tag-input"], input[placeholder*="tag"]', tag);
await this.page.keyboard.press('Enter');
}
}
}
if (options.isDraft) {
// Save as draft
const saveButton = await this.page.locator('button:has-text("Save draft"), [data-testid="save-draft"]').first();
if (await saveButton.isVisible()) {
await saveButton.click();
}
return { success: true };
} else {
// Publish the article
const finalPublishButton = await this.page.locator('button:has-text("Publish now"), [data-testid="publish-now"]').first();
if (await finalPublishButton.isVisible()) {
await finalPublishButton.click();
// Wait for success and get URL
await this.page.waitForLoadState('networkidle');
const currentUrl = this.page.url();
return { success: true, url: currentUrl };
}
}
return { success: false, error: 'Could not find publish button' };
} catch (error) {
return { success: false, error: `Publishing failed: ${error}` };
}
}
async searchMediumArticles(keywords: string[]): Promise<MediumArticle[]> {
if (!this.page) throw new Error('Browser not initialized');
const searchQuery = keywords.join(' ');
console.error(`š Searching Medium for: "${searchQuery}"`);
// Try to use saved session if available (but don't force login for search)
if (existsSync(this.sessionPath)) {
console.error('š¾ Using saved session for search...');
}
await this.page.goto(`https://medium.com/search?q=${encodeURIComponent(searchQuery)}`);
await this.page.waitForLoadState('networkidle');
// Wait a bit more for dynamic content to load
await this.page.waitForTimeout(2000);
console.error('š Current page URL:', this.page.url());
const articles = await this.page.evaluate((searchQuery) => {
// Remove console.log from browser context to avoid JSON interference
const log = (...args: any[]) => {
// Silent in browser context
};
log('š Starting search extraction for:', searchQuery);
// Try multiple selectors for different Medium layouts
const possibleSelectors = [
// Modern Medium selectors
'article',
'[data-testid="story-preview"]',
'[data-testid="story-card"]',
'.js-postListItem',
'.postArticle',
'.streamItem',
'.js-streamItem',
// Fallback selectors
'div[role="article"]',
'.story-preview',
'.post-preview'
];
const articles: any[] = [];
let elementsFound = 0;
for (const selector of possibleSelectors) {
const elements = document.querySelectorAll(selector);
log(`šÆ Selector "${selector}" found ${elements.length} elements`);
if (elements.length > 0) {
elementsFound += elements.length;
elements.forEach((element, index) => {
try {
// Try multiple title selectors
const titleSelectors = [
'h1', 'h2', 'h3', 'h4',
'[data-testid="story-title"]',
'.graf--title',
'.story-title',
'.post-title',
'a[data-action="show-post"]'
];
let titleElement = null;
let titleText = '';
for (const titleSel of titleSelectors) {
titleElement = element.querySelector(titleSel);
if (titleElement && titleElement.textContent?.trim()) {
titleText = titleElement.textContent.trim();
break;
}
}
// Try multiple approaches to find the actual article URL
let linkUrl = '';
// Strategy 1: Look for data-href attribute (most reliable for articles)
const dataHrefElement = element.querySelector('[data-href]');
if (dataHrefElement) {
const dataHref = dataHrefElement.getAttribute('data-href');
if (dataHref && dataHref.includes('medium.com') && dataHref.includes('-')) {
linkUrl = dataHref;
}
}
// Strategy 2: Look for direct article links if data-href didn't work
if (!linkUrl) {
const linkSelectors = [
'a[href*="medium.com"][href*="-"]', // Article URLs usually have dashes
'a[href*="/@"][href*="-"]', // Author articles with dashes
'a[href*="medium.com"]',
'a[href*="/"]',
'a'
];
for (const linkSel of linkSelectors) {
const linkElement = element.querySelector(linkSel);
if (linkElement && (linkElement as HTMLAnchorElement).href) {
let href = (linkElement as HTMLAnchorElement).href;
// Clean up and validate the URL
if (href) {
// If it's a redirect URL, extract the actual article URL
if (href.includes('redirect=')) {
const redirectMatch = href.match(/redirect=([^&]+)/);
if (redirectMatch) {
href = decodeURIComponent(redirectMatch[1]);
}
}
// Check if it's a valid article URL (prioritize actual articles)
const isValidArticleUrl = (
href.includes('medium.com') &&
!href.includes('/search?') && // Don't include search pages themselves
!href.includes('/signin') &&
!href.includes('/bookmark') &&
!href.includes('/signup') &&
// Prioritize URLs that look like actual articles
(href.includes('-') || // Article slugs usually have dashes
href.includes('/@') ||
href.match(/\/[a-f0-9]{8,}/)) // Article IDs (8+ chars)
);
if (isValidArticleUrl) {
// Clean the URL but preserve the path
if (href.includes('?')) {
// Extract the actual article URL from redirect parameters
if (href.includes('redirect=')) {
const redirectMatch = href.match(/redirect=([^&]+)/);
if (redirectMatch) {
linkUrl = decodeURIComponent(redirectMatch[1]);
}
} else {
// Just remove query parameters for cleaner URLs
linkUrl = href.split('?')[0];
}
} else {
linkUrl = href;
}
break;
}
}
}
}
}
// Try to get author info
const authorSelectors = [
'[data-testid="story-author"]',
'.postMetaInline-authorLockup',
'.story-author',
'.author-name'
];
let authorText = '';
for (const authorSel of authorSelectors) {
const authorElement = element.querySelector(authorSel);
if (authorElement && authorElement.textContent?.trim()) {
authorText = authorElement.textContent.trim();
break;
}
}
// Try to get snippet/preview
const snippetSelectors = [
'.story-excerpt',
'.post-excerpt',
'.graf--p',
'p'
];
let snippetText = '';
for (const snippetSel of snippetSelectors) {
const snippetElement = element.querySelector(snippetSel);
if (snippetElement && snippetElement.textContent?.trim()) {
snippetText = snippetElement.textContent.trim().substring(0, 200);
break;
}
}
log(`š Article ${index + 1}:`, {
title: titleText,
url: linkUrl,
author: authorText,
snippet: snippetText.substring(0, 50) + '...'
});
if (titleText && linkUrl) {
articles.push({
title: titleText,
content: snippetText,
url: linkUrl,
publishDate: '',
tags: [],
claps: 0
});
}
} catch (error) {
log('ā Error extracting article:', error);
}
});
// If we found articles with this selector, we can break
if (articles.length > 0) {
log(`ā
Successfully extracted ${articles.length} articles using selector: ${selector}`);
break;
}
}
}
log(`š Total elements found: ${elementsFound}, Articles extracted: ${articles.length}`);
// If no articles found, let's debug what's on the page
if (articles.length === 0) {
log('š Debug: Page structure analysis');
log('Page title:', document.title);
log('Page text content preview:', document.body.textContent?.substring(0, 500));
// Look for any text that might indicate search results
const searchResultIndicators = [
'No stories found',
'No results',
'Try different keywords',
'stories found',
'results for'
];
const pageText = document.body.textContent?.toLowerCase() || '';
for (const indicator of searchResultIndicators) {
if (pageText.includes(indicator.toLowerCase())) {
log(`š Found indicator: "${indicator}"`);
}
}
}
return articles;
}, searchQuery);
console.error(`š Search completed. Found ${articles.length} articles`);
return articles;
}
async close(): Promise<void> {
if (this.browser) {
await this.browser.close();
this.browser = null;
this.context = null;
this.page = null;
}
}
}