import puppeteer, { Browser, Page } from 'puppeteer';
import { execSync } from 'child_process';
import { platform } from 'os';
export interface PageTextOptions {
url?: string;
selector?: string;
waitForSelector?: string;
timeout?: number;
includeHidden?: boolean;
textType?: 'all' | 'visible' | 'innerText' | 'textContent';
}
export interface CurrentPageTextOptions {
selector?: string;
includeHidden?: boolean;
textType?: 'all' | 'visible' | 'innerText' | 'textContent';
}
export interface NavigateOptions {
url: string;
waitForSelector?: string;
timeout?: number;
}
export interface PageTextResult {
success: boolean;
text?: string;
url?: string;
title?: string;
error?: string;
metadata?: {
characterCount: number;
wordCount: number;
extractedAt: string;
selector?: string;
textType: string;
};
}
export interface PageInfoResult {
success: boolean;
url?: string;
title?: string;
error?: string;
metadata?: {
userAgent: string;
viewport: {
width: number;
height: number;
};
timestamp: string;
};
}
export interface NavigateResult {
success: boolean;
url?: string;
title?: string;
error?: string;
metadata?: {
userAgent: string;
viewport: {
width: number;
height: number;
};
timestamp: string;
};
}
export interface CloseResult {
success: boolean;
message?: string;
error?: string;
}
export class ChromeBrowserManager {
private browser?: Browser;
private page?: Page;
private isConnectedToExisting = false;
constructor() {
// 构造函数保持简单
}
/**
* 启动Chrome浏览器或连接到现有实例
*/
private async ensureBrowser(): Promise<void> {
if (this.browser && this.page) {
return;
}
try {
// 首先尝试连接到现有的Chrome实例
console.log('🔍 尝试连接到现有Chrome实例...');
this.browser = await puppeteer.connect({
browserURL: 'http://localhost:9222',
defaultViewport: null
});
const pages = await this.browser.pages();
this.page = pages.length > 0 ? pages[0] : await this.browser.newPage();
this.isConnectedToExisting = true;
console.log('✅ 成功连接到现有Chrome实例');
} catch (error) {
console.log('⚠️ 无法连接到现有Chrome实例,启动新的Chrome浏览器...');
// 如果连接失败,启动新的Chrome实例
this.browser = await puppeteer.launch({
headless: false, // 显示浏览器窗口
defaultViewport: null,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-web-security',
'--disable-features=VizDisplayCompositor',
'--remote-debugging-port=9222', // 启用远程调试
'--start-maximized'
]
});
this.page = await this.browser.newPage();
this.isConnectedToExisting = false;
console.log('✅ 成功启动新的Chrome浏览器');
}
// 设置页面配置
await this.page.setViewport({ width: 1920, height: 1080 });
await this.page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
}
/**
* 手动启动Chrome浏览器(用于用户手动控制)
*/
async launchChromeManually(): Promise<{ success: boolean; message?: string; error?: string }> {
try {
const chromePath = this.getChromePath();
console.log('🚀 手动启动Chrome浏览器...');
// 使用系统命令启动Chrome
const command = `"${chromePath}" --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug`;
if (platform() === 'win32') {
execSync(`start "" ${command}`, { stdio: 'ignore' });
} else {
execSync(`${command} &`, { stdio: 'ignore' });
}
// 等待Chrome启动
await new Promise(resolve => setTimeout(resolve, 3000));
return {
success: true,
message: 'Chrome浏览器已手动启动,可以通过远程调试端口9222连接'
};
} catch (error) {
return {
success: false,
error: `启动Chrome失败: ${error instanceof Error ? error.message : String(error)}`
};
}
}
/**
* 获取Chrome浏览器路径
*/
private getChromePath(): string {
const os = platform();
switch (os) {
case 'darwin': // macOS
return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome';
case 'win32': // Windows
return 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe';
case 'linux': // Linux
return '/usr/bin/google-chrome';
default:
throw new Error(`不支持的操作系统: ${os}`);
}
}
/**
* 导航到指定页面
*/
async navigateToPage(options: NavigateOptions): Promise<NavigateResult> {
try {
await this.ensureBrowser();
if (!this.page) {
throw new Error('页面实例未初始化');
}
console.log(`🌐 导航到: ${options.url}`);
await this.page.goto(options.url, {
waitUntil: 'networkidle2',
timeout: options.timeout || 30000
});
if (options.waitForSelector) {
console.log(`⏳ 等待元素: ${options.waitForSelector}`);
await this.page.waitForSelector(options.waitForSelector, {
timeout: options.timeout || 30000
});
}
const url = this.page.url();
const title = await this.page.title();
const userAgent = await this.page.evaluate(() => navigator.userAgent);
const viewport = this.page.viewport();
return {
success: true,
url,
title,
metadata: {
userAgent,
viewport: viewport || { width: 1920, height: 1080 },
timestamp: new Date().toISOString()
}
};
} catch (error) {
return {
success: false,
error: `导航失败: ${error instanceof Error ? error.message : String(error)}`
};
}
}
/**
* 获取页面文本内容
*/
async getPageText(options: PageTextOptions): Promise<PageTextResult> {
try {
if (options.url) {
const navigateResult = await this.navigateToPage({
url: options.url,
waitForSelector: options.waitForSelector,
timeout: options.timeout
});
if (!navigateResult.success) {
return {
success: false,
error: navigateResult.error
};
}
}
return await this.getCurrentPageText({
selector: options.selector,
includeHidden: options.includeHidden,
textType: options.textType
});
} catch (error) {
return {
success: false,
error: `获取页面文本失败: ${error instanceof Error ? error.message : String(error)}`
};
}
}
/**
* 获取当前页面文本内容
*/
async getCurrentPageText(options: CurrentPageTextOptions = {}): Promise<PageTextResult> {
try {
await this.ensureBrowser();
if (!this.page) {
throw new Error('页面实例未初始化');
}
const { selector, includeHidden = false, textType = 'visible' } = options;
console.log(`📝 提取文本 - 选择器: ${selector || '全页面'}, 类型: ${textType}`);
let text: string;
if (selector) {
// 提取特定元素的文本
text = await this.page.evaluate((sel, type, hidden) => {
const elements = document.querySelectorAll(sel);
let result = '';
elements.forEach(element => {
if (!hidden && element instanceof HTMLElement) {
const style = window.getComputedStyle(element);
if (style.display === 'none' || style.visibility === 'hidden') {
return;
}
}
switch (type) {
case 'innerText':
result += (element as HTMLElement).innerText || '';
break;
case 'textContent':
result += element.textContent || '';
break;
case 'all':
result += element.textContent || '';
break;
default: // visible
result += (element as HTMLElement).innerText || '';
}
result += '\n';
});
return result.trim();
}, selector, textType, includeHidden);
} else {
// 提取整个页面的文本
text = await this.page.evaluate((type, hidden) => {
const body = document.body;
if (!hidden) {
// 移除隐藏元素
const hiddenElements = body.querySelectorAll('*');
hiddenElements.forEach(element => {
if (element instanceof HTMLElement) {
const style = window.getComputedStyle(element);
if (style.display === 'none' || style.visibility === 'hidden') {
element.style.display = 'none';
}
}
});
}
switch (type) {
case 'innerText':
return body.innerText || '';
case 'textContent':
return body.textContent || '';
case 'all':
return body.textContent || '';
default: // visible
return body.innerText || '';
}
}, textType, includeHidden);
}
const url = this.page.url();
const title = await this.page.title();
const characterCount = text.length;
const wordCount = text.split(/\s+/).filter(word => word.length > 0).length;
return {
success: true,
text,
url,
title,
metadata: {
characterCount,
wordCount,
extractedAt: new Date().toISOString(),
selector,
textType
}
};
} catch (error) {
return {
success: false,
error: `获取当前页面文本失败: ${error instanceof Error ? error.message : String(error)}`
};
}
}
/**
* 获取页面信息
*/
async getPageInfo(): Promise<PageInfoResult> {
try {
await this.ensureBrowser();
if (!this.page) {
throw new Error('页面实例未初始化');
}
const url = this.page.url();
const title = await this.page.title();
const userAgent = await this.page.evaluate(() => navigator.userAgent);
const viewport = this.page.viewport();
return {
success: true,
url,
title,
metadata: {
userAgent,
viewport: viewport || { width: 1920, height: 1080 },
timestamp: new Date().toISOString()
}
};
} catch (error) {
return {
success: false,
error: `获取页面信息失败: ${error instanceof Error ? error.message : String(error)}`
};
}
}
/**
* 关闭浏览器
*/
async closeBrowser(): Promise<CloseResult> {
try {
if (this.page) {
await this.page.close();
this.page = undefined;
}
if (this.browser && !this.isConnectedToExisting) {
// 只有当浏览器是我们启动的时候才关闭它
await this.browser.close();
this.browser = undefined;
} else if (this.browser) {
// 如果是连接到现有实例,只断开连接
await this.browser.disconnect();
this.browser = undefined;
}
return {
success: true,
message: this.isConnectedToExisting ?
'已断开与Chrome浏览器的连接' :
'浏览器已成功关闭'
};
} catch (error) {
return {
success: false,
error: `关闭浏览器失败: ${error instanceof Error ? error.message : String(error)}`
};
}
}
/**
* 检查浏览器状态
*/
async getBrowserStatus(): Promise<{ connected: boolean; url?: string; title?: string }> {
try {
if (!this.browser || !this.page) {
return { connected: false };
}
const url = this.page.url();
const title = await this.page.title();
return {
connected: true,
url,
title
};
} catch (error) {
return { connected: false };
}
}
}