import puppeteer from "puppeteer-core";
// @ts-ignore
import chromium from "chromium";
import { YuqueConfig, YuqueDoc, YuqueDocListItem } from "./types.js";
let browser: any = null;
/**
* 获取或创建浏览器实例
*/
async function getBrowser() {
if (!browser) {
browser = await puppeteer.launch({
executablePath: chromium.path,
headless: true,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
],
});
}
return browser;
}
/**
* 解析 Cookie 字符串为 Puppeteer 格式
*/
function parseCookies(cookieString: string, domain: string): any[] {
const cookies: any[] = [];
const pairs = cookieString.split(";").map((s) => s.trim());
for (const pair of pairs) {
const [name, ...valueParts] = pair.split("=");
if (name && valueParts.length > 0) {
cookies.push({
name: name.trim(),
value: valueParts.join("=").trim(),
domain: domain,
path: "/",
});
}
}
return cookies;
}
/**
* 使用无头浏览器获取语雀文档
*/
export async function fetchYuqueDocByBrowser(
namespace: string,
slug: string,
config: YuqueConfig
): Promise<YuqueDoc | null> {
let page = null;
try {
const browserInstance = await getBrowser();
page = await browserInstance.newPage();
// 设置 Cookie
const domain = new URL(config.baseUrl).hostname;
const cookies = parseCookies(config.cookie, domain);
await page.setCookie(...cookies);
// 访问文档页面
const url = `${config.baseUrl}/${namespace}/${slug}`;
await page.goto(url, {
waitUntil: "networkidle2",
timeout: 30000,
});
// 等待页面加载完成 - 尝试多个可能的选择器
const contentSelectors = [
"[data-lake-id]",
".lark-content",
".lake-content",
".doc-content",
"article",
];
let contentFound = false;
for (const selector of contentSelectors) {
try {
await page.waitForSelector(selector, { timeout: 5000 });
contentFound = true;
break;
} catch (e) {
// 继续尝试下一个选择器
}
}
// 内容已加载,继续处理
// 提取文档信息
const docData: any = await page.evaluate(() => {
// 尝试多种方式提取标题
let title = "未知标题";
// 方法1: 尝试 document.title
// @ts-ignore
if (document.title && document.title.trim().length > 0) {
// @ts-ignore
title = document.title.trim();
}
// 方法2: 尝试多个 h1,找第一个有内容的
if (title === "未知标题") {
// @ts-ignore
const h1s = document.querySelectorAll("h1");
for (const h1 of h1s) {
const text = h1.textContent?.trim() || "";
if (text.length > 0) {
title = text;
break;
}
}
}
// 方法3: 尝试其他选择器
if (title === "未知标题") {
// @ts-ignore
const titleSelectors = [
".doc-title",
".lark-title",
"[data-testid='doc-title']",
".index-module_articleTitle",
];
for (const selector of titleSelectors) {
// @ts-ignore
const elem = document.querySelector(selector);
if (elem && elem.textContent && elem.textContent.trim().length > 0) {
title = elem.textContent.trim();
break;
}
}
}
// 尝试多种方式提取内容
// @ts-ignore
const contentSelectors = [
"[data-lake-id]",
".lark-content",
".lake-content",
".doc-content",
"article",
"main",
];
let body = "";
let body_html = "";
for (const selector of contentSelectors) {
// @ts-ignore
const elem = document.querySelector(selector);
if (elem) {
body = elem.textContent?.trim() || "";
body_html = elem.innerHTML || "";
if (body.length > 100) break; // 找到足够的内容就停止
}
}
return {
title,
body,
body_html,
};
});
// 构造返回数据
const doc: YuqueDoc = {
id: 0,
slug: slug,
title: docData.title,
book_id: 0,
book: {
id: 0,
type: "Book",
slug: namespace.split("/")[1] || "",
name: namespace,
namespace: namespace,
},
user_id: 0,
user: {
id: 0,
type: "User",
login: "",
name: "",
avatar_url: "",
},
format: "markdown",
body: docData.body,
body_draft: "",
body_html: docData.body_html,
public: 1,
status: 1,
view_status: 0,
created_at: new Date().toISOString(),
updated_at: new Date().toISOString(),
published_at: new Date().toISOString(),
first_published_at: new Date().toISOString(),
word_count: docData.body.length,
cover: null,
description: "",
custom_description: "",
hits: 0,
likes_count: 0,
comments_count: 0,
content_updated_at: new Date().toISOString(),
};
return doc;
} catch (error) {
// 获取失败,返回 null
return null;
} finally {
if (page) {
await page.close();
}
}
}
/**
* 使用无头浏览器获取知识库文档列表
*/
export async function listYuqueDocsByBrowser(
namespace: string,
config: YuqueConfig
): Promise<YuqueDocListItem[]> {
let page = null;
try {
const browserInstance = await getBrowser();
page = await browserInstance.newPage();
// 设置 Cookie
const domain = new URL(config.baseUrl).hostname;
const cookies = parseCookies(config.cookie, domain);
await page.setCookie(...cookies);
// 访问知识库页面
const url = `${config.baseUrl}/${namespace}`;
await page.goto(url, {
waitUntil: "networkidle2",
timeout: 30000,
});
// 等待文档列表加载
try {
await page.waitForSelector(".book-item, .doc-item, a[href*='/" + namespace + "/']", { timeout: 10000 });
} catch (e) {
// 超时后继续尝试提取
}
// 提取文档列表
const docs: any[] = await page.evaluate(() => {
const items: any[] = [];
// @ts-ignore
const links = document.querySelectorAll('a[href]');
links.forEach((link: any) => {
const href = link.getAttribute('href');
const text = link.textContent?.trim();
// 匹配知识库文档链接
if (href && text && href.includes('/') && !href.startsWith('http') && !href.includes('?')) {
const parts = href.split('/').filter((p: string) => p);
if (parts.length >= 3) {
items.push({
title: text,
slug: parts[parts.length - 1],
href: href,
});
}
}
});
return items;
});
// 去重
const uniqueDocs = Array.from(
new Map(docs.map((doc: any) => [doc.slug, doc])).values()
);
// 转换为标准格式
return uniqueDocs.map((doc, index) => ({
id: index,
slug: doc.slug,
title: doc.title,
description: "",
user_id: 0,
book_id: 0,
format: "markdown",
public: 1,
status: 1,
created_at: new Date().toISOString(),
updated_at: new Date().toISOString(),
published_at: new Date().toISOString(),
word_count: 0,
cover: null,
hits: 0,
likes_count: 0,
comments_count: 0,
}));
} catch (error) {
// 获取失败,返回空数组
return [];
} finally {
if (page) {
await page.close();
}
}
}
/**
* 搜索文档(基于浏览器)
*/
export async function searchYuqueDocsByBrowser(
namespace: string,
query: string,
config: YuqueConfig
): Promise<YuqueDocListItem[]> {
// 先获取所有文档,然后在客户端过滤
const allDocs = await listYuqueDocsByBrowser(namespace, config);
return allDocs.filter((doc) =>
doc.title.toLowerCase().includes(query.toLowerCase())
);
}
/**
* 关闭浏览器
*/
export async function closeBrowser() {
if (browser) {
await browser.close();
browser = null;
}
}