Skip to main content
Glama
cool-papers.ts6.45 kB
import axios from 'axios'; import * as cheerio from 'cheerio'; import type { AnyNode } from 'domhandler'; import * as fs from 'fs'; import * as path from 'path'; import { promisify } from 'util'; import { pipeline as streamPipeline } from 'stream'; import { DownloadOptions, DownloadResult, KimiQA, PaperSource, PaperSummary, SearchOptions, SearchResult, } from './types.js'; const BASE_URL = 'https://papers.cool'; const pipeline = promisify(streamPipeline); function buildSearchUrl(options: SearchOptions): string { const params = new URLSearchParams(); params.set('query', options.query); if (typeof options.maxResults === 'number') { params.set('show', options.maxResults.toString()); } if (typeof options.skip === 'number') { params.set('skip', options.skip.toString()); } if (typeof options.sort === 'number') { params.set('sort', options.sort.toString()); } return `${BASE_URL}/${options.source}/search?${params.toString()}`; } function parseIntFromText(text: string | undefined): number | undefined { if (!text) return undefined; const cleaned = text.replace(/[^\d]/g, ''); if (!cleaned) return undefined; const value = parseInt(cleaned, 10); return Number.isNaN(value) ? undefined : value; } function normaliseKeywords(value: string | undefined): string[] { if (!value) return []; return value .split(',') .map((keyword) => keyword.trim()) .filter(Boolean); } function ensureAbsoluteUrl(href?: string): string | undefined { if (!href) return undefined; if (href.startsWith('http')) return href; return `${BASE_URL}${href}`; } function parsePaperElement( source: PaperSource, paperElement: AnyNode ): PaperSummary { const $paper = cheerio.load(paperElement); const root = $paper.root().children().first(); const id = root.attr('id') ?? ''; const rankText = root.find('span.index').first().text().trim(); const rank = parseIntFromText(rankText) ?? 0; const titleLink = root.find('a.title-link').first(); const title = titleLink.text().trim(); const detailUrl = ensureAbsoluteUrl(titleLink.attr('href')) ?? ''; const externalUrl = (() => { let url: string | undefined; root.find('h2.title a').each((_, anchor) => { const href = $paper(anchor).attr('href'); if (href && href.startsWith('http')) { url = href; return false; } return undefined; }); return url ?? detailUrl; })(); const pdfAnchor = root.find('a.title-pdf').first(); const pdfUrl = pdfAnchor.attr('data'); const pdfStars = parseIntFromText(pdfAnchor.find('sup').text()); const kimiStars = parseIntFromText( root.find('a.title-kimi sup').first().text() ); const authors = root .find('p.metainfo.authors a.author') .map((_, anchor) => $paper(anchor).text().trim()) .get(); const abstract = root.find('p.summary').text().trim(); const subjects = root .find('p.metainfo.subjects a') .map((_, anchor) => $paper(anchor).text().trim()) .get(); const publishTime = root.find('p.metainfo.date span.date-data').text().trim(); const keywords = normaliseKeywords(root.attr('keywords')); return { id, source, rank, title, detailUrl, externalUrl, pdfUrl, pdfStars, kimiStars, authors, abstract, subjects, publishTime: publishTime || undefined, relatedKeywords: keywords, }; } function parseSearchHtml( source: PaperSource, query: string, html: string ): SearchResult { const $ = cheerio.load(html); const totalText = $('p.info').first().text(); const total = parseIntFromText(totalText) ?? 0; const papers: PaperSummary[] = []; $('div.papers div.panel.paper').each((_, element) => { papers.push(parsePaperElement(source, element)); }); return { source, query, total, papers, }; } export async function searchPapers(options: SearchOptions): Promise<SearchResult> { const url = buildSearchUrl(options); const { data } = await axios.get<string>(url, { responseType: 'text', }); return parseSearchHtml(options.source, options.query, data); } async function getPaperDetail( source: PaperSource, paperId: string ): Promise<PaperSummary> { const url = `${BASE_URL}/${source}/${paperId}`; const { data } = await axios.get<string>(url, { responseType: 'text' }); const result = parseSearchHtml(source, paperId, data); const paper = result.papers.find((item) => item.id === paperId); if (!paper) { throw new Error(`Paper ${paperId} not found on ${source}`); } return paper; } export async function downloadPaper( options: DownloadOptions ): Promise<DownloadResult> { const paper = await getPaperDetail(options.source, options.paperId); if (!paper.pdfUrl) { throw new Error(`Paper ${options.paperId} does not provide a PDF link`); } const safeId = options.paperId.replace(/[^a-zA-Z0-9-_]/g, '_'); const filename = options.filename && options.filename.trim() ? options.filename.trim() : `${safeId}.pdf`; const destinationDir = options.downloadFolder; await fs.promises.mkdir(destinationDir, { recursive: true }); const filePath = path.resolve(destinationDir, filename); const response = await axios.get(paper.pdfUrl, { responseType: 'stream', }); let totalBytes = 0; response.data.on('data', (chunk: Buffer) => { totalBytes += chunk.length; }); await pipeline(response.data, fs.createWriteStream(filePath)); return { pdfUrl: paper.pdfUrl, filePath, fileSize: totalBytes, }; } export async function getKimiAnalysis( source: PaperSource, paperId: string ): Promise<KimiQA[]> { const url = `${BASE_URL}/${source}/kimi?paper=${encodeURIComponent(paperId)}`; const { data } = await axios.get<string>(url, { responseType: 'text' }); const $ = cheerio.load(data); const questions: KimiQA[] = []; $('p.faq-q').each((_, questionElement) => { const questionText = $(questionElement).text().trim(); const answerElement = $(questionElement).next('div.faq-a'); const answerText = answerElement.text().trim().replace(/\s+\n/g, '\n'); if (questionText && answerText) { questions.push({ question: questionText, answer: answerText, }); } }); return questions; } export async function fetchPaperDetail( source: PaperSource, paperId: string ): Promise<PaperSummary> { return getPaperDetail(source, paperId); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/han-517/scholar-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server