Skip to main content
Glama
doc.ts4.04 kB
import fs from 'node:fs/promises'; import fssync from 'node:fs'; import path from 'node:path'; import MiniSearch from 'minisearch'; import { JSDOM } from 'jsdom'; import { Readability } from '@mozilla/readability'; import { CONFIG } from '../config.js'; let _pdfParse: any | null = null; async function pdfParseLazy(buf: Buffer): Promise<{ text: string }> { try { if (!_pdfParse) { const mod = await import('pdf-parse'); _pdfParse = (mod as any).default || (mod as any); } const out = await _pdfParse(buf); return { text: String(out.text || '') }; } catch { return { text: '' }; } } type DocRecord = { id: string, path: string, title: string, text: string }; const INDEX_PATH = path.resolve('.cache/index.json'); async function collectFiles(root: string): Promise<string[]> { const out: string[] = []; async function walk(dir: string) { const ents = await fs.readdir(dir, { withFileTypes: true }); for (const e of ents) { const p = path.join(dir, e.name); if (e.isDirectory()) await walk(p); else if (/(\.txt|\.md|\.html?|\.pdf)$/i.test(e.name)) out.push(p); } } await walk(root); return out; } async function fileToText(p: string): Promise<{ title: string, text: string }> { const buf = await fs.readFile(p); const name = path.basename(p); if (/\.pdf$/i.test(p)) { try { const parsed = await pdfParseLazy(buf as unknown as Buffer); return { title: name, text: parsed.text || '' }; } catch { return { title: name, text: '' }; } } const s = buf.toString('utf-8'); if (/\.html?$/i.test(p)) { const dom = new JSDOM(s, { url: 'file://' + p }); const reader = new Readability(dom.window.document); const art = reader.parse(); return { title: art?.title || name, text: art?.textContent || dom.window.document.body.textContent || s }; } return { title: name, text: s }; } export async function indexBuild(root?: string) { const base = root ? path.resolve(root) : CONFIG.sandboxDir; const files = await collectFiles(base); const docs: DocRecord[] = []; for (const p of files) { const { title, text } = await fileToText(p); docs.push({ id: p, path: p, title, text }); } const mini = new MiniSearch({ fields: ['title','text'], storeFields: ['path','title'], searchOptions: { boost: { title: 2 } } }); mini.addAll(docs); const payload = { docs, index: mini.toJSON() }; await fs.mkdir(path.dirname(INDEX_PATH), { recursive: true }).catch(()=>{}); await fs.writeFile(INDEX_PATH, JSON.stringify(payload)); return { ok: true, indexed: docs.length }; } function loadIndex() { if (!fssync.existsSync(INDEX_PATH)) return null; const payload = JSON.parse(fssync.readFileSync(INDEX_PATH, 'utf-8')); const mini = MiniSearch.loadJSON(payload.index, { fields: ['title','text'], storeFields: ['path','title'] }); return { mini, docs: payload.docs as DocRecord[] }; } export async function docFind(q: string, top = 5) { let idx = loadIndex(); if (!idx) { await indexBuild(CONFIG.sandboxDir); idx = loadIndex(); } if (!idx) return []; const res = idx.mini.search(q, { prefix: true, fuzzy: 0.2, boost: { title: 2 } }).slice(0, top); return res.map((r: any) => { const doc = idx.docs.find(d => d.id === r.id)!; const text = doc.text || ''; const i = text.toLowerCase().indexOf(q.toLowerCase()); const start = Math.max(0, i - 80); const end = Math.min(text.length, i + 80); const snippet = text.slice(start, end).replace(/\s+/g, ' '); return { path: doc.path, score: r.score, snippet }; }); } export async function docRead(p: string) { const full = path.resolve(p); if (!full.startsWith(CONFIG.sandboxDir)) throw new Error('Access outside sandbox is not allowed'); const buf = await fs.readFile(full); let text = ''; if (/\.pdf$/i.test(full)) { try { const parsed = await pdfParseLazy(buf as unknown as Buffer); text = parsed.text || ''; } catch { text = ''; } } else { text = buf.toString('utf-8'); } return { path: full, text }; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/khanhs-234/tool4lm'

If you have feedback or need assistance with the MCP directory API, please join our Discord server