Skip to main content
Glama
by microsoft
parsers.test.ts6.52 kB
import { describe, beforeEach, test } from "node:test" import assert from "node:assert/strict" import { createParsers } from "./parsers" import { MarkdownTrace } from "./trace" import { XLSXParse } from "./xlsx" import { readFile } from "fs/promises" import { resolve } from "path" import { TestHost } from "./testhost" import { estimateTokens } from "./tokens" import { writeFile } from "fs/promises" describe("parsers", async () => { let trace: MarkdownTrace let model: string let parsers: Awaited<ReturnType<typeof createParsers>> beforeEach(async () => { trace = new MarkdownTrace({}) model = "test model" parsers = await createParsers({ trace, model }) TestHost.install() }) test("JSON5", () => { const result = parsers.JSON5('{"key": "value"}') assert.deepStrictEqual(result, { key: "value" }) }) test("JSONL", () => { const result = parsers.JSONL('{"key": "value"}\n{"key2": "value2"}') assert.deepStrictEqual(result[0], { key: "value" }) assert.deepStrictEqual(result[1], { key2: "value2" }) }) test("YAML", () => { const result = parsers.YAML("key: value") assert.deepStrictEqual(result, { key: "value" }) }) test("XML parser", () => { const result = parsers.XML("<key>value</key>") assert.deepStrictEqual(result, { key: "value" }) }) test("TOML", () => { const result = parsers.TOML('key = "value"') assert.equal(result.key, "value") }) await test("PDF", async () => { const result = await parsers.PDF({ filename: "../sample/src/rag/loremipsum.pdf", }) assert(result.file.content.includes("Lorem")) }) await test("prompty", async () => { const result = await parsers.prompty({ filename: "../sample/src/chat.prompty", }) assert(result) assert(result.messages.length === 2) }) await test("PDF-image", async () => { const result = await parsers.PDF( { filename: "../sample/src/rag/loremipsum.pdf" }, { renderAsImage: true } ) let i = 1 for (const img of result.images) { await writeFile(`./loremipsum.temp.${i++}.png`, img) } assert(result.file.content.includes("Lorem")) }) await test("DOCX - markdown", async () => { const result = await parsers.DOCX( { filename: "../sample/src/rag/Document.docx", }, { format: "markdown" } ) assert(result.file.content.includes("Microsoft")) }) await test("DOCX - html", async () => { const result = await parsers.DOCX( { filename: "../sample/src/rag/Document.docx", }, { format: "html" } ) assert(result.file.content.includes("Microsoft")) }) await test("DOCX - text", async () => { const result = await parsers.DOCX( { filename: "../sample/src/rag/Document.docx", }, { format: "text" } ) assert(result.file.content.includes("Microsoft")) }) test("CSV", () => { const result = parsers.CSV("key,value\n1,2") assert.deepStrictEqual(result, [{ key: "1", value: "2" }]) }) test("XLSX", async () => { const result = await XLSXParse( await readFile(resolve("./src/parsers.test.xlsx")) ) assert.deepStrictEqual(result, [ { name: "Sheet1", rows: [{ key: 1, value: 2 }] }, ]) }) test("frontmatter", () => { const result = parsers.frontmatter("---\nkey: value\n---\n") assert.deepStrictEqual(result, { key: "value" }) }) test("zip", async () => { const result = await parsers.unzip( { filename: "./src/parsers.test.zip", content: undefined, }, { glob: "*.md" } ) assert(result.find((f) => f.filename === "markdown.md")) assert(!result.find((f) => f.filename === "loremipsum.pdf")) }) test("math", async () => { const res = await parsers.math("1 + 3") assert.strictEqual(res, 4) }) test("validateJSON", () => { const res = parsers.validateJSON( { type: "object", properties: { key: { type: "string" }, }, required: ["key"], }, { key: "value" } ) assert.strictEqual(res.pathValid, true) }) // write test about hash test("hash", async () => { const result = await parsers.hash( { test: "test string", arr: [1, 2, "32"], v: new Uint8Array(123) }, { length: 20, version: false } ) assert.strictEqual(result, "43ebfdc72c65bbf157ff") // Example hash value }) test("dedent", () => { const indentedText = ` This is an indented line This is more indented Back to first level ` const result = parsers.dedent(indentedText) assert.strictEqual( result, `This is an indented line This is more indented Back to first level` ) }) test("unthink", () => { const text = "I think the answer is 42. <think>Actually, it should be 43</think>" const result = parsers.unthink(text) assert.strictEqual(result, "I think the answer is 42. ") }) test("tokens", () => { const result = parsers.tokens("Hello world") assert(typeof result === "number") assert(result > 0) }) test("transcription", () => { const vttContent = `WEBVTT 1 00:00:00.000 --> 00:00:05.000 Hello world 2 00:00:05.500 --> 00:00:10.000 This is a test` const result = parsers.transcription(vttContent) assert.deepStrictEqual(result[0], { id: "1", start: 0, end: 5000, text: "Hello world", }) assert.deepStrictEqual(result[1], { id: "2", start: 5500, end: 10000, text: "This is a test", }) }) test("unfence", () => { const fencedText = '```json\n{"key": "value"}\n```' const result = parsers.unfence(fencedText, "json") assert.strictEqual(result, '{"key": "value"}') }) })

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/microsoft/genaiscript'

If you have feedback or need assistance with the MCP directory API, please join our Discord server