GenAIScript

Official

Overview Schema Related Servers Score Discussions

file.ts•11.5 KiB

/** * This module provides functions to handle file content resolution, rendering, * and data URI conversion. It includes support for various file formats like * PDF, DOCX, XLSX, and CSV. */ import { DOCXTryParse } from "./docx" import { readText, tryStat } from "./fs" import { lookupMime } from "./mime" import { isBinaryMimeType } from "./binary" import { createFetch } from "./fetch" import { fileTypeFromBuffer } from "./filetype" import { fromBase64, toBase64 } from "./base64" import { host } from "./host" import { TraceOptions } from "./trace" import { parsePdf } from "./pdf" import { XLSXParse } from "./xlsx" import { dataToMarkdownTable, CSVTryParse } from "./csv" import { CSV_REGEX, DOCX_MIME_TYPE, DOCX_REGEX, MAX_FILE_CONTENT_SIZE, PDF_MIME_TYPE, PDF_REGEX, XLSX_MIME_TYPE, XLSX_REGEX, } from "./constants" import { tidyData } from "./tidy" import { CancellationOptions, checkCancelled } from "./cancellation" import { prettyBytes } from "./pretty" import { tryResolveResource } from "./resources" import { genaiscriptDebug } from "./debug" const dbg = genaiscriptDebug("file") /** * Resolves the content of a file by decoding, fetching, or parsing it based on its type or source. * * @param file - The file object containing filename, content, type, and encoding. * @param options - Optional parameters: * - trace - Object for logging operations. * - cancellationToken - Token to cancel the operation. * - maxFileSize - Maximum file size for processing. Defaults to MAX_FILE_CONTENT_SIZE. * @returns The updated file object with resolved content or metadata. If the file cannot be resolved, it is returned as is. */ export async function resolveFileContent( file: WorkspaceFile, options?: TraceOptions & { maxFileSize?: number } & CancellationOptions ): Promise<WorkspaceFile> { const { trace, cancellationToken, maxFileSize = MAX_FILE_CONTENT_SIZE, } = options || {} if (!file) return file checkCancelled(cancellationToken) const stats = await tryStat(file.filename) if (stats && !stats.isFile()) { dbg(`skip, not a file`) return file // ignore, this is a directory } // decode known files if (file.encoding === "base64") { dbg(`decode base64`) const bytes = fromBase64(file.content) file.size = bytes.length if (file.type === PDF_MIME_TYPE) { dbg(`file type is PDF`) const { content } = await parsePdf(bytes, options) delete file.encoding file.content = content } else if (file.type === XLSX_MIME_TYPE) { dbg(`file type is XLSX`) const sheets = await XLSXParse(bytes) delete file.encoding file.content = JSON.stringify(sheets, null, 2) } return file } const { filename } = file // If file content is already available or filename is missing, return the file as is. if (file.content) { return file } if (!filename) { dbg(`file has no content and no filename`) return file } dbg(`resolving ${filename}`) const res = await tryResolveResource(filename, { trace, cancellationToken }) // Handle uris files if (res) { dbg(`resolved file uri`) const resFile = res.files[0] file.type = resFile.type file.content = resFile.content file.size = resFile.size file.encoding = resFile.encoding } // Handle PDF files else if (PDF_REGEX.test(filename)) { dbg(`file is pdf`) const stat = await tryStat(filename) const { content } = await parsePdf(filename, options) file.type = PDF_MIME_TYPE file.content = content file.size = stat?.size } // Handle DOCX files else if (DOCX_REGEX.test(filename)) { dbg(`file is docx`) const stat = await tryStat(filename) const res = await DOCXTryParse(filename, options) file.type = DOCX_MIME_TYPE file.content = res.file?.content file.size = res.file?.size || stat?.size } // Handle XLSX files else if (XLSX_REGEX.test(filename)) { dbg(`file is xlsx`) const stat = await tryStat(filename) const bytes = await host.readFile(filename) const sheets = await XLSXParse(bytes) file.type = XLSX_MIME_TYPE file.content = JSON.stringify(sheets, null, 2) file.size = stat?.size } // Handle other file types else { const mime = file.type || lookupMime(filename) const isBinary = isBinaryMimeType(mime) dbg(`mime %s binary %s`, mime, isBinary) file.type = mime const info = await tryStat(filename) file.size = info?.size if (!info) { dbg(`file not found: ${filename}`) return file } if (!info.isFile()) { dbg(`skip, not a file`) return file // ignore, this is a directory } if (!isBinary) { dbg(`text ${prettyBytes(info.size)}`) file.content = await readText(filename) } else { dbg(`binary ${prettyBytes(info?.size)}`) if (!maxFileSize || info.size < maxFileSize) { const bytes: Uint8Array = await host.readFile(filename) file.encoding = "base64" file.content = toBase64(bytes) file.size = bytes.length } } } return file } /** * Converts input into a WorkspaceFile structure. * @param fileOrFilename - A filename string or an object representing a WorkspaceFile. * @returns A WorkspaceFile object with the provided filename or the original WorkspaceFile object. */ export function toWorkspaceFile(fileOrFilename: string | WorkspaceFile) { return typeof fileOrFilename === "string" ? { filename: fileOrFilename } : fileOrFilename } /** * Resolves the contents of multiple files asynchronously. * Processes each file to resolve its content based on type or source. * @param files - List of files to process and resolve. * @param options - Optional parameters: * - cancellationToken - Token to cancel the operation if needed. * - trace - Object for logging and tracing operations. */ export async function resolveFileContents( files: WorkspaceFile[], options?: CancellationOptions & TraceOptions ) { const { cancellationToken } = options || {} for (const file of files) { await resolveFileContent(file, options) checkCancelled(cancellationToken) } } /** * Renders the content of a file into a markdown format if applicable. * Supports rendering for CSV and XLSX file types by converting their contents into readable markdown tables. * * @param file - The file object containing filename and content. If the content matches a supported format, it will be rendered. * @param options - Options for tracing operations and filtering the file data during rendering. Includes data transformation, markdown table generation, and optional sheet trimming for XLSX files. * @returns An object containing the filename and rendered content, or the original file object if rendering is not applicable. */ export async function renderFileContent( file: WorkspaceFile, options: TraceOptions & DataFilter ) { const { filename, content } = file // Render CSV content if (content && CSV_REGEX.test(filename)) { dbg(`rendering CSV content`) let csv = CSVTryParse(content, options) if (csv) { csv = tidyData(csv, options) return { filename, content: dataToMarkdownTable(csv, options) } } } // Render XLSX content else if (content && XLSX_REGEX.test(filename)) { dbg(`rendering XLSX content`) const sheets = JSON.parse(content) as WorkbookSheet[] const trimmed = sheets.length ? sheets .map( ({ name, rows }) => `## ${name} ${dataToMarkdownTable(tidyData(rows, options))} ` ) .join("\n") : dataToMarkdownTable(tidyData(sheets[0].rows, options)) return { filename, content: trimmed } } return { ...file } } /** * Converts a data URI into a binary buffer. * * @param filename - The string to be inspected and potentially decoded. If the string is a valid data URI, its content will be converted to a binary buffer. * @returns A binary buffer containing the decoded content of the data URI. Returns undefined if the input is not a valid data URI. * @throws Will throw an error if the data URI format is invalid. */ export function dataUriToBuffer(filename: string) { if (/^data:/i.test(filename)) { dbg(`converting data URI to buffer`) const matches = filename.match(/^data:[^;]+;base64,(.*)$/i) if (!matches) { dbg(`invalid data URI format`) throw new Error("Invalid data URI format") } return fromBase64(matches[1]) } return undefined } /** * Resolves and returns the file content as bytes. * @param filename - The file name, URL, data URI, or WorkspaceFile object to resolve. If a WorkspaceFile object, uses its encoding and content if available. If a string, resolves the file from the provided path, URL, or data URI. Supports both local files and remote URLs. * @param options - Optional parameters for tracing operations and fetch configuration. Used for logging operations or canceling the process. * @returns A Uint8Array containing the file content as bytes. */ export async function resolveFileBytes( filename: string | WorkspaceFile, options?: TraceOptions & CancellationOptions ): Promise<Uint8Array> { if (typeof filename === "object") { if (filename.encoding && filename.content) { dbg(`resolving file bytes`) return new Uint8Array( Buffer.from(filename.content, filename.encoding) ) } filename = filename.filename } const i = dataUriToBuffer(filename) if (i) { return i } // Fetch file from URL or data-uri if (/^https?:\/\//i.test(filename)) { dbg(`fetching file from URL: ${filename}`) const fetch = await createFetch(options) const resp = await fetch(filename) const buffer = await resp.arrayBuffer() return new Uint8Array(buffer) } // Read file from local storage else { dbg(`reading file %s`, filename) const stat = await host.statFile(filename) if (stat?.type !== "file") return undefined const buf = await host.readFile(filename) return new Uint8Array(buf) } } /** * Converts a file to a Data URI format. * @param filename - The file name, URL, or data URI to convert. Supports local files, remote URLs, and data URIs. If a WorkspaceFile object, its content and encoding are used. * @param options - Optional parameters for tracing operations and fetch configuration. * @returns A Data URI string if the MIME type is determined, otherwise undefined. */ export async function resolveFileDataUri( filename: string, options?: TraceOptions & CancellationOptions & { mime?: string } ) { const { cancellationToken, mime } = options || {} const bytes = await resolveFileBytes(filename, options) checkCancelled(cancellationToken) const uriMime = mime || (await fileTypeFromBuffer(bytes))?.mime || lookupMime(filename) if (!uriMime) { dbg(`no mime type found for ${filename}`) return undefined } const b64 = toBase64(bytes) return { uri: `data:${uriMime};base64,${b64}`, mimeType: uriMime, data: b64, } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/microsoft/genaiscript'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

file.ts•11.5 KiB