docs-mcp-server

Overview Schema Related Servers Score Discussions

LocalFileStrategy.ts•4.39 KiB

import fs from "node:fs/promises"; import path from "node:path"; import { logger } from "../../utils/logger"; import { FileFetcher } from "../fetcher"; import { FetchStatus, type RawContent } from "../fetcher/types"; import { PipelineFactory } from "../pipelines/PipelineFactory"; import type { ContentPipeline, PipelineResult } from "../pipelines/types"; import type { QueueItem, ScraperOptions } from "../types"; import { BaseScraperStrategy, type ProcessItemResult } from "./BaseScraperStrategy"; /** * LocalFileStrategy handles crawling and scraping of local files and folders using file:// URLs. * * All files with a MIME type of `text/*` are processed. This includes HTML, Markdown, plain text, and source code files such as `.js`, `.ts`, `.tsx`, `.css`, etc. Binary files, PDFs, images, and other non-text formats are ignored. * * Supports include/exclude filters and percent-encoded paths. */ export class LocalFileStrategy extends BaseScraperStrategy { private readonly fileFetcher = new FileFetcher(); private readonly pipelines: ContentPipeline[]; constructor() { super(); this.pipelines = PipelineFactory.createStandardPipelines(); } canHandle(url: string): boolean { return url.startsWith("file://"); } async processItem( item: QueueItem, options: ScraperOptions, _signal?: AbortSignal, ): Promise<ProcessItemResult> { // Parse the file URL properly to handle both file:// and file:/// formats let filePath = item.url.replace(/^file:\/\/\/?/, ""); filePath = decodeURIComponent(filePath); // Ensure absolute path on Unix-like systems (if not already absolute) if (!filePath.startsWith("/") && process.platform !== "win32") { filePath = `/${filePath}`; } let stats: Awaited<ReturnType<typeof fs.stat>>; try { stats = await fs.stat(filePath); } catch (error) { // File not found if ((error as NodeJS.ErrnoException).code === "ENOENT") { logger.info(`✓ File deleted or not available: ${filePath}`); return { url: item.url, links: [], status: FetchStatus.NOT_FOUND, }; } throw error; } if (stats.isDirectory()) { const contents = await fs.readdir(filePath); // Only return links that pass shouldProcessUrl const links = contents .map((name) => `file://${path.join(filePath, name)}`) .filter((url) => this.shouldProcessUrl(url, options)); return { url: item.url, links, status: FetchStatus.SUCCESS }; } const rawContent: RawContent = await this.fileFetcher.fetch(item.url, { etag: item.etag, }); // Handle NOT_MODIFIED status (file hasn't changed) if (rawContent.status === FetchStatus.NOT_MODIFIED) { logger.debug(`✓ File unchanged: ${filePath}`); return { url: rawContent.source, links: [], status: FetchStatus.NOT_MODIFIED }; } let processed: PipelineResult | undefined; for (const pipeline of this.pipelines) { if (pipeline.canProcess(rawContent.mimeType, rawContent.content)) { logger.debug( `Selected ${pipeline.constructor.name} for content type "${rawContent.mimeType}" (${filePath})`, ); processed = await pipeline.process(rawContent, options, this.fileFetcher); break; } } if (!processed) { logger.warn( `⚠️ Unsupported content type "${rawContent.mimeType}" for file ${filePath}. Skipping processing.`, ); return { url: rawContent.source, links: [], status: FetchStatus.SUCCESS }; } for (const err of processed.errors ?? []) { logger.warn(`⚠️ Processing error for ${filePath}: ${err.message}`); } // Use filename as fallback if title is empty or not a string const filename = path.basename(filePath); const title = processed.title?.trim() || filename || null; // For local files, we don't follow links (no crawling within file content) // Return empty links array return { url: rawContent.source, title: title, etag: rawContent.etag, lastModified: rawContent.lastModified, contentType: rawContent.mimeType, content: processed, links: [], status: FetchStatus.SUCCESS, }; } /** * Cleanup resources used by this strategy, specifically the pipeline browser instances. */ async cleanup(): Promise<void> { await Promise.allSettled(this.pipelines.map((pipeline) => pipeline.close())); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

LocalFileStrategy.ts•4.39 KiB