Skip to main content
Glama

docs-mcp-server

HtmlMetadataExtractorMiddleware.ts2.09 kB
import { logger } from "../../utils/logger"; import type { ContentProcessorMiddleware, MiddlewareContext } from "./types"; /** * Middleware to extract the title from HTML content using Cheerio. * Assumes context.dom (Cheerio API object) is populated by a preceding middleware * (e.g., HtmlCheerioParserMiddleware). */ export class HtmlMetadataExtractorMiddleware implements ContentProcessorMiddleware { /** * Processes the context to extract the HTML title. * @param context The current processing context. * @param next Function to call the next middleware. */ async process(context: MiddlewareContext, next: () => Promise<void>): Promise<void> { // Check if Cheerio DOM exists from previous middleware const $ = context.dom; if (!$) { logger.warn( `⏭️ Skipping ${this.constructor.name}: context.dom is missing. Ensure HtmlCheerioParserMiddleware runs before this.`, ); await next(); return; } // Only process if we have a Cheerio object (implicitly means it's HTML) try { // Extract title (using title tag, fallback to h1 if title is empty/missing) let title = $("title").first().text().trim(); if (!title) { // Fallback to the first H1 if title is empty title = $("h1").first().text().trim(); } // Default to "Untitled" if both are empty title = title || "Untitled"; // Basic cleanup (replace multiple spaces with single space) title = title.replace(/\s+/g, " ").trim(); context.metadata.title = title; logger.debug(`Extracted title: "${title}" from ${context.source}`); } catch (error) { logger.error(`❌ Error extracting metadata from ${context.source}: ${error}`); context.errors.push( new Error( `Failed to extract metadata from HTML: ${error instanceof Error ? error.message : String(error)}`, ), ); // Optionally decide whether to stop the pipeline here } // Call the next middleware in the chain await next(); // No cleanup needed for Cheerio } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server