Skip to main content
Glama
blog.ts8.69 kB
import { parseDocument } from "htmlparser2"; import { getElementsByTagName, textContent } from "domutils"; import type { Element } from "domhandler"; import type { BlogPostData, BlogPostsData } from "./types.js"; import { cacheOrFetch, CACHE_CONFIGS } from "../utils/cache.js"; /** * Extract field content from RSS item element, handling CDATA and parsing quirks */ export function extractFieldFromElement(element: Element, tagName: string): string | null { const elements = getElementsByTagName(tagName, element); if (elements.length === 0) return null; const targetElement = elements[0]; let content = ""; // Handle different node types if (targetElement.children && targetElement.children.length > 0) { for (const child of targetElement.children) { if (child.type === "text") { content += child.data || ""; } else if (child.type === "comment") { // Handle CDATA parsed as comment let data = child.data || ""; if (data.startsWith("[CDATA[") && data.endsWith("]]")) { data = data.slice(7, -2); } content += data; } } } // Fallback to textContent for elements with no children (like link) if (!content && tagName === "link") { content = textContent(targetElement).trim(); // If textContent fails, try next sibling for htmlparser2 parsing issues if (!content && targetElement.next?.type === "text") { content = targetElement.next.data?.trim() || ""; } } content = content.trim(); // Handle CDATA sections - remove CDATA wrapper if present if (content.startsWith("<![CDATA[") && content.endsWith("]]>")) { content = content.slice(9, -3).trim(); } return content || null; } /** * Extract blog post data from RSS item element */ export function extractBlogPostFromItem(item: Element): BlogPostData { return { title: extractFieldFromElement(item, "title"), link: extractFieldFromElement(item, "link"), description: extractFieldFromElement(item, "description"), pubDate: extractFieldFromElement(item, "pubDate") || extractFieldFromElement(item, "pubdate"), }; } /** * Parse RSS XML content and extract blog posts */ export function parseRSSContent( xml: string, limit = 1, ): { posts: BlogPostData[]; totalFound: number } { const doc = parseDocument(xml); const items = getElementsByTagName("item", doc); if (items.length === 0) { return { posts: [], totalFound: 0 }; } const postsToFetch = Math.min(limit, items.length); const posts: BlogPostData[] = []; for (let i = 0; i < postsToFetch; i++) { const item = items[i]; const blogPost = extractBlogPostFromItem(item); // Remove null values to clean up the output const cleanedPost = Object.fromEntries( Object.entries(blogPost).filter(([_, value]) => value !== null && value !== undefined), ) as BlogPostData; posts.push(cleanedPost); } return { posts, totalFound: items.length, }; } /** * Fetch and parse RSS feed from URL */ export async function fetchAndParseRSS( url: string, limit = 1, ): Promise<{ posts: BlogPostData[]; totalFound: number }> { const response = await fetch(url); if (!response.ok) { throw new Error(`Failed to fetch RSS feed: ${response.status} ${response.statusText}`); } const xml = await response.text(); return parseRSSContent(xml, limit); } /** * Fetch blog posts data (internal, not cached) */ async function fetchBlogPostsData(limit: number): Promise<BlogPostsData> { const feedUrl = "https://blog.duyet.net/rss.xml"; try { const result = await fetchAndParseRSS(feedUrl, limit); return { posts: result.posts, totalFound: result.totalFound, retrieved: result.posts.length, feedUrl, }; } catch (error) { const errorMessage = error instanceof Error ? error.message : "Unknown error"; throw new Error(`Error fetching blog posts: ${errorMessage}`); } } /** * Get blog posts data with caching (30 minutes TTL) * This is the public API that should be used by tools/resources */ export async function getBlogPostsData(limit = 5): Promise<BlogPostsData> { const limitNum = Math.min(Math.max(limit, 1), 20); const cacheKey = `blog-posts-${limitNum}`; return cacheOrFetch( cacheKey, CACHE_CONFIGS.BLOG, () => fetchBlogPostsData(limitNum), ); } /** * Format blog posts for MCP response */ export function formatBlogPostsForMCP(posts: BlogPostData[]): string { const postList = posts .map((post, index) => { const postData = JSON.stringify(post, null, 2); return `${index + 1}. Blog Post:\n\`\`\`json\n${postData}\n\`\`\``; }) .join("\n\n"); return `Latest ${posts.length} blog post${posts.length > 1 ? "s" : ""}:\n\n${postList}`; } /** * Format blog posts for tool JSON response */ export function formatBlogPostsForTool(data: BlogPostsData): string { return JSON.stringify( { posts: data.posts, totalFound: data.totalFound, retrieved: data.retrieved, }, null, 2, ); } /** * Extract article content from blog post HTML */ export function extractArticleContent(html: string): { title: string | null; content: string; metadata: { author?: string; publishDate?: string; tags?: string[]; }; } { const doc = parseDocument(html); // Extract title const titleElements = getElementsByTagName("h1", doc); const title = titleElements.length > 0 ? textContent(titleElements[0]).trim() : null; // Try to find article content in common blog structures let contentElements: Element[] = []; // Try main article tag first contentElements = getElementsByTagName("article", doc); // If no article tag, try common content classes if (contentElements.length === 0) { const allElements = getElementsByTagName("div", doc); for (const el of allElements) { const className = el.attribs?.class || ""; if ( className.includes("content") || className.includes("post") || className.includes("article") || className.includes("entry") ) { contentElements.push(el); break; } } } // Extract text content let content = ""; if (contentElements.length > 0) { // Get all paragraph tags from the content const paragraphs = getElementsByTagName("p", contentElements[0]); content = paragraphs.map((p) => textContent(p).trim()).join("\n\n"); // If no paragraphs, get all text if (!content) { content = textContent(contentElements[0]); } } // Clean up content content = content .replace(/\s+/g, " ") // Normalize whitespace .replace(/\n\s*\n/g, "\n\n") // Normalize line breaks .trim(); // Extract metadata const metadata: { author?: string; publishDate?: string; tags?: string[] } = {}; // Try to find author const authorElements = getElementsByTagName("meta", doc); for (const meta of authorElements) { if (meta.attribs?.name === "author" || meta.attribs?.property === "article:author") { metadata.author = meta.attribs.content; break; } } // Try to find publish date for (const meta of authorElements) { if (meta.attribs?.property === "article:published_time") { metadata.publishDate = meta.attribs.content; break; } } // Try to find tags const tags: string[] = []; for (const meta of authorElements) { if (meta.attribs?.property === "article:tag") { tags.push(meta.attribs.content); } } if (tags.length > 0) { metadata.tags = tags; } return { title, content, metadata }; } /** * Fetch blog post content (internal, not cached) */ async function fetchBlogPostContentInternal(url: string): Promise<{ url: string; title: string | null; content: string; metadata: { author?: string; publishDate?: string; tags?: string[]; }; contentLength: number; }> { // Validate URL const blogUrl = new URL(url); if (blogUrl.hostname !== "blog.duyet.net" && blogUrl.hostname !== "duyet.net") { throw new Error("Only blog.duyet.net and duyet.net URLs are supported"); } const response = await fetch(url); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } const html = await response.text(); const { title, content, metadata } = extractArticleContent(html); return { url, title, content, metadata, contentLength: content.length, }; } /** * Fetch and extract blog post content from URL with caching (30 minutes TTL) * This is the public API that should be used by tools/resources */ export async function fetchBlogPostContent(url: string): Promise<{ url: string; title: string | null; content: string; metadata: { author?: string; publishDate?: string; tags?: string[]; }; contentLength: number; }> { // Use URL as cache key (normalized) const cacheKey = `blog-post-${encodeURIComponent(url)}`; return cacheOrFetch( cacheKey, CACHE_CONFIGS.BLOG, () => fetchBlogPostContentInternal(url), ); }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/duyet/duyet-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server