Official Substack MCP Server

Overview Schema Related Servers Score Discussions

mcp-writer-substack
scrapers

medium.py•3.27 KiB

""" Medium scraper for Writer Context Protocol. """ import logging from datetime import datetime from typing import List import re from urllib.parse import urlparse import feedparser import httpx from bs4 import BeautifulSoup from scrapers.base import BaseScraper, Post logger = logging.getLogger(__name__) class MediumScraper(BaseScraper): """Scraper for Medium blogs.""" async def scrape(self) -> List[Post]: """ Scrape posts from a Medium blog. Returns: A list of Post objects with content and metadata """ # Extract username from URL parsed_url = urlparse(self.url) path_parts = parsed_url.path.strip('/').split('/') if parsed_url.netloc == 'medium.com': username = path_parts[0] if path_parts else '' else: username = parsed_url.netloc.split('.')[0] if username.startswith('@'): username = username[1:] # Construct RSS URL rss_url = f'https://medium.com/feed/@{username}' try: logger.info(f"Fetching RSS feed from: {rss_url}") async with httpx.AsyncClient() as client: response = await client.get(rss_url) response.raise_for_status() logger.info(f"RSS feed fetched successfully. Status code: {response.status_code}") feed = feedparser.parse(response.text) logger.info(f"Number of entries in feed: {len(feed.entries)}") except Exception as e: logger.error(f"Error fetching RSS feed: {str(e)}") return [] posts = [] for entry in feed.entries[:self.max_posts]: try: soup = BeautifulSoup(entry.content[0].value, 'html.parser') cleaned_text = self._clean_content(soup.get_text(separator=' ', strip=True)) # Parse date pub_date = None if hasattr(entry, 'published'): try: # RFC 2822 format used by RSS pub_date = datetime.strptime(entry.published, "%a, %d %b %Y %H:%M:%S %z") except ValueError: # Try without timezone try: pub_date = datetime.strptime(entry.published, "%a, %d %b %Y %H:%M:%S") except ValueError: logger.warning(f"Could not parse date: {entry.published}") post = Post( title=self._clean_content(entry.title), url=entry.link, content=cleaned_text, date=pub_date, subtitle='', ) posts.append(post) except Exception as e: logger.error(f"Error processing Medium post {entry.get('link', 'unknown')}: {str(e)}") logger.info(f"Scraped {len(posts)} posts from Medium") return posts @staticmethod def _clean_content(content: str) -> str: """Remove extra whitespace and normalize text.""" # Remove extra whitespace and newlines content = re.sub(r'\s+', ' ', content).strip() return content

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jonathan-politzki/mcp-writer-substack'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

medium.py•3.27 KiB