We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/northernvariables/FedMCP'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
"""
Article-by-article paywall detection.
Detects whether individual news articles are behind a paywall by analyzing:
1. Content truncation patterns
2. Paywall-specific phrases in summaries
3. Source characteristics (metered vs hard paywall)
"""
import re
from typing import Tuple
# Sources with HARD paywalls (almost all content requires subscription)
HARD_PAYWALL_SOURCES = {
"globe", # Globe & Mail
"thehill", # Hill Times
"ipolitics", # iPolitics
}
# Sources with METERED paywalls (some free articles, some paywalled)
METERED_PAYWALL_SOURCES = {
"toronto_star",
"national_post",
"winnipeg_free_press",
"vancouver_sun",
"ottawa_citizen",
"montreal_gazette",
"calgary_herald",
"edmonton_journal",
}
# Sources that are always free
FREE_SOURCES = {
"cbc",
"ctv",
"global_news",
"canadaland",
"thenarwhal",
"legisinfo",
}
# Paywall indicator phrases (case-insensitive)
PAYWALL_PHRASES = [
# English
r"subscribe\s+to\s+(continue|read|access)",
r"subscription\s+required",
r"subscribers\s+only",
r"for\s+subscribers",
r"sign\s+in\s+to\s+read",
r"unlock\s+(this|full)\s+(story|article)",
r"to\s+read\s+this\s+article",
r"exclusive\s+to\s+subscribers",
r"premium\s+(content|article)",
r"members(\s+|-)only",
r"already\s+a\s+subscriber\?",
r"start\s+your\s+free\s+trial",
r"get\s+full\s+access",
r"limited\s+time\s+offer",
r"become\s+a\s+(member|subscriber)",
r"join\s+now\s+to\s+read",
# French
r"abonnez-vous",
r"réservé\s+aux\s+abonnés",
r"contenu\s+réservé",
r"pour\s+lire\s+cet\s+article",
r"déjà\s+abonné\s*\?",
r"accès\s+complet",
]
# Compiled regex for efficiency
PAYWALL_PATTERN = re.compile(
"|".join(PAYWALL_PHRASES),
re.IGNORECASE
)
# Truncation indicators (often appear at end of paywalled summaries)
TRUNCATION_INDICATORS = [
r"\.\.\.$",
r"…$",
r"\[\.\.\.\]$",
r"\[more\]$",
r"read\s+more\.?$",
r"continue\s+reading\.?$",
]
TRUNCATION_PATTERN = re.compile(
"|".join(TRUNCATION_INDICATORS),
re.IGNORECASE
)
def detect_paywall(
source_id: str,
title: str,
summary: str,
full_content: str = "",
) -> Tuple[bool, float, str]:
"""
Detect if an article is likely behind a paywall.
Args:
source_id: The news source identifier
title: Article title
summary: Article summary/description from RSS
full_content: Full article text if available (optional)
Returns:
Tuple of (is_paywalled, confidence, reason)
- is_paywalled: True if article appears to be paywalled
- confidence: 0.0-1.0 confidence score
- reason: Human-readable explanation
"""
text_to_check = f"{title} {summary} {full_content}".strip()
# Check if source is always free
if source_id in FREE_SOURCES:
return False, 1.0, "Source is always free"
# Check for explicit paywall phrases
if PAYWALL_PATTERN.search(text_to_check):
return True, 0.95, "Contains paywall phrase"
# Hard paywall sources - assume paywalled unless clearly free
if source_id in HARD_PAYWALL_SOURCES:
# Check for "free to read" indicators
free_indicators = [
r"free\s+to\s+read",
r"no\s+subscription",
r"open\s+access",
]
for pattern in free_indicators:
if re.search(pattern, text_to_check, re.IGNORECASE):
return False, 0.7, "Hard paywall source but marked as free"
return True, 0.85, "Source has hard paywall"
# Metered paywall sources - check for truncation signals
if source_id in METERED_PAYWALL_SOURCES:
# Very short summary often indicates paywall
if len(summary) < 100:
return True, 0.7, "Metered source with very short summary"
# Check for truncation indicators
if TRUNCATION_PATTERN.search(summary):
return True, 0.75, "Metered source with truncated content"
# If summary seems complete, likely free article
if len(summary) > 300 and not TRUNCATION_PATTERN.search(summary):
return False, 0.6, "Metered source with full summary"
# Default for metered sources - slight lean toward paywalled
return True, 0.55, "Metered source (uncertain)"
# Unknown source - check content signals only
if TRUNCATION_PATTERN.search(summary) and len(summary) < 150:
return True, 0.5, "Unknown source with truncated content"
return False, 0.5, "No paywall indicators detected"
def is_paywalled(source_id: str, title: str, summary: str) -> bool:
"""
Simple boolean check for paywall status.
Uses a threshold of 0.6 confidence for marking as paywalled.
"""
paywalled, confidence, _ = detect_paywall(source_id, title, summary)
return paywalled and confidence >= 0.6