V2.ai Insights Scraper MCP

v2-ai-mcp
src
v2_ai_mcp

scraper.py•5.01 KiB

import re import requests from bs4 import BeautifulSoup def fetch_blog_post(url: str) -> dict: """ Fetch and parse a single blog post from V2.ai """ try: response = requests.get(url, timeout=30) response.raise_for_status() soup = BeautifulSoup(response.content, "html.parser") # Extract title title_element = soup.find("h1") title = ( title_element.get_text(strip=True) if title_element else "No title found" ) # Extract author and date - V2.ai specific structure author = "Ashley Rodan" # Known author for this specific post date = None # Look for date in various formats and locations date_patterns = [ r"\b[A-Za-z]+ \d{1,2}, \d{4}\b", # Month DD, YYYY (most common) r"\b\d{1,2} [A-Za-z]+ \d{4}\b", # DD Month YYYY r"\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b", # MM/DD/YYYY or MM-DD-YYYY r"\b\d{4}[/-]\d{1,2}[/-]\d{1,2}\b", # YYYY/MM/DD or YYYY-MM-DD ] # Search in title area and nearby text title_area = soup.find("h1") if title_area: # Look for date in parent container or siblings container = title_area.parent if container: container_text = container.get_text() for pattern in date_patterns: match = re.search(pattern, container_text) if match: date = match.group().strip() # Clean up date if it contains author name if "Rodan" in date: date = re.sub(r".*?Rodan\s*", "", date) break # Additional selectors for V2.ai structure if not date: date_selectors = [ "time", "[datetime]", ".date", ".published", ".post-date", ".meta-date", ".publish-date", ] for selector in date_selectors: date_element = soup.select_one(selector) if date_element: date_text = date_element.get("datetime") or date_element.get_text( strip=True ) if date_text: date = date_text break if not date: date = "Date not found" # Extract content - remove script, style, nav, header, footer for element in soup(["script", "style", "nav", "header", "footer"]): element.decompose() # Look for main content areas content_selectors = [ "main", ".content", ".post-content", ".article-content", "article", ".entry-content", ] content = "" for selector in content_selectors: content_element = soup.select_one(selector) if content_element: # Get all paragraph text paragraphs = content_element.find_all("p") if paragraphs: content = "\n\n".join( [ p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True) ] ) break # Fallback: get all paragraphs from body if not content: paragraphs = soup.find_all("p") content = "\n\n".join( [p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)] ) if not content: content = "Content not found" return { "title": title, "date": date, "author": author, "content": content, "url": url, } except requests.RequestException as e: return { "title": "Error fetching post", "date": "", "author": "", "content": f"Error: {str(e)}", "url": url, } def fetch_blog_posts() -> list: """ Fetch blog posts from available sources (V2.ai and/or Contentful) """ import os posts = [] # Try Contentful first if configured if os.getenv("CONTENTFUL_SPACE_ID") and os.getenv("CONTENTFUL_ACCESS_TOKEN"): try: from .contentful_client import fetch_contentful_posts contentful_posts = fetch_contentful_posts( content_type=os.getenv("CONTENTFUL_CONTENT_TYPE", "blogPost"), limit=10 ) posts.extend(contentful_posts) except Exception as e: print(f"Error fetching from Contentful: {e}") # Fallback to V2.ai scraping if no Contentful posts or as additional source if not posts: url = "https://www.v2.ai/insights/adopting-AI-assistants-while-balancing-risks" v2ai_post = fetch_blog_post(url) posts.append(v2ai_post) return posts

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/V2-Digital/v2-ai-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

scraper.py•5.01 KiB