Hacker News Reader MCP Server

MIT License

hackernews-mcp

hn_parser.py•12.5 kB

import requests from bs4 import BeautifulSoup from typing import List, Dict, Optional import sys import argparse class HNComment: """Represents a Hacker News comment with its metadata and replies""" def __init__(self, comment_id: str, author: str, time: str, text: str, indent_level: int = 0, parent_id: Optional[str] = None): self.comment_id = comment_id self.author = author self.time = time self.text = text self.indent_level = indent_level self.parent_id = parent_id self.replies: List[HNComment] = [] def to_dict(self) -> Dict: """Convert comment to dictionary format""" return { 'id': self.comment_id, 'author': self.author, 'time': self.time, 'text': self.text, 'indent_level': self.indent_level, 'parent_id': self.parent_id, 'replies': [reply.to_dict() for reply in self.replies] } def __repr__(self) -> str: indent = " " * self.indent_level return f"{indent}[{self.author}] {self.time}\n{indent}{self.text[:100]}..." def parse_hn_comments(item_id: str) -> Dict: """ Parse comments from a Hacker News item page. Args: item_id: The Hacker News item ID (e.g., '46130187') Returns: Dictionary containing story info and list of comments """ url = f"https://news.ycombinator.com/item?id={item_id}" # Make HTTP request response = requests.get(url) response.raise_for_status() # Parse HTML soup = BeautifulSoup(response.text, 'html.parser') # Extract story information story_info = extract_story_info(soup) # Extract all comments comments = extract_comments(soup) return { 'story': story_info, 'comments': comments, 'total_comments': len(comments) } def extract_story_info(soup: BeautifulSoup) -> Dict: """Extract story title and metadata""" story_info = {} # Get title title_element = soup.find('span', class_='titleline') if title_element: link_element = title_element.find('a') if link_element: story_info['title'] = link_element.text story_info['url'] = link_element.get('href', '') # Get metadata (points, author, time) subtext = soup.find('td', class_='subtext') if subtext: # Points score_element = subtext.find('span', class_='score') story_info['points'] = score_element.text if score_element else 'N/A' # Author author_element = subtext.find('a', class_='hnuser') story_info['author'] = author_element.text if author_element else 'N/A' # Time age_element = subtext.find('span', class_='age') if age_element: story_info['time'] = age_element.get('title', age_element.text) return story_info def extract_comments(soup: BeautifulSoup) -> List[Dict]: """ Extract all comments from the page, maintaining hierarchy. Returns: List of comment dictionaries with nested replies """ comments = [] comment_tree = {} # Store all comments by ID for building hierarchy # Find all comment rows comment_rows = soup.find_all('tr', class_='athing comtr') for row in comment_rows: comment_id = row.get('id', '') # Get indent level (determines nesting) indent_element = row.find('td', class_='ind') indent_level = 0 if indent_element: indent_img = indent_element.find('img') if indent_img and indent_img.get('width'): # Width is indent_level * 40 indent_level = int(indent_img.get('width', 0)) // 40 # Get comment metadata comment_head = row.find('div', class_='commtext') if not comment_head: continue # Get author author_element = row.find('a', class_='hnuser') author = author_element.text if author_element else '[deleted]' # Get time age_element = row.find('span', class_='age') time = age_element.get('title', age_element.text) if age_element else 'N/A' # Get comment text text_element = row.find('div', class_='commtext') if text_element: # Replace truncated URLs with full URLs from href attributes for link in text_element.find_all('a'): href = link.get('href', '') if href and href.startswith('http'): # Replace the link text with the full URL link.string = href # Remove the reply link and other UI elements for span in text_element.find_all('span', class_='reply'): span.decompose() text = text_element.get_text(separator='\n', strip=True) else: text = '[deleted]' # Create comment object comment = HNComment( comment_id=comment_id, author=author, time=time, text=text, indent_level=indent_level ) # Store in tree comment_tree[comment_id] = comment # If it's a top-level comment (indent 0), add to main list if indent_level == 0: comments.append(comment.to_dict()) # Build nested structure comments_with_nesting = build_nested_structure(comment_rows, soup) return comments_with_nesting def build_nested_structure(comment_rows, soup: BeautifulSoup) -> List[Dict]: """ Build a nested comment structure with replies properly organized. """ all_comments = [] comment_stack = [] # Stack to track parent comments at each level for row in comment_rows: comment_id = row.get('id', '') # Get indent level indent_element = row.find('td', class_='ind') indent_level = 0 if indent_element: indent_img = indent_element.find('img') if indent_img and indent_img.get('width'): indent_level = int(indent_img.get('width', 0)) // 40 # Get author author_element = row.find('a', class_='hnuser') author = author_element.text if author_element else '[deleted]' # Get time age_element = row.find('span', class_='age') time = age_element.get('title', age_element.text) if age_element else 'N/A' # Get comment text text_element = row.find('div', class_='commtext') if text_element: # Replace truncated URLs with full URLs from href attributes for link in text_element.find_all('a'): href = link.get('href', '') if href and href.startswith('http'): # Replace the link text with the full URL link.string = href for span in text_element.find_all('span', class_='reply'): span.decompose() text = text_element.get_text(separator='\n', strip=True) else: text = '[deleted]' # Create comment dict comment = { 'id': comment_id, 'author': author, 'time': time, 'text': text, 'indent_level': indent_level, 'replies': [] } # Adjust stack to current indent level comment_stack = comment_stack[:indent_level] # Add comment to appropriate location if indent_level == 0: # Top-level comment all_comments.append(comment) comment_stack = [comment] else: # Reply to parent if comment_stack: parent = comment_stack[-1] parent['replies'].append(comment) comment_stack.append(comment) return all_comments def print_comment(comment: Dict, indent: int = 0): """Pretty print a comment with proper indentation""" prefix = " " * indent print(f"{prefix}{'─' * 60}") print(f"{prefix}Author: {comment['author']}") print(f"{prefix}Time: {comment['time']}") print(f"{prefix}ID: {comment['id']}") print(f"{prefix}Text:") # Print text with indentation text_lines = comment['text'].split('\n') for line in text_lines: print(f"{prefix} {line}") # Print replies if comment['replies']: print(f"{prefix}Replies: {len(comment['replies'])}") for reply in comment['replies']: print_comment(reply, indent + 1) def print_comment_llm(comment: Dict, indent: int = 0): """Print comment in compact LLM-optimized format""" prefix = " " * indent # Determine comment type based on indent comment_type = "REPLY" if indent > 0 else "COMMENT" # Print compact header print(f"{prefix}{comment_type} [{comment['author']} @ {comment['time']}] ID: {comment['id']}") # Print text directly without extra formatting text_lines = comment['text'].split('\n') for line in text_lines: print(f"{prefix}{line}") # Print blank line only between top-level comments if indent == 0 and comment['replies']: print() # Recursively print replies for reply in comment['replies']: print_comment_llm(reply, indent + 1) def main(): """Main function to parse and display comments""" # Set up argument parser parser = argparse.ArgumentParser( description='Parse comments from Hacker News items', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python parse_hn_comments.py 46130187 python parse_hn_comments.py https://news.ycombinator.com/item?id=46130187 python parse_hn_comments.py 46130187 --llm """ ) parser.add_argument('item_id', help='HN item ID or full URL') parser.add_argument('--llm', action='store_true', help='Output in compact LLM-optimized format (saves tokens)') args = parser.parse_args() # Extract item ID from argument arg = args.item_id if 'item?id=' in arg: # Extract from URL item_id = arg.split('item?id=')[1].split('&')[0] else: item_id = arg if not args.llm: print(f"Fetching comments for item {item_id}...\n") try: result = parse_hn_comments(item_id) story = result['story'] if args.llm: # LLM-optimized compact format print(f"STORY: {story.get('title', 'N/A')}") print(f"URL: {story.get('url', 'N/A')}") print(f"AUTHOR: {story.get('author', 'N/A')} | POINTS: {story.get('points', 'N/A')} | TIME: {story.get('time', 'N/A')}") print(f"TOTAL_COMMENTS: {result['total_comments']}") print() # Print comments in compact format for i, comment in enumerate(result['comments'], 1): print(f"COMMENT #{i}") print_comment_llm(comment) print() else: # Standard verbose format print("=" * 80) print("STORY INFORMATION") print("=" * 80) print(f"Title: {story.get('title', 'N/A')}") print(f"URL: {story.get('url', 'N/A')}") print(f"Author: {story.get('author', 'N/A')}") print(f"Points: {story.get('points', 'N/A')}") print(f"Time: {story.get('time', 'N/A')}") print(f"\nTotal Comments: {result['total_comments']}") print("=" * 80) print() # Display comments print("COMMENTS") print("=" * 80) for i, comment in enumerate(result['comments'], 1): print(f"\n[Comment {i}]") print_comment(comment) print("\n" + "=" * 80) print(f"Successfully parsed {result['total_comments']} comments!") except requests.RequestException as e: print(f"Error fetching Hacker News item: {e}") sys.exit(1) except Exception as e: print(f"Error parsing comments: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()

Latest Blog Posts

The State of MCP in 2025: Who's Building What and Why It Matters
By punkpeye on December 7, 2025.
mcp
startups
MCP hosting with persistent storage
By punkpeye on December 6, 2025.
changelog
The Operational Metrics and Agent Analytics Driving Successful Model Context Protocol (MCP) Servers
By Om-Shree-0709 on December 6, 2025.
observability
mcp
Agent-Analytics

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Malayke/hackernews-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server