Skip to main content
Glama
northernvariables

FedMCP - Federal Parliamentary Information

scrape_seating_plan.py11.3 kB
#!/usr/bin/env python3 """ Scrape House of Commons seating plan from ourcommons.ca This script: 1. Fetches the floorplan page HTML 2. Extracts all seat buttons with aria-label (MP name) and data-person-id (PersonId) 3. Determines seat positions based on grid structure 4. Identifies government vs opposition benches 5. Outputs structured JSON file with complete seating layout Output: seating_plan.json """ import re import json import urllib.request from html.parser import HTMLParser from typing import List, Dict, Optional FLOORPLAN_URL = "https://www.ourcommons.ca/members/en/floorplan" class SeatParser(HTMLParser): """HTML parser to extract seat information from floorplan page""" def __init__(self): super().__init__() self.seats: List[Dict] = [] self.in_table = False self.in_caption = False self.current_caption = "" self.current_row_num = 0 self.current_side = "" # "left" (opposition) or "right" (government) self.seat_position = 0 # Position within the visual grid def handle_starttag(self, tag: str, attrs: List[tuple]): attrs_dict = dict(attrs) # Track table captions to determine side and row if tag == "caption": self.in_caption = True self.current_caption = "" # Extract seat buttons from gridview if tag == "button" and "floorplan-cell" in attrs_dict.get("class", ""): aria_label = attrs_dict.get("aria-label", "") data_person_id = attrs_dict.get("data-person-id", "") background_color = "" # Extract background color from style attribute style = attrs_dict.get("style", "") color_match = re.search(r'background-color:(#[0-9A-Fa-f]+)', style) if color_match: background_color = color_match.group(1) if aria_label and data_person_id: seat = { "mp_name": aria_label.strip(), "person_id": int(data_person_id), "party_color": background_color, "position_index": self.seat_position } self.seats.append(seat) self.seat_position += 1 def handle_data(self, data: str): if self.in_caption: self.current_caption += data.strip() def handle_endtag(self, tag: str): if tag == "caption": self.in_caption = False # Determine side and row from caption text self._parse_caption(self.current_caption) def _parse_caption(self, caption: str): """Extract row number and side from table caption""" caption_lower = caption.lower() # Determine side if "speaker's left" in caption_lower or "speaker's chair" in caption_lower: self.current_side = "opposition" elif "speaker's right" in caption_lower: self.current_side = "government" # Extract row number if "first row" in caption_lower: self.current_row_num = 1 elif "second row" in caption_lower: self.current_row_num = 2 elif "third row" in caption_lower: self.current_row_num = 3 elif "fourth row" in caption_lower: self.current_row_num = 4 elif "fifth row" in caption_lower: self.current_row_num = 5 elif "sixth row" in caption_lower: self.current_row_num = 6 def assign_visual_coordinates(seats: List[Dict]) -> List[Dict]: """ Assign SVG coordinates to seats based on their layout. Layout: - Opposition (top): y = 60-380 - Speaker (center): y = 400 (middle) - Government (bottom): y = 420-740 Width: 1400px, seats distributed evenly in rows """ # Use smaller spacing for compact layout SEATS_PER_ROW = 30 ROW_HEIGHT = 53 SEAT_WIDTH = 46 # Opposition benches (rows 1-6) at top opposition_y_start = 60 # Government benches (rows 1-6) at bottom government_y_start = 420 # Speaker at center speaker_x = 700 # Center of 1400px width speaker_y = 400 for i, seat in enumerate(seats): # Check if this is the Speaker (already marked by table parser) if seat.get("bench_section") == "speaker": seat["seat_visual_x"] = float(speaker_x) seat["seat_visual_y"] = float(speaker_y) continue # Determine row and column based on position row = i // SEATS_PER_ROW col = i % SEATS_PER_ROW # Calculate X coordinate (same for both sides) x = 100 + (col * SEAT_WIDTH) # First ~170 seats are opposition (top), rest are government (bottom) if i < 170: # Rough split - opposition side y = opposition_y_start + (row * ROW_HEIGHT) if not seat.get("bench_section"): # Only set if not already set seat["bench_section"] = "opposition" seat["seat_row"] = row + 1 else: # Government side y = government_y_start + ((row - 6) * ROW_HEIGHT) if not seat.get("bench_section"): seat["bench_section"] = "government" seat["seat_row"] = row - 5 seat["seat_column"] = col + 1 seat["seat_visual_x"] = float(x) seat["seat_visual_y"] = float(y) return seats def parse_table_structure(html: str) -> Dict[str, any]: """ Parse the table structure to get accurate row/side assignments Returns mapping of MP names to their row and side """ mp_locations = {} # Find all table sections table_pattern = r'<caption[^>]*>(.*?)</caption>.*?<tbody>(.*?)</tbody>' tables = re.findall(table_pattern, html, re.DOTALL) for caption, tbody in tables: # Determine side and row from caption caption_lower = caption.lower() side = "unknown" # Check for Speaker first if "speaker's chair" in caption_lower or "in the speaker" in caption_lower: side = "speaker" elif "speaker's left" in caption_lower: side = "opposition" elif "speaker's right" in caption_lower: side = "government" # Extract row number row_num = 0 if "first row" in caption_lower: row_num = 1 elif "second row" in caption_lower: row_num = 2 elif "third row" in caption_lower: row_num = 3 elif "fourth row" in caption_lower: row_num = 4 elif "fifth row" in caption_lower: row_num = 5 elif "sixth row" in caption_lower: row_num = 6 # Extract MP names from this table # Pattern: <td>name</td> name_pattern = r'<td>\s*(?:Hon\.\s+)?([A-Z][a-zA-Z\-]+\s+[A-Z][a-zA-Z\-]+(?:\s+[A-Z][a-zA-Z\-]+)?)\s*</td>' mp_names = re.findall(name_pattern, tbody) for idx, name in enumerate(mp_names, 1): name_clean = re.sub(r'\s+', ' ', name.strip()) mp_locations[name_clean] = { "side": side, "row": row_num if side != "speaker" else 0, "column": idx if side != "speaker" else 0 } return mp_locations def main(): print(f"Fetching seating plan from {FLOORPLAN_URL}...") try: # Fetch the page with urllib.request.urlopen(FLOORPLAN_URL) as response: html = response.read().decode('utf-8') print(f"✓ Downloaded {len(html)} bytes") # Parse the table structure for accurate positioning print("Parsing table structure for row/side assignments...") mp_locations = parse_table_structure(html) print(f"✓ Found {len(mp_locations)} MPs in table structure") # Parse seats from gridview buttons print("Parsing seat buttons from gridview...") parser = SeatParser() parser.feed(html) print(f"✓ Found {len(parser.seats)} seats with PersonId") # Match seats to table locations print("Matching seats to table positions...") matched_count = 0 speaker_identified = False for seat in parser.seats: mp_name = seat["mp_name"] if mp_name in mp_locations: loc = mp_locations[mp_name] seat["bench_section"] = loc["side"] seat["seat_row"] = loc["row"] seat["seat_column"] = loc["column"] matched_count += 1 if loc["side"] == "speaker": speaker_identified = True print(f"✓ Matched {matched_count} seats to table positions") # Fallback: Identify Speaker by name if table parsing didn't work if not speaker_identified: print("Table parsing didn't identify Speaker, using name matching...") # Common Speaker names to check speaker_candidates = ["Francis Scarpaleggia", "Greg Fergus"] for seat in parser.seats: if seat["mp_name"] in speaker_candidates: seat["bench_section"] = "speaker" seat["seat_row"] = 0 seat["seat_column"] = 0 speaker_identified = True print(f"✓ Identified Speaker: {seat['mp_name']}") break # Assign visual coordinates print("Calculating SVG coordinates...") seats = assign_visual_coordinates(parser.seats) # Add party mapping from colors party_colors = { "#002395": "Conservative", # Blue "#D71920": "Liberal", # Red "#33B2CC": "Bloc Québécois", # Light blue "#F37021": "NDP", # Orange "#3D9B35": "Green Party", # Green } for seat in seats: color = seat.get("party_color", "") seat["party_guess"] = party_colors.get(color, "Unknown") # Create output structure output = { "source_url": FLOORPLAN_URL, "total_seats": len(seats), "seats": seats } # Write to JSON file output_file = "seating_plan.json" with open(output_file, 'w', encoding='utf-8') as f: json.dump(output, f, indent=2, ensure_ascii=False) print(f"\n✓ Seating plan saved to {output_file}") print(f" Total seats: {len(seats)}") print(f" Opposition: {sum(1 for s in seats if s.get('bench_section') == 'opposition')}") print(f" Government: {sum(1 for s in seats if s.get('bench_section') == 'government')}") print(f" Speaker: {sum(1 for s in seats if s.get('bench_section') == 'speaker')}") # Show sample print("\nSample seats:") for seat in seats[:3]: print(f" - {seat['mp_name']} (ID: {seat['person_id']}, {seat.get('party_guess', 'Unknown')})") print(f" Position: Row {seat.get('seat_row', '?')}, Col {seat.get('seat_column', '?')}, {seat.get('bench_section', 'unknown')}") return 0 except Exception as e: print(f"✗ Error: {e}") import traceback traceback.print_exc() return 1 if __name__ == "__main__": exit(main())

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/northernvariables/FedMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server