Skip to main content
Glama

FirstCycling MCP Server

by r-huijts
MIT License
14
  • Apple
endpoints.py12.6 kB
from ..endpoints import ParsedEndpoint from ..parser import parse_date, parse_table, team_link_to_id, img_to_country_code, link_to_twitter_handle import pandas as pd import bs4 import io class RiderEndpoint(ParsedEndpoint): """ Rider profile page response. Extends Endpoint. Attributes ---------- years_active : list[int] List of years in which rider was active. header_details : dict Details from page header, including rider name and external links. sidebar_details : dict Details from right sidebar, including nation, date of birth, height, and more. """ def _parse_soup(self): self._get_years_active() self._get_header_details() self._get_sidebar_details() def _get_years_active(self): # TODO make this more robust, fails when too many active years (e.g. Anna Van Der Breggen) try: self.years_active = [int(a.text) for a in self.soup.find('p', {'class': "sidemeny2"}).find_all('a')] except ValueError: print("Warning: could not collect rider's years active.") self.years_active = [] def _get_header_details(self): self.header_details = {} self.header_details['current_team'] = self.soup.p.text.strip() if self.soup.p.text.strip() else None self.header_details['twitter_handle'] = link_to_twitter_handle(self.soup.find('p', {'class': 'left'}).a) if self.soup.find('p', {'class': 'left'}) and self.soup.find('p', {'class': 'left'}).a else None def _get_sidebar_details(self): # TODO Load details from sidebar self.sidebar_details = {} class RiderYearResults(RiderEndpoint): """ Rider's results in a certain year. Extends RiderEndpoint. Attributes ---------- year_details : dict The year-specific rider details from the page, including the team, division, UCI points, and more. results_df : pd.DataFrame Table of rider's results from the year. """ def _parse_soup(self): super()._parse_soup() self._get_year_details() self._get_year_results() def _get_year_details(self): # Find table with details details_table = self.soup.find('table', {'class': 'tablesorter notOddEven'}) spans = details_table.find_all('span') self.year_details = {} for span in spans: if span.img: # Team details self.year_details['Team'] = span.text.split('(')[0].strip() self.year_details['Team ID'] = team_link_to_id(span.a) self.year_details['Team Country'] = img_to_country_code(span.img) self.year_details['Division'] = span.text.split('(')[1].split(')')[0] elif 'Ranking' in span.text: self.year_details['UCI Ranking'] = int(span.text.split(': ')[1].split()[0]) self.year_details['UCI Points'] = float(span.text.split('(')[1].split('pts')[0]) elif 'Wins' in span.text: self.year_details['UCI Wins'] = int(span.text.split(': ')[-1]) elif 'Race days' in span.text: self.year_details['Race days'] = int(span.text.split(': ')[-1]) elif 'Distance' in span.text: self.year_details['Distance'] = int(span.text.split(': ')[-1].replace('.', '').split('km')[0]) def _get_year_results(self): # Find table with results table = self.soup.find('table', {'class': "sortTabell tablesorter"}) self.results_df = parse_table(table) class RiderVictories(RiderEndpoint): """ Rider's victories. Extends RiderEndpoint. Attributes ---------- results_df : pd.DataFrame Table of rider's victories. """ def _parse_soup(self): super()._parse_soup() self._get_victories() def _get_victories(self): # Find table with victories table = self.soup.find('table', {'class': "sortTabell tablesorter"}) if table: # Check if the table has "No data" content no_data_text = table.get_text().strip() if "No data" in no_data_text: # Table exists but has no data self.results_df = pd.DataFrame() return try: # Try to parse using the parse_table function self.results_df = parse_table(table) if self.results_df is None: self.results_df = pd.DataFrame() # Empty DataFrame if no victories found except Exception as e: # If there's an error in parsing, handle it by creating a basic DataFrame manually print(f"Warning: Error parsing victories table: {str(e)}") # Fallback: Try to create a DataFrame directly from the HTML try: import io from dateutil.parser import parse # Parse the basic table html = str(table) self.results_df = pd.read_html(io.StringIO(html), decimal=',')[0] # Check if the table contains "No data" if (self.results_df.shape[0] == 1 and "No data" in self.results_df.iloc[0, 0]): self.results_df = pd.DataFrame() return # Clean up column names # The typical format is: Year | Date | Race | Category if 'Date.1' in self.results_df.columns: self.results_df.rename(columns={ 'Date': 'Year', 'Date.1': 'Date', 'Unnamed: 2': 'Month_Day' # This can be blank or contain additional date info }, inplace=True) # Convert Year to string self.results_df['Year'] = self.results_df['Year'].astype(str) # Handle date formatting - combine Year and Date if available if 'Month_Day' in self.results_df.columns: # Clean Month_Day column (keep only non-NaN values) self.results_df = self.results_df.drop('Month_Day', axis=1) # If Date column has decimal format (e.g., 22.04), treat as MM.DD format def format_date(row): try: if pd.notnull(row['Date']): date_str = row['Date'] if isinstance(date_str, float): # Convert float (22.04) to string and handle decimal date_parts = str(date_str).split('.') if len(date_parts) == 2: day = date_parts[0].zfill(2) month = date_parts[1].zfill(2) return f"{row['Year']}-{month}-{day}" return f"{row['Year']}-01-01" # Default date if format not recognized return f"{row['Year']}-01-01" # Default date if no Date value except: return f"{row['Year']}-01-01" # Default for any errors # Create formatted date column self.results_df['Date_Formatted'] = self.results_df.apply(format_date, axis=1) except Exception as e: # If all else fails, just return an empty DataFrame print(f"Warning: Error creating DataFrame from table HTML: {str(e)}") self.results_df = pd.DataFrame() else: # No table found self.results_df = pd.DataFrame() class RiderBestResults(RiderEndpoint): """ Rider's best results. Extends RiderEndpoint. Attributes ---------- results_df : pd.DataFrame Table of rider's best results. """ def _parse_soup(self): super()._parse_soup() self._get_best_results() def _get_best_results(self): # Find table with best results (note different class than victories table) table = self.soup.find('table', {'class': "tablesorter"}) if table: # Check if the table has "No data" content no_data_text = table.get_text().strip() if "No data" in no_data_text: # Table exists but has no data self.results_df = pd.DataFrame() return try: # Try to parse the table manually since the structure is different headers = [th.text.strip() for th in table.find('thead').find_all('th')] # Create empty lists to store row data rows_data = [] # Get all data rows tbody = table.find('tbody') if table.find('tbody') else table for tr in tbody.find_all('tr'): row_data = {} cells = tr.find_all('td') # Skip empty rows if not cells: continue # Map each cell to its header for i, cell in enumerate(cells): if i < len(headers): header = headers[i] row_data[header] = cell.text.strip() # Extract race ID if available if header == 'Race' and cell.find('a'): href = cell.find('a').get('href', '') import re race_id_match = re.search(r'r=(\d+)', href) if race_id_match: row_data['Race_ID'] = race_id_match.group(1) # Extract country code if available if cell.find('img'): img_src = cell.find('img').get('src', '') country_code = img_to_country_code(cell.find('img')) if country_code: row_data['Race_Country'] = country_code rows_data.append(row_data) # Create DataFrame from the collected data self.results_df = pd.DataFrame(rows_data) # If the DataFrame is empty after parsing, set to empty DataFrame if self.results_df.empty: self.results_df = pd.DataFrame() except Exception as e: # If there's an error in parsing, handle it by creating a basic DataFrame manually print(f"Warning: Error parsing best results table: {str(e)}") # Fallback: Try to create a DataFrame directly from the HTML try: # Parse the basic table html = str(table) self.results_df = pd.read_html(io.StringIO(html), decimal=',')[0] # Check if the table contains "No data" if self.results_df.empty or (self.results_df.shape[0] == 1 and any("No data" in str(cell) for cell in self.results_df.iloc[0])): self.results_df = pd.DataFrame() return except Exception as e: # If all else fails, just return an empty DataFrame print(f"Warning: Error creating DataFrame from table HTML: {str(e)}") self.results_df = pd.DataFrame() else: # No table found self.results_df = pd.DataFrame() class RiderMonumentResults(RiderEndpoint): """ Rider's results in monuments. Extends RiderEndpoint. Attributes ---------- results_df : pd.DataFrame Table of rider's monument results. """ def _parse_soup(self): super()._parse_soup() self._get_monument_results() def _get_monument_results(self): # Find table with monument results - first try with both classes table = self.soup.find('table', {'class': "tablesorter sortTabell"}) # If not found, try with just one class attribute if not table: table = self.soup.find('table', {'class': "tablesorter"}) if table: # Check if the table has "No data" content no_data_text = table.get_text().strip() if "No data" in no_data_text: # Table exists but has no data self.results_df = pd.DataFrame() return try: # Try to parse the table manually headers = [th.text.strip() for th in table.find('tr').find_all(['th', 'td'])] # Create empty lists to store row data rows_data = [] # Get all data rows (skip the header row) for tr in table.find_all('tr')[1:]: row_data = {} cells = tr.find_all(['td', 'th']) # Skip empty rows if not cells: continue # Map each cell to its header for i, cell in enumerate(cells): if i < len(headers): header = headers[i] row_data[header] = cell.text.strip() # Extract race ID if available if header == 'Race' and cell.find('a'): href = cell.find('a').get('href', '') import re race_id_match = re.search(r'r=(\d+)', href) if race_id_match: row_data['Race_ID'] = race_id_match.group(1) # Extract country code if available if cell.find('img'): img_src = cell.find('img').get('src', '') country_code = img_to_country_code(cell.find('img')) if country_code: row_data['Race_Country'] = country_code rows_data.append(row_data) # Create DataFrame from the collected data self.results_df = pd.DataFrame(rows_data) # If the DataFrame is empty after parsing, set to empty DataFrame if self.results_df.empty: self.results_df = pd.DataFrame() except Exception as e: # If there's an error in parsing, handle it by creating a basic DataFrame manually print(f"Warning: Error parsing monument results table: {str(e)}") # Fallback: Try to create a DataFrame directly from the HTML try: # Parse the basic table html = str(table) self.results_df = pd.read_html(io.StringIO(html), decimal=',')[0] # Check if the table contains "No data" if self.results_df.empty or (self.results_df.shape[0] == 1 and any("No data" in str(cell) for cell in self.results_df.iloc[0])): self.results_df = pd.DataFrame() return except Exception as e: # If all else fails, just return an empty DataFrame print(f"Warning: Error creating DataFrame from table HTML: {str(e)}") self.results_df = pd.DataFrame() else: # No table found self.results_df = pd.DataFrame()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/r-huijts/firstcycling-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server