Skip to main content
Glama

FirstCycling MCP Server

by r-huijts
MIT License
14
  • Apple
parser.py3.79 kB
""" Parser ========= Provides useful functions to parse API responses. """ # Parsing dates ---- def parse_date(date_text): from dateutil.parser import parse as date_parse, ParserError try: return date_parse(date_text).date() except ParserError: # Result with uncertain date, use January/1st by default year, month, day = date_text.split('-') month = '01' if not int(month) else month day = '01' if not int(day) else day fixed_date = year + '-' + month + '-' + day return date_parse(fixed_date).date() # Parsing links ---- def get_url_parameters(url): # Adapted from https://stackoverflow.com/questions/21584545/url-query-parameters-to-dict-python from urllib import parse as url_parse return dict(url_parse.parse_qsl(url_parse.urlsplit(url).query)) def rider_link_to_id(a): return int(get_url_parameters(a['href'])['r']) def team_link_to_id(a): return int(get_url_parameters(a['href'])['l']) def race_link_to_race_id(a): return int(get_url_parameters(a['href'])['r']) def race_link_to_stage_num(a): return int(get_url_parameters(a['href'])['e']) def link_to_twitter_handle(a): return a['href'].split('/')[3] # Parsing icons ---- def get_img_name(img): return img['src'].split('/')[-1] def img_to_country_code(img): """ Obtain three-letter country code, or 'UCI' or 'OL' from html img tag """ return get_img_name(img).split('.')[0] def img_to_profile(img): """ Return profile type for image """ return profile_icon_map[get_img_name(img)] # Parsing tables ---- def parse_table(table): """ Convert HTML table from bs4 to pandas DataFrame. Return None if no data. """ # TODO for rider results, format dates nicely with hidden column we are throwing away import pandas as pd import io # Check early if table contains "No data" text if table and "No data" in table.get_text(): return None # Load pandas DataFrame from raw text only html = str(table) out_df = pd.read_html(io.StringIO(html), decimal=',')[0] if out_df.iat[0, 0] == 'No data': # No data return None # Convert decimal points to thousands separator # NOTE: Cannot use thousands='.' in pd.read_html because will ruin other columns (e.g. CAT for races) thousands_cols = ['Points'] for col in thousands_cols: if col in out_df: out_df[col] = out_df[col].astype(str).str.replace('.', '', regex=False).astype(int) # Parse soup to add information hidden in tags/links headers = [th.text for th in table.find_all('th')] trs = [tr for tr in table.find_all('tr') if tr.th is None] if 'Race.1' in out_df: out_df = out_df.rename(columns={'Race': 'Race_Country', 'Race.1': 'Race'}) headers.insert(headers.index('Race'), 'Race_Country') soup_df = pd.DataFrame([tr.find_all('td') for tr in trs], columns=headers) # Add information hidden in tags for col, series in soup_df.items(): if col in ('Rider', 'Winner', 'Second', 'Third'): out_df[col + '_ID'] = series.apply(lambda td: rider_link_to_id(td.a)) try: out_df[col + '_Country'] = series.apply(lambda td: img_to_country_code(td.img)) except TypeError: pass elif col == 'Team': out_df['Team_ID'] = series.apply(lambda td: team_link_to_id(td.a) if td.a else None) out_df['Team_Country'] = series.apply(lambda td: img_to_country_code(td.img) if td.img else None) elif col == 'Race': out_df['Race_ID'] = series.apply(lambda td: get_url_parameters(td.a['href'])['r'] if td.a else None) elif col == 'Race_Country': out_df['Race_Country'] = series.apply(lambda td: img_to_country_code(td.img) if td.img else None) elif col == '': try: out_df['Icon'] = series.apply(lambda td: get_img_name(td.img) if td.img else None) except AttributeError: pass out_df = out_df.replace({'-': None}).dropna(how='all', axis=1) # TODO Remove Unnamed columns return out_df

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/r-huijts/firstcycling-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server