Skip to main content
Glama
Aryan-Jhaveri

Canada's Food Guide MCP Server

recipe.py35.3 kB
import requests from bs4 import BeautifulSoup from typing import Optional, List import json import re import os import sys from fastmcp import FastMCP from typing import List, Dict, Any, Optional script_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(script_dir) src_dir = script_dir if src_dir not in sys.path: sys.path.insert(0, src_dir) if project_root not in sys.path: sys.path.insert(0, project_root) try: from src.api.search import RecipeSearcher from src.models.filters import SearchFilters from src.db.queries import register_db_tools from src.config import DB_FILE from src.utils.url_builder import FoodGuideURLBuilder except ImportError: try: from api.search import RecipeSearcher from models.filters import SearchFilters from db.queries import register_db_tools from config import DB_FILE from utils.url_builder import FoodGuideURLBuilder except ImportError as e: print(f"Error importing modules: {e}", file=sys.stderr) sys.exit(1) try: # Try first with src prefix from src.models.recipe import Recipe from src.utils.url_builder import FoodGuideURLBuilder except ImportError: try: # Next, try with parent directory from models.recipe import Recipe from utils.url_builder import FoodGuideURLBuilder except ImportError: # As a last resort, modify sys.path and try again if parent_dir not in sys.path: sys.path.insert(0, parent_dir) if project_root not in sys.path: sys.path.insert(0, project_root) from models.recipe import Recipe from utils.url_builder import FoodGuideURLBuilder class RecipeFetcher: def __init__(self): self.session = requests.Session() self.session.headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' }) def fetch_recipe(self, recipe_url: str) -> Optional[Recipe]: """Fetch and parse a single recipe.""" try: print(f"Fetching recipe from: {recipe_url}") response = self.session.get(recipe_url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') # Extract recipe data title = self._extract_title(soup) ingredients = self._extract_ingredients(soup) instructions = self._extract_instructions(soup) categories = self.extract_categories(soup) tips = self._extract_tips(soup) recipe_highlights = self._extract_recipe_highlights(soup) # Add this line # Extract metadata prep_time = self._extract_time(soup, 'prep') cook_time = self._extract_time(soup, 'cook') servings = self._extract_servings(soup) image_url = self._extract_image(soup) # Extract slug from URL slug = recipe_url.rstrip('/').split('/')[-1] return Recipe( title=title, slug=slug, url=recipe_url, ingredients=ingredients, instructions=instructions, prep_time=prep_time, cook_time=cook_time, servings=servings, categories=categories, tips=tips, recipe_highlights=recipe_highlights, # Add this line image_url=image_url ) except Exception as e: print(f"Error fetching recipe: {e}") return None def _extract_title(self, soup: BeautifulSoup) -> str: """Extract recipe title.""" # Try different title selectors title_element = ( soup.find('h1', class_='page-header__title') or soup.find('h1', id='wb-cont') or soup.find('h1', class_='gc-thickline') or soup.find('h1') ) return title_element.get_text(strip=True) if title_element else "Unknown Recipe" def _extract_ingredients(self, soup: BeautifulSoup) -> List[str]: """ Extract ingredients list, prioritizing a specific div class for Canada Food Guide patterns. This version extracts all textual content within the target div. """ ingredients = [] # 1. Prioritize the specific div class for ingredients ingredients_div = soup.find('div', class_='field--name-field-ingredients') if ingredients_div: # Iterate through all direct children divisions and extract their text for child in ingredients_div.children: text = child.get_text(strip=True) if hasattr(child, 'get_text') else str(child).strip() if text and text not in ingredients: # Avoid duplicates if an element contains redundant text # to keep lines from within <ul> separately, need to specifically look for <li> tags within any <ul> child if child.name == 'ul' or child.name == 'ol': for li in child.find_all('li'): li_text = li.get_text(strip=True) if li_text and li_text not in ingredients: ingredients.append(li_text) elif text: # Add non-list direct text content ingredients.append(text) if ingredients: return ingredients def _extract_instructions(self, soup: BeautifulSoup) -> List[str]: """Extract cooking instructions.""" instructions = [] # Look for instructions/directions section instructions_heading = soup.find(['h2', 'h3'], text=re.compile(r'Instructions|Directions|Method', re.I)) if instructions_heading: # Get the container with the instructions instructions_container = instructions_heading.find_next_sibling(['ol', 'ul']) if not instructions_container: parent = instructions_heading.parent if parent: instructions_container = parent.find(['ol', 'ul']) if instructions_container: for li in instructions_container.find_all('li'): text = li.get_text(strip=True) if text: instructions.append(text) # Alternative: Look for numbered lists if not instructions: for ol in soup.find_all('ol'): items = ol.find_all('li') if 2 < len(items) < 20: # Reasonable number of steps temp_instructions = [] for li in items: text = li.get_text(strip=True) if text and len(text) > 20: # Instructions are usually longer temp_instructions.append(text) if len(temp_instructions) > len(instructions): instructions = temp_instructions return instructions def _extract_time(self, soup: BeautifulSoup, time_type: str) -> Optional[str]: """ Extract prep or cook time, prioritizing a specific div structure. time_type should be 'Prep' or 'Cook'. """ # 1. Prioritize extraction from the specific div structure # Find all 'div' elements that have 'class="item col-xs-4"' time_containers = soup.find_all('div', class_='item col-xs-4') for container in time_containers: # Within each container, find the div with class="title" title_div = container.find('div', class_='title') if title_div: title_text = title_div.get_text(strip=True) # Check if the title matches the desired time type (e.g., "Prep time" or "Cook time") if time_type.lower() in title_text.lower(): # The actual time value is in the next sibling div time_value_div = title_div.find_next_sibling('div') if time_value_div: time = time_value_div.get_text(strip=True) # Basic validation to ensure it looks like a time string if re.search(r'\d+.*(?:min|hour)', time, re.I): return time # 2. Fallback to general time extraction (your original logic) time_patterns = [ re.compile(f'{time_type}.*time.*:.*?(\\d+.*(?:min|hour))', re.I), re.compile(f'{time_type}.*:.*?(\\d+.*(?:min|hour))', re.I), ] # Search in common time display areas for element in soup.find_all(['p', 'span', 'div', 'li']): text = element.get_text(strip=True) for pattern in time_patterns: match = pattern.search(text) if match: return match.group(1) return None def _extract_servings(self, soup: BeautifulSoup) -> Optional[int]: """Extract number of servings.""" # Look for servings information servings_patterns = [ re.compile(r'(?:serves?|servings?|yield[s]?).*?(\d+)', re.I), re.compile(r'(\d+).*(?:servings?|portions?)', re.I), ] for element in soup.find_all(['p', 'span', 'div', 'li']): text = element.get_text(strip=True) for pattern in servings_patterns: match = pattern.search(text) if match: try: return int(match.group(1)) except: pass return None def _extract_image(self, soup: BeautifulSoup) -> Optional[str]: """Extract recipe image URL.""" # First, look for featured image wrapper (most likely to be the main recipe image) featured_wrapper = soup.find('div', class_='featured-image-wrapper') if featured_wrapper: img = featured_wrapper.find('img') if img: src = img.get('src', '') if src: if not src.startswith('http'): src = FoodGuideURLBuilder.BASE_URL + src return src # Look for images that are clearly from the Canada Food Guide site for img in soup.find_all('img'): src = img.get('src', '') if src and 'food-guide.canada.ca' in src: # Skip small images (likely icons, logos, buttons) if not any(skip in src.lower() for skip in ['icon', 'logo', 'button', 'nav']): # Prefer images with 'styles' in path (these are usually processed/sized images) if 'styles' in src: return src # Look for images with Canada Food Guide domain (second pass without styles requirement) for img in soup.find_all('img'): src = img.get('src', '') if src and 'food-guide.canada.ca' in src: if not any(skip in src.lower() for skip in ['icon', 'logo', 'button', 'nav']): return src # Look for images with recipe-related classes or attributes image_selectors = [ {'class': re.compile('recipe.*image', re.I)}, {'class': re.compile('food.*image', re.I)}, {'class': re.compile('featured.*image', re.I)}, {'class': re.compile('hero.*image', re.I)}, {'alt': re.compile('recipe', re.I)}, ] for selector in image_selectors: image_element = soup.find('img', selector) if image_element: src = image_element.get('src', '') if src: if not src.startswith('http'): src = FoodGuideURLBuilder.BASE_URL + src return src # Look for images in common container classes container_selectors = [ 'div.container img', 'div.image-container img', 'div.recipe-image img', 'div.hero-image img', 'article img', 'main img' ] for selector in container_selectors: img = soup.select_one(selector) if img: src = img.get('src', '') if src and not any(skip in src.lower() for skip in ['icon', 'logo', 'button', 'nav']): if not src.startswith('http'): src = FoodGuideURLBuilder.BASE_URL + src return src # Final fallback: Look for the first substantial image for img in soup.find_all('img'): src = img.get('src', '') # Skip small images (likely icons) if src and not any(skip in src.lower() for skip in ['icon', 'logo', 'button', 'nav', 'sprite']): # Check if the image seems substantial (has some indicators of being a main image) if any(indicator in src.lower() for indicator in ['recipe', 'food', 'hero', 'main', 'featured']) or \ len(src) > 50: # Longer URLs often indicate processed images if not src.startswith('http'): src = FoodGuideURLBuilder.BASE_URL + src return src return None def extract_categories(self, soup: BeautifulSoup) -> List[str]: """ Extracts a list of categories from the specific HTML structure. Looks for divs with class 'field--name-name' inside the 'collection-name' divs. """ category_divs = soup.select('div.collection-name > div.field--name-name.field--type-string.field--label-hidden.field--item') return [div.get_text(strip=True) for div in category_divs] def _extract_tips(self, soup: BeautifulSoup) -> List[str]: """Extract cooking tips from the recipe page.""" tips = [] # Look for the tips section with the specific class tips_div = soup.find('div', class_='field--name-field-cooking-tips') if tips_div: # Find the field item container field_item = tips_div.find('div', class_='field--item') if field_item: # Extract all paragraph elements paragraphs = field_item.find_all('p') for p in paragraphs: tip_text = p.get_text(strip=True) if tip_text: tips.append(tip_text) # Alternative: Look for any section with "Tips" heading if not tips: tips_heading = soup.find(['h2', 'h3', 'h4'], text=re.compile(r'Tips?', re.I)) if tips_heading: # Look for content after the heading current = tips_heading.find_next_sibling() while current and current.name in ['p', 'ul', 'ol', 'div']: if current.name == 'p': tip_text = current.get_text(strip=True) if tip_text: tips.append(tip_text) elif current.name in ['ul', 'ol']: for li in current.find_all('li'): tip_text = li.get_text(strip=True) if tip_text: tips.append(tip_text) elif current.name == 'div' and 'field--item' in current.get('class', []): for p in current.find_all('p'): tip_text = p.get_text(strip=True) if tip_text: tips.append(tip_text) break current = current.find_next_sibling() return tips def _extract_recipe_highlights(self, soup: BeautifulSoup) -> List[dict[str, str]]: """Extract recipe highlights slides from the recipe page.""" highlights = [] # Look for the recipe highlights section highlights_section = soup.find('section', class_=re.compile(r'.*instruction-steps.*')) if not highlights_section: # Alternative: look for section with "Recipe highlights" heading highlights_heading = soup.find(['h2', 'h3'], text=re.compile(r'Recipe highlights', re.I)) if highlights_heading: highlights_section = highlights_heading.find_parent('section') # Also try to find slides directly if no specific section is found if not highlights_section: highlights_section = soup if highlights_section: # Look for slides in multiple possible structures slide_selectors = [ 'div.slick__slide', # Original selector 'div.slide__content', # New structure from your HTML 'div[class*="slide"]' # Any div with "slide" in class name ] slides = [] for selector in slide_selectors: found_slides = highlights_section.select(selector) if found_slides: slides = found_slides break for slide in slides: highlight = {} # Extract slide count - look in multiple possible locations slide_count_div = slide.find('div', class_='slide-count') if slide_count_div: # Get the slide count text slide_count_text = slide_count_div.get_text(strip=True) # Extract just the number from patterns like "1 of 2" or "Slide 1 of 2" count_match = re.search(r'(\d+)', slide_count_text) if count_match: highlight['slide_count'] = count_match.group(1) else: highlight['slide_count'] = slide_count_text.replace('Slide', '').strip() # Extract caption text - look in multiple possible locations caption_selectors = [ 'div.caption-text p', 'div.slide__description p', 'div.field-content p', 'p' # fallback to any p tag in the slide ] caption_text = None for caption_selector in caption_selectors: caption_element = slide.select_one(caption_selector) if caption_element: caption_text = caption_element.get_text(strip=True) if caption_text and len(caption_text) > 10: # Ensure it's substantial text break if caption_text: highlight['caption_text'] = caption_text # Extract image URL - look for multiple possible image structures img_selectors = [ 'img.media__element', # Your HTML structure 'img.b-lazy', # Alternative class from your HTML 'img.img-responsive', # Another class from your HTML 'div.slide__media img', # Images within slide media container 'div.media img', # Images within media container 'img' # Fallback to any img tag ] img_element = None for img_selector in img_selectors: img_element = slide.select_one(img_selector) if img_element: # First try to get the actual image URL from data attributes (for lazy loading) src = self._get_actual_image_src(img_element) # Skip placeholder images and obvious non-content images if src and not any(skip in src.lower() for skip in ['icon', 'logo', 'button', 'nav', 'data:image', 'svg+xml']): break img_element = None # Reset if this image was skipped if img_element: src = self._get_actual_image_src(img_element) if src and not any(skip in src.lower() for skip in ['data:image', 'svg+xml']): # Build full URL if it's relative if not src.startswith('http'): # FoodGuideURLBuilder is already imported at the top of the file src = FoodGuideURLBuilder.BASE_URL + src highlight['image_url'] = src # Add highlight if we have meaningful content # Require either caption OR image (not both) since some slides might only have one if 'caption_text' in highlight or 'image_url' in highlight: # Add slide count if we don't have it but can infer from position if 'slide_count' not in highlight: highlight['slide_count'] = str(len(highlights) + 1) highlights.append(highlight) return highlights def _get_actual_image_src(self, img_element) -> Optional[str]: """ Extract the actual image source from an img element, handling lazy loading. Checks data attributes first, then falls back to src. """ # Common lazy loading attributes in order of preference lazy_attrs = [ 'data-src', 'data-lazy-src', 'data-original', 'data-srcset', 'srcset', 'src' ] for attr in lazy_attrs: value = img_element.get(attr, '') if value: # For srcset, take the first URL (before any spaces or commas) if attr in ['srcset', 'data-srcset']: # srcset format: "url1 1x, url2 2x" or "url1 100w, url2 200w" first_url = value.split(',')[0].split(' ')[0].strip() if first_url and not any(skip in first_url.lower() for skip in ['data:image', 'svg+xml']): return first_url else: # Regular src/data-src attribute if not any(skip in value.lower() for skip in ['data:image', 'svg+xml']): return value return None def register_recipe_tools(mcp: FastMCP): """Register all recipe-related tools with the MCP server.""" @mcp.tool() def search_recipes( search_text: str = "", fruits: Optional[List[str]] = None, vegetables: Optional[List[str]] = None, proteins: Optional[List[str]] = None, whole_grains: Optional[List[str]] = None, meals: Optional[List[str]] = None, appliances: Optional[List[str]] = None, collections: Optional[List[str]] = None, max_pages: int = 5 ) -> List[dict[str, str]]: """ Search for Canadian recipes from Health Canada's official Food Guide website. This tool searches through thousands of government-verified, nutrition-focused recipes designed to help Canadians eat well according to official dietary guidelines. The search covers recipes that emphasize: - Vegetables and fruits as the foundation of meals - Whole grain foods for sustained energy - Protein foods including plant-based options - Culturally diverse Canadian cuisine - Family-friendly and accessible cooking methods Each recipe returned includes complete nutritional guidance, cooking tips from registered dietitians, and visual instruction steps to ensure cooking success. Args: search_text: Free-text search across recipe titles, ingredients, and descriptions (e.g., "quick breakfast", "salmon dinner", "vegetarian lunch") fruits: Filter by specific fruits (e.g., ["apple", "banana", "berries"]) - use list_filters to see all available options vegetables: Filter by specific vegetables (e.g., ["carrot", "broccoli", "spinach"]) - use list_filters to see all available options proteins: Filter by protein sources (e.g., ["chicken", "tofu", "beans", "fish"]) - use list_filters to see all available options whole_grains: Filter by grain types (e.g., ["rice", "quinoa", "oats"]) - use list_filters to see all available options meals: Filter by meal occasions (e.g., ["breakfast", "lunch", "dinner", "snack"]) - use list_filters to see all available options appliances: Filter by cooking equipment needed (e.g., ["oven", "stovetop", "slow_cooker"]) - use list_filters to see all available options collections: Filter by special dietary collections (e.g., ["vegetarian", "kid_friendly", "quick_meals"]) - use list_filters to see all available options max_pages: Maximum search result pages to process (1-10, default: 5). Each page contains ~12 recipes. Returns: List of recipe metadata dictionaries containing: - title: Recipe name as it appears on Canada's Food Guide - url: Direct link to the full recipe on food-guide.canada.ca - slug: URL-friendly recipe identifier for referencing Source: Health Canada's Food Guide - https://food-guide.canada.ca/ """ try: searcher = RecipeSearcher() filters = None filter_types = [ (fruits, 'fruits'), (vegetables, 'vegetables'), (proteins, 'proteins'), (whole_grains, 'whole_grains'), (meals, 'meals_and_course'), (appliances, 'cooking_appliance') ] if any([fruits, vegetables, proteins, whole_grains, meals, appliances, collections]): filters = SearchFilters() for filter_list, filter_type in filter_types: if filter_list: for value in filter_list: filters.add_filter(filter_type, value) if collections: for value in collections: filters.add_collection(value) results = searcher.search_recipes( search_text=search_text, filters=filters, max_pages=max_pages ) # Add source attribution to each result for result in results: if 'url' in result and not result.get('source'): result['source'] = 'Health Canada\'s Food Guide' # Use proper URL builder instead of hardcoded URL result['website'] = FoodGuideURLBuilder.BASE_URL return results except Exception as e: return [{"error": f"Search failed: {str(e)}"}] @mcp.tool() def get_recipe(url: str) -> Dict[str, Any]: """ Retrieve complete recipe details from Health Canada's Food Guide website. This tool extracts comprehensive recipe information from official government sources, providing nutrition-focused recipes developed by registered dietitians and health professionals. Each recipe includes: - Complete ingredient lists with measurements - Step-by-step cooking instructions with visual guides - Nutritional benefits and dietary information - Preparation and cooking time estimates - Serving size recommendations - Professional cooking tips and techniques - Recipe highlight images showing key preparation steps - Food category classifications aligned with Canada's Food Guide All recipes are designed to support healthy eating according to Canadian dietary guidelines and promote food skills development. Args: url: Complete URL to a specific recipe on Canada's Food Guide website (must start with https://food-guide.canada.ca/) Returns: Comprehensive recipe dictionary containing: - title: Official recipe name - slug: URL identifier for the recipe - url: Source URL for attribution and reference - ingredients: Complete list of ingredients with measurements - instructions: Detailed step-by-step cooking directions - prep_time: Estimated preparation time - cook_time: Estimated cooking time - servings: Number of servings the recipe yields - categories: Food Guide category classifications - tips: Professional cooking tips and dietary guidance - recipe_highlights: Visual instruction steps with images and descriptions - image_url: Main recipe photo URL - source: "Health Canada's Food Guide" for proper attribution - website: "https://food-guide.canada.ca/" for reference Source: Health Canada's Food Guide - https://food-guide.canada.ca/ + the recipe slug URL builder """ if not url or not url.startswith('https://food-guide.canada.ca/'): return {"error": "Invalid URL. Must be a Canada's Food Guide recipe URL."} try: fetcher = RecipeFetcher() recipe = fetcher.fetch_recipe(url) if not recipe: return {"error": "Recipe not found or could not be parsed"} recipe_data = { "title": recipe.title, "slug": getattr(recipe, 'slug', ''), "url": url, "ingredients": recipe.ingredients or [], "instructions": recipe.instructions or [], "prep_time": getattr(recipe, 'prep_time', ''), "cook_time": getattr(recipe, 'cook_time', ''), "servings": getattr(recipe, 'servings', None), "categories": getattr(recipe, 'categories', []), "tips": getattr(recipe, 'tips', []), "recipe_highlights": getattr(recipe, 'recipe_highlights', []), "image_url": getattr(recipe, 'image_url', ''), "source": "Health Canada's Food Guide", "website": "https://food-guide.canada.ca/", "attribution": "Recipe sourced from Canada's official Food Guide" } return recipe_data except Exception as e: return {"error": f"Failed to fetch recipe: {str(e)}"} @mcp.tool() def list_filters(filter_type: Optional[str] = None) -> Dict[str, List[str]]: """ Discover available search filters for Canada's Food Guide recipes. This tool provides the complete catalog of filter options that can be used with the search_recipes tool to find recipes that match specific dietary needs, cooking methods, meal types, and food categories. Filters are organized according to Canada's Food Guide food groups and practical cooking considerations: Food Categories (aligned with Canada's Food Guide): - vegetables: All vegetable types featured in Canadian recipes - fruits: Fresh, frozen, and dried fruits used in cooking - proteins: Both animal and plant-based protein sources - whole_grains: Whole grain options promoted for optimal nutrition Practical Filters: - meal: Meal occasions and course types (breakfast, lunch, dinner, snacks) - cooking_appliance: Kitchen equipment needed (accommodates various cooking setups) - collections: Special dietary categories and cooking themes This information helps users discover the full range of recipe options available and construct precise searches that match their dietary preferences, available ingredients, cooking equipment, and meal planning needs. Args: filter_type: Specific filter category to retrieve (optional). Valid options: - "vegetables" - All vegetable filter options - "fruits" - All fruit filter options - "proteins" - All protein source filter options - "whole_grains" - All whole grain filter options - "meal" - All meal type and course filter options - "cooking_appliance" - All cooking equipment filter options - "collections" - All special dietary and theme collections If not specified, returns all filter categories. Returns: Dictionary mapping filter categories to their available values. Each category contains a list of specific filter options that can be used in recipe searches. Also includes source attribution for transparency. Source: Health Canada's Food Guide - https://food-guide.canada.ca/ """ try: filters = SearchFilters(auto_update=True) result = {} if filter_type: if filter_type in ["vegetables", "fruits", "proteins", "whole_grains", "meal", "cooking_appliance"]: result[filter_type] = filters.get_available_filters(filter_type) elif filter_type == "collections": result["collections"] = filters.get_available_collections() else: return {"error": f"Invalid filter type: {filter_type}"} else: filter_types = [ "vegetables", "fruits", "proteins", "whole_grains", "meal", "cooking_appliance" ] for ft in filter_types: result[ft] = filters.get_available_filters(ft) result["collections"] = filters.get_available_collections() # Add source attribution result["source"] = "Health Canada's Food Guide" result["website"] = "https://food-guide.canada.ca/" result["note"] = "Filter options are dynamically updated from Canada's Food Guide recipe database to ensure current availability." return result except Exception as e: return {"error": f"Failed to fetch filters: {str(e)}"}

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Aryan-Jhaveri/mcp-foodguidecanada'

If you have feedback or need assistance with the MCP directory API, please join our Discord server