Skip to main content
Glama
Sharan0402

Expense Tracker MCP Server

by Sharan0402
categorizer.py6.78 kB
"""Item categorization logic with static rules and LLM fallback.""" import re from typing import Optional # Static mapping of item types to patterns ITEM_TYPE_MAPPINGS = { # Dairy "milk": [ "milk", "2% milk", "whole milk", "skim milk", "1% milk", "dairy milk", "vitamin d milk", ], "oatmilk": ["oat milk", "oatly", "oat beverage", "oat drink", "oatmilk"], "eggs": ["eggs", "egg", "large eggs", "dozen eggs", "extra large eggs", "organic eggs"], "cheese": ["cheese", "cheddar", "mozzarella", "parmesan", "swiss", "gouda"], "yogurt": ["yogurt", "yoghurt", "greek yogurt", "yoghourt"], "butter": ["butter", "margarine", "spread"], # Grains "bread": ["bread", "loaf", "baguette", "sourdough", "wheat bread", "white bread"], "rice": ["rice", "basmati", "jasmine rice", "brown rice", "white rice", "long grain"], "lentils": ["lentils", "lentil", "dal", "red lentils", "green lentils"], "pasta": ["pasta", "spaghetti", "penne", "macaroni", "noodles", "fusilli"], "cereal": ["cereal", "granola", "oatmeal", "oats", "corn flakes"], # Produce "veggies": [ "vegetables", "veggie", "broccoli", "carrot", "carrots", "spinach", "lettuce", "cucumber", "tomato", "tomatoes", "onion", "onions", "pepper", "peppers", "bell pepper", "celery", "kale", "cabbage", ], "fruits": [ "fruit", "apple", "apples", "banana", "bananas", "orange", "oranges", "berries", "strawberries", "blueberries", "raspberries", "grapes", "pear", "pears", "mango", "mangoes", "peach", "peaches", ], "potatoes": ["potato", "potatoes", "russet", "sweet potato", "yam"], # Proteins "meat": [ "chicken", "beef", "pork", "turkey", "lamb", "steak", "ground beef", "sausage", "bacon", ], "fish": ["fish", "salmon", "tuna", "tilapia", "cod", "shrimp", "seafood"], # Snacks & Beverages "snacks": [ "chips", "crackers", "pretzels", "popcorn", "cookies", "candy", "chocolate", "nuts", "trail mix", ], "beverages": [ "soda", "juice", "water", "coffee", "tea", "sports drink", "energy drink", "cola", "sprite", ], # Pantry "oil": ["oil", "olive oil", "vegetable oil", "canola oil", "cooking oil"], "spices": ["spices", "spice", "salt", "pepper", "cumin", "turmeric", "paprika"], "sauce": ["sauce", "ketchup", "mustard", "mayo", "mayonnaise", "salsa", "soy sauce"], # Household (non-food) "cleaning": [ "cleaner", "detergent", "soap", "dish soap", "laundry", "bleach", "wipes", ], "paper": ["paper towel", "toilet paper", "tissue", "napkins"], } def normalize_text(text: str) -> str: """Normalize text for matching.""" # Convert to lowercase text = text.lower() # Remove extra whitespace text = " ".join(text.split()) # Remove common brand prefixes/suffixes text = re.sub(r"\bkirkland\b", "", text) text = re.sub(r"\bgreat value\b", "", text) text = re.sub(r"\borganics?\b", "", text) text = re.sub(r"\b(oz|lb|lbs|kg|g|ml|l)\b", "", text) # Clean up extra spaces again text = " ".join(text.split()) return text.strip() def deterministic_categorize(item_name: str) -> Optional[str]: """Try to categorize using static rules. Returns: item_type if found, None otherwise """ normalized = normalize_text(item_name) # Direct substring matching for item_type, patterns in ITEM_TYPE_MAPPINGS.items(): for pattern in patterns: pattern_normalized = normalize_text(pattern) # Check if pattern appears in item name or vice versa if pattern_normalized in normalized or normalized in pattern_normalized: return item_type # Fuzzy word-based matching words = set(normalized.split()) for item_type, patterns in ITEM_TYPE_MAPPINGS.items(): for pattern in patterns: pattern_words = set(normalize_text(pattern).split()) # If all pattern words appear in item name if pattern_words and pattern_words.issubset(words): return item_type return None async def llm_categorize(item_name: str, ctx) -> str: """Use LLM to categorize unknown items. Args: item_name: Raw item name from receipt ctx: FastMCP Context for LLM sampling Returns: item_type category """ # Get list of known categories categories = ", ".join(sorted(ITEM_TYPE_MAPPINGS.keys())) prompt = f"""Categorize this grocery/household item into ONE category. Item: "{item_name}" Available categories: {categories}, other Rules: - Return ONLY the category name (lowercase, no spaces between words) - If the item clearly fits a category, use it - If uncertain or it doesn't fit any category, return "other" - Do not explain, just return the category name Category:""" try: response = await ctx.sample( messages=[{"role": "user", "content": prompt}], max_tokens=20, temperature=0.3, # Low temperature for consistent categorization ) # Extract and validate category category = response.strip().lower() # Remove any punctuation or extra text category = re.sub(r"[^\w]", "", category) # Validate it's a known category if category in ITEM_TYPE_MAPPINGS or category == "other": return category # If LLM returned something weird, default to "other" return "other" except Exception as e: # If LLM fails, default to "other" print(f"LLM categorization failed for '{item_name}': {e}") return "other" async def categorize_item(item_name: str, ctx=None) -> str: """Main categorization function with hybrid approach. Args: item_name: Raw item name from receipt ctx: Optional FastMCP Context for LLM fallback Returns: item_type category (guaranteed to return a value) """ # Try deterministic rules first category = deterministic_categorize(item_name) if category: return category # Fall back to LLM if context is available if ctx: return await llm_categorize(item_name, ctx) # Ultimate fallback return "other"

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Sharan0402/expense-tracker-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server