Expense Tracker MCP Server

Overview Schema Related Servers Score Discussions

category_detection.py•8.36 KiB

""" Smart category detection for expense tracker. Goals: - Suggest best-fit category for a free-text query or expense description. - Return confidence score + top alternatives. - Offer an 'auto_assign' boolean when confidence is high so modules can auto-fill. - Simple, dependency-free (stdlib only) implementation suitable for offline use. """ from typing import List, Dict, Optional, Tuple import re import difflib # Basic keyword mappings. Extend this map as your domain grows. # Each key is a category -> entry contains keywords, typical price range (min,max) KEYWORD_MAP = { "food": { "keywords": ["food", "lunch", "dinner", "breakfast", "restaurant", "cafe", "coffee", "tea", "snack", "grocery", "vegetable", "fruit", "meal", "takeout", "delivery", "bakery", "supermarket"], "price_range": (10, 5000) }, "transport": { "keywords": ["fuel", "petrol", "diesel", "uber", "ola", "taxi", "bus", "train", "metro", "auto", "rickshaw", "parking", "toll", "commute", "cab"], "price_range": (20, 10000) }, "shopping": { "keywords": ["shopping", "clothes", "shirt", "pant", "dress", "shoe", "watch", "electronics", "phone", "laptop", "gadget", "appliance", "furniture"], "price_range": (200, 100000) }, "entertainment": { "keywords": ["movie", "cinema", "netflix", "prime", "spotify", "music", "game", "concert", "party", "ticket", "show"], "price_range": (50, 20000) }, "utilities": { "keywords": ["electricity", "water", "internet", "wifi", "mobile bill", "mobile", "phone bill", "broadband", "gas", "cylinder", "cable", "tv"], "price_range": (100, 50000) }, "health": { "keywords": ["medicine", "doctor", "hospital", "pharmacy", "medical", "clinic", "checkup", "lab", "diagnostic", "health", "insurance"], "price_range": (50, 100000) }, "education": { "keywords": ["book", "course", "tuition", "class", "training", "seminar", "workshop", "school", "college", "university", "exam"], "price_range": (100, 200000) }, "housing": { "keywords": ["rent", "mortgage", "emi", "home", "apartment", "flat", "lease"], "price_range": (1000, 500000) }, "personal_care": { "keywords": ["salon", "barber", "spa", "gym", "fitness", "yoga", "haircut", "beauty", "skincare", "cosmetic", "toiletries"], "price_range": (50, 20000) }, "subscriptions": { "keywords": ["subscription", "membership", "renewal", "software", "app", "monthly", "yearly", "plan", "premium"], "price_range": (50, 5000) }, "travel": { "keywords": ["flight", "airline", "hotel", "booking", "vacation", "tour", "travel", "stay"], "price_range": (500, 500000) }, "home": { "keywords": ["sofa", "mattress", "furniture", "home", "kitchen", "decor", "appliance"], "price_range": (500, 200000) }, "pet": { "keywords": ["pet", "vet", "veterinary", "pet food", "dog food", "cat food", "grooming"], "price_range": (50, 50000) }, "gifts_donations": { "keywords": ["gift", "donation", "charity", "present", "donate"], "price_range": (50, 100000) }, "finance_fees": { "keywords": ["fee", "bank", "interest", "charges", "transaction fee", "atm fee"], "price_range": (10, 10000) }, "misc": { "keywords": ["misc", "miscellaneous", "other", "unknown"], "price_range": (0, 100000) } } # Build a flat list of all keywords => category for fast fallback fuzzy matching _FLAT_KEY_TO_CAT = {} for cat, data in KEYWORD_MAP.items(): for kw in data["keywords"]: _FLAT_KEY_TO_CAT[kw] = cat def _tokenize(text: str) -> List[str]: text = text.lower() # split on non-word characters, keep words of length >=2 tokens = [t for t in re.split(r'\W+', text) if len(t) >= 2] return tokens def _keyword_score(tokens: List[str], keywords: List[str]) -> float: """Return a simple normalized score = matched_keywords / total_keywords_considered.""" if not keywords: return 0.0 match_count = 0 for kw in keywords: # match multi-word keywords by checking substring of original text tokens joined if ' ' in kw: if kw in ' '.join(tokens): match_count += 1 else: if kw in tokens: match_count += 1 return match_count / max(len(keywords), 1) def _amount_score(amount: Optional[float], price_range: Tuple[int, int]) -> float: """Returns 1.0 if amount is inside price_range, 0.5 if near (within 50%), else 0.""" if amount is None: return 0.5 # neutral if no amount given low, high = price_range if low <= amount <= high: return 1.0 # near if within 50% outside range if amount >= low and amount <= high * 1.5: return 0.6 if amount >= low * 0.5 and amount <= high: return 0.6 return 0.0 def detect_category(text: str, amount: Optional[float] = None, top_n: int = 3, high_threshold: float = 0.65, low_threshold: float = 0.25) -> Dict: """ Detect category for given text and optional amount. Returns a dict: { "detected_category": "food", "confidence": 0.82, "confidence_label": "high", # high | medium | low "alternatives": [ {"category": "health", "score": 0.3}, ... ], "auto_assign": True|False, "extracted_keywords": ["pizza", "delivery"] } """ tokens = _tokenize(text) if not tokens: return { "detected_category": "misc", "confidence": 0.0, "confidence_label": "low", "alternatives": [], "auto_assign": False, "extracted_keywords": [] } scores = {} extracted_keywords = set() for cat, data in KEYWORD_MAP.items(): kws = data.get("keywords", []) key_score = _keyword_score(tokens, kws) amt_score = _amount_score(amount, data.get("price_range", (0, 1_000_000))) # Weighted combination: keywords are primary (weight 0.7), amount is secondary (0.3) combined = 0.7 * key_score + 0.3 * amt_score scores[cat] = combined # collect any matched keywords for explanation for kw in kws: if (' ' in kw and kw in ' '.join(tokens)) or kw in tokens: extracted_keywords.add(kw) # Also consider fuzzy matching of individual tokens against known keywords (typo tolerance) for tok in tokens: # use difflib to find close keywords in flat key map close = difflib.get_close_matches(tok, list(_FLAT_KEY_TO_CAT.keys()), n=2, cutoff=0.8) for ckw in close: cat = _FLAT_KEY_TO_CAT.get(ckw) if cat: # boost that category slightly scores[cat] = min(1.0, scores.get(cat, 0) + 0.15) extracted_keywords.add(ckw) # Sort categories by score ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True) top = ranked[:top_n] alternatives = [{"category": c, "score": round(s, 3)} for c, s in top] best_cat, best_score = top[0] if top else ("misc", 0.0) # Map numeric score to label if best_score >= high_threshold: label = "high" elif best_score >= low_threshold: label = "medium" else: label = "low" auto_assign = True if best_score >= high_threshold else False return { "detected_category": best_cat, "confidence": round(best_score, 3), "confidence_label": label, "alternatives": alternatives[1:] if len(alternatives) > 1 else [], "auto_assign": auto_assign, "extracted_keywords": sorted(list(extracted_keywords)) } # Backwards-compatible alias suggest_category = detect_category if __name__ == "__main__": tests = [ ("2 coffees and a sandwich from cafe", 350), ("Uber ride to airport", 850), ("monthly netflix subscription", 199), ("Headphones sony wireless", 4500), ("emergency dentist visit", 7200), ("rent for apartment", 15000), ("dog vet appointment and vaccination", 1200), ] for text, amt in tests: print(text, amt, "->", detect_category(text, amt))

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Khushi-c-sharma/expense-tracker-mcp-server-improvised'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

category_detection.py•8.36 KiB