Skip to main content
Glama
jezweb

Australian Postcodes MCP Server

validation_tools.py13.8 kB
"""Validation and fuzzy matching tools for suburb names.""" from typing import Dict, Any, Optional, List import logging from database import get_database from utils.config import Config from utils.fuzzy_match import ( smart_match, phonetic_match, find_best_matches, handle_compound_words ) logger = logging.getLogger(__name__) async def find_similar_suburbs( misspelled: str, state: Optional[str] = None, threshold: float = None ) -> Dict[str, Any]: """ Find similar suburbs for a possibly misspelled name. Args: misspelled: Potentially misspelled suburb name state: Optional state filter threshold: Similarity threshold (0-1) Returns: Dictionary with matched suburbs and confidence scores """ try: misspelled = misspelled.strip() threshold = threshold or Config.FUZZY_THRESHOLD # Validate state if provided if state: state = state.strip().upper() if state not in Config.STATES: return { "status": "error", "error": f"Invalid state: {state}", "suggestion": f"Valid states are: {', '.join(Config.STATES.keys())}" } db = await get_database() # First check for exact match (case-insensitive) exact_results = await db.search_by_suburb(misspelled, state) if exact_results: return { "status": "success", "exact_match": True, "confidence": 1.0, "primary_result": exact_results[0], "all_results": exact_results, "validation_notes": ["Exact match found"] } # Get all suburbs for fuzzy matching all_suburbs = await db.get_all_suburbs(state) # Perform smart matching (fuzzy + phonetic) matches = smart_match( misspelled, all_suburbs, use_fuzzy=True, use_phonetic=True ) if not matches: # Try with looser threshold matches = find_best_matches( misspelled, all_suburbs, threshold=threshold * 0.7, limit=Config.MAX_SUGGESTIONS ) if not matches: return { "status": "error", "exact_match": False, "confidence": 0.0, "error": f"No similar suburbs found for '{misspelled}'", "suggestion": "Please check the spelling or try a different search" } # Get full details for matched suburbs suggestions = [] for match in matches: suburb_details = await db.search_by_suburb(match['match'], state) if suburb_details: suggestions.append({ "suburb": match['match'], "confidence": match['confidence'], "match_type": match.get('match_type', 'fuzzy'), "postcodes": [r['postcode'] for r in suburb_details], "states": list(set(r['state'] for r in suburb_details)) }) # Determine best match best_match = suggestions[0] if suggestions else None return { "status": "success", "exact_match": False, "confidence": best_match['confidence'] if best_match else 0.0, "primary_result": best_match, "suggestions": suggestions, "query": misspelled, "suggestion": f"Did you mean '{best_match['suburb']}'?" if best_match else None, "validation_notes": [ f"Found {len(suggestions)} similar suburb(s)", f"Best match: {best_match['suburb']} ({best_match['confidence']:.2f} confidence)" if best_match else "" ] } except Exception as e: logger.error(f"Error finding similar suburbs: {e}") return { "status": "error", "error": str(e) } async def autocomplete_suburb( partial: str, state: Optional[str] = None, limit: int = 10 ) -> Dict[str, Any]: """ Provide autocomplete suggestions for partial suburb names. Args: partial: Partial suburb name (minimum 2 characters) state: Optional state filter limit: Maximum number of suggestions Returns: Dictionary with autocomplete suggestions """ try: partial = partial.strip() # Require minimum length if len(partial) < 2: return { "status": "error", "error": "Please enter at least 2 characters", "suggestions": [] } # Validate state if provided if state: state = state.strip().upper() if state not in Config.STATES: return { "status": "error", "error": f"Invalid state: {state}" } db = await get_database() # Use fuzzy search with prefix matching results = await db.search_fuzzy(partial, state, limit=limit * 2) # Also get exact prefix matches all_suburbs = await db.get_all_suburbs(state) prefix_matches = [ s for s in all_suburbs if s.lower().startswith(partial.lower()) ] # Combine and deduplicate suggestions = [] seen = set() # Add prefix matches first (higher priority) for suburb in prefix_matches[:limit]: if suburb not in seen: details = await db.search_by_suburb(suburb, state) if details: suggestions.append({ "suburb": suburb, "match_type": "prefix", "states": list(set(r['state'] for r in details)), "postcodes": list(set(r['postcode'] for r in details)) }) seen.add(suburb) # Add fuzzy matches for result in results: if result['locality'] not in seen and len(suggestions) < limit: suggestions.append({ "suburb": result['locality'], "match_type": "fuzzy", "state": result['state'], "postcode": result['postcode'] }) seen.add(result['locality']) return { "status": "success", "query": partial, "suggestions": suggestions[:limit], "count": len(suggestions), "validation_notes": [f"Found {len(suggestions)} suggestion(s) for '{partial}'"] } except Exception as e: logger.error(f"Error in autocomplete: {e}") return { "status": "error", "error": str(e), "suggestions": [] } async def validate_spelling(suburb: str) -> Dict[str, Any]: """ Check spelling and suggest corrections for suburb names. Args: suburb: Suburb name to validate Returns: Validation result with spelling suggestions """ try: suburb = suburb.strip() db = await get_database() # Check if it's already correct exact_results = await db.search_by_suburb(suburb) if exact_results: return { "status": "success", "spelling_correct": True, "suburb": suburb, "confidence": 1.0, "validation_notes": ["Spelling is correct"], "details": exact_results } # Find similar suburbs all_suburbs = await db.get_all_suburbs() # Use fuzzy matching to find corrections matches = find_best_matches( suburb, all_suburbs, threshold=0.7, # Lower threshold for spelling corrections limit=5 ) if not matches: return { "status": "error", "spelling_correct": False, "error": f"No similar suburbs found for '{suburb}'", "confidence": 0.0 } # Format corrections corrections = [] for match in matches: details = await db.search_by_suburb(match['match']) if details: corrections.append({ "suburb": match['match'], "confidence": match['confidence'], "states": list(set(r['state'] for r in details)), "postcodes": list(set(r['postcode'] for r in details)) }) best_match = corrections[0] if corrections else None return { "status": "success", "spelling_correct": False, "query": suburb, "confidence": best_match['confidence'] if best_match else 0.0, "suggested_spelling": best_match['suburb'] if best_match else None, "corrections": corrections, "suggestion": f"Did you mean '{best_match['suburb']}'?" if best_match else None, "validation_notes": [ f"Spelling appears incorrect", f"Found {len(corrections)} possible correction(s)" ] } except Exception as e: logger.error(f"Error validating spelling: {e}") return { "status": "error", "error": str(e) } async def phonetic_search(spoken_name: str) -> Dict[str, Any]: """ Search for suburbs based on phonetic similarity (for voice input). Args: spoken_name: Suburb name as heard/spoken Returns: Phonetically similar suburbs """ try: spoken_name = spoken_name.strip() db = await get_database() # First check exact match exact_results = await db.search_by_suburb(spoken_name) if exact_results: return { "status": "success", "exact_match": True, "confidence": 1.0, "results": exact_results, "match_type": "exact", "validation_notes": ["Exact match found"] } # Handle compound word variations variations = handle_compound_words(spoken_name) all_matches = [] for variant in variations: # Try exact match for variant variant_results = await db.search_by_suburb(variant) if variant_results: return { "status": "success", "exact_match": True, "confidence": 0.95, # Slightly lower for variant match "results": variant_results, "match_type": "compound_variant", "original_query": spoken_name, "matched_variant": variant, "validation_notes": [f"Found match for variant: '{variant}'"] } # Get all suburbs for phonetic matching all_suburbs = await db.get_all_suburbs() # Perform phonetic matching phonetic_matches = phonetic_match( spoken_name, all_suburbs, threshold=Config.PHONETIC_THRESHOLD ) if not phonetic_matches: # Try with variations for variant in variations[1:]: # Skip first (original) phonetic_matches = phonetic_match( variant, all_suburbs, threshold=Config.PHONETIC_THRESHOLD * 0.9 ) if phonetic_matches: break if not phonetic_matches: return { "status": "error", "exact_match": False, "confidence": 0.0, "error": f"No phonetic matches found for '{spoken_name}'", "suggestion": "Please try spelling the suburb name differently" } # Get full details for matches results = [] for match in phonetic_matches: suburb_details = await db.search_by_suburb(match['match']) if suburb_details: results.append({ "suburb": match['match'], "confidence": match['confidence'], "match_type": "phonetic", "postcodes": [r['postcode'] for r in suburb_details], "states": list(set(r['state'] for r in suburb_details)) }) best_match = results[0] if results else None return { "status": "success", "exact_match": False, "confidence": best_match['confidence'] if best_match else 0.0, "spoken_input": spoken_name, "primary_result": best_match, "phonetic_matches": results, "suggestion": f"Did you say '{best_match['suburb']}'?" if best_match else None, "validation_notes": [ f"Found {len(results)} phonetic match(es)", "These suburbs sound similar to what was spoken" ] } except Exception as e: logger.error(f"Error in phonetic search: {e}") return { "status": "error", "error": str(e) }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jezweb/australian-postcodes-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server