data_manager.py•19 kB
"""
Data Manager Module
Handles loading, parsing, validation, and querying of the best practices database.
"""
import json
import os
import logging
from typing import Dict, Any, Optional
logger = logging.getLogger(__name__)
# Default file path
DEFAULT_BEST_PRACTICES_FILE = "data/python_best_practices.json"
class BestPracticesError(Exception):
"""Base exception for best practices errors."""
pass
class FileLoadError(BestPracticesError):
"""Exception raised when file cannot be loaded."""
pass
class ValidationError(BestPracticesError):
"""Exception raised when JSON validation fails."""
pass
def get_file_path() -> str:
"""
Get the best practices file path from environment or use default.
Returns:
Path to the best practices JSON file
"""
return os.getenv("BEST_PRACTICES_FILE", DEFAULT_BEST_PRACTICES_FILE)
def load_best_practices() -> Dict[str, Any]:
"""
Load and validate the best practices database from JSON file.
Reads the file dynamically on each call to support live updates.
Validates the JSON structure before returning.
Returns:
Dictionary containing the best practices data
Raises:
FileLoadError: If file cannot be read
ValidationError: If JSON structure is invalid
"""
file_path = get_file_path()
try:
logger.debug(f"Loading best practices from {file_path}")
# Check if file exists
if not os.path.exists(file_path):
raise FileLoadError(
f"Best practices file not found at '{file_path}'. "
f"Please ensure the file exists or set BEST_PRACTICES_FILE environment variable."
)
# Read and parse JSON
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Validate structure
_validate_structure(data)
logger.debug(f"Successfully loaded best practices from {file_path}")
return data
except json.JSONDecodeError as e:
logger.error(f"Invalid JSON in {file_path}: {e}")
raise ValidationError(
f"Failed to parse JSON file at '{file_path}'. "
f"Error at line {e.lineno}, column {e.colno}: {e.msg}"
)
except FileNotFoundError:
logger.error(f"File not found: {file_path}")
raise FileLoadError(
f"Best practices file not found at '{file_path}'. "
f"Please ensure the file exists."
)
except PermissionError:
logger.error(f"Permission denied reading {file_path}")
raise FileLoadError(
f"Permission denied reading '{file_path}'. "
f"Please check file permissions."
)
except ValidationError:
# Re-raise validation errors as-is
raise
except Exception as e:
logger.error(f"Unexpected error loading best practices: {e}")
raise FileLoadError(
f"Failed to load best practices file: {str(e)}"
)
def _validate_structure(data: Dict[str, Any]) -> None:
"""
Validate the structure of the best practices data.
Args:
data: The loaded JSON data
Raises:
ValidationError: If structure is invalid
"""
# Check for root key
if "python_best_practices" not in data:
raise ValidationError(
"Invalid structure: missing 'python_best_practices' root key. "
"Expected format: {'python_best_practices': {...}}"
)
practices = data["python_best_practices"]
if not isinstance(practices, dict):
raise ValidationError(
"Invalid structure: 'python_best_practices' must be a dictionary"
)
# Validate categories exist
if len(practices) == 0:
raise ValidationError(
"Invalid structure: 'python_best_practices' contains no categories"
)
# Validate each category has topics
for category_name, category_data in practices.items():
if not isinstance(category_data, dict):
raise ValidationError(
f"Invalid structure: category '{category_name}' must be a dictionary"
)
if len(category_data) == 0:
logger.warning(f"Category '{category_name}' contains no topics")
# Validate each topic has required fields
for topic_name, topic_data in category_data.items():
if not isinstance(topic_data, dict):
raise ValidationError(
f"Invalid structure: topic '{category_name}.{topic_name}' must be a dictionary"
)
if "description" not in topic_data:
raise ValidationError(
f"Invalid structure: topic '{category_name}.{topic_name}' missing 'description' field"
)
# Check for either 'examples' or 'tools' field (some topics use 'tools' instead)
if "examples" not in topic_data and "tools" not in topic_data:
raise ValidationError(
f"Invalid structure: topic '{category_name}.{topic_name}' missing 'examples' or 'tools' field"
)
# Validate the examples/tools field is a dictionary
examples_field = topic_data.get("examples") or topic_data.get("tools")
if not isinstance(examples_field, dict):
raise ValidationError(
f"Invalid structure: topic '{category_name}.{topic_name}' 'examples'/'tools' must be a dictionary"
)
logger.debug("Best practices structure validation passed")
def get_fallback_guidance() -> Dict[str, Any]:
"""
Provide fallback guidance when the database is unavailable.
Returns:
Dictionary with basic Python/FastAPI guidance
"""
return {
"error": "database_unavailable",
"message": "Best practices database is currently unavailable",
"fallback_guidance": {
"python_basics": [
"Use type hints for all function parameters and return values",
"Follow PEP 8 naming conventions (snake_case for functions/variables)",
"Write docstrings for all public functions and classes",
"Use specific exception types instead of bare except clauses",
"Organize code into logical modules following single responsibility principle"
],
"fastapi_basics": [
"Define response models using Pydantic for validation",
"Use proper HTTP status codes (200, 201, 404, etc.)",
"Implement dependency injection for shared resources",
"Use async/await for I/O-bound operations",
"Add proper error handling with HTTPException"
]
},
"suggestion": "Check that the best practices file exists and is readable"
}
def search_practices(keyword: str, data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Search for best practices by keyword across all categories.
Searches in descriptions, examples, and topic names for the keyword.
Results are organized by category.
Args:
keyword: Search term to find
data: Optional pre-loaded data (if None, loads from file)
Returns:
Dictionary with matches, count, and suggestions if no matches
"""
if not keyword or not keyword.strip():
return {
"error": "invalid_keyword",
"message": "Search keyword cannot be empty",
"suggestion": "Please provide a non-empty search term"
}
keyword_lower = keyword.lower().strip()
# Load data if not provided
if data is None:
try:
data = load_best_practices()
except (FileLoadError, ValidationError) as e:
logger.error(f"Failed to load data for search: {e}")
return {
"error": "data_unavailable",
"message": str(e),
"matches": [],
"count": 0
}
practices = data.get("python_best_practices", {})
matches = []
# Search through all categories and topics
for category_name, category_data in practices.items():
if not isinstance(category_data, dict):
continue
for topic_name, topic_data in category_data.items():
if not isinstance(topic_data, dict):
continue
# Check if keyword matches
description = topic_data.get("description", "")
examples = topic_data.get("examples", {}) or topic_data.get("tools", {})
# Convert examples to searchable text
examples_text = json.dumps(examples) if isinstance(examples, dict) else str(examples)
# Search in topic name, description, and examples
if (keyword_lower in topic_name.lower() or
keyword_lower in description.lower() or
keyword_lower in examples_text.lower()):
# Find related topics in same category
related_topics = [t for t in category_data.keys() if t != topic_name][:3]
matches.append({
"category": category_name,
"topic": topic_name,
"description": description,
"examples": list(examples.keys()) if isinstance(examples, dict) else [],
"related_topics": related_topics
})
# Organize by category
matches_by_category = {}
for match in matches:
cat = match["category"]
if cat not in matches_by_category:
matches_by_category[cat] = []
matches_by_category[cat].append(match)
result = {
"matches": matches,
"matches_by_category": matches_by_category,
"count": len(matches)
}
# If no matches, suggest similar topics
if len(matches) == 0:
from difflib import get_close_matches
all_topics = []
for category_data in practices.values():
if isinstance(category_data, dict):
all_topics.extend(category_data.keys())
suggestions = get_close_matches(keyword, all_topics, n=3, cutoff=0.4)
result["suggestions"] = suggestions
result["message"] = f"No exact matches found for '{keyword}'"
return result
def find_similar_topics(query: str, data: Optional[Dict[str, Any]] = None, n: int = 3) -> list:
"""
Find similar topics using fuzzy string matching.
Args:
query: Search query
data: Optional pre-loaded data
n: Number of suggestions to return
Returns:
List of similar topic names
"""
from difflib import get_close_matches
if data is None:
try:
data = load_best_practices()
except (FileLoadError, ValidationError):
return []
practices = data.get("python_best_practices", {})
# Collect all topic names and category names
all_names = []
for category_name, category_data in practices.items():
all_names.append(category_name)
if isinstance(category_data, dict):
all_names.extend(category_data.keys())
# Find close matches
suggestions = get_close_matches(query, all_names, n=n, cutoff=0.4)
return suggestions
def get_category(category: str, topic: Optional[str] = None, data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Retrieve best practices for a specific category or topic.
Args:
category: Category name
topic: Optional specific topic within category
data: Optional pre-loaded data
Returns:
Dictionary with category/topic details
"""
if data is None:
try:
data = load_best_practices()
except (FileLoadError, ValidationError) as e:
return {
"error": "data_unavailable",
"message": str(e)
}
practices = data.get("python_best_practices", {})
# Check if category exists
if category not in practices:
suggestions = find_similar_topics(category, data)
return {
"error": "invalid_category",
"message": f"Category '{category}' not found",
"suggestions": suggestions,
"available_categories": list(practices.keys())
}
category_data = practices[category]
# If specific topic requested
if topic:
if topic not in category_data:
suggestions = find_similar_topics(topic, data)
return {
"error": "invalid_topic",
"message": f"Topic '{topic}' not found in category '{category}'",
"suggestions": suggestions,
"available_topics": list(category_data.keys())
}
topic_data = category_data[topic]
examples = topic_data.get("examples") or topic_data.get("tools", {})
return {
"category": category,
"topic": topic,
"description": topic_data.get("description", ""),
"examples": examples,
"documentation_links": [] # Could be added to JSON in future
}
# Return all topics in category
topics = []
for topic_name, topic_data in category_data.items():
if not isinstance(topic_data, dict):
continue
examples = topic_data.get("examples") or topic_data.get("tools", {})
topics.append({
"name": topic_name,
"description": topic_data.get("description", ""),
"examples": list(examples.keys()) if isinstance(examples, dict) else [],
"documentation_links": []
})
return {
"category": category,
"topics": topics,
"count": len(topics)
}
def list_all_categories(data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
List all available best practice categories.
Args:
data: Optional pre-loaded data
Returns:
Dictionary with category information
"""
if data is None:
try:
data = load_best_practices()
except (FileLoadError, ValidationError) as e:
return {
"error": "data_unavailable",
"message": str(e),
"categories": []
}
practices = data.get("python_best_practices", {})
categories = []
category_descriptions = {
"general_coding": "General Python coding best practices including naming, types, and error handling",
"fastapi_specific": "FastAPI-specific patterns for routes, models, dependencies, and async operations",
"performance": "Performance optimization techniques including caching and database optimization",
"code_quality": "Code quality tools and practices for linting, formatting, and logging"
}
for category_name, category_data in practices.items():
if not isinstance(category_data, dict):
continue
topic_count = len(category_data)
description = category_descriptions.get(category_name, f"Best practices for {category_name}")
categories.append({
"name": category_name,
"description": description,
"topic_count": topic_count
})
return {
"categories": categories,
"count": len(categories)
}
def get_examples(topic: str, data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Get code examples for a specific topic.
Args:
topic: Topic name to retrieve examples for
data: Optional pre-loaded data
Returns:
Dictionary with examples and context
"""
if data is None:
try:
data = load_best_practices()
except (FileLoadError, ValidationError) as e:
return {
"error": "data_unavailable",
"message": str(e)
}
practices = data.get("python_best_practices", {})
# Search for topic across all categories
found_examples = []
found_category = None
found_topic_data = None
for category_name, category_data in practices.items():
if not isinstance(category_data, dict):
continue
if topic in category_data:
found_category = category_name
found_topic_data = category_data[topic]
break
if not found_topic_data:
# Topic not found, suggest similar
suggestions = find_similar_topics(topic, data)
# Find topics that have examples
topics_with_examples = []
for cat_data in practices.values():
if isinstance(cat_data, dict):
for t_name, t_data in cat_data.items():
if isinstance(t_data, dict):
examples = t_data.get("examples") or t_data.get("tools")
if examples and len(examples) > 0:
topics_with_examples.append(t_name)
return {
"error": "topic_not_found",
"message": f"Topic '{topic}' not found",
"suggestions": suggestions[:3],
"topics_with_examples": topics_with_examples[:5]
}
# Get examples
examples_dict = found_topic_data.get("examples") or found_topic_data.get("tools", {})
if not examples_dict or len(examples_dict) == 0:
# No examples, suggest related topics
related = find_similar_topics(topic, data, n=5)
return {
"topic": topic,
"category": found_category,
"message": f"No examples found for topic '{topic}'",
"suggestions": related
}
# Format examples with syntax highlighting markers
formatted_examples = []
for example_name, example_code in examples_dict.items():
# Wrap in markdown code blocks
if isinstance(example_code, str) and not example_code.startswith("```"):
formatted_code = f"```python\n{example_code}\n```"
else:
formatted_code = example_code
formatted_examples.append({
"name": example_name,
"code": formatted_code,
"context": found_topic_data.get("description", "")
})
return {
"topic": topic,
"category": found_category,
"description": found_topic_data.get("description", ""),
"examples": formatted_examples,
"count": len(formatted_examples)
}