json_tools.py.newā¢9.47 kB
"""
JSON Tools Module
Provides utilities for working with JSON files and data, including reading, writing,
validating, and manipulating JSON content with proper error handling.
"""
import json
from pathlib import Path
from typing import Any, Dict, List, Union, TypeVar, Optional
from datetime import datetime
T = TypeVar('T')
def read_json_file(file_path: Union[str, Path]) -> Union[Dict, List]:
"""
Read and parse a JSON file, returning its contents as a Python dictionary or list.
Args:
file_path: Path to the JSON file (as string or Path object)
Returns:
Union[Dict, List]: Parsed JSON data as a Python dictionary or list
Raises:
FileNotFoundError: If the specified file does not exist
json.JSONDecodeError: If the file contains invalid JSON
PermissionError: If there are insufficient permissions to read the file
OSError: For other file-related errors
"""
try:
file_path = Path(file_path) if isinstance(file_path, str) else file_path
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except json.JSONDecodeError as e:
raise json.JSONDecodeError(
f"Invalid JSON in file {file_path}: {str(e)}",
e.doc,
e.pos
) from e
except FileNotFoundError:
raise FileNotFoundError(f"File not found: {file_path}")
except PermissionError:
raise PermissionError(f"Permission denied when reading file: {file_path}")
except OSError as e:
raise OSError(f"Error reading file {file_path}: {str(e)}")
def write_json_file(
data: Any,
file_path: Union[str, Path],
indent: int = 2,
ensure_ascii: bool = False,
sort_keys: bool = False,
create_dirs: bool = True
) -> None:
"""
Write data to a JSON file with proper error handling.
Args:
data: Data to be serialized to JSON
file_path: Path where to save the JSON file
indent: Number of spaces for indentation (use None for compact output)
ensure_ascii: If False, non-ASCII characters will be output as-is
sort_keys: If True, output dictionaries will be sorted by key
create_dirs: If True, create parent directories if they don't exist
Raises:
PermissionError: If there are insufficient permissions to write to the file
OSError: For other file-related errors
TypeError: If the data is not JSON serializable
"""
try:
file_path = Path(file_path) if isinstance(file_path, str) else file_path
if create_dirs:
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(
data,
f,
indent=indent,
ensure_ascii=ensure_ascii,
sort_keys=sort_keys,
default=_json_serializer
)
except (TypeError, ValueError) as e:
raise TypeError(f"Data is not JSON serializable: {str(e)}") from e
except PermissionError:
raise PermissionError(f"Permission denied when writing to file: {file_path}")
except OSError as e:
raise OSError(f"Error writing to file {file_path}: {str(e)}")
def validate_json(json_str: str) -> bool:
"""
Validate if a string is valid JSON.
Args:
json_str: String to validate as JSON
Returns:
bool: True if the string is valid JSON, False otherwise
"""
try:
json.loads(json_str)
return True
except json.JSONDecodeError:
return False
def merge_json(*json_objects: Dict) -> Dict:
"""
Deep merge multiple JSON objects (dictionaries) together.
Args:
*json_objects: Variable number of dictionaries to merge
Returns:
Dict: A new dictionary containing the merged result
Example:
>>> a = {"a": 1, "b": {"x": 10}}
>>> b = {"b": {"y": 20}, "c": 30}
>>> merge_json(a, b)
{'a': 1, 'b': {'x': 10, 'y': 20}, 'c': 30}
"""
result = {}
for obj in json_objects:
if not isinstance(obj, dict):
continue
for key, value in obj.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = merge_json(result[key], value)
else:
result[key] = value
return result
def json_to_string(
data: Any,
indent: int = 2,
ensure_ascii: bool = False,
sort_keys: bool = False
) -> str:
"""
Convert a Python object to a formatted JSON string.
Args:
data: Python object to convert to JSON
indent: Number of spaces for indentation
ensure_ascii: If False, non-ASCII characters will be output as-is
sort_keys: If True, output dictionaries will be sorted by key
Returns:
str: Formatted JSON string
Raises:
TypeError: If the data is not JSON serializable
"""
try:
return json.dumps(
data,
indent=indent,
ensure_ascii=ensure_ascii,
sort_keys=sort_keys,
default=_json_serializer
)
except (TypeError, ValueError) as e:
raise TypeError(f"Data is not JSON serializable: {str(e)}") from e
def format_json_string(
json_str: str,
indent: int = 2,
sort_keys: bool = False
) -> str:
"""
Format a JSON string with proper indentation.
Args:
json_str: JSON string to format
indent: Number of spaces for indentation
sort_keys: Whether to sort dictionary keys
Returns:
str: Formatted JSON string
Raises:
json.JSONDecodeError: If the input is not valid JSON
"""
try:
data = json.loads(json_str)
return json_to_string(data, indent=indent, sort_keys=sort_keys)
except json.JSONDecodeError as e:
raise json.JSONDecodeError(
f"Invalid JSON string: {str(e)}",
e.doc,
e.pos
) from e
def extract_json_from_text(text: str) -> List[Dict]:
"""
Extract JSON objects from a text string.
Args:
text: Text potentially containing JSON objects
Returns:
List[Dict]: List of extracted JSON objects as dictionaries
"""
result = []
stack = []
start_index = -1
for i, char in enumerate(text):
if char == '{':
if not stack:
start_index = i
stack.append(char)
elif char == '}':
if stack:
stack.pop()
if not stack and start_index != -1:
try:
json_str = text[start_index:i+1]
result.append(json.loads(json_str))
start_index = -1
except json.JSONDecodeError:
continue
return result
def _json_serializer(obj: Any) -> Any:
"""
Custom JSON serializer for objects not serializable by default.
Args:
obj: Object to serialize
Returns:
A JSON-serializable representation of the object
Raises:
TypeError: If the object type is not supported
"""
if isinstance(obj, (datetime,)):
return obj.isoformat()
elif isinstance(obj, (set, frozenset)):
return list(obj)
elif hasattr(obj, '__dict__'):
return obj.__dict__
elif hasattr(obj, 'to_json'):
return obj.to_json()
else:
raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")
# Example usage
if __name__ == "__main__":
# Example usage of the JSON tools
try:
# Example data
data = {
"name": "Example",
"value": 42,
"nested": {
"items": [1, 2, 3],
"active": True
},
"timestamp": datetime.now()
}
# Write to file
write_json_file(data, "example.json", indent=2)
print("Data written to example.json")
# Read from file
loaded_data = read_json_file("example.json")
print("Data loaded from file:")
print(json_to_string(loaded_data, indent=2))
# Validate JSON
json_str = '{"test": "value"}'
print(f"Is valid JSON: {validate_json(json_str)}")
# Merge JSON objects
merged = merge_json(
{"a": 1, "b": {"x": 10}},
{"b": {"y": 20}, "c": 30}
)
print("Merged JSON:")
print(json_to_string(merged, indent=2))
# Test format_json_string
ugly_json = '{"a":1,"b":2}'
pretty_json = format_json_string(ugly_json, indent=2)
print("Formatted JSON:")
print(pretty_json)
# Test extract_json_from_text
text_with_json = """
Some text before
{"key1": "value1", "key2": [1, 2, 3]}
More text in between
{"another": "object"}
"""
print("Extracted JSON objects:")
for i, obj in enumerate(extract_json_from_text(text_with_json), 1):
print(f"Object {i}:", json_to_string(obj, indent=2))
except Exception as e:
print(f"Error: {str(e)}")