"""Query utilities and helpers for iMessage database."""
import re
from datetime import datetime, timezone
from ..constants import COREDATA_EPOCH
def coredata_to_datetime(timestamp: int | None) -> datetime | None:
"""Convert CoreData timestamp to Python datetime.
CoreData uses seconds (or nanoseconds) since 2001-01-01 00:00:00 UTC.
This function handles both formats automatically.
Args:
timestamp: CoreData timestamp (seconds or nanoseconds since 2001-01-01)
Returns:
UTC datetime, or None if timestamp is None
"""
if timestamp is None:
return None
# Handle nanosecond timestamps (large values > 1 trillion)
if timestamp > 1e12:
timestamp = timestamp / 1e9
unix_ts = timestamp + COREDATA_EPOCH
return datetime.fromtimestamp(unix_ts, tz=timezone.utc)
def datetime_to_coredata(dt: datetime) -> int:
"""Convert Python datetime to CoreData timestamp.
Args:
dt: Datetime to convert (will be treated as UTC if no timezone)
Returns:
CoreData timestamp in seconds since 2001-01-01
"""
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
unix_ts = dt.timestamp()
return int(unix_ts - COREDATA_EPOCH)
def normalize_handle(handle: str) -> str:
"""Normalize a phone number or email handle for database lookup.
Phone numbers are normalized by:
- Removing all non-digit characters except leading +
- Keeping the + prefix if present
Email addresses are:
- Lowercased
- Stripped of whitespace
Args:
handle: Phone number or email address
Returns:
Normalized handle string
"""
handle = handle.strip()
# Check if it looks like an email
if "@" in handle:
return handle.lower()
# Phone number normalization
# Keep the leading + if present
has_plus = handle.startswith("+")
# Remove all non-digit characters
digits = re.sub(r"\D", "", handle)
if has_plus:
return f"+{digits}"
return digits
def normalize_for_contact_matching(handle: str, region: str = "US") -> str:
"""Normalize a phone number or email for contact matching.
For phone numbers: attempts E.164 format via phonenumbers library,
falls back to digits-only if parsing fails.
For emails: lowercase.
Args:
handle: Phone number or email address
region: Default region for phone number parsing (default: "US")
Returns:
Normalized handle string in E.164 format for phones, lowercase for emails
"""
handle = handle.strip()
# Check if it looks like an email
if "@" in handle:
return handle.lower()
# Phone number normalization using phonenumbers library
try:
import phonenumbers
parsed = phonenumbers.parse(handle, region)
if phonenumbers.is_valid_number(parsed):
return phonenumbers.format_number(
parsed, phonenumbers.PhoneNumberFormat.E164
)
except Exception:
# Fall through to fallback normalization
pass
# Fallback: strip to digits, preserve leading +
has_plus = handle.startswith("+")
digits = re.sub(r"\D", "", handle)
return f"+{digits}" if has_plus else digits
def format_datetime_iso(dt: datetime | None) -> str | None:
"""Format datetime as ISO8601 string.
Args:
dt: Datetime to format
Returns:
ISO8601 formatted string, or None if dt is None
"""
if dt is None:
return None
return dt.isoformat()
def parse_chat_guid(guid: str) -> tuple[str, bool, str]:
"""Parse a chat GUID to extract service, type, and identifier.
Chat GUIDs have the format: service;type;identifier
- service: "iMessage", "SMS", or "any"
- type: "-" for 1:1 chats, "+" for group chats
- identifier: phone number, email, or group chat ID
Args:
guid: Chat GUID string
Returns:
Tuple of (service, is_group, identifier)
"""
parts = guid.split(";")
if len(parts) >= 3:
service = parts[0]
is_group = parts[1] == "+"
identifier = parts[2]
return service, is_group, identifier
# Fallback for unexpected format
return "unknown", False, guid