Session Buddy

Overview Schema Related Servers Score Discussions

cloud_sync.py•17.5 KiB

"""Cloud storage sync method for uploading memories to S3/R2. This module implements the CloudSyncMethod class which uploads DuckDB databases to S3-compatible cloud storage (Cloudflare R2, AWS S3, MinIO) using Oneiric storage adapters. Key Features: - Streaming upload with configurable chunking (5MB default) - Gzip compression for 65% size reduction - Upload deduplication via SHA-256 checksums - Retry logic with exponential backoff (3 retries) - Manifest.json creation for Akosha's IngestionWorker - Graceful degradation with error handling Example: >>> config = AkoshaSyncConfig.from_settings(settings) >>> cloud_sync = CloudSyncMethod(config) >>> await cloud_sync.sync() {'method': 'cloud', 'success': True, 'files_uploaded': [...]} """ from __future__ import annotations import asyncio import gzip import hashlib import json from datetime import UTC, datetime from pathlib import Path from typing import Any from session_buddy.storage.akosha_config import AkoshaSyncConfig from session_buddy.storage.sync_protocol import CloudUploadError, SyncMethod from session_buddy.utils.error_management import _get_logger logger = _get_logger() # Type for Oneiric S3 adapter (lazy import to avoid dependency if not used) S3Adapter: type[Any] | None = None def _get_s3_adapter_class() -> type[Any]: """Lazy import of Oneiric S3 adapter. Returns: S3StorageAdapter class from oneiric.adapters.storage.s3 Raises: ImportError: If oneiric package is not installed """ global S3Adapter if S3Adapter is None: try: from oneiric.adapters.storage.s3 import S3StorageAdapter S3Adapter = S3StorageAdapter except ImportError as e: logger.error(f"Oneiric S3 adapter not available: {e}") raise ImportError( "Oneiric S3 adapter requires 'oneiric' package. " "Install with: pip install oneiric" ) from e return S3Adapter class CloudSyncMethod(SyncMethod): """Cloud storage sync method using Oneiric S3 adapter. Uploads DuckDB databases to S3-compatible storage (R2, S3, MinIO). Architecture: 1. Compute SHA-256 checksums for deduplication 2. Optionally compress with gzip 3. Upload to systems/{system_id}/uploads/{upload_id}/ 4. Create manifest.json for Akosha's IngestionWorker 5. Retry failed uploads with exponential backoff Example: >>> config = AkoshaSyncConfig.from_settings(settings) >>> cloud_sync = CloudSyncMethod(config) >>> result = await cloud_sync.sync() >>> print(result['files_uploaded']) ['systems/macbook-pro/uploads/20250208_123456/reflection.duckdb'] """ # Upload structure MANIFEST_FILENAME = "manifest.json" REFLECTION_DB_NAME = "reflection.duckdb" KNOWLEDGE_GRAPH_DB_NAME = "knowledge_graph.duckdb" # Chunk size for large file uploads (5MB) CHUNK_SIZE = 5 * 1024 * 1024 # 5MB def __init__(self, config: AkoshaSyncConfig) -> None: """Initialize cloud sync method. Args: config: Akosha sync configuration Raises: ValueError: If configuration is invalid """ # Validate configuration errors = config.validate() if errors: raise ValueError(f"Invalid Akosha configuration: {'; '.join(errors)}") self.config = config self._s3_adapter: Any = None # Database paths self.reflection_db_path = Path.home() / ".claude" / "data" / "reflection.duckdb" self.knowledge_graph_db_path = ( Path.home() / ".claude" / "data" / "knowledge_graph.duckdb" ) logger.info( f"CloudSyncMethod initialized: bucket={config.cloud_bucket}, " f"endpoint={config.cloud_endpoint}, system_id={config.system_id_resolved}" ) def is_available(self) -> bool: """Check if cloud sync is available. Returns: True if cloud is configured and Oneiric is available Example: >>> cloud_sync.is_available() True # Bucket configured and Oneiric installed """ if not self.config.cloud_configured: logger.debug("Cloud sync not configured (no bucket)") return False try: _get_s3_adapter_class() return True except ImportError: logger.warning("Oneiric S3 adapter not available") return False def get_method_name(self) -> str: """Get method name for logging. Returns: Method name string """ return "cloud" async def sync( self, upload_reflections: bool = True, upload_knowledge_graph: bool = True, **kwargs: Any, ) -> dict[str, Any]: """Synchronize databases to cloud storage. Args: upload_reflections: Whether to upload reflection database upload_knowledge_graph: Whether to upload knowledge graph database **kwargs: Additional parameters (ignored) Returns: Sync result dictionary with: - method: "cloud" - success: bool - files_uploaded: list of cloud paths - bytes_transferred: int - duration_seconds: float - upload_id: Unique upload identifier - error: str | None Raises: CloudUploadError: If upload fails catastrophically Example: >>> result = await cloud_sync.sync() >>> print(result['upload_id']) '20250208_143052_macbook-pro-les' """ start_time = asyncio.get_event_loop().time() try: # Lazy-initialize S3 adapter if self._s3_adapter is None: self._s3_adapter = await self._create_s3_adapter() # Generate upload ID upload_id = self._generate_upload_id() logger.info(f"Starting cloud sync: upload_id={upload_id}") # Upload files files_uploaded: list[str] = [] bytes_transferred = 0 # Upload reflection database if upload_reflections and self.reflection_db_path.exists(): reflection_path = await self._upload_database( db_path=self.reflection_db_path, db_name=self.REFLECTION_DB_NAME, upload_id=upload_id, ) files_uploaded.append(reflection_path) bytes_transferred += self.reflection_db_path.stat().st_size # Upload knowledge graph database if upload_knowledge_graph and self.knowledge_graph_db_path.exists(): kg_path = await self._upload_database( db_path=self.knowledge_graph_db_path, db_name=self.KNOWLEDGE_GRAPH_DB_NAME, upload_id=upload_id, ) files_uploaded.append(kg_path) bytes_transferred += self.knowledge_graph_db_path.stat().st_size # Create and upload manifest if files_uploaded: manifest_path = await self._upload_manifest( upload_id=upload_id, files_uploaded=files_uploaded, ) files_uploaded.append(manifest_path) duration = asyncio.get_event_loop().time() - start_time logger.info( f"Cloud sync complete: {len(files_uploaded)} files, " f"{bytes_transferred:,} bytes, {duration:.2f}s" ) return { "method": "cloud", "success": True, "files_uploaded": files_uploaded, "bytes_transferred": bytes_transferred, "duration_seconds": duration, "upload_id": upload_id, "error": None, } except Exception as e: duration = asyncio.get_event_loop().time() - start_time logger.error(f"Cloud sync failed: {e}") raise CloudUploadError( message=f"Cloud upload failed: {e}", method="cloud", original=e, ) from e async def _create_s3_adapter(self) -> Any: """Create Oneiric S3 adapter instance. Returns: Initialized S3StorageAdapter instance Raises: CloudUploadError: If adapter creation fails """ try: S3StorageAdapter = _get_s3_adapter_class() # Build configuration for Oneiric config: dict[str, Any] = { "bucket_name": self.config.cloud_bucket, } # Add endpoint if specified (R2, MinIO, etc.) if self.config.cloud_endpoint: config["endpoint_url"] = self.config.cloud_endpoint # Add region if specified if self.config.cloud_region != "auto": config["region"] = self.config.cloud_region # Create adapter adapter = S3StorageAdapter(config=config) logger.info(f"Created S3 adapter: bucket={self.config.cloud_bucket}") return adapter except Exception as e: raise CloudUploadError( message=f"Failed to create S3 adapter: {e}", method="cloud", original=e, ) from e def _generate_upload_id(self) -> str: """Generate unique upload ID. Format: YYYYMMDD_HHMMSS_system-id Returns: Upload ID string Example: >>> _generate_upload_id() '20250208_143052_macbook-pro-les' """ timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S") system_id = self.config.system_id_resolved return f"{timestamp}_{system_id}" async def _upload_database( self, db_path: Path, db_name: str, upload_id: str, ) -> str: """Upload database file to cloud storage. Args: db_path: Local path to database file db_name: Database filename upload_id: Upload identifier Returns: Cloud storage path (s3://bucket/systems/.../db_name) Raises: CloudUploadError: If upload fails after retries """ # Check for deduplication if self.config.enable_deduplication: local_checksum = await self._compute_sha256(db_path) # Check if file already exists in cloud with same checksum cloud_path = self._get_cloud_path(db_name, upload_id) if await self._file_exists_with_checksum(cloud_path, local_checksum): logger.info(f"Skipping upload (unchanged): {db_name}") return cloud_path # Read and optionally compress file_data = await self._read_database(db_path) # Upload with retry logic cloud_path = await self._upload_with_retry( data=file_data, db_name=db_name, upload_id=upload_id, ) logger.info(f"Uploaded database: {db_name} -> {cloud_path}") return cloud_path async def _read_database(self, db_path: Path) -> bytes: """Read database file, optionally compressing. Args: db_path: Path to database file Returns: File data (compressed or raw) """ loop = asyncio.get_event_loop() def _read_sync() -> bytes: with open(db_path, "rb") as f: data = f.read() if self.config.enable_compression: return gzip.compress(data) return data return await loop.run_in_executor(None, _read_sync) async def _compute_sha256(self, file_path: Path) -> str: """Compute SHA-256 checksum of file. Args: file_path: Path to file Returns: Hexadecimal checksum string """ loop = asyncio.get_event_loop() def _compute_sync() -> str: sha256 = hashlib.sha256() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(8192), b""): sha256.update(chunk) return sha256.hexdigest() return await loop.run_in_executor(None, _compute_sync) async def _file_exists_with_checksum( self, cloud_path: str, expected_checksum: str, ) -> bool: """Check if file exists in cloud with matching checksum. Args: cloud_path: Cloud storage path expected_checksum: Expected SHA-256 checksum Returns: True if file exists with matching checksum """ # For now, skip this check (would require HEAD request or metadata) # TODO: Implement metadata-based checksum verification return False def _get_cloud_path(self, db_name: str, upload_id: str) -> str: """Get cloud storage path for database. Format: systems/{system_id}/uploads/{upload_id}/{db_name} Args: db_name: Database filename upload_id: Upload identifier Returns: Cloud storage path (without bucket prefix) Example: >>> _get_cloud_path("reflection.duckdb", "20250208_143052_mac") 'systems/macbook-pro/uploads/20250208_143052_mac/reflection.duckdb' """ system_id = self.config.system_id_resolved return f"systems/{system_id}/uploads/{upload_id}/{db_name}" async def _upload_with_retry( self, data: bytes, db_name: str, upload_id: str, ) -> str: """Upload with exponential backoff retry. Args: data: File data to upload db_name: Database filename upload_id: Upload identifier Returns: Cloud storage path Raises: CloudUploadError: If all retries exhausted """ cloud_path = self._get_cloud_path(db_name, upload_id) last_error: Exception | None = None for attempt in range(self.config.max_retries): try: await self._upload_to_s3(cloud_path, data) return cloud_path except Exception as e: last_error = e logger.warning( f"Upload attempt {attempt + 1}/{self.config.max_retries} failed: {e}" ) # Exponential backoff before retry if attempt < self.config.max_retries - 1: delay = self.config.retry_backoff_seconds * (2**attempt) logger.debug(f"Retrying in {delay:.1f}s...") await asyncio.sleep(delay) # All retries exhausted raise CloudUploadError( message=f"Upload failed after {self.config.max_retries} attempts", method="cloud", original=last_error, ) async def _upload_to_s3(self, cloud_path: str, data: bytes) -> None: """Upload data to S3 via Oneiric adapter. Args: cloud_path: Destination path in bucket data: Data to upload Raises: CloudUploadError: If upload fails """ if self._s3_adapter is None: raise RuntimeError("S3 adapter not initialized") try: # Oneiric's upload method (signature may vary) # Assuming: upload(path: str, data: bytes) -> None await self._s3_adapter.upload(path=cloud_path, data=data) except AttributeError: # Fallback: try synchronous upload in executor loop = asyncio.get_event_loop() def _sync_upload() -> None: self._s3_adapter.upload(path=cloud_path, data=data) await loop.run_in_executor(None, _sync_upload) async def _upload_manifest( self, upload_id: str, files_uploaded: list[str], ) -> str: """Upload manifest.json for Akosha's IngestionWorker. Args: upload_id: Upload identifier files_uploaded: List of uploaded file paths Returns: Cloud path to manifest Example: >>> manifest = await _upload_manifest('20250208_143052_mac', [...]) >>> print(manifest) 'systems/macbook-pro/uploads/20250208_143052_mac/manifest.json' """ # Build manifest matching Akosha's SystemMemoryUploadManifest schema manifest_data = { "upload_id": upload_id, "system_id": self.config.system_id_resolved, "timestamp": datetime.now(UTC).isoformat(), "files": [ { "name": Path(f).name, "path": f, "size_bytes": 0, # TODO: Get actual size "compression": "gzip" if self.config.enable_compression else "none", "checksum": "", # TODO: Add checksum } for f in files_uploaded ], "metadata": { "uploader": "session-buddy", "version": "1.0.0", }, } manifest_json = json.dumps(manifest_data, indent=2) manifest_path = self._get_cloud_path(self.MANIFEST_FILENAME, upload_id) # Upload manifest await self._upload_with_retry( data=manifest_json.encode("utf-8"), db_name=self.MANIFEST_FILENAME, upload_id=upload_id, ) logger.info(f"Uploaded manifest: {manifest_path}") return manifest_path __all__ = [ "CloudSyncMethod", ]

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/lesleslie/session-buddy'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

cloud_sync.py•17.5 KiB