"""Storage module for BtToxin Pipeline Web Backend. Implements Redis + file hybrid storage strategy for task metadata. - Primary storage: /data/jobs//task_meta.json (persistent) - Cache: Redis task:{task_id}:meta (atomic operations, concurrent access) - Write order: file first, then Redis - Read order: Redis first, file on cache miss Requirements: 2.1, 2.3 """ import json import os from pathlib import Path from typing import Optional import redis from .config import settings from .models import TaskMeta, TaskStatus, PipelineStage class TaskStorage: """Hybrid storage for task metadata using Redis cache + file persistence.""" TASK_META_FILENAME = "task_meta.json" REDIS_META_PREFIX = "task:" REDIS_META_SUFFIX = ":meta" REDIS_TOKEN_SUFFIX = ":token_hash" REDIS_CACHE_TTL = 86400 # 24 hours cache TTL def __init__(self, redis_url: str = None, jobs_dir: str = None): """Initialize storage with Redis connection and jobs directory. Args: redis_url: Redis connection URL (defaults to settings.redis_url) jobs_dir: Base directory for job files (defaults to settings.jobs_dir) """ self.redis_url = redis_url or settings.redis_url self.jobs_dir = Path(jobs_dir or settings.jobs_dir) self._redis_client: Optional[redis.Redis] = None @property def redis(self) -> redis.Redis: """Lazy Redis client initialization.""" if self._redis_client is None: self._redis_client = redis.from_url( self.redis_url, decode_responses=True ) return self._redis_client def _get_task_dir(self, task_id: str) -> Path: """Get the directory path for a task.""" return self.jobs_dir / task_id def _get_meta_file_path(self, task_id: str) -> Path: """Get the task_meta.json file path for a task.""" return self._get_task_dir(task_id) / self.TASK_META_FILENAME def _get_redis_meta_key(self, task_id: str) -> str: """Get Redis key for task metadata.""" return f"{self.REDIS_META_PREFIX}{task_id}{self.REDIS_META_SUFFIX}" def _get_redis_token_key(self, task_id: str) -> str: """Get Redis key for token hash.""" return f"{self.REDIS_META_PREFIX}{task_id}{self.REDIS_TOKEN_SUFFIX}" # ==================== File Operations ==================== def _write_meta_file(self, task_meta: TaskMeta) -> None: """Write task metadata to file (persistent storage).""" file_path = self._get_meta_file_path(task_meta.task_id) file_path.parent.mkdir(parents=True, exist_ok=True) with open(file_path, "w", encoding="utf-8") as f: json.dump(task_meta.to_dict(), f, indent=2, ensure_ascii=False) def _read_meta_file(self, task_id: str) -> Optional[TaskMeta]: """Read task metadata from file.""" file_path = self._get_meta_file_path(task_id) if not file_path.exists(): return None try: with open(file_path, "r", encoding="utf-8") as f: data = json.load(f) return TaskMeta.from_dict(data) except (json.JSONDecodeError, KeyError, TypeError): return None # ==================== Redis Operations ==================== def _write_redis_cache(self, task_meta: TaskMeta) -> None: """Write task metadata to Redis cache.""" key = self._get_redis_meta_key(task_meta.task_id) self.redis.setex(key, self.REDIS_CACHE_TTL, task_meta.to_json()) # Also cache token hash separately for faster auth checks if task_meta.token_hash: token_key = self._get_redis_token_key(task_meta.task_id) self.redis.setex(token_key, self.REDIS_CACHE_TTL, task_meta.token_hash) def _read_redis_cache(self, task_id: str) -> Optional[TaskMeta]: """Read task metadata from Redis cache.""" meta_key = self._get_redis_meta_key(task_id) data = self.redis.get(meta_key) if data is None: return None try: return TaskMeta.from_json(data) except (json.JSONDecodeError, KeyError, TypeError): return None def _delete_redis_cache(self, task_id: str) -> None: """Delete task metadata from Redis cache.""" meta_key = self._get_redis_meta_key(task_id) token_key = self._get_redis_token_key(task_id) self.redis.delete(meta_key, token_key) # ==================== Public API ==================== def create_task(self, task_meta: TaskMeta) -> None: """Create a new task with metadata. Write order: file first (persistent), then Redis (cache). """ # Write to file first (persistent storage) self._write_meta_file(task_meta) # Then update Redis cache self._write_redis_cache(task_meta) def get_task(self, task_id: str) -> Optional[TaskMeta]: """Get task metadata. Read order: Redis first (fast), file on cache miss. """ # Try Redis cache first task_meta = self._read_redis_cache(task_id) if task_meta is not None: return task_meta # Cache miss - read from file task_meta = self._read_meta_file(task_id) if task_meta is not None: # Repopulate cache self._write_redis_cache(task_meta) return task_meta def update_task(self, task_meta: TaskMeta) -> None: """Update task metadata. Write order: file first (persistent), then Redis (cache). """ self._write_meta_file(task_meta) self._write_redis_cache(task_meta) def delete_task(self, task_id: str) -> bool: """Delete task and all associated files. Returns True if task existed and was deleted. """ import shutil task_dir = self._get_task_dir(task_id) existed = task_dir.exists() # Delete from Redis cache self._delete_redis_cache(task_id) # Delete task directory if existed: shutil.rmtree(task_dir, ignore_errors=True) return existed def update_status( self, task_id: str, status: TaskStatus, current_stage: Optional[PipelineStage] = None, progress: Optional[int] = None, error_message: Optional[str] = None, error_stage: Optional[str] = None, ) -> Optional[TaskMeta]: """Atomically update task status fields. Uses Redis for atomic update, then persists to file. Returns updated TaskMeta or None if task not found. """ task_meta = self.get_task(task_id) if task_meta is None: return None # Update fields task_meta.status = status if current_stage is not None: task_meta.current_stage = current_stage if progress is not None: task_meta.progress = progress if error_message is not None: task_meta.error_message = error_message if error_stage is not None: task_meta.error_stage = error_stage # Persist changes self.update_task(task_meta) return task_meta def get_token_hash(self, task_id: str) -> Optional[str]: """Get token hash for a task (fast path via Redis). Used for authentication checks. """ # Try Redis first token_key = self._get_redis_token_key(task_id) token_hash = self.redis.get(token_key) if token_hash is not None: return token_hash # Cache miss - get from task meta task_meta = self.get_task(task_id) if task_meta is not None: return task_meta.token_hash return None def task_exists(self, task_id: str) -> bool: """Check if a task exists.""" # Check Redis first meta_key = self._get_redis_meta_key(task_id) if self.redis.exists(meta_key): return True # Check file return self._get_meta_file_path(task_id).exists() def get_task_dir(self, task_id: str) -> Path: """Get the directory path for a task (public accessor).""" return self._get_task_dir(task_id) # Global storage instance storage = TaskStorage()