- 新增 README_CN.md 中文文档 - 新增 frontend/ Vue 3 前端项目 - 新增 web/ FastAPI 后端项目 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
239 lines
8.2 KiB
Python
239 lines
8.2 KiB
Python
"""Storage module for BtToxin Pipeline Web Backend.
|
|
|
|
Implements Redis + file hybrid storage strategy for task metadata.
|
|
- Primary storage: /data/jobs/<task_id>/task_meta.json (persistent)
|
|
- Cache: Redis task:{task_id}:meta (atomic operations, concurrent access)
|
|
- Write order: file first, then Redis
|
|
- Read order: Redis first, file on cache miss
|
|
|
|
Requirements: 2.1, 2.3
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
import redis
|
|
|
|
from .config import settings
|
|
from .models import TaskMeta, TaskStatus, PipelineStage
|
|
|
|
|
|
class TaskStorage:
|
|
"""Hybrid storage for task metadata using Redis cache + file persistence."""
|
|
|
|
TASK_META_FILENAME = "task_meta.json"
|
|
REDIS_META_PREFIX = "task:"
|
|
REDIS_META_SUFFIX = ":meta"
|
|
REDIS_TOKEN_SUFFIX = ":token_hash"
|
|
REDIS_CACHE_TTL = 86400 # 24 hours cache TTL
|
|
|
|
def __init__(self, redis_url: str = None, jobs_dir: str = None):
|
|
"""Initialize storage with Redis connection and jobs directory.
|
|
|
|
Args:
|
|
redis_url: Redis connection URL (defaults to settings.redis_url)
|
|
jobs_dir: Base directory for job files (defaults to settings.jobs_dir)
|
|
"""
|
|
self.redis_url = redis_url or settings.redis_url
|
|
self.jobs_dir = Path(jobs_dir or settings.jobs_dir)
|
|
self._redis_client: Optional[redis.Redis] = None
|
|
|
|
@property
|
|
def redis(self) -> redis.Redis:
|
|
"""Lazy Redis client initialization."""
|
|
if self._redis_client is None:
|
|
self._redis_client = redis.from_url(
|
|
self.redis_url,
|
|
decode_responses=True
|
|
)
|
|
return self._redis_client
|
|
|
|
def _get_task_dir(self, task_id: str) -> Path:
|
|
"""Get the directory path for a task."""
|
|
return self.jobs_dir / task_id
|
|
|
|
def _get_meta_file_path(self, task_id: str) -> Path:
|
|
"""Get the task_meta.json file path for a task."""
|
|
return self._get_task_dir(task_id) / self.TASK_META_FILENAME
|
|
|
|
def _get_redis_meta_key(self, task_id: str) -> str:
|
|
"""Get Redis key for task metadata."""
|
|
return f"{self.REDIS_META_PREFIX}{task_id}{self.REDIS_META_SUFFIX}"
|
|
|
|
def _get_redis_token_key(self, task_id: str) -> str:
|
|
"""Get Redis key for token hash."""
|
|
return f"{self.REDIS_META_PREFIX}{task_id}{self.REDIS_TOKEN_SUFFIX}"
|
|
|
|
# ==================== File Operations ====================
|
|
|
|
def _write_meta_file(self, task_meta: TaskMeta) -> None:
|
|
"""Write task metadata to file (persistent storage)."""
|
|
file_path = self._get_meta_file_path(task_meta.task_id)
|
|
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(file_path, "w", encoding="utf-8") as f:
|
|
json.dump(task_meta.to_dict(), f, indent=2, ensure_ascii=False)
|
|
|
|
def _read_meta_file(self, task_id: str) -> Optional[TaskMeta]:
|
|
"""Read task metadata from file."""
|
|
file_path = self._get_meta_file_path(task_id)
|
|
if not file_path.exists():
|
|
return None
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
return TaskMeta.from_dict(data)
|
|
except (json.JSONDecodeError, KeyError, TypeError):
|
|
return None
|
|
|
|
# ==================== Redis Operations ====================
|
|
|
|
def _write_redis_cache(self, task_meta: TaskMeta) -> None:
|
|
"""Write task metadata to Redis cache."""
|
|
key = self._get_redis_meta_key(task_meta.task_id)
|
|
self.redis.setex(key, self.REDIS_CACHE_TTL, task_meta.to_json())
|
|
# Also cache token hash separately for faster auth checks
|
|
if task_meta.token_hash:
|
|
token_key = self._get_redis_token_key(task_meta.task_id)
|
|
self.redis.setex(token_key, self.REDIS_CACHE_TTL, task_meta.token_hash)
|
|
|
|
def _read_redis_cache(self, task_id: str) -> Optional[TaskMeta]:
|
|
"""Read task metadata from Redis cache."""
|
|
meta_key = self._get_redis_meta_key(task_id)
|
|
data = self.redis.get(meta_key)
|
|
if data is None:
|
|
return None
|
|
try:
|
|
return TaskMeta.from_json(data)
|
|
except (json.JSONDecodeError, KeyError, TypeError):
|
|
return None
|
|
|
|
def _delete_redis_cache(self, task_id: str) -> None:
|
|
"""Delete task metadata from Redis cache."""
|
|
meta_key = self._get_redis_meta_key(task_id)
|
|
token_key = self._get_redis_token_key(task_id)
|
|
self.redis.delete(meta_key, token_key)
|
|
|
|
# ==================== Public API ====================
|
|
|
|
def create_task(self, task_meta: TaskMeta) -> None:
|
|
"""Create a new task with metadata.
|
|
|
|
Write order: file first (persistent), then Redis (cache).
|
|
"""
|
|
# Write to file first (persistent storage)
|
|
self._write_meta_file(task_meta)
|
|
# Then update Redis cache
|
|
self._write_redis_cache(task_meta)
|
|
|
|
def get_task(self, task_id: str) -> Optional[TaskMeta]:
|
|
"""Get task metadata.
|
|
|
|
Read order: Redis first (fast), file on cache miss.
|
|
"""
|
|
# Try Redis cache first
|
|
task_meta = self._read_redis_cache(task_id)
|
|
if task_meta is not None:
|
|
return task_meta
|
|
|
|
# Cache miss - read from file
|
|
task_meta = self._read_meta_file(task_id)
|
|
if task_meta is not None:
|
|
# Repopulate cache
|
|
self._write_redis_cache(task_meta)
|
|
return task_meta
|
|
|
|
def update_task(self, task_meta: TaskMeta) -> None:
|
|
|
|
"""Update task metadata.
|
|
|
|
Write order: file first (persistent), then Redis (cache).
|
|
"""
|
|
self._write_meta_file(task_meta)
|
|
self._write_redis_cache(task_meta)
|
|
|
|
def delete_task(self, task_id: str) -> bool:
|
|
"""Delete task and all associated files.
|
|
|
|
Returns True if task existed and was deleted.
|
|
"""
|
|
import shutil
|
|
|
|
task_dir = self._get_task_dir(task_id)
|
|
existed = task_dir.exists()
|
|
|
|
# Delete from Redis cache
|
|
self._delete_redis_cache(task_id)
|
|
|
|
# Delete task directory
|
|
if existed:
|
|
shutil.rmtree(task_dir, ignore_errors=True)
|
|
|
|
return existed
|
|
|
|
def update_status(
|
|
self,
|
|
task_id: str,
|
|
status: TaskStatus,
|
|
current_stage: Optional[PipelineStage] = None,
|
|
progress: Optional[int] = None,
|
|
error_message: Optional[str] = None,
|
|
error_stage: Optional[str] = None,
|
|
) -> Optional[TaskMeta]:
|
|
"""Atomically update task status fields.
|
|
|
|
Uses Redis for atomic update, then persists to file.
|
|
Returns updated TaskMeta or None if task not found.
|
|
"""
|
|
task_meta = self.get_task(task_id)
|
|
if task_meta is None:
|
|
return None
|
|
|
|
# Update fields
|
|
task_meta.status = status
|
|
if current_stage is not None:
|
|
task_meta.current_stage = current_stage
|
|
if progress is not None:
|
|
task_meta.progress = progress
|
|
if error_message is not None:
|
|
task_meta.error_message = error_message
|
|
if error_stage is not None:
|
|
task_meta.error_stage = error_stage
|
|
|
|
# Persist changes
|
|
self.update_task(task_meta)
|
|
return task_meta
|
|
|
|
def get_token_hash(self, task_id: str) -> Optional[str]:
|
|
"""Get token hash for a task (fast path via Redis).
|
|
|
|
Used for authentication checks.
|
|
"""
|
|
# Try Redis first
|
|
token_key = self._get_redis_token_key(task_id)
|
|
token_hash = self.redis.get(token_key)
|
|
if token_hash is not None:
|
|
return token_hash
|
|
|
|
# Cache miss - get from task meta
|
|
task_meta = self.get_task(task_id)
|
|
if task_meta is not None:
|
|
return task_meta.token_hash
|
|
return None
|
|
|
|
def task_exists(self, task_id: str) -> bool:
|
|
"""Check if a task exists."""
|
|
# Check Redis first
|
|
meta_key = self._get_redis_meta_key(task_id)
|
|
if self.redis.exists(meta_key):
|
|
return True
|
|
# Check file
|
|
return self._get_meta_file_path(task_id).exists()
|
|
|
|
def get_task_dir(self, task_id: str) -> Path:
|
|
"""Get the directory path for a task (public accessor)."""
|
|
return self._get_task_dir(task_id)
|
|
|
|
|
|
# Global storage instance
|
|
storage = TaskStorage() |