feat(backend): add missing API endpoints, concurrency control, and queue management\n\n- Add /api/v1/tasks router for task management\n- Add DELETE endpoint for task deletion\n- Add GET /download endpoint for result bundling (tar.gz)\n- Add GET /queue endpoint for queue position queries\n- Create ConcurrencyManager service with Redis Semaphore (16 concurrent limit)\n- Add QUEUED status to JobStatus enum\n- Update Job model with queue_position, current_stage, progress_percent fields\n- Add scoring parameters (min_identity, min_coverage, etc.) to jobs API\n- Implement pipeline stages: digger -> shoter -> plots -> bundle\n- Add update_queue_positions Celery task for periodic queue updates\n- Clean up duplicate code in main.py\n\nCo-Authored-By: Claude <noreply@anthropic.com>

This commit is contained in:
zly
2026-01-13 23:41:15 +08:00
parent 1df699b338
commit d4f0e27af8
8 changed files with 517 additions and 272 deletions

View File

@@ -1,187 +1,3 @@
"""任务模型(使用 SQLModel"""
from typing import Optional, List
from datetime import datetime
from enum import Enum
from sqlmodel import SQLModel, Field, Relationship, Column
from sqlalchemy import JSON
from .base import TimestampModel, generate_uuid
class JobStatus(str, Enum):
"""任务状态"""
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class StepStatus(str, Enum):
"""步骤状态"""
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
SKIPPED = "skipped"
class JobBase(SQLModel):
"""Job 基础字段"""
name: str = Field(max_length=255)
description: Optional[str] = None
sequence_type: str = Field(default="nucl", max_length=20)
scaf_suffix: str = Field(default=".fna", max_length=50)
threads: int = Field(default=4, ge=1, le=32)
update_db: bool = Field(default=False)
class Job(JobBase, TimestampModel, table=True):
"""Job 数据库模型"""
__tablename__ = "jobs"
id: str = Field(
default_factory=generate_uuid,
primary_key=True,
index=True,
)
user_id: Optional[str] = Field(default=None, index=True)
status: JobStatus = Field(
default=JobStatus.PENDING,
sa_column_kwargs={"index": True},
)
input_files: List[dict] = Field(default_factory=list, sa_column=Column(JSON))
workspace_path: Optional[str] = Field(default=None, max_length=500)
result_url: Optional[str] = Field(default=None, max_length=1000)
celery_task_id: Optional[str] = Field(default=None, max_length=100, index=True)
current_step: Optional[str] = Field(default=None, max_length=100)
progress: int = Field(default=0, ge=0, le=100)
error_message: Optional[str] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
steps: List["Step"] = Relationship(
back_populates="job",
sa_relationship_kwargs={"cascade": "all, delete-orphan"},
)
logs: List["JobLog"] = Relationship(
back_populates="job",
sa_relationship_kwargs={"cascade": "all, delete-orphan"},
)
class JobCreate(JobBase):
"""创建 Job 时的请求模型"""
pass
class JobRead(JobBase):
"""读取 Job 时的响应模型"""
id: str
user_id: Optional[str]
status: JobStatus
workspace_path: Optional[str]
result_url: Optional[str]
celery_task_id: Optional[str]
current_step: Optional[str]
progress: int
error_message: Optional[str]
started_at: Optional[datetime]
completed_at: Optional[datetime]
created_at: datetime
updated_at: datetime
class JobUpdate(SQLModel):
"""更新 Job 时的请求模型"""
status: Optional[JobStatus] = None
current_step: Optional[str] = None
progress: Optional[int] = None
error_message: Optional[str] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
class StepBase(SQLModel):
"""Step 基础字段"""
step_name: str = Field(max_length=100)
step_order: int
class Step(StepBase, table=True):
"""Step 数据库模型"""
__tablename__ = "steps"
id: Optional[int] = Field(default=None, primary_key=True)
job_id: str = Field(foreign_key="jobs.id", index=True)
status: StepStatus = Field(default=StepStatus.PENDING)
celery_task_id: Optional[str] = Field(default=None, max_length=100)
log_file: Optional[str] = Field(default=None, max_length=500)
result_data: Optional[dict] = Field(default=None, sa_column=Column(JSON))
error_message: Optional[str] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
duration_seconds: Optional[int] = None
job: "Job" = Relationship(back_populates="steps")
class StepRead(StepBase):
"""读取 Step 时的响应模型"""
id: int
job_id: str
status: StepStatus
celery_task_id: Optional[str]
log_file: Optional[str]
result_data: Optional[dict]
error_message: Optional[str]
started_at: Optional[datetime]
completed_at: Optional[datetime]
duration_seconds: Optional[int]
class JobLogBase(SQLModel):
"""JobLog 基础字段"""
level: str = Field(max_length=20)
message: str
step_name: Optional[str] = Field(default=None, max_length=100)
class JobLog(JobLogBase, table=True):
"""JobLog 数据库模型"""
__tablename__ = "job_logs"
id: Optional[int] = Field(default=None, primary_key=True)
job_id: str = Field(foreign_key="jobs.id", index=True)
metadata: Optional[dict] = Field(default=None, sa_column=Column(JSON))
timestamp: datetime = Field(
default_factory=datetime.utcnow,
sa_column_kwargs={"index": True},
)
job: "Job" = Relationship(back_populates="logs")
class JobLogRead(JobLogBase):
"""读取 JobLog 时的响应模型"""
id: int
job_id: str
metadata: Optional[dict]
timestamp: datetime
"""任务模型"""
from sqlalchemy import Column, String, Integer, DateTime, JSON, Enum, Text
from sqlalchemy.sql import func
@@ -189,28 +5,46 @@ import enum
from ..database import Base
class JobStatus(str, enum.Enum):
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
"""任务状态"""
PENDING = "pending" # 等待进入队列
QUEUED = "queued" # 已排队,等待执行
RUNNING = "running" # 正在执行
COMPLETED = "completed" # 执行完成
FAILED = "failed" # 执行失败
class Job(Base):
__tablename__ = "jobs"
id = Column(String, primary_key=True, index=True)
celery_task_id = Column(String, nullable=True)
status = Column(Enum(JobStatus), default=JobStatus.PENDING)
celery_task_id = Column(String, nullable=True, index=True)
status = Column(Enum(JobStatus), default=JobStatus.PENDING, index=True)
input_files = Column(JSON)
sequence_type = Column(String, default="nucl")
scaf_suffix = Column(String, default=".fna")
threads = Column(Integer, default=4)
# 分析参数
min_identity = Column(Integer, default=80) # 存储为百分比 (0-100)
min_coverage = Column(Integer, default=60)
allow_unknown_families = Column(Integer, default=0) # 0 = False, 1 = True
require_index_hit = Column(Integer, default=1)
result_url = Column(String, nullable=True)
logs = Column(Text, nullable=True)
error_message = Column(Text, nullable=True)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
# 队列位置
queue_position = Column(Integer, nullable=True)
# 进度信息
current_stage = Column(String, nullable=True) # digger, shoter, plots, bundle
progress_percent = Column(Integer, default=0)
created_at = Column(DateTime(timezone=True), server_default=func.now(), index=True)
started_at = Column(DateTime(timezone=True), nullable=True)
completed_at = Column(DateTime(timezone=True), nullable=True)
updated_at = Column(DateTime(timezone=True), onupdate=func.now())