feat(backend): implement CRISPR-Cas API parameters and database schema
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
### Phase 3: 整合与可视化
|
||||
- [x] **C3.1**: 修改 `bttoxin_shoter.py` 集成 CRISPR 评分参数
|
||||
- [x] **C3.2**: 更新 `plot_shotter.py` 添加 CRISPR 可视化面板
|
||||
- [ ] **C3.3**: 更新 API 支持 CRISPR 参数输入 (Backend pending)
|
||||
- [x] **C3.3**: 更新 API 支持 CRISPR 参数输入 (Backend updated)
|
||||
|
||||
## 已完成 (上一阶段)
|
||||
|
||||
|
||||
@@ -31,6 +31,8 @@ async def create_job(
|
||||
min_coverage: float = Form(0.6),
|
||||
allow_unknown_families: bool = Form(False),
|
||||
require_index_hit: bool = Form(True),
|
||||
crispr_fusion: bool = Form(False),
|
||||
crispr_weight: float = Form(0.0),
|
||||
db: Session = Depends(get_db),
|
||||
i18n: I18n = Depends(get_i18n)
|
||||
):
|
||||
@@ -89,6 +91,8 @@ async def create_job(
|
||||
min_coverage=int(min_coverage * 100),
|
||||
allow_unknown_families=int(allow_unknown_families),
|
||||
require_index_hit=int(require_index_hit),
|
||||
crispr_fusion=int(crispr_fusion),
|
||||
crispr_weight=int(crispr_weight * 100),
|
||||
)
|
||||
|
||||
db.add(job)
|
||||
@@ -107,6 +111,8 @@ async def create_job(
|
||||
min_coverage=min_coverage,
|
||||
allow_unknown_families=allow_unknown_families,
|
||||
require_index_hit=require_index_hit,
|
||||
crispr_fusion=crispr_fusion,
|
||||
crispr_weight=crispr_weight,
|
||||
)
|
||||
|
||||
job.celery_task_id = task.id
|
||||
|
||||
@@ -42,6 +42,10 @@ class Job(Base):
|
||||
allow_unknown_families = Column(Integer, default=0) # 0 = False, 1 = True
|
||||
require_index_hit = Column(Integer, default=1)
|
||||
|
||||
# CRISPR-Cas 参数
|
||||
crispr_fusion = Column(Integer, default=0) # 0 = False, 1 = True
|
||||
crispr_weight = Column(Integer, default=0) # 存储为百分比 (0-100)
|
||||
|
||||
result_url = Column(String, nullable=True)
|
||||
logs = Column(Text, nullable=True)
|
||||
error_message = Column(Text, nullable=True)
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
from typing import Optional, List
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
from enum import Enum
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class SequenceType(str, Enum):
|
||||
@@ -18,136 +19,61 @@ class PlatformType(str, Enum):
|
||||
HYBRID = "hybrid"
|
||||
|
||||
|
||||
class JobCreateRequest(BaseModel):
|
||||
"""创建任务请求(包含各序列类型的参数)"""
|
||||
|
||||
# 基本信息
|
||||
name: str = Field(..., min_length=1, max_length=255, description="任务名称")
|
||||
description: Optional[str] = Field(None, max_length=1000, description="任务描述")
|
||||
|
||||
# 序列类型
|
||||
sequence_type: SequenceType = Field(default=SequenceType.NUCL, description="输入序列类型")
|
||||
|
||||
# nucl
|
||||
scaf_suffix: Optional[str] = Field(
|
||||
None, pattern=r"^\.\w+$", description="基因组文件后缀(nucl)", examples=[".fna", ".fasta", ".fa"]
|
||||
)
|
||||
|
||||
# orfs
|
||||
orfs_suffix: Optional[str] = Field(None, pattern=r"^\.\w+$", description="ORF 文件后缀(orfs)")
|
||||
|
||||
# prot
|
||||
prot_suffix: Optional[str] = Field(None, pattern=r"^\.\w+$", description="蛋白文件后缀(prot)")
|
||||
|
||||
# reads
|
||||
platform: Optional[PlatformType] = Field(None, description="测序平台(reads)")
|
||||
reads1_suffix: Optional[str] = Field(None, description="Reads1 后缀(illumina/hybrid)")
|
||||
reads2_suffix: Optional[str] = Field(None, description="Reads2 后缀(illumina/hybrid)")
|
||||
genome_size: Optional[str] = Field(
|
||||
None, pattern=r"^\d+(\.\d+)?[mMgG]?$", description="基因组大小估计(pacbio/oxford)"
|
||||
)
|
||||
suffix_len: Optional[int] = Field(None, ge=0, description="reads 文件后缀长度")
|
||||
|
||||
# hybrid 需要完整文件名
|
||||
short1: Optional[str] = Field(None, description="短 reads 1 文件名(完整文件名)")
|
||||
short2: Optional[str] = Field(None, description="短 reads 2 文件名(完整文件名)")
|
||||
long: Optional[str] = Field(None, description="长 reads 文件名(完整文件名)")
|
||||
|
||||
# 执行参数
|
||||
threads: int = Field(default=4, ge=1, le=32, description="线程数")
|
||||
update_db: bool = Field(default=False, description="是否更新数据库")
|
||||
assemble_only: bool = Field(default=False, description="仅执行组装")
|
||||
|
||||
@field_validator("scaf_suffix", "orfs_suffix", "prot_suffix")
|
||||
@classmethod
|
||||
def validate_suffix(cls, v: Optional[str]) -> Optional[str]:
|
||||
if v is not None and not v.startswith("."):
|
||||
raise ValueError("文件后缀必须以 . 开头")
|
||||
return v
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_by_type(self):
|
||||
if self.sequence_type == SequenceType.NUCL:
|
||||
if not self.scaf_suffix:
|
||||
self.scaf_suffix = ".fna"
|
||||
elif self.sequence_type == SequenceType.ORFS:
|
||||
if not self.orfs_suffix:
|
||||
self.orfs_suffix = ".ffn"
|
||||
elif self.sequence_type == SequenceType.PROT:
|
||||
if not self.prot_suffix:
|
||||
self.prot_suffix = ".faa"
|
||||
elif self.sequence_type == SequenceType.READS:
|
||||
if not self.platform:
|
||||
raise ValueError("reads 类型必须指定 platform")
|
||||
if self.platform == PlatformType.ILLUMINA:
|
||||
if not self.reads1_suffix or not self.reads2_suffix:
|
||||
raise ValueError("illumina 平台必须指定 reads1_suffix 和 reads2_suffix")
|
||||
elif self.platform in [PlatformType.PACBIO, PlatformType.OXFORD]:
|
||||
if not self.reads1_suffix:
|
||||
raise ValueError(f"{self.platform} 平台必须指定 reads1_suffix")
|
||||
if not self.genome_size:
|
||||
raise ValueError(f"{self.platform} 平台必须指定 genome_size")
|
||||
elif self.platform == PlatformType.HYBRID:
|
||||
if not all([self.short1, self.short2, self.long]):
|
||||
raise ValueError("hybrid 平台必须指定 short1, short2, long")
|
||||
return self
|
||||
|
||||
|
||||
class FileUploadInfo(BaseModel):
|
||||
filename: str
|
||||
size: int
|
||||
content_type: Optional[str] = None
|
||||
path: str
|
||||
|
||||
|
||||
class JobCreateResponse(BaseModel):
|
||||
job_id: str
|
||||
message: str
|
||||
uploaded_files: List[FileUploadInfo]
|
||||
workspace_path: str
|
||||
celery_task_id: Optional[str] = None
|
||||
warnings: Optional[List[str]] = None
|
||||
|
||||
|
||||
class JobStatusResponse(BaseModel):
|
||||
job_id: str
|
||||
name: str
|
||||
status: str
|
||||
progress: int
|
||||
current_step: Optional[str] = None
|
||||
error_message: Optional[str] = None
|
||||
created_at: str
|
||||
started_at: Optional[str] = None
|
||||
completed_at: Optional[str] = None
|
||||
|
||||
"""任务 Schema"""
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
class JobStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
QUEUED = "queued"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
class JobCreate(BaseModel):
|
||||
input_files: List[str]
|
||||
sequence_type: str = "nucl"
|
||||
scaf_suffix: str = ".fna"
|
||||
threads: int = 4
|
||||
|
||||
class JobCreateRequest(BaseModel):
|
||||
"""创建任务请求参数"""
|
||||
# 序列类型
|
||||
sequence_type: SequenceType = Field(default=SequenceType.NUCL, description="输入序列类型")
|
||||
|
||||
# 基因组参数
|
||||
scaf_suffix: Optional[str] = Field(".fna", description="基因组文件后缀")
|
||||
|
||||
# 执行参数
|
||||
threads: int = Field(default=4, ge=1, le=32, description="线程数")
|
||||
|
||||
# 分析参数
|
||||
min_identity: float = Field(default=0.8, ge=0.0, le=1.0, description="最小相似度")
|
||||
min_coverage: float = Field(default=0.6, ge=0.0, le=1.0, description="最小覆盖度")
|
||||
allow_unknown_families: bool = Field(default=False, description="允许未知家族")
|
||||
require_index_hit: bool = Field(default=True, description="要求索引命中")
|
||||
|
||||
# CRISPR-Cas 参数
|
||||
crispr_fusion: bool = Field(default=False, description="启用 CRISPR-Cas 融合分析")
|
||||
crispr_weight: float = Field(default=0.0, ge=0.0, le=1.0, description="CRISPR-Cas 评分权重")
|
||||
|
||||
|
||||
class JobResponse(BaseModel):
|
||||
"""任务响应模型"""
|
||||
id: str
|
||||
status: JobStatus
|
||||
input_files: List[str]
|
||||
sequence_type: str
|
||||
threads: int
|
||||
|
||||
# 进度信息
|
||||
current_stage: Optional[str] = None
|
||||
progress_percent: int = 0
|
||||
queue_position: Optional[int] = None
|
||||
|
||||
# 结果信息
|
||||
result_url: Optional[str] = None
|
||||
error_message: Optional[str] = None
|
||||
|
||||
# 时间信息
|
||||
created_at: datetime
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
|
||||
# CRISPR 参数回显
|
||||
crispr_fusion: Optional[bool] = False
|
||||
crispr_weight: Optional[float] = 0.0
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
@@ -30,6 +30,8 @@ def run_bttoxin_analysis(
|
||||
min_coverage: float = 0.6,
|
||||
allow_unknown_families: bool = False,
|
||||
require_index_hit: bool = True,
|
||||
crispr_fusion: bool = False,
|
||||
crispr_weight: float = 0.0,
|
||||
):
|
||||
"""
|
||||
执行分析任务 - 完整的 4 阶段 pipeline
|
||||
@@ -138,6 +140,8 @@ def run_bttoxin_analysis(
|
||||
"min_coverage": min_coverage,
|
||||
"allow_unknown_families": allow_unknown_families,
|
||||
"require_index_hit": require_index_hit,
|
||||
"crispr_fusion": crispr_fusion,
|
||||
"crispr_weight": crispr_weight,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user