From 7090676f464e35bac2aaccd8ab1cf0bfc00ce6d4 Mon Sep 17 00:00:00 2001 From: zly <644706215@qq.com> Date: Wed, 14 Jan 2026 15:47:35 +0800 Subject: [PATCH] feat(backend): implement CRISPR-Cas API parameters and database schema --- @fix_plan.md | 2 +- backend/app/api/v1/jobs.py | 6 ++ backend/app/models/job.py | 4 + backend/app/schemas/job.py | 152 +++++++++-------------------------- backend/app/workers/tasks.py | 4 + 5 files changed, 54 insertions(+), 114 deletions(-) diff --git a/@fix_plan.md b/@fix_plan.md index 0830b25..00920de 100644 --- a/@fix_plan.md +++ b/@fix_plan.md @@ -16,7 +16,7 @@ ### Phase 3: 整合与可视化 - [x] **C3.1**: 修改 `bttoxin_shoter.py` 集成 CRISPR 评分参数 - [x] **C3.2**: 更新 `plot_shotter.py` 添加 CRISPR 可视化面板 -- [ ] **C3.3**: 更新 API 支持 CRISPR 参数输入 (Backend pending) +- [x] **C3.3**: 更新 API 支持 CRISPR 参数输入 (Backend updated) ## 已完成 (上一阶段) diff --git a/backend/app/api/v1/jobs.py b/backend/app/api/v1/jobs.py index dce77dc..586fd3e 100644 --- a/backend/app/api/v1/jobs.py +++ b/backend/app/api/v1/jobs.py @@ -31,6 +31,8 @@ async def create_job( min_coverage: float = Form(0.6), allow_unknown_families: bool = Form(False), require_index_hit: bool = Form(True), + crispr_fusion: bool = Form(False), + crispr_weight: float = Form(0.0), db: Session = Depends(get_db), i18n: I18n = Depends(get_i18n) ): @@ -89,6 +91,8 @@ async def create_job( min_coverage=int(min_coverage * 100), allow_unknown_families=int(allow_unknown_families), require_index_hit=int(require_index_hit), + crispr_fusion=int(crispr_fusion), + crispr_weight=int(crispr_weight * 100), ) db.add(job) @@ -107,6 +111,8 @@ async def create_job( min_coverage=min_coverage, allow_unknown_families=allow_unknown_families, require_index_hit=require_index_hit, + crispr_fusion=crispr_fusion, + crispr_weight=crispr_weight, ) job.celery_task_id = task.id diff --git a/backend/app/models/job.py b/backend/app/models/job.py index cbebfba..5cc1c5c 100644 --- a/backend/app/models/job.py +++ b/backend/app/models/job.py @@ -42,6 +42,10 @@ class Job(Base): allow_unknown_families = Column(Integer, default=0) # 0 = False, 1 = True require_index_hit = Column(Integer, default=1) + # CRISPR-Cas 参数 + crispr_fusion = Column(Integer, default=0) # 0 = False, 1 = True + crispr_weight = Column(Integer, default=0) # 存储为百分比 (0-100) + result_url = Column(String, nullable=True) logs = Column(Text, nullable=True) error_message = Column(Text, nullable=True) diff --git a/backend/app/schemas/job.py b/backend/app/schemas/job.py index c59faee..4626634 100644 --- a/backend/app/schemas/job.py +++ b/backend/app/schemas/job.py @@ -2,6 +2,7 @@ from typing import Optional, List from pydantic import BaseModel, Field, field_validator, model_validator from enum import Enum +from datetime import datetime class SequenceType(str, Enum): @@ -18,136 +19,61 @@ class PlatformType(str, Enum): HYBRID = "hybrid" -class JobCreateRequest(BaseModel): - """创建任务请求(包含各序列类型的参数)""" - - # 基本信息 - name: str = Field(..., min_length=1, max_length=255, description="任务名称") - description: Optional[str] = Field(None, max_length=1000, description="任务描述") - - # 序列类型 - sequence_type: SequenceType = Field(default=SequenceType.NUCL, description="输入序列类型") - - # nucl - scaf_suffix: Optional[str] = Field( - None, pattern=r"^\.\w+$", description="基因组文件后缀(nucl)", examples=[".fna", ".fasta", ".fa"] - ) - - # orfs - orfs_suffix: Optional[str] = Field(None, pattern=r"^\.\w+$", description="ORF 文件后缀(orfs)") - - # prot - prot_suffix: Optional[str] = Field(None, pattern=r"^\.\w+$", description="蛋白文件后缀(prot)") - - # reads - platform: Optional[PlatformType] = Field(None, description="测序平台(reads)") - reads1_suffix: Optional[str] = Field(None, description="Reads1 后缀(illumina/hybrid)") - reads2_suffix: Optional[str] = Field(None, description="Reads2 后缀(illumina/hybrid)") - genome_size: Optional[str] = Field( - None, pattern=r"^\d+(\.\d+)?[mMgG]?$", description="基因组大小估计(pacbio/oxford)" - ) - suffix_len: Optional[int] = Field(None, ge=0, description="reads 文件后缀长度") - - # hybrid 需要完整文件名 - short1: Optional[str] = Field(None, description="短 reads 1 文件名(完整文件名)") - short2: Optional[str] = Field(None, description="短 reads 2 文件名(完整文件名)") - long: Optional[str] = Field(None, description="长 reads 文件名(完整文件名)") - - # 执行参数 - threads: int = Field(default=4, ge=1, le=32, description="线程数") - update_db: bool = Field(default=False, description="是否更新数据库") - assemble_only: bool = Field(default=False, description="仅执行组装") - - @field_validator("scaf_suffix", "orfs_suffix", "prot_suffix") - @classmethod - def validate_suffix(cls, v: Optional[str]) -> Optional[str]: - if v is not None and not v.startswith("."): - raise ValueError("文件后缀必须以 . 开头") - return v - - @model_validator(mode="after") - def validate_by_type(self): - if self.sequence_type == SequenceType.NUCL: - if not self.scaf_suffix: - self.scaf_suffix = ".fna" - elif self.sequence_type == SequenceType.ORFS: - if not self.orfs_suffix: - self.orfs_suffix = ".ffn" - elif self.sequence_type == SequenceType.PROT: - if not self.prot_suffix: - self.prot_suffix = ".faa" - elif self.sequence_type == SequenceType.READS: - if not self.platform: - raise ValueError("reads 类型必须指定 platform") - if self.platform == PlatformType.ILLUMINA: - if not self.reads1_suffix or not self.reads2_suffix: - raise ValueError("illumina 平台必须指定 reads1_suffix 和 reads2_suffix") - elif self.platform in [PlatformType.PACBIO, PlatformType.OXFORD]: - if not self.reads1_suffix: - raise ValueError(f"{self.platform} 平台必须指定 reads1_suffix") - if not self.genome_size: - raise ValueError(f"{self.platform} 平台必须指定 genome_size") - elif self.platform == PlatformType.HYBRID: - if not all([self.short1, self.short2, self.long]): - raise ValueError("hybrid 平台必须指定 short1, short2, long") - return self - - -class FileUploadInfo(BaseModel): - filename: str - size: int - content_type: Optional[str] = None - path: str - - -class JobCreateResponse(BaseModel): - job_id: str - message: str - uploaded_files: List[FileUploadInfo] - workspace_path: str - celery_task_id: Optional[str] = None - warnings: Optional[List[str]] = None - - -class JobStatusResponse(BaseModel): - job_id: str - name: str - status: str - progress: int - current_step: Optional[str] = None - error_message: Optional[str] = None - created_at: str - started_at: Optional[str] = None - completed_at: Optional[str] = None - -"""任务 Schema""" -from pydantic import BaseModel -from typing import Optional, List -from datetime import datetime -from enum import Enum - class JobStatus(str, Enum): PENDING = "pending" + QUEUED = "queued" RUNNING = "running" COMPLETED = "completed" FAILED = "failed" -class JobCreate(BaseModel): - input_files: List[str] - sequence_type: str = "nucl" - scaf_suffix: str = ".fna" - threads: int = 4 + +class JobCreateRequest(BaseModel): + """创建任务请求参数""" + # 序列类型 + sequence_type: SequenceType = Field(default=SequenceType.NUCL, description="输入序列类型") + + # 基因组参数 + scaf_suffix: Optional[str] = Field(".fna", description="基因组文件后缀") + + # 执行参数 + threads: int = Field(default=4, ge=1, le=32, description="线程数") + + # 分析参数 + min_identity: float = Field(default=0.8, ge=0.0, le=1.0, description="最小相似度") + min_coverage: float = Field(default=0.6, ge=0.0, le=1.0, description="最小覆盖度") + allow_unknown_families: bool = Field(default=False, description="允许未知家族") + require_index_hit: bool = Field(default=True, description="要求索引命中") + + # CRISPR-Cas 参数 + crispr_fusion: bool = Field(default=False, description="启用 CRISPR-Cas 融合分析") + crispr_weight: float = Field(default=0.0, ge=0.0, le=1.0, description="CRISPR-Cas 评分权重") + class JobResponse(BaseModel): + """任务响应模型""" id: str status: JobStatus input_files: List[str] sequence_type: str threads: int + + # 进度信息 + current_stage: Optional[str] = None + progress_percent: int = 0 + queue_position: Optional[int] = None + + # 结果信息 result_url: Optional[str] = None error_message: Optional[str] = None + + # 时间信息 created_at: datetime + started_at: Optional[datetime] = None completed_at: Optional[datetime] = None + # CRISPR 参数回显 + crispr_fusion: Optional[bool] = False + crispr_weight: Optional[float] = 0.0 + class Config: from_attributes = True diff --git a/backend/app/workers/tasks.py b/backend/app/workers/tasks.py index d376afb..9ea10b4 100644 --- a/backend/app/workers/tasks.py +++ b/backend/app/workers/tasks.py @@ -30,6 +30,8 @@ def run_bttoxin_analysis( min_coverage: float = 0.6, allow_unknown_families: bool = False, require_index_hit: bool = True, + crispr_fusion: bool = False, + crispr_weight: float = 0.0, ): """ 执行分析任务 - 完整的 4 阶段 pipeline @@ -138,6 +140,8 @@ def run_bttoxin_analysis( "min_coverage": min_coverage, "allow_unknown_families": allow_unknown_families, "require_index_hit": require_index_hit, + "crispr_fusion": crispr_fusion, + "crispr_weight": crispr_weight, } }