from pathlib import Path from modeller import * from modeller.automodel import * # 加载 AutoModel 类 import time import os # micromamba create -n modeller modeller biopython pymol-open-source biopandas -y -c conda-forge -c salilab # 注册码:MODELIRANJE # python /opt/software/docking_pipeline/scripts/protein_structure/modeller/build_modellel.py -s /mnt/AppData/task-27/ProteinCompletion/protein.pdb -f /mnt/AppData/task-27/ProteinCompletion/seq.fasta -o ./ -n 1 -m refine.fast def make_model(structure_file, sequence_file, outdir: str, chain: str, num_loop: int = 2, md_level: str = 'refine.fast' ): print("***************************************************") print("md_level ====",md_level) print("***************************************************") p_struct = Path(structure_file) p_seq = Path(sequence_file) structure = p_struct.stem sequence = p_seq.stem # 开始时间 time_start = time.time() # 对齐蛋白质结构和序列 env1 = Environ() # 从 PDB 文件中读取模型并将其加入对齐对象中 mdl = Model(env1, file=structure_file, model_segment=(f'FIRST:{chain}', f'LAST:{chain}')) # print(mdl) aln = Alignment(env1) # print(aln) aln.append_model(mdl, align_codes=structure, atom_files=structure_file) # 将序列添加到对齐对象中 aln.append(file=sequence_file, align_codes=sequence) # 进行 2D 对齐 aln.align2d() # 将对齐结果写入文件 aln.write(file=f'{outdir}/alignment.ali', alignment_format='PIR') aln.write(file=f'{outdir}/alignment.pap', alignment_format='PAP') log.verbose() # 重建蛋白质结构 env2 = Environ() # 设置输入原子文件的目录 env2.io.atom_files_directory = ['.'] # 生成模型,使用自动调整模型类 LoopModel loop_model = LoopModel(env2, alnfile=f'{outdir}/alignment.ali', knowns=structure, sequence=sequence, loop_assess_methods=(assess.DOPE, assess.GA341)) # 设置模型数量 # loop_model.starting_model = 1 # loop_model.ending_model = int(num_loop) # 设置循环模型数量 # 数量规则:(end - start) + 1 loop_model.loop.starting_model = 1 loop_model.loop.ending_model = int(num_loop) # 设置 MD 优化函数为 "refine.slow" 或 "refine.fast if md_level.strip() == 'refine.slow': loop_model.loop.md_level = refine.slow elif md_level.strip() == 'refine.very_fast': loop_model.loop.md_level = refine.very_fast elif md_level.strip() == 'refine.fast': loop_model.loop.md_level = refine.fast # 生成模型 loop_model.make() end_time = time.time() print(f"Time cost: {end_time - time_start}s") def fasta_to_ali(fasta_file, outdir): if os.path.exists(outdir) is False: os.makedirs(outdir) p_fasta = Path(fasta_file) sequence = p_fasta.stem with open(fasta_file, 'r') as f: seq = f.readlines() seq = seq[1].strip() ali_file = f'{outdir}/{sequence}_full.ali' with open(ali_file, 'w') as f: f.write(f'>P1;{sequence}_full\n') f.write(f'sequence:{sequence}_full:::::::0.00: 0.00\n') f.write(f'{seq}*') return ali_file if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Build model by Modeller") parser.add_argument("-s", "--structure", help="Structure file") parser.add_argument("-o", "--outdir", help="Output directory") parser.add_argument("-f", "--fasta", help="Fasta file") parser.add_argument("-n", "--num_loop", help="Number of loop model") parser.add_argument("-m", "--md_level", help="MD level") parser.add_argument("-c", "--chain", help="your fix chain ID") args = parser.parse_args() sequence_file = fasta_to_ali(args.fasta, args.outdir) make_model(args.structure, sequence_file, args.outdir, args.chain,args.num_loop, args.md_level) # python build_modellel.py -s 5sws_fixer.pdb -o ./5swsmodellerfix -f rcsb_pdb_5SWS.fasta -n 1 -m refine.fast -c A # python build_modellel.py -s ../5sws_fixer.pdb -o ./5swsmodellerfix -f ../rcsb_pdb_5SWS.fasta -n 1 -m refine.fast -c D