init
This commit is contained in:
111
build_modellel.py
Normal file
111
build_modellel.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from pathlib import Path
|
||||
from modeller import *
|
||||
from modeller.automodel import * # 加载 AutoModel 类
|
||||
import time
|
||||
import os
|
||||
# micromamba create -n modeller modeller biopython pymol-open-source biopandas -y -c conda-forge -c salilab
|
||||
# 注册码:MODELIRANJE
|
||||
# python /opt/software/docking_pipeline/scripts/protein_structure/modeller/build_modellel.py -s /mnt/AppData/task-27/ProteinCompletion/protein.pdb -f /mnt/AppData/task-27/ProteinCompletion/seq.fasta -o ./ -n 1 -m refine.fast
|
||||
|
||||
def make_model(structure_file,
|
||||
sequence_file,
|
||||
outdir: str,
|
||||
chain: str,
|
||||
num_loop: int = 2,
|
||||
md_level: str = 'refine.fast'
|
||||
):
|
||||
|
||||
print("***************************************************")
|
||||
print("md_level ====",md_level)
|
||||
print("***************************************************")
|
||||
|
||||
p_struct = Path(structure_file)
|
||||
p_seq = Path(sequence_file)
|
||||
structure = p_struct.stem
|
||||
sequence = p_seq.stem
|
||||
# 开始时间
|
||||
time_start = time.time()
|
||||
# 对齐蛋白质结构和序列
|
||||
env1 = Environ()
|
||||
# 从 PDB 文件中读取模型并将其加入对齐对象中
|
||||
mdl = Model(env1, file=structure_file, model_segment=(f'FIRST:{chain}', f'LAST:{chain}'))
|
||||
# print(mdl)
|
||||
aln = Alignment(env1)
|
||||
# print(aln)
|
||||
aln.append_model(mdl, align_codes=structure, atom_files=structure_file)
|
||||
# 将序列添加到对齐对象中
|
||||
aln.append(file=sequence_file, align_codes=sequence)
|
||||
# 进行 2D 对齐
|
||||
aln.align2d()
|
||||
# 将对齐结果写入文件
|
||||
aln.write(file=f'{outdir}/alignment.ali', alignment_format='PIR')
|
||||
aln.write(file=f'{outdir}/alignment.pap', alignment_format='PAP')
|
||||
|
||||
log.verbose()
|
||||
|
||||
# 重建蛋白质结构
|
||||
env2 = Environ()
|
||||
# 设置输入原子文件的目录
|
||||
env2.io.atom_files_directory = ['.']
|
||||
# 生成模型,使用自动调整模型类 LoopModel
|
||||
loop_model = LoopModel(env2,
|
||||
alnfile=f'{outdir}/alignment.ali',
|
||||
knowns=structure,
|
||||
sequence=sequence,
|
||||
loop_assess_methods=(assess.DOPE, assess.GA341))
|
||||
# 设置模型数量
|
||||
# loop_model.starting_model = 1
|
||||
# loop_model.ending_model = int(num_loop)
|
||||
|
||||
# 设置循环模型数量
|
||||
# 数量规则:(end - start) + 1
|
||||
loop_model.loop.starting_model = 1
|
||||
loop_model.loop.ending_model = int(num_loop)
|
||||
# 设置 MD 优化函数为 "refine.slow" 或 "refine.fast
|
||||
if md_level.strip() == 'refine.slow':
|
||||
loop_model.loop.md_level = refine.slow
|
||||
elif md_level.strip() == 'refine.very_fast':
|
||||
loop_model.loop.md_level = refine.very_fast
|
||||
elif md_level.strip() == 'refine.fast':
|
||||
loop_model.loop.md_level = refine.fast
|
||||
|
||||
# 生成模型
|
||||
loop_model.make()
|
||||
end_time = time.time()
|
||||
print(f"Time cost: {end_time - time_start}s")
|
||||
|
||||
|
||||
def fasta_to_ali(fasta_file, outdir):
|
||||
if os.path.exists(outdir) is False:
|
||||
os.makedirs(outdir)
|
||||
|
||||
p_fasta = Path(fasta_file)
|
||||
sequence = p_fasta.stem
|
||||
with open(fasta_file, 'r') as f:
|
||||
seq = f.readlines()
|
||||
seq = seq[1].strip()
|
||||
ali_file = f'{outdir}/{sequence}_full.ali'
|
||||
with open(ali_file, 'w') as f:
|
||||
f.write(f'>P1;{sequence}_full\n')
|
||||
f.write(f'sequence:{sequence}_full:::::::0.00: 0.00\n')
|
||||
f.write(f'{seq}*')
|
||||
return ali_file
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Build model by Modeller")
|
||||
parser.add_argument("-s", "--structure", help="Structure file")
|
||||
parser.add_argument("-o", "--outdir", help="Output directory")
|
||||
parser.add_argument("-f", "--fasta", help="Fasta file")
|
||||
parser.add_argument("-n", "--num_loop", help="Number of loop model")
|
||||
parser.add_argument("-m", "--md_level", help="MD level")
|
||||
parser.add_argument("-c", "--chain", help="your fix chain ID")
|
||||
args = parser.parse_args()
|
||||
|
||||
sequence_file = fasta_to_ali(args.fasta, args.outdir)
|
||||
|
||||
make_model(args.structure, sequence_file,
|
||||
args.outdir, args.chain,args.num_loop, args.md_level)
|
||||
|
||||
# python build_modellel.py -s 5sws_fixer.pdb -o ./5swsmodellerfix -f rcsb_pdb_5SWS.fasta -n 1 -m refine.fast -c A
|
||||
# python build_modellel.py -s ../5sws_fixer.pdb -o ./5swsmodellerfix -f ../rcsb_pdb_5SWS.fasta -n 1 -m refine.fast -c D
|
||||
Reference in New Issue
Block a user