Files
analysis_pdb/build_modellel.py
2024-01-03 11:17:37 +08:00

111 lines
4.3 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from pathlib import Path
from modeller import *
from modeller.automodel import * # 加载 AutoModel 类
import time
import os
# micromamba create -n modeller modeller biopython pymol-open-source biopandas -y -c conda-forge -c salilab
# 注册码MODELIRANJE
# python /opt/software/docking_pipeline/scripts/protein_structure/modeller/build_modellel.py -s /mnt/AppData/task-27/ProteinCompletion/protein.pdb -f /mnt/AppData/task-27/ProteinCompletion/seq.fasta -o ./ -n 1 -m refine.fast
def make_model(structure_file,
sequence_file,
outdir: str,
chain: str,
num_loop: int = 2,
md_level: str = 'refine.fast'
):
print("***************************************************")
print("md_level ====",md_level)
print("***************************************************")
p_struct = Path(structure_file)
p_seq = Path(sequence_file)
structure = p_struct.stem
sequence = p_seq.stem
# 开始时间
time_start = time.time()
# 对齐蛋白质结构和序列
env1 = Environ()
# 从 PDB 文件中读取模型并将其加入对齐对象中
mdl = Model(env1, file=structure_file, model_segment=(f'FIRST:{chain}', f'LAST:{chain}'))
# print(mdl)
aln = Alignment(env1)
# print(aln)
aln.append_model(mdl, align_codes=structure, atom_files=structure_file)
# 将序列添加到对齐对象中
aln.append(file=sequence_file, align_codes=sequence)
# 进行 2D 对齐
aln.align2d()
# 将对齐结果写入文件
aln.write(file=f'{outdir}/alignment.ali', alignment_format='PIR')
aln.write(file=f'{outdir}/alignment.pap', alignment_format='PAP')
log.verbose()
# 重建蛋白质结构
env2 = Environ()
# 设置输入原子文件的目录
env2.io.atom_files_directory = ['.']
# 生成模型,使用自动调整模型类 LoopModel
loop_model = LoopModel(env2,
alnfile=f'{outdir}/alignment.ali',
knowns=structure,
sequence=sequence,
loop_assess_methods=(assess.DOPE, assess.GA341))
# 设置模型数量
# loop_model.starting_model = 1
# loop_model.ending_model = int(num_loop)
# 设置循环模型数量
# 数量规则:(end - start) + 1
loop_model.loop.starting_model = 1
loop_model.loop.ending_model = int(num_loop)
# 设置 MD 优化函数为 "refine.slow" 或 "refine.fast
if md_level.strip() == 'refine.slow':
loop_model.loop.md_level = refine.slow
elif md_level.strip() == 'refine.very_fast':
loop_model.loop.md_level = refine.very_fast
elif md_level.strip() == 'refine.fast':
loop_model.loop.md_level = refine.fast
# 生成模型
loop_model.make()
end_time = time.time()
print(f"Time cost: {end_time - time_start}s")
def fasta_to_ali(fasta_file, outdir):
if os.path.exists(outdir) is False:
os.makedirs(outdir)
p_fasta = Path(fasta_file)
sequence = p_fasta.stem
with open(fasta_file, 'r') as f:
seq = f.readlines()
seq = seq[1].strip()
ali_file = f'{outdir}/{sequence}_full.ali'
with open(ali_file, 'w') as f:
f.write(f'>P1;{sequence}_full\n')
f.write(f'sequence:{sequence}_full:::::::0.00: 0.00\n')
f.write(f'{seq}*')
return ali_file
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Build model by Modeller")
parser.add_argument("-s", "--structure", help="Structure file")
parser.add_argument("-o", "--outdir", help="Output directory")
parser.add_argument("-f", "--fasta", help="Fasta file")
parser.add_argument("-n", "--num_loop", help="Number of loop model")
parser.add_argument("-m", "--md_level", help="MD level")
parser.add_argument("-c", "--chain", help="your fix chain ID")
args = parser.parse_args()
sequence_file = fasta_to_ali(args.fasta, args.outdir)
make_model(args.structure, sequence_file,
args.outdir, args.chain,args.num_loop, args.md_level)
# python build_modellel.py -s 5sws_fixer.pdb -o ./5swsmodellerfix -f rcsb_pdb_5SWS.fasta -n 1 -m refine.fast -c A
# python build_modellel.py -s ../5sws_fixer.pdb -o ./5swsmodellerfix -f ../rcsb_pdb_5SWS.fasta -n 1 -m refine.fast -c D