From 6be6fcc9ce2cce07bd55ae2502c944eb55e6312f Mon Sep 17 00:00:00 2001 From: lingyuzeng Date: Mon, 18 Mar 2024 13:50:17 +0800 Subject: [PATCH] update last --- diff.ipynb | 31 +++++++++++++++++++++ fixsequence.py | 23 ++++++++++++++++ manualfix/README.md | 66 +++++++++++++++++++++++++++++++++++++++++++++ manualfix/relax.py | 46 +++++++++++++++++++++++++++++++ 4 files changed, 166 insertions(+) create mode 100644 fixsequence.py create mode 100644 manualfix/README.md create mode 100644 manualfix/relax.py diff --git a/diff.ipynb b/diff.ipynb index 7403d56..7963582 100644 --- a/diff.ipynb +++ b/diff.ipynb @@ -591,6 +591,37 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2024-03-07 \n", + "['5ksa', '1g6r', '6bga', '6u3n', '1zgl', '7l1d', '3qiu', '2ypl', '7z50', '6uln', '1mwa', '4z7v', '4ozi', '7rrg', '6uk4', '4p2o', '6vm8', '5ksb'] 重新建模\n", + "1g6r: Align 对齐过程中,序列编号出错导致残基缺失,进而导致MD模拟后续失败\n", + "6u3n: A开头地方与B链的碰撞\n", + "6uk4: 结构分散(不适合模拟)\n", + "6uln: 结构分散(不适合模拟)\n", + "6vm8: 结构分散(不适合模拟)\n", + "7l1d: 无碰撞,结构分散(不适合模拟)\n", + "7rrg: 无碰撞,结构分散(不适合模拟)\n", + "7z50: A与B链的碰撞\n", + "1mwa: B链116缺失\n", + "1zgl: M链122,123缺失\n", + "2ypl: D链124E链121碰撞\n", + "3qiu: A与B链的碰撞\n", + "4ozi: A链72位与B链5位(β折叠)碰撞\n", + "4p2o: A链67位与B链4位碰撞\n", + "4z7v: C链96位与D链105碰撞\n", + "5ksa: 缺失过多导致修复后loop过多(不适合模拟)\n", + "5ksb: C链83位与D链5位碰撞\n", + "6bga: B链137,138虚线缺失\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] } ], "metadata": { diff --git a/fixsequence.py b/fixsequence.py new file mode 100644 index 0000000..77d4a37 --- /dev/null +++ b/fixsequence.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :fixsequence.py +@Description: : fix sequence of pdb file +@Date :2024/03/07 09:44:06 +@Author :lyzeng +@Email :pylyzeng@gmail.com +@version :1.0 +''' +from analysis_pdb import PDBAnalyzer +from pathlib import Path +# Specify the path to your PDB file +pdb_files = [ + Path('./pdb_test9/1zgl.pdb'), + Path('./pdb_test9/1g6r.pdb'), + Path('./pdb_test9/1mwa.pdb'), + Path('./pdb_test9/6bga.pdb'), +] +for pdb_file in pdb_files: + # 修复所有链的编号 + pdb_analyzer_instance = PDBAnalyzer.renumber_residues_based_on_issues_and_clean(pdb_file) + print(pdb_file.stem, pdb_analyzer_instance.renumber_errors) \ No newline at end of file diff --git a/manualfix/README.md b/manualfix/README.md new file mode 100644 index 0000000..1d7cff6 --- /dev/null +++ b/manualfix/README.md @@ -0,0 +1,66 @@ +2024-03-07 +['5ksa', '1g6r', '6bga', '6u3n', '1zgl', '7l1d', '3qiu', '2ypl', '7z50', '6uln', '1mwa', '4z7v', '4ozi', '7rrg', '6uk4', '4p2o', '6vm8', '5ksb'] 重新建模 +# 1g6r: Align 对齐过程中,序列编号出错导致残基缺失,进而导致MD模拟后续失败 +# 6u3n: A开头地方与B链的碰撞 +6uk4: 结构分散(不适合模拟) +6uln: 结构分散(不适合模拟) +6vm8: 结构分散(不适合模拟) +7l1d: 无碰撞,结构分散(不适合模拟) +7rrg: 无碰撞,结构分散(不适合模拟) +# 7z50: A与B链的碰撞 +# 1mwa: B链116缺失 +# 1zgl: M链122,123缺失 +# 2ypl: D链124E链121碰撞 +# 3qiu: A与B链的碰撞 +# 4ozi: A链72位与B链5位(β折叠)碰撞 +# 4p2o: A链67位与B链4位碰撞 +# 4z7v: C链96位与D链105碰撞 +5ksa: 缺失过多导致修复后loop过多(不适合模拟) +# 5ksb: C链83位与D链5位碰撞 +# 6bga: B链137,138虚线缺失 + +最终确定:['6bga', '5ksb', '4z7v', '4p2o', '4ozi', '3qiu', '2ypl', '1zgl', '1mwa', '7z50', '6u3n', '1g6r'] + +____ +1zgl [{'chain_id': 'P', 'start_residue': 63, 'end_residue': 104, 'estimated_missing': 0}] B链M链缺失太多,修复之后loop过多,不适合模拟 +1g6r [{'chain_id': 'B', 'start_residue': 109, 'end_residue': 237, 'estimated_missing': 0}] 修复 +1mwa [{'chain_id': 'B', 'start_residue': 116, 'end_residue': 316, 'estimated_missing': 0}] 多处编号错误B链 +6bga [{'chain_id': 'B', 'start_residue': 112, 'end_residue': 198, 'estimated_missing': 0}] B链两处缺失虚线 +____ +放手动修复的pdb文件,再提取单聚体后手动修复残缺编号。 + +1g6r.manualfix.pdb +1mwa.manualfix.pdb +6bga.modellerfix.pdb B链碰撞导致缺失 +cp ../pdb_test7/runner_5ksb/5ksb.modellerfix.pdb ./ + +## pyrosetta fastrelax + +``` +# 使用pyrosetta快速修复蛋白缺失的侧链 +from pathlib import Path +import pyrosetta +from pyrosetta import rosetta +from multiprocessing import Pool + + +def fix_optimize(file: Path, out_file: Path): + ''' + FastRelax使用快速梯度下降算法,可以在较短时间内对蛋白质进行优化,并且对于结构中的非构象缺陷,例如氢键、离子对、芳香性相互作用和溶剂-蛋白质相互作用等进行优化。 + ref2015是Rosetta程序包中的一个分数函数,它是Rosetta2015中引入的一个新的蛋白质力场,用于蛋白质结构预测和设计。这个力场是从先前的Rosetta力场中提炼出来的,经过了一系列的校正和优化,可以更好地预测蛋白质的折叠构象。在FastRelax中,ref2015可以作为一个可选参数来指定使用哪个力场来进行优化。 + :param file: + :param out_file: + :return: + ''' + # 使用pyrosetta修复蛋白结构 + # 初始化PyRosetta + pyrosetta.init() + # 读入蛋白质结构 + pose = pyrosetta.pose_from_pdb(file.as_posix()) + # fix residue side chain + scorefxn = pyrosetta.create_score_function('ref2015') + relax = pyrosetta.rosetta.protocols.relax.FastRelax(scorefxn) + relax.apply(pose) + # 输出修复后的结构 + pose.dump_pdb(out_file.as_posix()) +``` \ No newline at end of file diff --git a/manualfix/relax.py b/manualfix/relax.py new file mode 100644 index 0000000..6245fe2 --- /dev/null +++ b/manualfix/relax.py @@ -0,0 +1,46 @@ +from pathlib import Path +import pyrosetta +from multiprocessing import Pool +from shutil import copyfile +import logging + +# fix = ['6bga', '5ksb', '4z7v', '4p2o', '4ozi', '3qiu', '2ypl', '1zgl', '7z50', '6u3n'] +fix = ['1zgl'] + +def fix_optimize(file: Path, out_file: Path): + # 设置日志 + log_file = out_file.with_suffix('.log') + logger = logging.getLogger(log_file.name) + logger.setLevel(logging.INFO) + formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + file_handler = logging.FileHandler(log_file) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + logger.info(f'Processing {file.name}') + + pyrosetta.init() + pose = pyrosetta.pose_from_pdb(file.as_posix()) + scorefxn = pyrosetta.create_score_function('ref2015') + relax = pyrosetta.rosetta.protocols.relax.FastRelax(scorefxn) + relax.apply(pose) + pose.dump_pdb(out_file.as_posix()) + + logger.info(f'Finished processing {file.name}') + +if __name__ == '__main__': + dir1 = Path('../pdb_test8') + dir2 = Path('../pdb_test7') + dirs = [dir1, dir2] + files = [] + for dir in dirs: + files.extend(list(dir.rglob('*.modellerfix.pdb'))) + for file in files: + if file.name.split('.')[0] in fix: + print(file) + target = Path('/mnt/mydrive/analysis_pdb-dev/manualfix') / file.name + copyfile(file.as_posix(), target.as_posix()) + pyrosetta_fix = list(Path('/mnt/mydrive/analysis_pdb-dev/manualfix').rglob('*.modellerfix.pdb')) + with Pool(16) as p: + p.starmap(fix_optimize, [(file, file.with_stem(file.stem + '.fastrelax')) for file in pyrosetta_fix]) + print('fastrelax done') \ No newline at end of file