update last
This commit is contained in:
31
diff.ipynb
31
diff.ipynb
@@ -591,6 +591,37 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"2024-03-07 \n",
|
||||||
|
"['5ksa', '1g6r', '6bga', '6u3n', '1zgl', '7l1d', '3qiu', '2ypl', '7z50', '6uln', '1mwa', '4z7v', '4ozi', '7rrg', '6uk4', '4p2o', '6vm8', '5ksb'] 重新建模\n",
|
||||||
|
"1g6r: Align 对齐过程中,序列编号出错导致残基缺失,进而导致MD模拟后续失败\n",
|
||||||
|
"6u3n: A开头地方与B链的碰撞\n",
|
||||||
|
"6uk4: 结构分散(不适合模拟)\n",
|
||||||
|
"6uln: 结构分散(不适合模拟)\n",
|
||||||
|
"6vm8: 结构分散(不适合模拟)\n",
|
||||||
|
"7l1d: 无碰撞,结构分散(不适合模拟)\n",
|
||||||
|
"7rrg: 无碰撞,结构分散(不适合模拟)\n",
|
||||||
|
"7z50: A与B链的碰撞\n",
|
||||||
|
"1mwa: B链116缺失\n",
|
||||||
|
"1zgl: M链122,123缺失\n",
|
||||||
|
"2ypl: D链124E链121碰撞\n",
|
||||||
|
"3qiu: A与B链的碰撞\n",
|
||||||
|
"4ozi: A链72位与B链5位(β折叠)碰撞\n",
|
||||||
|
"4p2o: A链67位与B链4位碰撞\n",
|
||||||
|
"4z7v: C链96位与D链105碰撞\n",
|
||||||
|
"5ksa: 缺失过多导致修复后loop过多(不适合模拟)\n",
|
||||||
|
"5ksb: C链83位与D链5位碰撞\n",
|
||||||
|
"6bga: B链137,138虚线缺失\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
23
fixsequence.py
Normal file
23
fixsequence.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- encoding: utf-8 -*-
|
||||||
|
'''
|
||||||
|
@file :fixsequence.py
|
||||||
|
@Description: : fix sequence of pdb file
|
||||||
|
@Date :2024/03/07 09:44:06
|
||||||
|
@Author :lyzeng
|
||||||
|
@Email :pylyzeng@gmail.com
|
||||||
|
@version :1.0
|
||||||
|
'''
|
||||||
|
from analysis_pdb import PDBAnalyzer
|
||||||
|
from pathlib import Path
|
||||||
|
# Specify the path to your PDB file
|
||||||
|
pdb_files = [
|
||||||
|
Path('./pdb_test9/1zgl.pdb'),
|
||||||
|
Path('./pdb_test9/1g6r.pdb'),
|
||||||
|
Path('./pdb_test9/1mwa.pdb'),
|
||||||
|
Path('./pdb_test9/6bga.pdb'),
|
||||||
|
]
|
||||||
|
for pdb_file in pdb_files:
|
||||||
|
# 修复所有链的编号
|
||||||
|
pdb_analyzer_instance = PDBAnalyzer.renumber_residues_based_on_issues_and_clean(pdb_file)
|
||||||
|
print(pdb_file.stem, pdb_analyzer_instance.renumber_errors)
|
||||||
66
manualfix/README.md
Normal file
66
manualfix/README.md
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
2024-03-07
|
||||||
|
['5ksa', '1g6r', '6bga', '6u3n', '1zgl', '7l1d', '3qiu', '2ypl', '7z50', '6uln', '1mwa', '4z7v', '4ozi', '7rrg', '6uk4', '4p2o', '6vm8', '5ksb'] 重新建模
|
||||||
|
# 1g6r: Align 对齐过程中,序列编号出错导致残基缺失,进而导致MD模拟后续失败
|
||||||
|
# 6u3n: A开头地方与B链的碰撞
|
||||||
|
6uk4: 结构分散(不适合模拟)
|
||||||
|
6uln: 结构分散(不适合模拟)
|
||||||
|
6vm8: 结构分散(不适合模拟)
|
||||||
|
7l1d: 无碰撞,结构分散(不适合模拟)
|
||||||
|
7rrg: 无碰撞,结构分散(不适合模拟)
|
||||||
|
# 7z50: A与B链的碰撞
|
||||||
|
# 1mwa: B链116缺失
|
||||||
|
# 1zgl: M链122,123缺失
|
||||||
|
# 2ypl: D链124E链121碰撞
|
||||||
|
# 3qiu: A与B链的碰撞
|
||||||
|
# 4ozi: A链72位与B链5位(β折叠)碰撞
|
||||||
|
# 4p2o: A链67位与B链4位碰撞
|
||||||
|
# 4z7v: C链96位与D链105碰撞
|
||||||
|
5ksa: 缺失过多导致修复后loop过多(不适合模拟)
|
||||||
|
# 5ksb: C链83位与D链5位碰撞
|
||||||
|
# 6bga: B链137,138虚线缺失
|
||||||
|
|
||||||
|
最终确定:['6bga', '5ksb', '4z7v', '4p2o', '4ozi', '3qiu', '2ypl', '1zgl', '1mwa', '7z50', '6u3n', '1g6r']
|
||||||
|
|
||||||
|
____
|
||||||
|
1zgl [{'chain_id': 'P', 'start_residue': 63, 'end_residue': 104, 'estimated_missing': 0}] B链M链缺失太多,修复之后loop过多,不适合模拟
|
||||||
|
1g6r [{'chain_id': 'B', 'start_residue': 109, 'end_residue': 237, 'estimated_missing': 0}] 修复
|
||||||
|
1mwa [{'chain_id': 'B', 'start_residue': 116, 'end_residue': 316, 'estimated_missing': 0}] 多处编号错误B链
|
||||||
|
6bga [{'chain_id': 'B', 'start_residue': 112, 'end_residue': 198, 'estimated_missing': 0}] B链两处缺失虚线
|
||||||
|
____
|
||||||
|
放手动修复的pdb文件,再提取单聚体后手动修复残缺编号。
|
||||||
|
|
||||||
|
1g6r.manualfix.pdb
|
||||||
|
1mwa.manualfix.pdb
|
||||||
|
6bga.modellerfix.pdb B链碰撞导致缺失
|
||||||
|
cp ../pdb_test7/runner_5ksb/5ksb.modellerfix.pdb ./
|
||||||
|
|
||||||
|
## pyrosetta fastrelax
|
||||||
|
|
||||||
|
```
|
||||||
|
# 使用pyrosetta快速修复蛋白缺失的侧链
|
||||||
|
from pathlib import Path
|
||||||
|
import pyrosetta
|
||||||
|
from pyrosetta import rosetta
|
||||||
|
from multiprocessing import Pool
|
||||||
|
|
||||||
|
|
||||||
|
def fix_optimize(file: Path, out_file: Path):
|
||||||
|
'''
|
||||||
|
FastRelax使用快速梯度下降算法,可以在较短时间内对蛋白质进行优化,并且对于结构中的非构象缺陷,例如氢键、离子对、芳香性相互作用和溶剂-蛋白质相互作用等进行优化。
|
||||||
|
ref2015是Rosetta程序包中的一个分数函数,它是Rosetta2015中引入的一个新的蛋白质力场,用于蛋白质结构预测和设计。这个力场是从先前的Rosetta力场中提炼出来的,经过了一系列的校正和优化,可以更好地预测蛋白质的折叠构象。在FastRelax中,ref2015可以作为一个可选参数来指定使用哪个力场来进行优化。
|
||||||
|
:param file:
|
||||||
|
:param out_file:
|
||||||
|
:return:
|
||||||
|
'''
|
||||||
|
# 使用pyrosetta修复蛋白结构
|
||||||
|
# 初始化PyRosetta
|
||||||
|
pyrosetta.init()
|
||||||
|
# 读入蛋白质结构
|
||||||
|
pose = pyrosetta.pose_from_pdb(file.as_posix())
|
||||||
|
# fix residue side chain
|
||||||
|
scorefxn = pyrosetta.create_score_function('ref2015')
|
||||||
|
relax = pyrosetta.rosetta.protocols.relax.FastRelax(scorefxn)
|
||||||
|
relax.apply(pose)
|
||||||
|
# 输出修复后的结构
|
||||||
|
pose.dump_pdb(out_file.as_posix())
|
||||||
|
```
|
||||||
46
manualfix/relax.py
Normal file
46
manualfix/relax.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import pyrosetta
|
||||||
|
from multiprocessing import Pool
|
||||||
|
from shutil import copyfile
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# fix = ['6bga', '5ksb', '4z7v', '4p2o', '4ozi', '3qiu', '2ypl', '1zgl', '7z50', '6u3n']
|
||||||
|
fix = ['1zgl']
|
||||||
|
|
||||||
|
def fix_optimize(file: Path, out_file: Path):
|
||||||
|
# 设置日志
|
||||||
|
log_file = out_file.with_suffix('.log')
|
||||||
|
logger = logging.getLogger(log_file.name)
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
file_handler = logging.FileHandler(log_file)
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
|
||||||
|
logger.info(f'Processing {file.name}')
|
||||||
|
|
||||||
|
pyrosetta.init()
|
||||||
|
pose = pyrosetta.pose_from_pdb(file.as_posix())
|
||||||
|
scorefxn = pyrosetta.create_score_function('ref2015')
|
||||||
|
relax = pyrosetta.rosetta.protocols.relax.FastRelax(scorefxn)
|
||||||
|
relax.apply(pose)
|
||||||
|
pose.dump_pdb(out_file.as_posix())
|
||||||
|
|
||||||
|
logger.info(f'Finished processing {file.name}')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dir1 = Path('../pdb_test8')
|
||||||
|
dir2 = Path('../pdb_test7')
|
||||||
|
dirs = [dir1, dir2]
|
||||||
|
files = []
|
||||||
|
for dir in dirs:
|
||||||
|
files.extend(list(dir.rglob('*.modellerfix.pdb')))
|
||||||
|
for file in files:
|
||||||
|
if file.name.split('.')[0] in fix:
|
||||||
|
print(file)
|
||||||
|
target = Path('/mnt/mydrive/analysis_pdb-dev/manualfix') / file.name
|
||||||
|
copyfile(file.as_posix(), target.as_posix())
|
||||||
|
pyrosetta_fix = list(Path('/mnt/mydrive/analysis_pdb-dev/manualfix').rglob('*.modellerfix.pdb'))
|
||||||
|
with Pool(16) as p:
|
||||||
|
p.starmap(fix_optimize, [(file, file.with_stem(file.stem + '.fastrelax')) for file in pyrosetta_fix])
|
||||||
|
print('fastrelax done')
|
||||||
Reference in New Issue
Block a user