first version

This commit is contained in:
2024-09-07 09:12:14 +08:00
commit bc8cc22a9c
27 changed files with 2530 additions and 0 deletions

9
.gitignore vendored Normal file
View File

@@ -0,0 +1,9 @@
__pycache__
*.pse
*.log
*.pdb
*.mol2
*.sdf
*.gif
*.pyc
*.cif

163
README.md Normal file
View File

@@ -0,0 +1,163 @@
# 目录结构说明
asset放一些学习资料github仓库放不下就放到网盘中去。
plugin为自建的pymol的插件开发的一些代码
scripts为一些脚本文件主要是测试代码
## pymol 资源学习绘图
[各种作用力绘图](https://proteintools.uni-bayreuth.de./)
## 环境安装
```shell
conda install -c conda-forge ffmpeg pymol-open-source biopython loguru
```
## 最高质量渲染
```shell
util.performance(0)
rebuild
```
PyMOL可以用多种方式进行录制
1. **命令行录制**: 使用`log_open``log_close`命令可以将PyMOL的命令行操作记录到文本文件。
```
cmd.log_open('filename.log')
... # Your operations
cmd.log_close()
```
2. **Ray Tracing**: 使用`ray`命令渲染高质量的图片,然后用其他软件将图片合成为视频。
3. **使用`mset`和`mview`创建动画**: 如你之前所述,通过这两个命令创建动画,然后使用`mpng`命令将动画导出为PNG帧最后用其他软件将其合成为视频。
```
cmd.mpng('frame_prefix_', 1, 60)
```
4. **屏幕录制软件**: 使用外部屏幕录制软件如OBS、Camtasia等进行录制。
5. **PyMOL API和脚本**: 使用PyMOL的Python API和PyMOL脚本可以更精细地控制录制内容。
## PyMOL工作目录
使用`cd`命令改变PyMOL的工作目录。例如
```
cmd.cd("/path/to/new/directory")
```
## 图片渲染
设置600DPI
```shell
width = int(1920 * (600 / 72))
height = int(1080 * (600 / 72))
cmd.ray(width, height)
```
得到帧1至帧60的PNG图片
```shell
cmd.mset("1 x60 61 x60")
cmd.scene('frame1', 'store')
cmd.mview('store', state=1, scene='frame1')
cmd.scene('frame2', 'store')
cmd.mview('store', state=60, scene='frame2')
cmd.mview('interpolate')
for i in range(1, 61):
cmd.frame(i)
cmd.ray(1920, 1080, 300)
cmd.mpng(f'frame_prefix_{i}.png')
```
```shell
from dataclasses import dataclass, field
from typing import Tuple
from pathlib import Path
@dataclass
class PyMOLAnimator:
resolution: Tuple[int, int] = (1920, 1080)
dpi: int = 300
frame_count: int = 120
output_dir: Path = field(default_factory=lambda: Path('./'))
def save_frames(self, scene_1: str, scene_2: str):
import pymol.cmd as cmd
# 设置动画帧数
cmd.mset(f"1 x{self.frame_count} {self.frame_count+1} x{self.frame_count}")
# 存储第一帧和最后一帧
cmd.scene(scene_1, 'store')
cmd.mview('store', state=1, scene=scene_1)
cmd.scene(scene_2, 'store')
cmd.mview('store', state=self.frame_count, scene=scene_2)
# 插值以创建动画
cmd.mview('interpolate')
output_path = self.output_dir / 'frame_prefix_'
# 保存每一帧为PNG图片
for i in range(1, self.frame_count+1):
cmd.frame(i)
cmd.ray(self.resolution[0], self.resolution[1], self.dpi)
cmd.mpng(str(output_path) + f'{i}.png')
# 使用
animator = PyMOLAnimator((1920, 1080), 300, 120, Path('/path/to/save'))
animator.save_frames('frame1', 'frame2')
```
## 场景设置
`cmd.mview`和`cmd.scene`两者都用于存储视图状态,但用途和范围有所不同:
### `cmd.mview`
- **主要用途**: 用于电影插值,在电影播放中创建平滑过渡。
- **存储内容**: 主要存储相机和对象矩阵,用于电影帧之间的插值。
- **特定于帧**: 可以为电影中的特定帧或帧范围存储视图。
- **动画专用**: 主要用于创建复杂的电影动画。
```
cmd.mview('store', state=1, scene='frame1')
```
这会在状态1下用场景名`frame1`存储当前相机和对象矩阵。
### `cmd.scene`
- **主要用途**: 存储和恢复场景,方便用户随时回到特定的视图和设置。
- **存储内容**: 存储相机视图、所有对象的活动信息、所有原子的可见性和颜色、所有的表示形式和全局帧索引。
- **不特定于帧**: 主要用于一般目的的视图保存和恢复。
```
cmd.scene('MyScene', 'store', 'This is my special scene')
```
这会存储一个名为"MyScene"的场景,并附带一条消息。

BIN
asset/ChimeraX Tutorial.pdf Normal file

Binary file not shown.

2
plugin/README.md Normal file
View File

@@ -0,0 +1,2 @@
结构生物学研究中硒代蛋氨酸MSE通常用作甲硫氨酸MET的代理因为硒元素在X射线晶体学中有更强的散射能力。但在某些情况下研究人员可能想要从他们对“配体ligands”的定义中排除MSE。
MSE作为蛋白质的一部分有时可能与真正的小分子配体或其他非天然出现的分子混淆。

0
plugin/__init__.py Normal file
View File

52
plugin/cmd_singleton.py Normal file
View File

@@ -0,0 +1,52 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :base.py
@Description: :pymol cmd 全局单例
@Date :2023/09/24 12:34:33
@Author :lyzeng
@Email :pylyzeng@gmail.com
@version :1.0
'''
# pymol_singleton.py
import sys
from pathlib import Path
from pymol import cmd as pymol_cmd, launch as pymol_launch
from typing import Any
from dataclasses import dataclass, field, InitVar
here = Path(__file__).absolute().parent
sys.path.append(here.as_posix())
from plugin.config import colorp, font, mole, atom
@dataclass
class PyMOLSingleton:
_instance: InitVar[Any] = None # InitVar类型表示这个字段仅用于__post_init__
colorp: type(colorp) = field(default_factory=colorp) # 使用default_factory设置默认值
def __new__(cls, *args, **kwargs):
if cls._instance is None:
cls._instance = super(PyMOLSingleton, cls).__new__(cls)
return cls._instance
def __post_init__(self, _instance):
if _instance is None:
# 绑定pymol.cmd的所有方法到这个单例对象
for attr in dir(pymol_cmd):
if callable(getattr(pymol_cmd, attr)) and not attr.startswith("__"):
setattr(self, attr, getattr(pymol_cmd, attr))
# 初始化配色
self.defcolor()
# 启动PyMOL界面
self.launch()
def defcolor(self):
# 根据self.colorp生成颜色
color_gennerate = map(lambda x: x[0], self.colorp.colors)
list(map(lambda x: self.set_color(*x), color_gennerate)) # 使用self.set_color而不是cmd.set_color
self.set_color(*self.colorp.grey1) # 使用self.set_color而不是cmd.set_color
def launch(self):
pymol_launch() # 使用pymol的launch函数来启动PyMOL界面
# 创建单例对象并暴露给其他模块
cmd = PyMOLSingleton()

42
plugin/config.py Normal file
View File

@@ -0,0 +1,42 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :myconfig.py
@Description: :定义一些常用的配置
@Date :2023/09/24 13:09:19
@Author :lyzeng
@Email :pylyzeng@gmail.com
@version :1.0
'''
from dataclasses import dataclass, field, fields
from typing import Tuple
@dataclass
class colorp():
color1: Tuple[Tuple[str, str], str] = field(default=(('color1', '[186,182,217]'), 'purple'))
color2: Tuple[Tuple[str, str], str] = field(default=(('color2', '[233,195,153]'), 'yellow'))
color3: Tuple[Tuple[str, str], str] = field(default=(('color3', '[43,113,216]'), 'blue_N'))
color4: Tuple[Tuple[str, str], str] = field(default=(('color4', '[206,155,198]'), 'purple'))
color5: Tuple[Tuple[str, str], str] = field(default=(('color5', '[251,187,62]'), 'orange'))
color6: Tuple[Tuple[str, str], str] = field(default=(('color6', '[245,157,158]'), 'red'))
color7: Tuple[Tuple[str, str], str] = field(default=(('color7', '[133,188,135]'), 'green'))
color8: Tuple[Tuple[str, str], str] = field(default=(('color8', '[30,230,30]'),'green_CL')) # Cl卤素配色
color9: Tuple[Tuple[str, str], str] = field(default=(('color9', '[141,215,247]'),'blue_C')) # C配色
color10:Tuple[Tuple[str, str], str] = field(default=(('color10', '[0,132,55]'),'green_F')) # F卤素配色
grey1: Tuple[str, str] = field(default=('grey1','[224,224,224]'))
colors: list = field(default_factory=list, init=False) # init=False 表示不在 __init__ 方法中初始化
def __post_init__(self):
self.colors = [getattr(self, f.name) for f in fields(self) if f.name.startswith('color') and getattr(self, f.name)]
@dataclass
class font():
font_size:int = field(default=28) # 单位就是正常的px。你也可以用负值则单位是Å
@dataclass
class mole():
stick_radius: float = field(default=0.10)
@dataclass
class atom():
size: float = field(default=0.28)

44
plugin/method.py Normal file
View File

@@ -0,0 +1,44 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :method.py
@Description: :pymol中一些常用到的一些方法
@Date :2023/09/24 13:28:30
@Author :lyzeng
@Email :pylyzeng@gmail.com
@version :1.0
'''
import sys
from pathlib import Path
here = Path(__file__).absolute().parent
sys.path.append(here.as_posix())
from plugin.cmd_singleton import cmd
from pymol.preset import ion_sele,lig_sele,solv_sele
def color_mole(selection='hetatm'):
cmd.color('color9',f'{selection} in name c*')
cmd.color('color6',f'{selection} in name o*')
cmd.color('color5',f'{selection} in name s*')
cmd.color('color3',f'{selection} in name n*')
cmd.color('color8',f'{selection} in name cl*')
cmd.color('color10',f'{selection} in name f*')
def focus_ligand(selection: str = 'sele'):
'''
watch ligand and protein structure
'''
cmd.remove(solv_sele) # 移除金属离子和溶剂
cmd.remove("resn SO4 or resn PO4 or resn CL")
cmd.pretty(selection)
def grey(selection='sele',opt=True):
if opt: optimisation(selection) # 优化
cmd.color('grey1',f'{selection}')# 对某个对象进行灰化
def optimisation(selection, grey=True):
if grey: cmd.set('cartoon_color','grey1', selection=selection)
cmd.set('stick_transparency','0.1', selection=selection)
cmd.set("ray_shadows","off", selection=selection)
cmd.set('cartoon_highlight_color', selection=selection)
cmd.set('cartoon_fancy_helices', selection=selection)
cmd.set('cartoon_transparency','0.5', selection=selection)

97
plugin/singleton.py Normal file
View File

@@ -0,0 +1,97 @@
import pymol
from pymol import _cmd
import threading
import sys
pymol2_lock = threading.RLock()
##
## FIXME: The PyMOL and SingletonPyMOL classes are partly redundant with the
## instance tracking of the "cmd" module (and the pymol2.cmd2.Cmd class),
## which also holds the _COb pointer.
##
class SingletonPyMOL:
'''
Start an exclusive PyMOL instance, only one instance allowed
'''
def __init__(self, options=None):
if options is None:
self.options = pymol.invocation.options
else:
self.options = options
def idle(self):
return _cmd._idle(self._COb)
def getRedisplay(self, reset=True):
return _cmd._getRedisplay(self._COb, reset)
def reshape(self, width, height, force=0):
_cmd._reshape(self._COb, width, height, force)
def draw(self):
_cmd._draw(self._COb)
def button(self, button, state, x, y, modifiers):
_cmd._button(self._COb, button, state, x, y, modifiers)
def drag(self, x, y, modifiers):
_cmd._drag(self._COb, x, y, modifiers)
def start(self):
cmd = pymol.cmd
if cmd._COb is not None:
raise RuntimeError('can only start SingletonPyMOL once')
with pymol2_lock:
cmd._COb = _cmd._new(pymol, self.options, True) #! 此处对pymol-open-source源码进行修改方便启动的时候修改参数具体参考options
_cmd._start(cmd._COb, cmd)
self._COb = cmd._COb
self.cmd = cmd
def stop(self):
with pymol2_lock:
_cmd._stop(self._COb)
pymol.cmd._COb = None
def __enter__(self):
self.start()
return self
def __exit__(self, exc_type, exc_value, tb):
self.stop()
@classmethod
def start_with_options(cls, options): # new add
instance = cls(options)
instance.start()
return instance
'''
# 使用示例
if __name__ == '__main__':
from pymol.invocation import options
options.rpcServer = 1
# 使用类方法创建并启动实例
pymol_instance1 = SingletonPyMOL.start_with_options(options)
# 使用上下文管理器
with SingletonPyMOL() as pymol_instance2:
# pymol_instance2 使用默认的 options
pass
# 先创建实例,然后单独启动
pymol_instance3 = SingletonPyMOL()
pymol_instance3.start() # 使用默认的 options
# 启动pymol GUI可以在一个新的进程中启动通过rpcServer 默认端口是9123 localhost
# 或者在一个新的线程中启动界面,不堵塞主线程
from pmg_qt import pymol_qt_gui
pymol_qt_gui.execapp()
'''

112
pymol_anime.py Normal file
View File

@@ -0,0 +1,112 @@
from loguru import logger
from pymol import cmd
import subprocess
from pathlib import Path
# 删除当前目录下的*.mp4和*.png文件
for file_path in Path('.').glob('*.mp4'):
file_path.unlink()
for file_path in Path('.').glob('*.png'):
file_path.unlink()
# 设置日志文件
logger.add("pymol_animation.log")
cmd.log_open('pymol_animation_cmd.log')
# 最高渲染质量
cmd.util.performance(0)
cmd.rebuild()
# 初始化并加载4i24结构
cmd.reinitialize('everything')
cmd.fetch('4i24')
cmd.copy('4i24_mutated', '4i24') # 复制4i24到4i24_mutated
# 假设protgrey和colorp.color_mole是你自定义的函数
protgrey('4i24')
protgrey('4i24_mutated')
colorp.color_mole(obj='hetatm')
cmd.hide('everything', '4i24_mutated')
i = '4i24_mutated' # object
j = 'A' # chain
k = '797' # site
mutate_site = f"/{i}//{j}/{k}"
mutation_type = 'GLY'
# 对4i24_mutated进行突变
cmd.select('mut_res', mutate_site)
cmd.wizard("mutagenesis")
cmd.refresh_wizard()
cmd.get_wizard().set_mode(mutation_type)
cmd.get_wizard().do_select(mutate_site)
cmd.get_wizard().apply()
cmd.set_wizard("done")
cmd.show('sticks', mutate_site)
cmd.hide('everything', '4i24_mutated')
# 先打关键帧然后创建视频
# 创建第一个关键帧
selection1 = "/4i24//A/797"
cmd.scene('scene1', 'store')
cmd.orient(selection1)
cmd.show('sticks', selection1)
# 创建第二个关键帧
selection2 = "/4i24//A/1C9"
cmd.select("both_residues", f"{selection1} or {selection2}")
cmd.orient("both_residues")
cmd.scene('scene2', 'store')
# 设置动画和mview
cmd.mset("1 x900") # 创建一个总共1200帧的动画框架每秒120帧共计10秒
# 第一个关键帧: 1帧
cmd.mview(action='store', first=1, scene='scene1')
# 第二个关键帧: 300帧
cmd.mview(action='store', first=300, scene='scene2')
# 插值以平滑过渡
cmd.mview('interpolate', first=1, last=300)
residue_to_show = "/4i24//A/797" # 用你想显示的残基替换
cmd.mdo(360, f"show lines, {residue_to_show}; label {residue_to_show}/n, f'res {residue_to_show}(CYS)'")
# 隐藏4i24对象: 600帧
cmd.mdo(600, "hide everything, 4i24")
# 显示4i24_mutated对象: 610帧
cmd.mdo(610, "show cartoon, 4i24_mutated")
# 显示突变残基: 620帧
cmd.mdo(620, "show sticks, /4i24_mutated//A/797")
# 为残基添加标签: 630帧
residue_to_show = "/4i24_mutated//A/797"
cmd.mdo(630, f"show lines, {residue_to_show}; label {residue_to_show}/n, f'res {residue_to_show}(GLY)'")
# 插值以平滑过渡
cmd.mview('interpolate', first=300, last=900)
# 设置viewport尺寸为1080P1920x1080像素
cmd.viewport(1920, 1080)
cmd.mplay() # 播放动画
# 光线追踪和保存每一帧
# for frame in range(1, 901):
# cmd.frame(frame)
# logger.info(f"Rendering frame {frame}")
# cmd.refresh()
# cmd.ray(width=1920, height=1080)
# cmd.png(f"frame_transition_{frame:04d}.png")
# 使用ffmpeg合成视频
# subprocess.run([
# 'ffmpeg',
# '-framerate', '120', # 120帧/秒
# '-i', 'frame_transition_%04d.png',
# '-c:v', 'libx264',
# '-pix_fmt', 'yuv420p',
# 'transition_video.mp4'
# ])
# ffmpeg -framerate 120 -i frame_transition_%04d.png -c:v libx264 -pix_fmt yuv420p transition_video.mp4

264
pymol_color_plugin.py Normal file
View File

@@ -0,0 +1,264 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :pymol_color_plugin.py
@Description: :pymol配色方案
@Date :2023/8/29 10:52:25
@Author :hotwa
@version :1.0
'''
from pymol import cmd
from pathlib import Path
from dataclasses import dataclass
from json import dumps
from Bio.Data import IUPACData
from Bio.SeqUtils import seq3
from collections import defaultdict
STANDARD_AMINO_ACIDS_1 = set(IUPACData.protein_letters)
STANDARD_AMINO_ACIDS_3 = set([seq3(i).upper() for i in STANDARD_AMINO_ACIDS_1])
def bejson(d:dict(help='need to beauty json')) -> dict:
return dumps(d,indent=4,ensure_ascii=False)
@dataclass
class pymolbase():
file: Path
def __post_init__(self):
cmd.reinitialize('everything')
cmd.load(filename=self.file.as_posix())
PDBs = cmd.get_names()
if len(PDBs) == 1:
PDB = PDBs[0]
else:
raise ValueError(f'this pdb have more than one object! PDBs:{PDBs}')
self.CAindex = cmd.identify(f"{PDB} and name CA") # get CA index
self.pdbstrList = [cmd.get_pdbstr("%s and id %s" % (PDB, CAid)).splitlines() for CAid in CAindex]
self.ProtChainResiList = [[PDB, i[0][21], i[0][22:26].strip()] for i in pdbstrList] # get pdb chain line string
resn_hetatm = self.get_resn('hetatm')
def mutate(self, site:str, chain:str, mutation_type: str):
'''
mutation_type:ALA,ARG,ASN,ASP,CYS,GLN,GLU,GLY,HIS,ILE,LEU,LYS,MET,PHE,PRO,SER,THR,TRP,TYR,VAL
'''
for i, j, k in self.ProtChainResiList:
if int(k) == int(site) and j == str(chain):
cmd.wizard("mutagenesis")
cmd.refresh_wizard()
cmd.get_wizard().set_mode(mutation_type)
selection = f"/{i}//{j}/{k}"
cmd.get_wizard().do_select(selection)
cmd.get_wizard().apply()
cmd.set_wizard("done")
def add_polar_hydrogen(self, pymol_obj='sele'):
'''
add polar hydrogen
'''
cmd.h_add(pymol_obj) # add all hydrogens in this molecular
cmd.remove(f"{pymol_obj} & hydro & not nbr. (don.|acc.)") # remove no-polar-hydrogen
def save(self, file_name: Path, selection: str):
cmd.save(file_name.as_posix(), f"{selection}")
# def get_resn(self, name:str = 'all'):
# resn_list = []
# if name == 'protein':
# cmd.iterate(f"all and not hetatm", expression=lambda atom: resn_list.append(atom.resn))
# return tuple(set(resn_list))
# else:
# cmd.iterate(selection=name, expression=lambda atom: resn_list.append(atom.resn))
# return tuple(set(resn_list))
def get_resn(self, name:str = 'all'): # ! not test
resn_dict = defaultdict(set)
def collect_resn(atom):
category = 'protein' if atom.chain and not atom.hetatm else 'small_molecule'
resn_dict[category].add(atom.resn)
cmd.iterate(name, expression=collect_resn)
protein_resns = resn_dict.get('protein', set())
small_mol_resns = resn_dict.get('small_molecule', set())
standard_aa = protein_resns.intersection(STANDARD_AMINO_ACIDS_3)
non_standard_aa = protein_resns - STANDARD_AMINO_ACIDS_3
return {
'small_molecules': list(small_mol_resns),
'protein_resns': list(protein_resns),
'standard_aa': list(standard_aa),
'non_standard_aa': list(non_standard_aa)
}
class colorp(pymolbase):
color1 = (('color1', '[186,182,217]'), 'purple')
color2 = (('color2', '[233,195,153]'), 'yellow')
color3 = (('color3', '[43,113,216]'), 'blue-N')
color4 = (('color4', '[206,155,198]'), 'purple')
color5 = (('color5', '[251,187,62]'), 'orange')
color6 = (('color6', '[245,157,158]'), 'red')
color7 = (('color7', '[133,188,135]'), 'green')
color8 = (('color8', '[30,230,30]'),'green-CL') # Cl卤素配色
color9 = (('color9', '[141,215,247]'),'blue-C') # C配色
color10 = (('color10', '[0,132,55]'),'green-F') # F卤素配色
grey1 = ('grey1','[224,224,224]')
colors = (color1, color2, color3, color4, color5, color6, color7, color8, color9, color10)
def __init__(self, path, id):
self.id = id.lower()
cmd.reinitialize()
p = Path(path)
if p.is_dir():
file = p.joinpath(f"{id}.pdb")
else:
raise ValueError('path params error')
cmd.load(file,id)
def pretty(self):
cmd.remove('solvent metals') # 移除金属离子和溶剂
cmd.remove("resn SO4 or resn PO4 or resn CL")
cmd.pretty(selection=self.id)
@staticmethod
def defcolor():
# 定义常用配色
color_gennerate = map(lambda x:x[0],colorp.colors)
list(map(lambda x:cmd.set_color(*x),color_gennerate))
# 定义灰度配色
cmd.set_color(*colorp.grey1)
@staticmethod
def grey(obj='sele',opt=True):
colorp.defcolor()
if opt: method.optimisation() # 优化
# 对某个对象进行灰化
cmd.color('grey1',f'{obj}')
@staticmethod
def color_mole(obj='hetatm'):
# color blue,sele in name c*
colorp.defcolor()
cmd.color('color9',f'{obj} in name c*')
cmd.color('color6',f'{obj} in name o*')
cmd.color('color5',f'{obj} in name s*')
cmd.color('color3',f'{obj} in name n*')
cmd.color('color8',f'{obj} in name cl*')
cmd.color('color10',f'{obj} in name f*')
@staticmethod
def pretty(obj = None):
if not obj: obj = cmd.get_names()[0]
cmd.remove('solvent metals') # 移除金属离子和溶剂
cmd.pretty(selection=obj)
class font():
font_size = 28 # 单位就是正常的px。你也可以用负值则单位是Å
class mole():
stick_radius=0.10
class atom():
size = 0.28
class method():
@staticmethod
def optimisation(selection, grey=True):
colorp.defcolor()
if grey: cmd.set('cartoon_color','grey1', selection=selection)
cmd.set('stick_transparency','0.1', selection=selection)
cmd.set("ray_shadows","off", selection=selection)
cmd.set('cartoon_highlight_color', selection=selection)
cmd.set('cartoon_fancy_helices', selection=selection)
cmd.set('cartoon_transparency','0.5', selection=selection)
@staticmethod
def remove():
cmd.remove('resn SO4') # SO4 remove
cmd.remove('solvent metals') # 移除金属离子和溶剂
def get_resn(name:str = 'all'):
resn_list = []
cmd.iterate(selection=name, expression=lambda atom: resn_list.append(atom.resn))
return tuple(set(resn_list))
def protgrey(selection='all'):
method.remove()
method.optimisation(selection)
# 观察小分子
# protgrey()
# colorp.color_mole('sele')
# add label
# label sele, "your label"
# black background
# set bg_rgb, [0, 0, 0]
# define label
# cmd.get_position("sele")
# set label_position, [x,y,z]
# cmd.h_add(pymol_obj) # add all hydrogens in this molecular
# cmd.remove(f"{pymol_obj} & hydro & not nbr. (don.|acc.)") # remove no-polar-hydrogen
# cmd.remove(f"sele & hydro") # 移除所有的H
# cmd.set('cartoon_color','color8', 'obj')
# cmd.hide("everything", "chain A and resn UHT and alt B") # 小分子隐藏B构象
# cmd.label("chain A and resn UHT and alt A", "resn") # 添加标签
#cmd.select('sele', f'resn {resn} and chain {chain}')
#cmd.center('sele')
#cmd.orient('sele')
#obj_name = 'around_and_self'
# cmd.create(obj_name, f'(resn {resn} and chain {chain}) around {distance}') # 不扩展残基
# cmd.create(obj_name, f'byres (resn {resn} and chain {chain}) around {distance}') # 扩展残基
# cmd.create(obj_name, f'(resn {resn} and chain {chain}) around {distance} or (resn {resn} and chain {chain})') # 选择resn的对象和resn对象周围3A的原子
#cmd.create(obj_name, f'byres (resn {resn} and chain {chain}) around {distance} or (resn {resn} and chain {chain})') # 选择resn的对象和resn对象周围3A的原子扩展至残基的原子
#cmd.hide('everything', 'all')
#cmd.show('lines', obj_name)
cmd.extend('colorp',colorp)
cmd.extend('method',method)
if __name__ == '__main__':
PDB = '4i24'
cmd.reinitialize('everything')
cmd.fetch('4i24')
protgrey('4i24')
colorp.color_mole(obj='hetatm')
# 设置动画帧数
# cmd.mset("1 x60 61 x60") # 60帧展示初始结构第61帧用于突变再60帧展示突变后结构
# 保存第1帧
cmd.scene('frame1', 'store')
cmd.mview('store', state=1, scene='frame1')
# 执行残基突变。这里以将位于链A和编号为797的残基突变为ALA为例。
selection1 = "/4i24//A/797"
selection2 = "/4i24//A/1C9"
cmd.select("both_residues", f"{selection1} or {selection2}")
cmd.orient("both_residues")
cmd.show('sticks', selection1)
# 保存第2帧
cmd.scene('frame60', 'store')
cmd.mview('store', state=60, scene='frame60')
# 关闭电影模式
cmd.mstop() # 停止电影播放
cmd.mdelete() # 删除所有电影帧
cmd.wizard("mutagenesis")
cmd.refresh_wizard()
cmd.get_wizard().set_mode("GLY")
cmd.get_wizard().do_select(selection1)
# 保存第3帧
cmd.scene('frame120', 'store')
cmd.mview('store', state=120, scene='frame120')
cmd.get_wizard().apply()
cmd.set_wizard()
# 保存第4帧
cmd.scene('frame180', 'store')
cmd.mview('store', state=180, scene='frame180')
cmd.mpng('frame_prefix_', 1, 4)

1
pymolrc Normal file
View File

@@ -0,0 +1 @@
run pymol_color_plugin.py

4
script/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
__pycache__
*.pdb
*.pse
*.cif

6
script/README.md Normal file
View File

@@ -0,0 +1,6 @@
# pymol
## Getting started

0
script/__init__.py Normal file
View File

48
script/add_hydrogen.py Normal file
View File

@@ -0,0 +1,48 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :add_hydrogen.py
@Description: : add hydrogen in pymol
@Date :2022/10/24 11:17:11
@Author :hotwa
@version :1.0
'''
from pymol import cmd
from openbabel import pybel
"""
conda install -c conda-forge pymol-open-source openbabel --yes
"""
def add_polar_hydrogen(file, out_file,read_fmt = 'mol2', pymol_obj='sele',
save_format='mol2',tool = 'pymol'):
"""add_polar_hydrogen _summary_
_extended_summary_
Arguments:
file {pathlike} -- input file path
out_file {pathlike} -- output file
Keyword Arguments:
read_fmt {string} -- the format of read file (default: {'mol2'})
pymol_obj {string } -- add polar hydrogen object name, ligand ID (default: {'sele'})
save_format {string} -- the format of save file (default: {'mol2'})
tool {string} -- the tool of add polar hydrogens (default: {'pymol'})
Returns:
_type_ -- path or None
"""
if tool == 'pymol':
cmd.reinitialize("everything")
cmd.load(filename=file, format=read_fmt)
cmd.h_add(pymol_obj) # add all hydrogens in this molecular
cmd.remove(f"{pymol_obj} & hydro & not nbr. (don.|acc.)") # remove no-polar-hydrogen
cmd.save(filename=out_file,selection=pymol_obj, format=save_format)
return out_file
elif tool == 'pybel':
omol = list(pybel.readfile(format = 'mol2', filename = file))[0]
omol.OBMol.AddPolarHydrogens()
omol.write('mol2',out_file,overwrite=True)
return out_file
else:
return None

465
script/biotools.py Normal file
View File

@@ -0,0 +1,465 @@
from email import header
from unittest.mock import Base
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :biotools.py
@Description: :
@Date :2021/06/22 16:36:34
@Author :hotwa
@version :1.0
'''
import os,re,time
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB import PDBIO,Select,parse_pdb_header,PDBList
from tools import mdcheck,get_path_contents
import requests
import random,string
from pathlib import Path
file_dir = os.path.dirname(os.path.realpath(__file__)) # 当前python文件的绝对路径
residue = [i.upper() for i in 'Gly,Ala,Val,Leu,Ile,Pro,Phe,Tyr,Trp,Ser,Thr,Cys,Met,Asn,Gln,Asp,Glu,Lys,Arg,His'.split(',')]
""" getpdbfile function"""
agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36 -- '+''.join(random.sample(string.ascii_letters+string.digits, 32))
HEADER = {'User-Agent': agent}
def getpdbfile(pdbid,dir): # 下载备用,不是很稳定
pdbid = pdbid.upper()
res = os.path.exists(f'{pdbid}.pdb')
if not res:
r = requests.get(url = f'https://files.rcsb.org/download/{pdbid}.pdb1',verify=False,headers=HEADER)
_file_path = os.path.join(dir,f'{pdbid}.pdb')
with open(_file_path,'wb+') as f:
f.write(r.content)
""" getpdbfile function"""
class PDBLengthError(BaseException):
def __init__(self,arg):
self.arg = arg
def downloadpdbfile(pdbid,pdir,overwrite=True,changename=True):
"""downloadpdbfile [summary]
[extended_summary]
:param pdbid: like code 5xv7
:type pdbid: string
:param pdir: save pdb file path
:type pdir: string
:param overwrite: overwrite pdb file, defaults to True
:type overwrite: bool, optional
:param changename: change .ent to .pdb, defaults to True
:type changename: bool, optional
"""
if isinstance(pdbid,list):
for i in pdbid:
downloadpdbfile_sub(pdbid=i,dir = pdir,overwrite=overwrite,changename=changename)
elif isinstance(pdbid,str):
if len(pdbid) == 4:
downloadpdbfile_sub(pdbid=pdbid,dir = pdir,overwrite=overwrite,changename=changename)
else:
raise PDBLengthError('pdbid length error!')
def downloadpdbfile_sub(pdbid,dir,overwrite,changename):
_pdbname = f'pdb{pdbid}.ent'
_pdbname_new = f'{pdbid}.pdb'
if not overwrite:
_path = os.path.join(dir,_pdbname_new)
if os.path.exists(_path):
...
else:
pdbl = PDBList()
try:
pdbl.retrieve_pdb_file(pdbid,file_format='pdb',pdir = dir,overwrite=overwrite)
except FileNotFoundError: # 调用biopython下载接口失败使用自己的接口
getpdbfile(pdbid,dir)
if changename:
srcFile = os.path.join(dir,_pdbname)
dstFile = os.path.join(dir,_pdbname_new)
try:
os.rename(srcFile,dstFile)
except FileExistsError as e: # 如果重名名的文件存在,则表示该文件已经下载
...
except Exception as e:
raise e
elif overwrite: # 覆盖之前已经下载的pdb文件
pdbl = PDBList()
try:
pdbl.retrieve_pdb_file(pdbid,file_format='pdb',pdir = dir,overwrite=overwrite)
except FileNotFoundError: # 调用biopython下载接口失败使用自己的接口
getpdbfile(pdbid,dir)
if changename:
srcFile = os.path.join(dir,_pdbname)
dstFile = os.path.join(dir,_pdbname_new)
try:
os.rename(srcFile,dstFile)
except Exception as e:
raise e
else:
print('error overwrite params!')
class ChainSelect(Select):
def __init__(self,chain_string='A'):
super(Select,self).__init__()
self.chain_string = chain_string
def accept_chain(self, chain):
if str(chain.__repr__()) == '<Chain id={}>'.format(self.chain_string): # judge chain name
return 1
else:
return 0
class ResidueSelect(Select):
"""ResidueSelect ues by select
[extended_summary]
:param Select: [description]
:type Select: [type]
"""
def __init__(self,residue):
super(Select,self).__init__()
self.residue = residue
def accept_residue(self, residue):
if residue.get_resname()==self.residue:
return 1
else:
return 0
def return_path(path):
# 判断是否为绝对路径
path_split_string = os.path.split(path)
if os.path.isdir(path_split_string[0]) and os.path.isfile(path):
return path # 输入绝对路径,返回绝对路径
elif bool(path_split_string[0]) == False:
return os.path.join(file_dir,path) # 尝试在当前目录下自动补全路径
elif not (path_split_string[1][-4:] == '.pdb' or '.ent'):
raise ValueError(f'this file, {path} is not a valid pdb file')
else:
raise ValueError(f'valid path: {path}')
# ! error class for Bdp
class DoiError(BaseException):
def __init__(self,arg):
self.arg = arg
class ChainError(BaseException):
def __init__(self,arg):
self.arg = arg
class Bdp(object):
__slots__ = ['path','sid','mole_id','mole_struct','create_path']
def __init__(self,path,sid):
self.path = path
self.create_path = os.path.dirname(os.path.dirname(path))
self.sid = sid
self.mole_id = []
self.mole_struct = []
# 初始化数据
self.read_formula()
self.init_path()
@property
def chain_num(self):
s = self.read_struct()
first_model = s[0]
return len(first_model)
@property
def header_infos(self):
return parse_pdb_header(self.path)
@property
def doi(self):
_journal = self.header_infos['journal']
_doi = _journal.split()[-1]
if _doi.startswith('10.'):
return _doi
else:
raise DoiError(f'current pdb does not doi, {_journal}')
def mkdir(self,pname,md = False):
"""mkdir [summary]
[extended_summary]
:param pname: [description]
:type pname: [type]
:param md: [dict], defaults to False
:type md: [dict], optional
"""
def wrapper(func):
def deco(self,*arg,**kwargs):
create_path = os.path.join(self.create_path,pname)
if os.path.exists(create_path):
...
else:
os.mkdir(create_path)
# create markdown
mdc = mdcheck(md)
header_info = f'''---
title: {mdc['title']}
date: {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}
type: {mdc['typeinfo']}
---
[TOC]
# Description
{mdc['description']}
'''
create_path_md = os.path.join(create_path,'README.md')
with open(create_path_md,'w+') as f:
f.write(header_info)
func(self,*arg,**kwargs)
return deco
return wrapper
def value_check(self):
try:
if len(self.mole_id) == len(self.mole_struct):
print('check pass!')
else:
raise ValueError('molecule identifier do not match formula identifier! manual check')
except :
raise ValueError
@staticmethod
def read_line_list(first_column,path):
"""read_line_list 读取第一列为first_column字符串的行存为列表返回
[extended_summary]
Arguments:
first_column {[string]} -- [pdb文件内容第一列常常为大写]
path {[string]} -- [路径]
Returns:
[iter] -- [含有first_column字符串的生成器]
"""
stringiter = Bdp.stringlinesiter(file = path)
stringiter = map(lambda x:x.strip(), stringiter)
return filter(lambda x:x.split()[0]==first_column,stringiter)
@staticmethod
def stringlinesiter(file):
with open(file, 'r+', encoding='utf-8') as f:
yield from f.readlines()
@mkdir(self = 'self',pname = 'split_molecule')
def split_molecule(self,residue):
"""split_molecule [split molecule or residue line save to file]
[extended_summary]
:param residue: [select residue]
:type residue: [string]
:extract_chain: extract molecule corresponding chain
:type extract_chain: bool
"""
remove_list,residue = [],str(residue)
s = self.read_struct()
chainlist = self.getchain_str()
# 对每条链上都与这个小分子结合的链上的结合信息都提取出来
for i in chainlist:
io = PDBIO()
io.set_structure(s[0][i])
sidc = self.sid.replace('.pdb', '') if '.pdb' in self.sid else self.sid
savename = f'{sidc}_{i}_{residue}.pdb'
path = os.path.join(self.create_path,'split_molecule',savename)
io.save(path,ResidueSelect(residue))
if os.path.exists(path):
with open(path,'r+') as f:
content = f.readline()
if content.strip() == 'END':
# 该链没有小分子,删除文件
# print(f'{savename} this chain have not molecule remove it')
remove_list.append(path)
for i in remove_list:
os.remove(i) # remove the empty molecule file
def getchain_str(self):
"""getchain_str retrun chain symbol name
[extended_summary]
:return: [description]
:rtype: list
"""
_l = []
p = PDBParser(PERMISSIVE=1)
s = p.get_structure(self.sid, self.path)
chain_gennerate = s[0].copy().get_chains()
for i in chain_gennerate:
_l.append(i.id)
_l.sort()
return _l
def site_exists(self,residue,num):
"""site_exists 判断改蛋白的某个位点是否存在,并返回该位点所在的链
用于判断结合位点的链在哪里
:param residue: 残基名称
:type residue: string
:param num: 残基编号
:type num: int
:return: chain
:rtype: list
"""
_l = []
num = int(num)
residue = residue.strip().upper()
chainlist = self.getchain_str()
s = self.read_struct()
for i in chainlist:
try:
resname_from_pdb = s[0][i][num].get_resname()
if residue == resname_from_pdb:
_l.append(i)
except KeyError as e:
print(e,f'chain {i} not exist {residue}-{num}')
return _l
def _split_chain_save_struct(self,obj,sid):
io = PDBIO()
io.set_structure(obj)
sidc = sid.replace('.pdb', '') if '.pdb' in sid else sid
name,path = f'{sidc}_{obj.id}.pdb', self.create_path
io.save(os.path.join(path,'split_chain',name),ChainSelect(obj.id))
@mkdir(self = 'self',pname ='split_chain')
def split_chain(self,extract='auto'):
"""split_chain [summary]
[extended_summary]
:param extract: [extract the chain], defaults to 'auto' extract all chains
:type extract: [string], optional
:raises ValueError: [save to pdb in current path in ./splitchain]
"""
# 将传入的蛋白文件按照蛋白链分割并按照 pdbid_A_m.pdb 重新命名 A代表链m表示该链含有小分子
p = PDBParser(PERMISSIVE=1)
s = p.get_structure(self.sid, self.path)
if extract == 'auto':
chainlist = self.getchain_str()
for j in chainlist:
self._split_chain_save_struct(obj=s[0][j],sid=self.sid)
else:
chainlist = self.getchain_str()
if extract not in chainlist:
raise ChainError(f'The {extract} chain not exists !')
if extract in chainlist:
extract = extract.upper()
self._split_chain_save_struct(obj=s[0][extract],sid=self.sid)
else:
raise ValueError
def read_struct(self):
"""read_struct read pdb structure
https://biopython-cn.readthedocs.io/zh_CN/latest/cn/chr11.html#id3
一个 Structure 对象的整体布局遵循称为SMCRAStructure/Model/Chain/Residue/Atom结构/模型/链/残基/原子)的体系架构:
结构由模型组成
模型由多条链组成
链由残基组成
多个原子构成残基
Returns:
[obj] -- [Model]
"""
p = PDBParser(PERMISSIVE=1)
s = p.get_structure(self.sid, self.path)
return s
def get_chain(self,chain_str):
s = self.read_struct()
return s[0][chain_str]
def init_path(self):
_p = return_path(self.path)
self.path = _p
if os.path.isfile(_p) == False:
raise FileNotFoundError(f'file {_p} not found')
def read_formula(self):
"""read_formula read pdb file molecule information and identifier in this pdb file(just like residue)
[extended_summary]
:raises OSError: Do not find this file
"""
_path = self.path
try:
with open(_path,'r') as f:
text_line = f.readlines()
_l = []
for i in text_line:
li = i.split()
if li[0] == 'FORMUL':
_l.append(li)
self.mole_id.append(li[2])
for ii in _l:
self.mole_struct.append(''.join(ii[3:]))
except Exception as e:
raise e
def clean_pdb(self,outfile=None,path=None):
__lst = Bdp.read_line_list('ATOM',self.path)
_p = Path(path) if path else Path(self.path).absolute().parent
_f = outfile if outfile else f"{self.sid}.clean.pdb"
write_path = _p.joinpath(_f)
if write_path.exists(): write_path.unlink()
with open(write_path,'w+') as f:
for i in __lst:
f.write(f"{i}\n")
def renamepdbfile(path):
"""renamepdbfile 自动对path路径中批量下载的pdb中文件的文件名进行重命名
[extended_summary]
Arguments:
path {[type]} -- [description]
"""
_flist = get_path_contents(path)[0]
for i in _flist:
i = i.strip()
basetuple = os.path.split(i)
basedir = basetuple[0]
_fn = basetuple[1]
res = re.match(pattern='pdb([A-Za-z0-9]*).ent',string=_fn)
# nfn = res.group(1)
# newname =os.path.join(basedir,str(nfn)+'.pbd')
# os.rename(src = i,dst = newname)
try:
nfn = res.group(1)
newname =os.path.join(basedir,str(nfn)+'.pdb')
os.rename(src = i,dst = newname)
except IndexError:
raise IndexError(f'Could not find pdbid in this filename: {i}')
except AttributeError:
continue
except FileNotFoundError as e:
if os.path.exists(i):
raise e
elif os.path.exists(newname):
continue
else:
raise e
def pdbupdate(path):
pl = PDBList(pdb=path)
pl.update_pdb()

66
script/mutagenesis.py Normal file
View File

@@ -0,0 +1,66 @@
# 定义残基突变函数
from pymol import cmd
from pathlib import Path
def Mutagenesis_site(filename: Path, mutation_type: str, site: int, outfile: Path = None) -> Path:
"""Mutagenesis_site Mutagenesis residue in site
residue site have mutil conformations, need to select one conformations, some error accured.
pass
_extended_summary_
Arguments:
filename {str} -- PDB file format
mutation_type {str} -- 'VAL' for ALA TO VAL; 'ALA' for any/not ALA to ALA; 'GLY' for ALA to GLY
site {int} -- residue site in pdbfile
Keyword Arguments:
outfile {str} -- _description_ (default: {None})
Raises:
ValueError: not one object in PDBs,need to fix
Returns:
str -- save mutagenesis file path
"""
p = Path(filename)
savename = p.stem + f"_{site}_mutation.pdb"
_out_file = Path(outfile) if outfile else p.absolute().parent.joinpath(savename)
if not _out_file.absolute().parent.exists(): _out_file.absolute().parent.mkdir(parents=True)
cmd.reinitialize('everything') # ! clean up
cmd.load(filename.as_posix())
PDBs = cmd.get_names()
# Get the ID numbers of c-alpha (CA) atoms of all residues
if len(PDBs) == 1:
PDB = PDBs[0]
else:
raise ValueError(f'this pdb have more than one object!PDBs:{PDBs}')
CAindex = cmd.identify(f"{PDB} and name CA")
pdbstrList = [cmd.get_pdbstr("%s and id %s" % (PDB, CAid)).splitlines() for CAid in CAindex]
ProtChainResiList = [[PDB, i[0][21], i[0][22:26].strip()] for i in pdbstrList]
for i, j, k in ProtChainResiList:
if int(k) == int(site):
cmd.wizard("mutagenesis")
# print(i,j,k)
cmd.refresh_wizard()
cmd.get_wizard().set_mode(mutation_type)
##Possible mutation_type could be:
##'VAL' for ALA TO VAL
##'ALA' for any/not ALA to ALA
##'GLY' for ALA to GLY
# 'selection' will select each residue that you have selected
# on ProtChainResiList above using the PDBid,chain,and residue
# present on your pdb file.If you didn't select a range on
# ProteinChainResiList, it will do the mutation on all the residues
# present in your protein.
selection = f"/{i}//{j}/{k}"
# Print selection to check the output
# print(selection)
# Selects where to place the mutation
cmd.get_wizard().do_select(selection)
##Applies mutation
cmd.get_wizard().apply()
# Save each mutation and reinitialize the session before the next mutation
##to have pdb files only with the residue-specific single-point mutation you were interested.
cmd.set_wizard("done")
cmd.save(_out_file.as_posix(), f"{PDB}")
cmd.reinitialize('everything') # Reinitialize PyMOL to default settings.
return _out_file

102
script/ppi.py Normal file
View File

@@ -0,0 +1,102 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :ppi.py
@Description: :
@Date :2023/09/01 16:56:22
@Author :lyzeng
@Email :pylyzeng@gmail.com
@version :1.0
'''
import sys
from dataclasses import dataclass, field, InitVar
from functools import partial
from pymol import cmd
from pymol.cmd import util
from pathlib import Path
from collections import defaultdict
# local import
from .pymolbase import PyMOLBase
@dataclass
class PyMOLConcrete(PyMOLBase):
file: Path
ligand_positions: defaultdict = field(default_factory=lambda: defaultdict(list), init=False)
def __post_init__(self, file):
self.load_file(file)
self.global_obj_name = self.cleanpdb(file.stem)
def load_file(self, file):
# 加载文件到PyMOL这里是一个示例您可能需要使用实际的PyMOL命令
cmd.load(file)
pass
def collect_ligand_positions(self, model, chain, resn, resi):
position = f'/{model}//{chain}/{resi}'
self.ligand_positions[resn].append(position)
def gather_ligand_info(self):
cmd.iterate('organ', 'collect_ligand_positions(model, chain, resn, resi)', space={'collect_ligand_positions': partial(self.collect_ligand_positions)})
@staticmethod
def cleanpdb(before_string):
"""
处理传入参数pdb 例如: pdb1b12.ent 1b12.pdb 转化成 1b12
"""
# print(f'try to format {before_string}')
if '.ent' in before_string:
before_string = before_string[3:-4].lower()
elif '.pdb' in before_string:
before_string = before_string[:4].lower()
elif len(before_string) == 4:
before_string = before_string.lower()
else:
if len(before_string) != 4:
raise ValueError(f'length out of 4 {before_string}')
return before_string
@dataclass
class PDBAnalyzer:
pdb: Path # 使用Path对象代替字符串
ChA: str
ChB: str
dist: str
# 主运行函数
def run(self):
cmd.delete('all')
cmd.load(str(self.pdb)) # 将Path对象转为字符串以兼容PyMOL命令
pdb_name = self.pdb.stem # 获取不包括扩展名的文件名
# ...(省略了其他设置和选择)
# 调用封装好的函数
self.HBond()
self.Pi_Pi_interaction()
self.Cation_Pi_interaction()
self.Salt_bridge_positive()
self.Salt_bridge_negative()
# ...(省略了其他设置和保存)
cmd.save(f'{pdb_name}_interaction.pse')
class
# 主程序
def main():
pdb_file = Path(sys.argv[1]) # 使用Path对象
CHA = sys.argv[2]
CHB = sys.argv[3]
dist = sys.argv[4]
cmd.set('bg_rgb', 'white')
analyzer = PDBAnalyzer(pdb_file, CHA, CHB, dist)
analyzer.run()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,143 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :pymol_color_plugin.py
@Description: :pymol配色方案
@Date :2023/6/1 10:52:25
@Author :hotwa
@version :1.0
'''
from pymol import cmd
from pathlib import Path
class colorp():
color1 = (('color1', '[186,182,217]'), 'purple')
color2 = (('color2', '[233,195,153]'), 'yellow')
color3 = (('color3', '[43,113,216]'), 'blue-N')
color4 = (('color4', '[206,155,198]'), 'purple')
color5 = (('color5', '[251,187,62]'), 'orange')
color6 = (('color6', '[245,157,158]'), 'red')
color7 = (('color7', '[133,188,135]'), 'green')
color8 = (('color8', '[30,230,30]'),'green-CL') # Cl卤素配色
color9 = (('color9', '[141,215,247]'),'blue-C') # C配色
color10 = (('color10', '[0,132,55]'),'green-F') # F卤素配色
grey1 = ('grey1','[224,224,224]')
colors = (color1, color2, color3, color4, color5, color6, color7, color8, color9, color10)
def __init__(self, path, id):
self.id = id.lower()
cmd.reinitialize()
p = Path(path)
if p.is_dir():
file = p.joinpath(f"{id}.pdb")
else:
raise ValueError('path params error')
cmd.load(file,id)
def pretty(self):
cmd.remove('solvent metals') # 移除金属离子和溶剂
cmd.remove("resn SO4 or resn PO4 or resn NA,CL,GOL")
cmd.remove('not polymer') # 删除所有不是多聚物(通常指蛋白质或核酸链)的原子或分子。
cmd.pretty(selection=self.id)
@staticmethod
def defcolor():
# 定义常用配色
color_gennerate = map(lambda x:x[0],colorp.colors)
list(map(lambda x:cmd.set_color(*x),color_gennerate))
# 定义灰度配色
cmd.set_color(*colorp.grey1)
@staticmethod
def grey(obj='sele',opt=True):
colorp.defcolor()
if opt: method.optimisation() # 优化
# 对某个对象进行灰化
cmd.color('grey1',f'{obj}')
@staticmethod
def color_mole(obj='hetatm'):
# color blue,sele in name c*
colorp.defcolor()
cmd.color('color9',f'{obj} in name c*')
cmd.color('color6',f'{obj} in name o*')
cmd.color('color5',f'{obj} in name s*')
cmd.color('color3',f'{obj} in name n*')
cmd.color('color8',f'{obj} in name cl*')
cmd.color('color10',f'{obj} in name f*')
@staticmethod
def pretty(obj = None):
if not obj: obj = cmd.get_names()[0]
cmd.remove('solvent metals') # 移除金属离子和溶剂
cmd.pretty(selection=obj)
class font():
font_size = 28 # 单位就是正常的px。你也可以用负值则单位是Å
class mole():
stick_radius=0.10
class atom():
size = 0.28
class method():
@staticmethod
def optimisation(grey=True, selection='sele'):
colorp.defcolor()
if grey: cmd.set('cartoon_color','grey1', selection=selection)
cmd.set('stick_transparency','0.1', selection=selection)
cmd.set("ray_shadows","off", selection=selection)
cmd.set('cartoon_highlight_color', selection=selection)
cmd.set('cartoon_fancy_helices', selection=selection)
cmd.set('cartoon_transparency','0.5', selection=selection)
@staticmethod
def remove():
cmd.remove('resn SO4') # SO4 remove
cmd.remove('solvent metals') # 移除金属离子和溶剂
def get_resn(name:str = 'all'):
resn_list = []
cmd.iterate(selection=name, expression=lambda atom: resn_list.append(atom.resn))
return tuple(set(resn_list))
def protgrey():
method.remove()
method.optimisation()
# 观察小分子
# https://chatmol.org/qa/
# protgrey()
# colorp.color_mole('sele')
# add label
# label sele, "your label"
# black background
# set bg_rgb, [0, 0, 0]
# define label
# cmd.get_position("sele")
# set label_position, [x,y,z]
# cmd.h_add(pymol_obj) # add all hydrogens in this molecular
# cmd.remove(f"{pymol_obj} & hydro & not nbr. (don.|acc.)") # remove no-polar-hydrogen
# cmd.remove(f"sele & hydro") # 移除所有的H
# cmd.set('cartoon_color','color8', 'obj')
# cmd.hide("everything", "chain A and resn UHT and alt B") # 小分子隐藏B构象
# cmd.label("chain A and resn UHT and alt A", "resn") # 添加标签
#cmd.select('sele', f'resn {resn} and chain {chain}')
#cmd.center('sele')
#cmd.orient('sele')
#obj_name = 'around_and_self'
# cmd.create(obj_name, f'(resn {resn} and chain {chain}) around {distance}') # 不扩展残基
# cmd.create(obj_name, f'byres (resn {resn} and chain {chain}) around {distance}') # 扩展残基
# cmd.create(obj_name, f'(resn {resn} and chain {chain}) around {distance} or (resn {resn} and chain {chain})') # 选择resn的对象和resn对象周围3A的原子
#cmd.create(obj_name, f'byres (resn {resn} and chain {chain}) around {distance} or (resn {resn} and chain {chain})') # 选择resn的对象和resn对象周围3A的原子扩展至残基的原子
#cmd.hide('everything', 'all')
#cmd.show('lines', obj_name)
# cmd.remove('not polymer') #除非聚合物部分
cmd.extend('colorp',colorp)
cmd.extend('method',method)

498
script/pymol_plugin.py Normal file
View File

@@ -0,0 +1,498 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :auto.py
@Description: :自动处理脚本
@Date :2021/09/02 15:19:15
@Author :hotwa
@version :1.0
https://zhuanlan.zhihu.com/p/121215784 # PyMOL 选择器的语法参考
https://blog.csdn.net/u011450367/article/details/51815130
resource: https://www.cnblogs.com/wq242424/p/13073222.html
http://blog.sina.com.cn/s/blog_7188922f0100wbz1.html
https://www.jianshu.com/p/3396e94315cb
https://blog.csdn.net/dengximo9047/article/details/101221495 # 疏水表面
http://www.mdbbs.org/thread-4064-1-1.html # 用来寻找配体口袋的残基
'''
from functools import partial
from pymol import cmd
from pathlib import Path
import pandas as pd
from json import dumps
import numpy as np
from biotools import Bdp
from loguru import logger
# from types import FunctionType,CodeType
logger.add('pymol_plugin_{time}.log')
# select ligand, resn x
# cmd.select('ligand','byres 5UEH within 6 of resn GOL resn 85P')
def autoshow(i,path,distance = 6,ionshow = False):
"""autoshow 自动展示所有配体6A以内的lines方便查找共价键
[extended_summary] # cmd.create('pocket',f'byres {i} within {distance} of {rawstring}') # 创建一个在当前pdb对象中配体残基周围距离为6A的口袋对象
Arguments:
i {[string]} -- [pdbid 例如 5EA9]
Keyword Arguments:
path {[string]} -- [pdb文件存放的目录目前支持后缀为.pdb的文件也可以在全局变量中设置好文件目录] (default: {path})
distance {[int]} -- [显示周围原子的距离参数] (default: {6})
ionshow {[bool]} -- [离子和有机小分子周围共价结合观察使用,默认不展示] (default: {False})
Returns:
[list] -- [返回ligid标识符除去了部分离子]
"""
cmd.reinitialize()
p = Path(path)
file = p.joinpath(f"{i}.pdb")
cmd.load(file,i)
cmd.remove('solvent metals') # 移除金属离子和溶剂
mole = moleculeidentity(f"{i}",path)
rawstring = 'resn ' + ' resn '.join(mole.ligIdNoion)
print(f'{rawstring} around {distance}')
cmd.select('ligand',f'{rawstring}')
cmd.select('ligand_around',f'{rawstring} around {distance}') # 选择一个在当前pdb对象中配体残基周围距离为6A的口袋对象
if ionshow: # 是否显示所有记录小分子HET条记录中的信息对于离子与有机物显示相关共价键有效
cmd.show('lines','ligand_part') # 显示所有HET侧链
cmd.create('ligand_part',f'{rawstring} expand {distance}') # 单独显示小分子扩展6A周围的lines对象
cmd.create('organ',f'organic expand {distance}')
cmd.show('lines','organ')
return mole.ligId
def bejson(d:dict(help='need to beauty json')) -> dict:
return dumps(d,indent=4,ensure_ascii=False)
class covalentidentity():
"""covalentidentity 使用pymol进行识别共价键在输出有机小分子6A以内的原子时可能出现分子的构象发生改变导致共价键距离计算有问题还可能是与金属离子形成共价键
"""
# __slots__ = []
def __init__(self,pdbfilename,pdbid,path):
"""__init__ [summary]
[extended_summary]
Arguments:
pdbfilename {[string]} -- pdb文件名称
pdbid {[string]} -- 4 length id
path {[string or pathlib obj]} -- pdb文件路径
"""
self.pdbfilename = pdbfilename
self.pdbid = pdbid
self.path = Path(path)
self.init()
@staticmethod
def cleanpdb(before_string):
"""
处理传入参数pdb 例如: pdb1b12.ent 1b12.pdb 转化成 1b12
"""
# print(f'try to format {before_string}')
if '.ent' in before_string:
before_string = before_string[3:-4].lower()
elif '.pdb' in before_string:
before_string = before_string[:4].lower()
elif len(before_string) == 4:
before_string = before_string.lower()
else:
if len(before_string) != 4:
raise ValueError(f'length out of 4 {before_string}')
return before_string
@classmethod
def export_organ(cls,pdbfile,path,distance = 6,remove_water_metals = True):
# 输出小分子结合部分的共价结合信息对象pdb格式
file = path.joinpath(pdbfile)
_pdbid = covalentidentity.cleanpdb(pdbfile)
savepath = path.joinpath(f'{_pdbid}_organ_around_{distance}_lines.pdb')
cmd.reinitialize()
cmd.load(filename = file,object = pdbfile)
if remove_water_metals: cmd.remove('solvent metals') # 移除金属离子和溶剂
cmd.create('organ',f'organic expand {distance}')
cmd.show('lines','organ')
cmd.save(filename=savepath,selection='organ')
cls.connect_info_pdb_path = savepath
return savepath
def init(self):
self.__create_dataframe()
def __read_organ_pdb(self):
exportfile = covalentidentity.export_organ(self.pdbfilename,path = Path(self.path))
with open(file=exportfile,mode = 'r') as f:
read_list = [i.replace('\n', '') for i in f.readlines()]
self.read_connect = [i for i in read_list if i[:6].strip()=='CONECT']
self.read_hetatm = [i for i in read_list if i[:6].strip()=='HETATM']
self.read_atom = [i for i in read_list if i[:6].strip()=='ATOM']
def __create_dataframe(self):
self.__read_organ_pdb() # 读取初始化数据
connect_infos = []
for i in self.read_connect: #清洗数据
if len(i.split()) == 3: # 策略:查找只有两个原子键的连接信息
connect_infos.append(i)
self.df = self.transformtodataframe(self.read_hetatm).append(self.transformtodataframe(self.read_atom))
target_search_connect = []
for i in connect_infos:
l = [i for i in i.split() if i != 'CONECT']
target_search_connect.append(l)
self.target_search_connect = target_search_connect # 两个连接信息的列表,不一定都是共价键
def __search_convenlent(self,distance_accuracy=1):
true_covlent = [] # 共价键连接信息记录 有几个列表就有几个共价键链接信息
infos = []
locateinfos = {}
for item in self.target_search_connect: # 对两个连接原子进行判断
df = self.df
# molecule chain search
res0 = df[df['AtomNum']==item[0]]
res1 = df[df['AtomNum']==item[1]]
if (res0['Identifier'].all() == res1['Identifier'].all()):
continue
else:
true_covlent.append(item)
# 定位信息那个是小分子的行信息和那个是蛋白行的信息
if res0['Identifier'].all() == 'ATOM': locateinfos['ATOM'] = res0
if res0['Identifier'].all() == 'HETATM': locateinfos['HETATM'] = res0
if res1['Identifier'].all() == 'ATOM': locateinfos['ATOM'] = res1
if res1['Identifier'].all() == 'HETATM': locateinfos['HETATM'] = res1
assert (not res0.empty and not res1.empty),(
'this code occur a bug'
'try to connet to author fix it'
'maybe is pymol output connect infos error'
'never be happen'
)
partinfos = self.__fill_infos(i=item,pro=locateinfos['ATOM'],mole=locateinfos['HETATM'],distance_accuracy=distance_accuracy)
infos.append(partinfos)
return infos
def convenlent_infos(self,distance_accuracy=1):
return self.__search_convenlent(distance_accuracy=distance_accuracy)
@property
def cov_infos(self):
d = self.__search_convenlent()
return d
@staticmethod
def __fill_infos(i,pro,mole,distance_accuracy=1):
distance = np.sqrt(np.square(float(pro['X'].all())-float(mole['X'].all())) + np.square(float(pro['Y'].all())-float(mole['Y'].all())) + np.square(float(pro['Z'].all())-float(mole['Z'].all())))
infostmplate = {
'ConnectInfos': i,
'ResidueName':pro['ResidueName'].all(),
'ResidueSequence':pro['ResidueSequence'].all(),
'ChainIndentifier':pro['ChainIndentifier'].all(),
'CovenlentAtom':{
'protein': {
'AtomName':pro['AtomName'].all(),
'ElementSymbol':pro['ElementSymbol'].all(),
},
'molecule':{
'AtomName':mole['AtomName'].all(),
'ElementSymbol':mole['ElementSymbol'].all(),
},
},
'CovenlentDistance':round(distance,distance_accuracy),
'LigandName':mole['ResidueName'].all(),
}
return infostmplate
@staticmethod
def transformtodataframe(readlinecontent=None,first_label = 'ATOM',path = False):
path_info = (Path(path).name.__str__(),Path(path).parent.__str__()) if path else ('unknown file name','unknown path')
if path: readlinecontent = Bdp.read_line_list(first_column=first_label,path=path)
if readlinecontent == None: raise ValueError('readlinecontent need')
if first_label == ('ATOM' or 'HETATM'): # ! 注意python中的懒惰运算及运算符特性
# 将pdb每行读取的信息转化为dataframe 针对ATOM HETAM开头的行适用
Identifier,AtomNum,AtomName,AtomLocationIndicator,ResidueName,ChainIndentifier,ResidueSequence,InsertionsResidue,X,Y,Z,Occupancy,Tfactor,SegmentIdentifier,ElementSymbol = [],[],[],[],[],[],[],[],[],[],[],[],[],[],[]
for i in readlinecontent:
Identifier.append(i[:7].strip())
AtomNum.append(i[6:11].strip())
AtomName.append(i[12:16].strip())
AtomLocationIndicator.append(i[16].strip())
ResidueName.append(i[17:20].strip())
ChainIndentifier.append(i[21].strip())
ResidueSequence.append(i[22:26].strip())
InsertionsResidue.append(i[26].strip())
X.append(i[30:38].strip())
Y.append(i[38:46].strip())
Z.append(i[46:54].strip())
Occupancy.append(i[54:60].strip())
Tfactor.append(i[60:66].strip())
SegmentIdentifier.append(i[72:76].strip())
ElementSymbol.append(i[76:78].strip())
df = pd.DataFrame(data={
'Identifier':Identifier,
'AtomNum':AtomNum,
'AtomName':AtomName,
'AtomLocationIndicator':AtomLocationIndicator,
'ResidueName':ResidueName,
'ChainIndentifier':ChainIndentifier,
'ResidueSequence':ResidueSequence,
'InsertionsResidue':InsertionsResidue,
'X':X,
'Y':Y,
'Z':Z,
'Occupancy':Occupancy,
'Tfactor':Tfactor,
'SegmentIdentifier':SegmentIdentifier,
'ElementSymbol':ElementSymbol
})
elif first_label == 'LINK':
# https://www.wwpdb.org/documentation/file-format-content/format33/sect6.html
Record_name,Atom_name1,Alternate_location_indicator1,Residue_name1,Chain_identifier1,Residue_sequence_number1,Insertion_code1,Atom_name2,Alternate_location_indicator2,Residue_name2,Chain_identifier2,Residue_sequence_number2,Insertion_code2,Symmetry_operator_atom_1,Symmetry_operator_atom_2,Link_distance = [],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]
for i in readlinecontent:
if len(i) != 78:
logger.exception(
f"[LINK label length error] not match the LINK line length\n"
f"[input string]:{i}\n"
f"check your input file:{path_info[0]} from {path_info[1]}\n"
"if this line not have link distance, try to caculate by covalent.bypymol method")
pass
else:
Record_name.append(i[:6].strip())
Atom_name1.append(i[12:16].strip())
Alternate_location_indicator1.append(i[16].strip())
Residue_name1.append(i[17:20].strip())
Chain_identifier1.append(i[21].strip())
Residue_sequence_number1.append(i[22:26].strip())
Insertion_code1.append(i[26].strip())
Atom_name2.append(i[42:46].strip())
Alternate_location_indicator2.append(i[46].strip())
Residue_name2.append(i[47:50].strip())
Chain_identifier2.append(i[51].strip())
Residue_sequence_number2.append(i[52:56].strip())
Insertion_code2.append(i[56].strip())
Symmetry_operator_atom_1.append(i[59:65].strip())
Symmetry_operator_atom_2.append(i[66:72].strip())
Link_distance.append(i[73:78].strip())
df = pd.DataFrame(data={
'Record_name':Record_name,
'Atom_name1':Atom_name1,
'Alternate_location_indicator1':Alternate_location_indicator1,
'Residue_name1':pd.Series(Residue_name1,dtype='object'),
'Chain_identifier1':Chain_identifier1,
'Residue_sequence_number1':Residue_sequence_number1,
'Insertion_code1':Insertion_code1,
'Atom_name2':Atom_name2,
'Alternate_location_indicator2':Alternate_location_indicator2,
'Residue_name2':pd.Series(Residue_name2,dtype='object'),
'Chain_identifier2':Chain_identifier2,
'Residue_sequence_number2':Residue_sequence_number2,
'Insertion_code2':Insertion_code2,
'Symmetry_operator_atom_1':Symmetry_operator_atom_1,
'Symmetry_operator_atom_2':Symmetry_operator_atom_2,
'Link_distance':pd.Series(Link_distance,dtype='float32'),
})
else:
assert False,(
'No return',
'a error occurred'
)
return df
class color_plan():
color1 = (('color1', '[186,182,217]'), 'purple')
color2 = (('color2', '[233,195,153]'), 'yellow')
color3 = (('color3', '[141,215,247]'), 'blue')
color4 = (('color4', '[206,155,198]'), 'purple')
color5 = (('color5', '[251,187,62]'), 'orange')
color6 = (('color6', '[245,157,158]'), 'red')
color7 = (('color7', '[133,188,135]'), 'green')
colors = (color1, color2, color3, color4, color5, color6, color7)
@staticmethod
def defcolor():
color_gennerate = map(lambda x:x[0],color_plan.colors)
list(map(lambda x:cmd.set_color(*x),color_gennerate))
# error class
class PathError(BaseException):
def __init__(self,arg):
self.arg = arg
class moleculeidentity():
"""moleculeidentity [summary]
[识别pdb蛋白文件中的小分子标识符]
"""
def __init__(self,pdbfile,path):
self.pathstr = path
self.path = Path(path)
self.pdbfile = pdbfile
self._init()
def _init(self):
if not self.path.exists():
raise PathError('path not exist')
if not isinstance(self.pdbfile,str):
raise TypeError('Pdbid must be a string!')
if ('.pdb' not in self.pdbfile) and (len(self.pdbfile) != 4):
raise TypeError('Pdbid must be 4 letters')
if ('.pdb' or '.PDB') in self.pdbfile:
raise TypeError(f'{self.pdbfile} Remove ".pdb" from input arg, add automatically')
file_list = list(self.path.glob('*.pdb'))
self.path_parent = file_list[0].parent
self.pdbfilelist = [i.name[:4].upper() for i in file_list]
def __parse_pdb_ligid(self,ion=True):
if self.pdbfile.upper() not in self.pdbfilelist:
raise FileNotFoundError(f'not found {self.pdbfile} in {self.path}')
infos_line = []
ligId = []
for i in self.__generate_pdb_lines():
if self.check_line_header(i) == 'HET':
infos_line.append(i)
ligId = [i.split()[1] for i in infos_line]
ligId = list(set(ligId))
if not ion: ligId = [i for i in ligId if len(i) == 3 ] # remove ion from list
return ligId
@property
def ligId(self):
# return ligId include ion
return self.__parse_pdb_ligid()
@property
def ligIdNoion(self):
return self.__parse_pdb_ligid(ion=False)
@staticmethod
def check_line_header(line_text):
return line_text[0:6].strip()
def __generate_pdb_lines(self):
openpdbfile = self.pdbfile + '.pdb' if '.pdb' not in self.pdbfile else self.pdbfile
for row in open(self.path_parent.joinpath(openpdbfile),'r+'):
yield row.strip()
class covalent(object):
def __init__(self,pdbfilename,path,pdbid):
self.pdbfilename = pdbfilename
self.pdbid = covalentidentity.cleanpdb(pdbid)
self.path_parent = Path(path)
self.path = self.path_parent.joinpath(pdbfilename)
self._init()
def _init(self):
self.link_df = covalentidentity.transformtodataframe(path = self.path,first_label='LINK')
@classmethod
def bypdb(cls,pdbfilename,path,pdbid):
return cls(pdbfilename=pdbfilename,path=path,pdbid=pdbid)
@staticmethod
def bypymol(pdbfilename,pdbid,path):
return covalentidentity(pdbfilename,pdbid,path)
@property
def mole_id(self)->list:
_instance=Bdp(path=self.path,sid=self.pdbid)
return [i for i in _instance.mole_id if len(i)==3 and i != 'HOH' and i != 'SO4' and i != 'PO4'] # ! 在这里手动剔除掉硫酸根和磷酸根
def modresdata(self)->list:
"""modresdata 标准残疾的修饰信息
"""
modres_lst = Bdp.read_line_list(first_column='MODRES',path=self.path)
lst = [i[12:15] for i in modres_lst] # 切片出修饰残基的小分子
return list(set(lst))
def get_covalent(self):
# 从原始的pdb文件中可获取共价键信息 # ? 对其中的信息正确率为100% 来自原始的pdb文件中的信息
res1 = self.link_df.query(f'Residue_name1 in {self.remove_modres_moleid}')
res2 = self.link_df.query(f'Residue_name2 in {self.remove_modres_moleid}')
df = pd.merge(res1, res2,how='outer')
df.astype('object')
# remove ions link with ligand
df = df[df['Residue_name1'].str.len()==3]
df = df[df['Residue_name2'].str.len()==3]
return df
@property
def remove_modres_moleid(self)->list:
# 清除link信息中的modres即修饰残基的ligid
# lst = Bdp.read_line_list(first_column='LINK',path=self.path)
# lst = list(set(map(lambda s:s[17:20],lst)))
clean_func = partial(self.func_dynamic_data_template,dynamic_data=self.ModifiedResidues)
res = clean_func(origin_data=self.mole_id)
return list(filter(lambda x:len(x.strip())==3 and x != 'HOH',res))
@property
def ModifiedResidues(self):
ModifiedResiduesId = []
for i in self.mole_id:
if i in self.modresdata():
ModifiedResiduesId.append(i)
# 从SEQRES序列中获取MODRES的修饰残基的小分子仅仅从SEQRES有时获取不全面
seqres_lst = [i[19:].strip() for i in Bdp.read_line_list(first_column='SEQRES',path = self.path)]
seqres_lst_string = '\r\n'.join(seqres_lst)
seqres_lst_set = set(list(i for line in seqres_lst_string.splitlines() for i in line.split()))
seqres_list = seqres_lst_set.intersection(set(self.mole_id))
ModifiedResiduesId.extend(seqres_list)
return list(set(ModifiedResiduesId))
@staticmethod
def func_dynamic_data_template(origin_data:list,dynamic_data:list)->list:
# 取origin_data和dynamic_data补集
origin_data_set = set(origin_data)
dynamic_data_set = set(dynamic_data)
_data = origin_data_set.difference(dynamic_data_set)
return _data
cmd.extend('autoshow', autoshow) # pymol中自定义autoshow函数请使用cmd.autoshow()命令
#! 下面为具体使用查找共价键的代码
# def read_link_lines(item):
# ins1 = covalent.bypdb(pdbfilename = item,path='M:\program\\autotask\search_res1_pdb',pdbid = item[3:7])
# res = ins1.get_covalent()
# res['Pdb_id'] = [item[3:7] for _ in range(len(res))]
# return res
def return_mole(series):
# LINK 字段信息进行分析解读
residue = [i.upper() for i in 'Gly,Ala,Val,Leu,Ile,Pro,Phe,Tyr,Trp,Ser,Thr,Cys,Met,Asn,Gln,Asp,Glu,Lys,Arg,His'.split(',')]
if str(series[1]['Residue_name1']) in residue:
if str(series[1]['Residue_name2']) in residue:
# 除去两个氨基酸连接的情况
return pd.DataFrame()
else:
_data = series[1]['Residue_name2'],series[1]['Link_distance'],series[1]['Pdb_id'],series[1]['Chain_identifier2']
_df = pd.DataFrame(data={
'LigandName': pd.Series([_data[0]],dtype='string'),
'Link_distance': pd.Series([_data[1]],dtype='float32'),
'Pdb_id': pd.Series([_data[2]],dtype='string'),
'Chain_identifier': pd.Series([_data[3]],dtype='string')
})
return _df
elif str(series[1]['Residue_name2']) in residue:
if str(series[1]['Residue_name1']) in residue:
# 除去两个氨基酸连接的情况
return pd.DataFrame()
else:
_data = series[1]['Residue_name1'],series[1]['Link_distance'],series[1]['Pdb_id'],series[1]['Chain_identifier1']
_df = pd.DataFrame(data={
'LigandName': pd.Series([_data[0]],dtype='string'),
'Link_distance': pd.Series([_data[1]],dtype='float32'),
'Pdb_id': pd.Series([_data[2]],dtype='string'),
'Chain_identifier': pd.Series([_data[3]],dtype='string')
})
return _df
else:
logger.exception(
f'[MoleculeMatchError] Pdbid {series[1]["Pdb_id"]} occur a error\n'
f'出现了两个残基名称都不属于常见的20中\n{residue}\n'
f'source: {series[1]}'
)
return pd.DataFrame() # 两个小分子连接的情况,去除
if __name__ == '__main__':
...

23
script/pymolbase.py Normal file
View File

@@ -0,0 +1,23 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :base.py
@Description: :PyMOLBase的抽象基类
@Date :2023/09/01 16:54:02
@Author :lyzeng
@Email :pylyzeng@gmail.com
@version :1.0
'''
from abc import ABC, abstractmethod
class PyMOLBase(ABC):
@abstractmethod
def clean_pdb_name(self, pdb_name):
"""
This abstract method is meant to clean or process the given PDB name.
Implement this method in the subclass.
"""
pass

90
script/pymolbond.py Normal file
View File

@@ -0,0 +1,90 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :pymolbond.py
@Description: :
@Date :2023/09/01 16:57:02
@Author :lyzeng
@Email :pylyzeng@gmail.com
@version :1.0
'''
class Bond():
"""
This class contains static methods for identifying various types of molecular interactions.
"""
@staticmethod
def HBond(ChA, ChB):
"""Identifies hydrogen bonds between two chains."""
name = 'Hbond'
cmd.distance(name, ChA, ChB, '3.5', '2')
return name
@staticmethod
def Pi_Pi_interaction(ChA, ChB):
"""Identifies pi-pi interactions between two chains."""
name = 'PI_PI'
cmd.distance(name, ChA, ChB, '5.5', '6')
return name
@staticmethod
def Cation_Pi_interaction(ChA, ChB):
"""Identifies cation-pi interactions between two chains."""
name = 'PI_Cation'
cmd.distance(name, ChA, ChB, '5.5', '7')
return name
@staticmethod
def Salt_bridge_positive(ChA, ChB):
"""Identifies salt bridges from positive residues in one chain to negative residues in another chain."""
name = 'SBP'
selection_1 = f'{ChB} and ((resn LYS and name NZ) or (resn ARG and name NE+NH*))'
selection_2 = f'{ChA} and resn ASP+GLU and name OD*+OE*'
cmd.distance(name, selection_1, selection_2, '5.0', '0')
return name
@staticmethod
def Salt_bridge_negative(ChA, ChB):
"""Identifies salt bridges from negative residues in one chain to positive residues in another chain."""
name = 'SBN'
selection_1 = f'{ChA} and ((resn LYS and name NZ) or (resn ARG and name NE+NH*))'
selection_2 = f'{ChB} and resn ASP+GLU and name OD*+OE*'
cmd.distance(name, selection_1, selection_2, '5.0', '0')
return name
@staticmethod
def Van_der_Waals(ChA, ChB): # not tested
"""Identifies Van der Waals interactions between two chains."""
name = 'VDW'
cmd.distance(name, ChA, ChB, '0.6', '0')
return name
@staticmethod
def Hydrophobic_interaction(ChA, ChB): # not tested
"""Identifies hydrophobic interactions between two chains."""
name = 'Hydrophobic'
cmd.distance(name, ChA, ChB, '0.6', '0')
return name
@staticmethod
def Electrostatic_interaction(ChA, ChB): # not tested
"""Identifies electrostatic interactions between two chains."""
name = 'Electrostatic'
cmd.distance(name, ChA, ChB, '0.6', '0')
return name
@staticmethod
def Metal_coordination(ChA, ChB): # not tested
"""Identifies metal coordination bonds between two chains."""
name = 'MetalCoord'
cmd.distance(name, ChA, ChB, '0.2', '0')
return name
@staticmethod
def Disulfide_bond(ChA, ChB): # not tested
"""Identifies disulfide bonds between two chains."""
name = 'Disulfide'
cmd.distance(name, ChA, ChB, '0.2', '0')
return name

View File

@@ -0,0 +1,92 @@
from pymol import cmd
from pymol.cmd import util
import sys
from pymol_color_plugin import colorp
def HBond(ChA,ChB):
name = f'Hbond'
dist = cmd.distance(name, ChA,ChB, '3.5', '2')
return name
def Pi_Pi_interaction(ChA,ChB):
name = f'PI_PI'
cmd.distance(name,ChA, ChB,'5.5','6')
return name
def Cation_Pi_interaction(ChA,ChB):
name = f'PI_Cation'
cmd.distance(name, ChA, ChB, '5.5', '7')
return name
def Salt_bridge_positive(ChA,ChB):
name = f'SBP'
selection_1 = f'{ChB} and ((resn LYS and name NZ) or (resn ARG and name NE+NH*))'
selection_2 = f'{ChA} and resn ASP+GLU and name OD*+OE*'
cmd.distance(name,selection_1,selection_2,'5.0','0')
return name
def Salt_bridge_negative(ChA,ChB):
name = f'SBN'
selection_1 = f'{ChA} and ((resn LYS and name NZ) or (resn ARG and name NE+NH*))'
selection_2 = f'{ChB} and resn ASP+GLU and name OD*+OE*'
cmd.distance(name,selection_1,selection_2,'5.0','0')
return name
# def T_stacking(ChA, ChB):
# name = 'T_stacking'
# # 添加适当的距离和选择标准
# cmd.distance(name, ChA, ChB, '5.5', '2')
# return name
# def Hydrophobic_interaction(ChA, ChB):
# name = 'Hydrophobic'
# # 添加适当的距离和选择标准
# cmd.distance(name, ChA, ChB, '7.0', '2')
# return name
# def VDW_interaction(ChA, ChB):
# name = 'VDW'
# # 添加适当的距离和选择标准
# cmd.distance(name, ChA, ChB, '4.0', '2')
# return name
def show_label_name(obj:str):
cmd.label(f'name CA and {obj}', 'oneletter+resi')
cmd.set('label_bg_color', -7, '', 0)
cmd.set('label_size', 18, '', 0)
cmd.set('label_font_id', '7')
def interact(obj:str='epitope', distance:int=5):
cmd.show('stick',f'{obj}')
cmd.set('stick_radius', '0.3', f'{obj}')
colorp.color_mole(obj)
#show_label_name(obj)
cmd.select('nearby',f'byres {obj} around {distance}')
cmd.show('stick','nearby')
cmd.set('stick_radius', '0.1', 'nearby')
cmd.set('cartoon_transparency', 0.7, selection='nearby')
Hbond = HBond(obj, 'nearby')
PI = Pi_Pi_interaction(obj, 'nearby')
C_PI = Cation_Pi_interaction(obj, 'nearby')
SBP = Salt_bridge_positive(obj, 'nearby')
SBN = Salt_bridge_negative(obj, 'nearby')
cmd.set('dash_color', 'yellow', Hbond)
cmd.set('dash_color', 'blue', PI)
cmd.set('dash_color', 'blue', C_PI)
cmd.set('dash_color', 'orange', SBP)
cmd.set('dash_color', 'orange', SBN)
cmd.set('dash_radius', '0.07')
cmd.set('dash_gap', '0.3')
#cmd.remove('e. h and neighbor(name C*)')
cmd.zoom('Hbond')
def main():
cmd.reinitialize()
cmd.fetch('5sws',type='pdb')
colorp.pretty()
cmd.select('epitope','chain C and resi 1-9')
cmd.zoom('nearby')
interact()
cmd.save(f'5sws_interaction.pse')

7
script/test.py Normal file
View File

@@ -0,0 +1,7 @@
if __name__ == '__main__':
from plugin.cmd_singleton import PyMOLSingleton
cmd = PyMOLSingleton
print(
help(cmd))

View File

@@ -0,0 +1,198 @@
from pymol import cmd
from pymol.cmd import util
import sys
import os
import math
def pi_pi_angle(x1,x2,x3,y1,y2,y3):
import numpy as np
#print(x1,x2,x3,y1,y2,y3)
B1, B2, B3 = [x1[0] - x2[0], x1[1] - x2[1], x1[2] - x2[2]]
C1, C2, C3 = [x1[0] - x3[0], x1[1] - x3[1], x1[2] - x3[2]]
#print([x1[0] - x2[0], x1[1] - x2[1], x1[2] - x2[2]])
n1 = [B2 * C3 - C2 * B3, B3 * C1 - C3 * B1, B1 * C2 - C1 * B2]
D1, D2, D3 = [y1[0] - y2[0], y1[1] - y2[1], y1[2] - y2[2]]
E1, E2, E3 = [y1[0] - y3[0], y1[1] - y3[1], y1[2] - y3[2]]
n2 = [D2 * E3 - E2 * D3, D3 * E1 - E3 * D1, D1 * E2 - E1 * D2]
dot_product = np.dot(n1, n2)
magnitude1 = np.linalg.norm(n1) # ????????????????
magnitude2 = np.linalg.norm(n2)
#print(dot_product,magnitude1,magnitude2,dot_product / (magnitude1 * magnitude2))
xx = math.acos(dot_product / (magnitude1 * magnitude2))
degree = math.degrees(xx)
if degree > 90:
degree = 180 - degree
return round(degree,2)
def pi_pi(aro_ring_info,cutoff=5.0):
aro_resn =['HIS','PHE','TRP','TYR']
aro_resn_list = '+'.join(aro_resn)
chA_aro_res = f'inter_1 and resn {aro_resn_list}'
chB_aro_res = f'inter_2 and resn {aro_resn_list}'
chA_aro_res_list = list(set([f'resi {a.resi} and resn {a.resn}' for a in cmd.get_model(chA_aro_res).atom]))
chB_aro_res_list = list(set([f'resi {a.resi} and resn {a.resn}' for a in cmd.get_model(chB_aro_res).atom]))
for A in chA_aro_res_list:
for B in chB_aro_res_list:
A_type = A.split()[-1]
B_type = B.split()[-1]
#print(f'{A} and name {aro_ring_info[A_type]}')
x = cmd.centerofmass(f'inter_1 and {A} and name {aro_ring_info[A_type]}')
y = cmd.centerofmass(f'inter_2 and {B} and name {aro_ring_info[B_type]}')
dist = math.sqrt((x[0] - y[0]) ** 2 + (x[1] - y[1]) ** 2 + (x[2] - y[2]) ** 2)
if dist < cutoff:
cmd.show('sticks',f'inter_1 and {A}')
cmd.show('sticks', f'inter_2 and {B}')
cmd.label(f'name CA and inter_1 and {A}', 'oneletter+resi')
cmd.label(f'name CA and inter_2 and {B}', 'oneletter+resi')
x1 = cmd.get_coords(f"{A} and name {aro_ring_info[A_type].split('+')[0]}")[0]
x2 = cmd.get_coords(f"{A} and name {aro_ring_info[A_type].split('+')[1]}")[0]
x3 = cmd.get_coords(f"{A} and name {aro_ring_info[A_type].split('+')[2]}")[0]
y1 = cmd.get_coords(f"{B} and name {aro_ring_info[B_type].split('+')[0]}")[0]
y2 = cmd.get_coords(f"{B} and name {aro_ring_info[B_type].split('+')[1]}")[0]
y3 = cmd.get_coords(f"{B} and name {aro_ring_info[B_type].split('+')[2]}")[0]
print(A,B)
pi_angle = pi_pi_angle(x1, x2, x3, y1, y2, y3)
pi_name = f'pi_{A.split()[1]}_{B.split()[1]}_{str(pi_angle)}'
print(A, B,pi_name)
cmd.distance(pi_name, f"inter_1 and resi {A.split()[1]} and name {aro_ring_info[A_type]}", f'inter_2 and resi {B.split()[1]} and name {aro_ring_info[B_type]}', mode=4)
def Salt_bridge(ChA,ChB,cutoff=5.0):
selection_1 = f'{ChB} and ((resn LYS and name NZ) or (resn ARG and name NE+NH*))'
a_charge_res = set([f'chain {a.chain} and resi {a.resi} and resn {a.resn}' for a in cmd.get_model(f'inter_1 and resn LYS+ARG+ASP+GLU').atom])
b_charge_res = set([f'chain {a.chain} and resi {a.resi} and resn {a.resn}' for a in cmd.get_model(f'inter_2 and resn LYS+ARG+ASP+GLU').atom])
#print(a_charge_res)
p_info = {'LYS':'NZ','ARG':'NE+NH*'}
n_info = {'ASP':'OD*+OE*','GLU':'OD*+OE*'}
for a in a_charge_res:
a_resn = a.split()[-1]
for b in b_charge_res:
b_resn = b.split()[-1]
name = f'SB_{a.split()[4]}_{b.split()[4]}'
if (a_resn in ['LYS','ARG']) and (b_resn in ['ASP','GLU']):
a_resn_atom = p_info[a_resn]
b_resn_atom = n_info[b_resn]
#print(a,b)
#print(a_resn_atom,b_resn_atom)
selection_1 = f'{a} and name {a_resn_atom}'
selection_2 = f'{b} and name {b_resn_atom}'
cal_saltbrige(selection_1, selection_2, name, a,b,cutoff)
elif (a_resn in ['ASP','GLU'] ) and (b_resn in ['LYS','ARG']):
a_resn_atom = n_info[a_resn]
b_resn_atom = p_info[b_resn]
selection_1 = f'{a} and name {a_resn_atom}'
selection_2 = f'{b} and name {b_resn_atom}'
cal_saltbrige(selection_1,selection_2,name, a,b,cutoff)
def cal_saltbrige(selection1,selection2,name,a,b,cutoff):
x = cmd.centerofmass(selection1)
y = cmd.centerofmass(selection2)
dist = math.sqrt((x[0] - y[0]) ** 2 + (x[1] - y[1]) ** 2 + (x[2] - y[2]) ** 2)
if dist < cutoff:
cmd.show('stick', a)
cmd.show('stick', b)
print(a,b)
cmd.label(f'name CA and {a}', 'oneletter+resi')
cmd.label(f'name CA and {b}', 'oneletter+resi')
cmd.distance(name, selection1, selection2, mode=4)
return name
def DA_Hbond(DA_type,select1,select2,cutoff='3.5',angle='150'):
hbond_pairs = []
pairs = cmd.find_pairs(select1, select2, mode=0, cutoff=cutoff)
for pair in pairs:
D_atom = f'index {pair[0][1]}'
A_atom = f'index {pair[1][1]}'
HD_atom = f'e. h and neighbor({D_atom})'
index_1 = [a.index for a in cmd.get_model(A_atom).atom]
index_2 = [a.index for a in cmd.get_model(HD_atom).atom]
index_3 = [a.index for a in cmd.get_model(D_atom).atom]
for h in index_2:
h_angle = cmd.angle('angle', A_atom, f'index {h}', D_atom)
#print(type, pair, h_angle)
if h_angle > float(angle):
D_atom_name = [f'{a.resi}-{a.name}' for a in cmd.get_model(f'index {pair[0][1]} ').atom][0]
A_atom_name = [f'{a.resi}-{a.name}' for a in cmd.get_model(f'index {pair[1][1]} ').atom][0]
#name = [f'HB-{a.resi}{a.resn}{a.chain}-{base_atom_name}-lig-{lig_atom_name}' for a in cmd.get_model(f'index {pair[0][1]} and polymer').atom][0]
name = f'{DA_type}_{D_atom_name}_{A_atom_name}'
cmd.show('stick', f'byres(index {pair[0][1]})')
cmd.show('stick', f'byres(index {pair[1][1]})')
cmd.label(f'name CA and byres(index {pair[0][1]})', 'oneletter+resi')
cmd.label(f'name CA and byres(index {pair[1][1]})', 'oneletter+resi')
cmd.distance(name, f'index {index_1[0]} ', f'index {h} ')
break
return hbond_pairs
def run(pdb,ChA,ChB,dist):
cmd.delete('all')
cmd.load(pdb)
pdb_name = pdb[:-4]
#cmd.h_add('all')
#cmd.remove('solvent and resn SO4, NA,CL,GOL')
cmd.remove('not polymer')
#util.cbc('all', first_color=7, quiet=1, legacy=0, _self=cmd)
cmd.select('inter_1',f'{pdb_name} and chain {ChA} and byres(chain {ChB}) around {dist}')
cmd.select('inter_2', f'{pdb_name} and chain {ChB} and byres(chain {ChA}) around {dist}')
util.cba('lightblue',f'chain {ChA}')
util.cba('gray90', f'chain {ChB}')
else_part = cmd.select('else',f'{pdb_name} and not (chain {ChA}+{ChB})')
if else_part > 0:
util.cba('lime', 'else')
chA_D = f'inter_1 and e. o+n and (neighbor e. h)'
chB_A = f'inter_2 and e. o+n '
DA_Hbond('HDA',chA_D,chB_A)
chB_D = f'inter_2 and e. o+n and (neighbor e. h)'
chA_A = f'inter_1 and e. o+n '
DA_Hbond('HAD',chB_D,chA_A)
aro_ring_name = {'HIS':'CG+ND1+CE1+NE2+CD2',
'TRP':'CH2+CZ3+CE3+CZ2+CE2+CD2',
'TYR':'CG+CD1+CE1+CZ+CE2+CD2',
'PHE':'CG+CD1+CE1+CZ+CE2+CD2'}
pi_pi(aro_ring_name)
Salt_bridge(ChA,ChB)
cmd.do('set cartoon_transparency, 0.4')
cmd.do('set dash_gap, 0.2')
cmd.do('set dash_round_ends, 0')
cmd.do('set label_size, 24')
cmd.do('set dash_color, blue,pi*')
cmd.do('set dash_color, violet,SB*')
cmd.do('set dash_radius, 0.1')
cmd.do('set stick_radius, 0.15')
cmd.delete('angle')
cmd.disable('angle')
cmd.set('bg_rgb', 'white')
cmd.save(f'{pdb_name}_interaction.pse')
def main():
pdb_file = sys.argv[1]
CHA = sys.argv[2]
CHB = sys.argv[3]
dist = sys.argv[4]
run(pdb_file,CHA,CHB,dist)
main()

2
test.py Normal file
View File

@@ -0,0 +1,2 @@
from plugin.cmd_singleton import cmd