commit bc8cc22a9cb2f4bd0efc69035a5d8049e19c4f95 Author: lingyuzeng Date: Sat Sep 7 09:12:14 2024 +0800 first version diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7fa4f50 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +__pycache__ +*.pse +*.log +*.pdb +*.mol2 +*.sdf +*.gif +*.pyc +*.cif \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e73ae37 --- /dev/null +++ b/README.md @@ -0,0 +1,163 @@ +# 目录结构说明 + +asset放一些学习资料,github仓库放不下就放到网盘中去。 + +plugin为自建的pymol的插件开发的一些代码 + +scripts为一些脚本文件,主要是测试代码 + + +## pymol 资源学习绘图 + +[各种作用力绘图](https://proteintools.uni-bayreuth.de./) + + +## 环境安装 + +```shell +conda install -c conda-forge ffmpeg pymol-open-source biopython loguru +``` + +## 最高质量渲染 + +```shell +util.performance(0) +rebuild +``` + + +PyMOL可以用多种方式进行录制: + +1. **命令行录制**: 使用`log_open`和`log_close`命令,可以将PyMOL的命令行操作记录到文本文件。 + + ``` + cmd.log_open('filename.log') + ... # Your operations + cmd.log_close() + ``` + +2. **Ray Tracing**: 使用`ray`命令渲染高质量的图片,然后用其他软件将图片合成为视频。 + +3. **使用`mset`和`mview`创建动画**: 如你之前所述,通过这两个命令创建动画,然后使用`mpng`命令将动画导出为PNG帧,最后用其他软件将其合成为视频。 + + ``` + cmd.mpng('frame_prefix_', 1, 60) + ``` + +4. **屏幕录制软件**: 使用外部屏幕录制软件(如OBS、Camtasia等)进行录制。 + +5. **PyMOL API和脚本**: 使用PyMOL的Python API和PyMOL脚本,可以更精细地控制录制内容。 + +## PyMOL工作目录 + +使用`cd`命令改变PyMOL的工作目录。例如: + +``` +cmd.cd("/path/to/new/directory") +``` + +## 图片渲染 + +设置600DPI: + +```shell +width = int(1920 * (600 / 72)) +height = int(1080 * (600 / 72)) +cmd.ray(width, height) +``` + + + +得到帧1至帧60的PNG图片 + +```shell +cmd.mset("1 x60 61 x60") +cmd.scene('frame1', 'store') +cmd.mview('store', state=1, scene='frame1') +cmd.scene('frame2', 'store') +cmd.mview('store', state=60, scene='frame2') +cmd.mview('interpolate') + +for i in range(1, 61): + cmd.frame(i) + cmd.ray(1920, 1080, 300) + cmd.mpng(f'frame_prefix_{i}.png') +``` + + + +```shell +from dataclasses import dataclass, field +from typing import Tuple +from pathlib import Path + +@dataclass +class PyMOLAnimator: + resolution: Tuple[int, int] = (1920, 1080) + dpi: int = 300 + frame_count: int = 120 + output_dir: Path = field(default_factory=lambda: Path('./')) + + def save_frames(self, scene_1: str, scene_2: str): + import pymol.cmd as cmd + + # 设置动画帧数 + cmd.mset(f"1 x{self.frame_count} {self.frame_count+1} x{self.frame_count}") + + # 存储第一帧和最后一帧 + cmd.scene(scene_1, 'store') + cmd.mview('store', state=1, scene=scene_1) + cmd.scene(scene_2, 'store') + cmd.mview('store', state=self.frame_count, scene=scene_2) + + # 插值以创建动画 + cmd.mview('interpolate') + + output_path = self.output_dir / 'frame_prefix_' + + # 保存每一帧为PNG图片 + for i in range(1, self.frame_count+1): + cmd.frame(i) + cmd.ray(self.resolution[0], self.resolution[1], self.dpi) + cmd.mpng(str(output_path) + f'{i}.png') + +# 使用 +animator = PyMOLAnimator((1920, 1080), 300, 120, Path('/path/to/save')) +animator.save_frames('frame1', 'frame2') + +``` + +## 场景设置 + +`cmd.mview`和`cmd.scene`两者都用于存储视图状态,但用途和范围有所不同: + +### `cmd.mview` + +- **主要用途**: 用于电影插值,在电影播放中创建平滑过渡。 + +- **存储内容**: 主要存储相机和对象矩阵,用于电影帧之间的插值。 + +- **特定于帧**: 可以为电影中的特定帧或帧范围存储视图。 + +- **动画专用**: 主要用于创建复杂的电影动画。 + + ``` + cmd.mview('store', state=1, scene='frame1') + ``` + + 这会在状态1下,用场景名`frame1`存储当前相机和对象矩阵。 + +### `cmd.scene` + +- **主要用途**: 存储和恢复场景,方便用户随时回到特定的视图和设置。 + +- **存储内容**: 存储相机视图、所有对象的活动信息、所有原子的可见性和颜色、所有的表示形式和全局帧索引。 + +- **不特定于帧**: 主要用于一般目的的视图保存和恢复。 + + ``` + cmd.scene('MyScene', 'store', 'This is my special scene') + ``` + + 这会存储一个名为"MyScene"的场景,并附带一条消息。 + diff --git a/asset/ChimeraX Tutorial.pdf b/asset/ChimeraX Tutorial.pdf new file mode 100644 index 0000000..2350062 Binary files /dev/null and b/asset/ChimeraX Tutorial.pdf differ diff --git a/plugin/README.md b/plugin/README.md new file mode 100644 index 0000000..b90d9e5 --- /dev/null +++ b/plugin/README.md @@ -0,0 +1,2 @@ +结构生物学研究中,硒代蛋氨酸(MSE)通常用作甲硫氨酸(MET)的代理,因为硒元素在X射线晶体学中有更强的散射能力。但在某些情况下,研究人员可能想要从他们对“配体(ligands)”的定义中排除MSE。 +MSE作为蛋白质的一部分,有时可能与真正的小分子配体或其他非天然出现的分子混淆。 diff --git a/plugin/__init__.py b/plugin/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plugin/cmd_singleton.py b/plugin/cmd_singleton.py new file mode 100644 index 0000000..0a128bc --- /dev/null +++ b/plugin/cmd_singleton.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :base.py +@Description: :pymol cmd 全局单例 +@Date :2023/09/24 12:34:33 +@Author :lyzeng +@Email :pylyzeng@gmail.com +@version :1.0 +''' +# pymol_singleton.py +import sys +from pathlib import Path +from pymol import cmd as pymol_cmd, launch as pymol_launch +from typing import Any +from dataclasses import dataclass, field, InitVar +here = Path(__file__).absolute().parent +sys.path.append(here.as_posix()) +from plugin.config import colorp, font, mole, atom + +@dataclass +class PyMOLSingleton: + _instance: InitVar[Any] = None # InitVar类型表示这个字段仅用于__post_init__ + colorp: type(colorp) = field(default_factory=colorp) # 使用default_factory设置默认值 + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super(PyMOLSingleton, cls).__new__(cls) + return cls._instance + + def __post_init__(self, _instance): + if _instance is None: + # 绑定pymol.cmd的所有方法到这个单例对象 + for attr in dir(pymol_cmd): + if callable(getattr(pymol_cmd, attr)) and not attr.startswith("__"): + setattr(self, attr, getattr(pymol_cmd, attr)) + # 初始化配色 + self.defcolor() + # 启动PyMOL界面 + self.launch() + + def defcolor(self): + # 根据self.colorp生成颜色 + color_gennerate = map(lambda x: x[0], self.colorp.colors) + list(map(lambda x: self.set_color(*x), color_gennerate)) # 使用self.set_color而不是cmd.set_color + self.set_color(*self.colorp.grey1) # 使用self.set_color而不是cmd.set_color + + def launch(self): + pymol_launch() # 使用pymol的launch函数来启动PyMOL界面 + +# 创建单例对象并暴露给其他模块 +cmd = PyMOLSingleton() \ No newline at end of file diff --git a/plugin/config.py b/plugin/config.py new file mode 100644 index 0000000..638e534 --- /dev/null +++ b/plugin/config.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :myconfig.py +@Description: :定义一些常用的配置 +@Date :2023/09/24 13:09:19 +@Author :lyzeng +@Email :pylyzeng@gmail.com +@version :1.0 +''' +from dataclasses import dataclass, field, fields +from typing import Tuple + +@dataclass +class colorp(): + color1: Tuple[Tuple[str, str], str] = field(default=(('color1', '[186,182,217]'), 'purple')) + color2: Tuple[Tuple[str, str], str] = field(default=(('color2', '[233,195,153]'), 'yellow')) + color3: Tuple[Tuple[str, str], str] = field(default=(('color3', '[43,113,216]'), 'blue_N')) + color4: Tuple[Tuple[str, str], str] = field(default=(('color4', '[206,155,198]'), 'purple')) + color5: Tuple[Tuple[str, str], str] = field(default=(('color5', '[251,187,62]'), 'orange')) + color6: Tuple[Tuple[str, str], str] = field(default=(('color6', '[245,157,158]'), 'red')) + color7: Tuple[Tuple[str, str], str] = field(default=(('color7', '[133,188,135]'), 'green')) + color8: Tuple[Tuple[str, str], str] = field(default=(('color8', '[30,230,30]'),'green_CL')) # Cl卤素配色 + color9: Tuple[Tuple[str, str], str] = field(default=(('color9', '[141,215,247]'),'blue_C')) # C配色 + color10:Tuple[Tuple[str, str], str] = field(default=(('color10', '[0,132,55]'),'green_F')) # F卤素配色 + grey1: Tuple[str, str] = field(default=('grey1','[224,224,224]')) + colors: list = field(default_factory=list, init=False) # init=False 表示不在 __init__ 方法中初始化 + + def __post_init__(self): + self.colors = [getattr(self, f.name) for f in fields(self) if f.name.startswith('color') and getattr(self, f.name)] + +@dataclass +class font(): + font_size:int = field(default=28) # 单位就是正常的px。你也可以用负值,则单位是Å + +@dataclass +class mole(): + stick_radius: float = field(default=0.10) + +@dataclass +class atom(): + size: float = field(default=0.28) \ No newline at end of file diff --git a/plugin/method.py b/plugin/method.py new file mode 100644 index 0000000..bcfc347 --- /dev/null +++ b/plugin/method.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :method.py +@Description: :pymol中一些常用到的一些方法 +@Date :2023/09/24 13:28:30 +@Author :lyzeng +@Email :pylyzeng@gmail.com +@version :1.0 +''' +import sys +from pathlib import Path +here = Path(__file__).absolute().parent +sys.path.append(here.as_posix()) +from plugin.cmd_singleton import cmd +from pymol.preset import ion_sele,lig_sele,solv_sele + +def color_mole(selection='hetatm'): + cmd.color('color9',f'{selection} in name c*') + cmd.color('color6',f'{selection} in name o*') + cmd.color('color5',f'{selection} in name s*') + cmd.color('color3',f'{selection} in name n*') + cmd.color('color8',f'{selection} in name cl*') + cmd.color('color10',f'{selection} in name f*') + +def focus_ligand(selection: str = 'sele'): + ''' + watch ligand and protein structure + ''' + cmd.remove(solv_sele) # 移除金属离子和溶剂 + cmd.remove("resn SO4 or resn PO4 or resn CL") + cmd.pretty(selection) + +def grey(selection='sele',opt=True): + if opt: optimisation(selection) # 优化 + cmd.color('grey1',f'{selection}')# 对某个对象进行灰化 + +def optimisation(selection, grey=True): + if grey: cmd.set('cartoon_color','grey1', selection=selection) + cmd.set('stick_transparency','0.1', selection=selection) + cmd.set("ray_shadows","off", selection=selection) + cmd.set('cartoon_highlight_color', selection=selection) + cmd.set('cartoon_fancy_helices', selection=selection) + cmd.set('cartoon_transparency','0.5', selection=selection) \ No newline at end of file diff --git a/plugin/singleton.py b/plugin/singleton.py new file mode 100644 index 0000000..b112ce8 --- /dev/null +++ b/plugin/singleton.py @@ -0,0 +1,97 @@ +import pymol + +from pymol import _cmd + +import threading +import sys + +pymol2_lock = threading.RLock() + +## +## FIXME: The PyMOL and SingletonPyMOL classes are partly redundant with the +## instance tracking of the "cmd" module (and the pymol2.cmd2.Cmd class), +## which also holds the _COb pointer. +## + +class SingletonPyMOL: + ''' + Start an exclusive PyMOL instance, only one instance allowed + ''' + def __init__(self, options=None): + if options is None: + self.options = pymol.invocation.options + else: + self.options = options + + def idle(self): + return _cmd._idle(self._COb) + + def getRedisplay(self, reset=True): + return _cmd._getRedisplay(self._COb, reset) + + def reshape(self, width, height, force=0): + _cmd._reshape(self._COb, width, height, force) + + def draw(self): + _cmd._draw(self._COb) + + def button(self, button, state, x, y, modifiers): + _cmd._button(self._COb, button, state, x, y, modifiers) + + def drag(self, x, y, modifiers): + _cmd._drag(self._COb, x, y, modifiers) + + def start(self): + cmd = pymol.cmd + if cmd._COb is not None: + raise RuntimeError('can only start SingletonPyMOL once') + + with pymol2_lock: + cmd._COb = _cmd._new(pymol, self.options, True) #! 此处对pymol-open-source源码进行修改,方便启动的时候修改参数,具体参考options + _cmd._start(cmd._COb, cmd) + + self._COb = cmd._COb + self.cmd = cmd + + def stop(self): + with pymol2_lock: + _cmd._stop(self._COb) + + pymol.cmd._COb = None + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_value, tb): + self.stop() + + @classmethod + def start_with_options(cls, options): # new add + instance = cls(options) + instance.start() + return instance + +''' +# 使用示例 +if __name__ == '__main__': + from pymol.invocation import options + options.rpcServer = 1 + + # 使用类方法创建并启动实例 + pymol_instance1 = SingletonPyMOL.start_with_options(options) + + # 使用上下文管理器 + with SingletonPyMOL() as pymol_instance2: + # pymol_instance2 使用默认的 options + pass + + # 先创建实例,然后单独启动 + pymol_instance3 = SingletonPyMOL() + pymol_instance3.start() # 使用默认的 options + +# 启动pymol GUI,可以在一个新的进程中启动,通过rpcServer 默认端口是:9123 localhost +# 或者在一个新的线程中启动界面,不堵塞主线程 +from pmg_qt import pymol_qt_gui +pymol_qt_gui.execapp() +''' \ No newline at end of file diff --git a/pymol_anime.py b/pymol_anime.py new file mode 100644 index 0000000..d6ac9e7 --- /dev/null +++ b/pymol_anime.py @@ -0,0 +1,112 @@ +from loguru import logger +from pymol import cmd +import subprocess +from pathlib import Path + +# 删除当前目录下的*.mp4和*.png文件 +for file_path in Path('.').glob('*.mp4'): + file_path.unlink() + +for file_path in Path('.').glob('*.png'): + file_path.unlink() + +# 设置日志文件 +logger.add("pymol_animation.log") +cmd.log_open('pymol_animation_cmd.log') +# 最高渲染质量 +cmd.util.performance(0) +cmd.rebuild() +# 初始化并加载4i24结构 +cmd.reinitialize('everything') +cmd.fetch('4i24') +cmd.copy('4i24_mutated', '4i24') # 复制4i24到4i24_mutated +# 假设protgrey和colorp.color_mole是你自定义的函数 +protgrey('4i24') +protgrey('4i24_mutated') +colorp.color_mole(obj='hetatm') +cmd.hide('everything', '4i24_mutated') +i = '4i24_mutated' # object +j = 'A' # chain +k = '797' # site +mutate_site = f"/{i}//{j}/{k}" +mutation_type = 'GLY' +# 对4i24_mutated进行突变 +cmd.select('mut_res', mutate_site) +cmd.wizard("mutagenesis") +cmd.refresh_wizard() +cmd.get_wizard().set_mode(mutation_type) +cmd.get_wizard().do_select(mutate_site) +cmd.get_wizard().apply() +cmd.set_wizard("done") +cmd.show('sticks', mutate_site) +cmd.hide('everything', '4i24_mutated') + +# 先打关键帧然后创建视频 + +# 创建第一个关键帧 +selection1 = "/4i24//A/797" +cmd.scene('scene1', 'store') +cmd.orient(selection1) +cmd.show('sticks', selection1) + + +# 创建第二个关键帧 +selection2 = "/4i24//A/1C9" +cmd.select("both_residues", f"{selection1} or {selection2}") +cmd.orient("both_residues") +cmd.scene('scene2', 'store') + +# 设置动画和mview +cmd.mset("1 x900") # 创建一个总共1200帧的动画框架,每秒120帧共计10秒 +# 第一个关键帧: 1帧 +cmd.mview(action='store', first=1, scene='scene1') + +# 第二个关键帧: 300帧 +cmd.mview(action='store', first=300, scene='scene2') + +# 插值以平滑过渡 +cmd.mview('interpolate', first=1, last=300) + +residue_to_show = "/4i24//A/797" # 用你想显示的残基替换 +cmd.mdo(360, f"show lines, {residue_to_show}; label {residue_to_show}/n, f'res {residue_to_show}(CYS)'") + +# 隐藏4i24对象: 600帧 +cmd.mdo(600, "hide everything, 4i24") + +# 显示4i24_mutated对象: 610帧 +cmd.mdo(610, "show cartoon, 4i24_mutated") + +# 显示突变残基: 620帧 +cmd.mdo(620, "show sticks, /4i24_mutated//A/797") + +# 为残基添加标签: 630帧 +residue_to_show = "/4i24_mutated//A/797" +cmd.mdo(630, f"show lines, {residue_to_show}; label {residue_to_show}/n, f'res {residue_to_show}(GLY)'") + +# 插值以平滑过渡 +cmd.mview('interpolate', first=300, last=900) + +# 设置viewport尺寸为1080P(1920x1080像素) +cmd.viewport(1920, 1080) +cmd.mplay() # 播放动画 + +# 光线追踪和保存每一帧 +# for frame in range(1, 901): +# cmd.frame(frame) +# logger.info(f"Rendering frame {frame}") +# cmd.refresh() +# cmd.ray(width=1920, height=1080) +# cmd.png(f"frame_transition_{frame:04d}.png") + +# 使用ffmpeg合成视频 +# subprocess.run([ +# 'ffmpeg', +# '-framerate', '120', # 120帧/秒 +# '-i', 'frame_transition_%04d.png', +# '-c:v', 'libx264', +# '-pix_fmt', 'yuv420p', +# 'transition_video.mp4' +# ]) + + +# ffmpeg -framerate 120 -i frame_transition_%04d.png -c:v libx264 -pix_fmt yuv420p transition_video.mp4 diff --git a/pymol_color_plugin.py b/pymol_color_plugin.py new file mode 100644 index 0000000..3f83ef2 --- /dev/null +++ b/pymol_color_plugin.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :pymol_color_plugin.py +@Description: :pymol配色方案 +@Date :2023/8/29 10:52:25 +@Author :hotwa +@version :1.0 +''' + + +from pymol import cmd +from pathlib import Path +from dataclasses import dataclass +from json import dumps +from Bio.Data import IUPACData +from Bio.SeqUtils import seq3 +from collections import defaultdict + +STANDARD_AMINO_ACIDS_1 = set(IUPACData.protein_letters) +STANDARD_AMINO_ACIDS_3 = set([seq3(i).upper() for i in STANDARD_AMINO_ACIDS_1]) + + +def bejson(d:dict(help='need to beauty json')) -> dict: + return dumps(d,indent=4,ensure_ascii=False) + +@dataclass +class pymolbase(): + file: Path + + def __post_init__(self): + cmd.reinitialize('everything') + cmd.load(filename=self.file.as_posix()) + PDBs = cmd.get_names() + if len(PDBs) == 1: + PDB = PDBs[0] + else: + raise ValueError(f'this pdb have more than one object! PDBs:{PDBs}') + self.CAindex = cmd.identify(f"{PDB} and name CA") # get CA index + self.pdbstrList = [cmd.get_pdbstr("%s and id %s" % (PDB, CAid)).splitlines() for CAid in CAindex] + self.ProtChainResiList = [[PDB, i[0][21], i[0][22:26].strip()] for i in pdbstrList] # get pdb chain line string + resn_hetatm = self.get_resn('hetatm') + + def mutate(self, site:str, chain:str, mutation_type: str): + ''' + mutation_type:ALA,ARG,ASN,ASP,CYS,GLN,GLU,GLY,HIS,ILE,LEU,LYS,MET,PHE,PRO,SER,THR,TRP,TYR,VAL + ''' + for i, j, k in self.ProtChainResiList: + if int(k) == int(site) and j == str(chain): + cmd.wizard("mutagenesis") + cmd.refresh_wizard() + cmd.get_wizard().set_mode(mutation_type) + selection = f"/{i}//{j}/{k}" + cmd.get_wizard().do_select(selection) + cmd.get_wizard().apply() + cmd.set_wizard("done") + + def add_polar_hydrogen(self, pymol_obj='sele'): + ''' + add polar hydrogen + ''' + cmd.h_add(pymol_obj) # add all hydrogens in this molecular + cmd.remove(f"{pymol_obj} & hydro & not nbr. (don.|acc.)") # remove no-polar-hydrogen + + def save(self, file_name: Path, selection: str): + cmd.save(file_name.as_posix(), f"{selection}") + + # def get_resn(self, name:str = 'all'): + # resn_list = [] + # if name == 'protein': + # cmd.iterate(f"all and not hetatm", expression=lambda atom: resn_list.append(atom.resn)) + # return tuple(set(resn_list)) + # else: + # cmd.iterate(selection=name, expression=lambda atom: resn_list.append(atom.resn)) + # return tuple(set(resn_list)) + + def get_resn(self, name:str = 'all'): # ! not test + resn_dict = defaultdict(set) + + def collect_resn(atom): + category = 'protein' if atom.chain and not atom.hetatm else 'small_molecule' + resn_dict[category].add(atom.resn) + + cmd.iterate(name, expression=collect_resn) + + protein_resns = resn_dict.get('protein', set()) + small_mol_resns = resn_dict.get('small_molecule', set()) + + standard_aa = protein_resns.intersection(STANDARD_AMINO_ACIDS_3) + non_standard_aa = protein_resns - STANDARD_AMINO_ACIDS_3 + + return { + 'small_molecules': list(small_mol_resns), + 'protein_resns': list(protein_resns), + 'standard_aa': list(standard_aa), + 'non_standard_aa': list(non_standard_aa) + } + +class colorp(pymolbase): + color1 = (('color1', '[186,182,217]'), 'purple') + color2 = (('color2', '[233,195,153]'), 'yellow') + color3 = (('color3', '[43,113,216]'), 'blue-N') + color4 = (('color4', '[206,155,198]'), 'purple') + color5 = (('color5', '[251,187,62]'), 'orange') + color6 = (('color6', '[245,157,158]'), 'red') + color7 = (('color7', '[133,188,135]'), 'green') + color8 = (('color8', '[30,230,30]'),'green-CL') # Cl卤素配色 + color9 = (('color9', '[141,215,247]'),'blue-C') # C配色 + color10 = (('color10', '[0,132,55]'),'green-F') # F卤素配色 + grey1 = ('grey1','[224,224,224]') + colors = (color1, color2, color3, color4, color5, color6, color7, color8, color9, color10) + + def __init__(self, path, id): + self.id = id.lower() + cmd.reinitialize() + p = Path(path) + if p.is_dir(): + file = p.joinpath(f"{id}.pdb") + else: + raise ValueError('path params error') + cmd.load(file,id) + + def pretty(self): + cmd.remove('solvent metals') # 移除金属离子和溶剂 + cmd.remove("resn SO4 or resn PO4 or resn CL") + cmd.pretty(selection=self.id) + + @staticmethod + def defcolor(): + # 定义常用配色 + color_gennerate = map(lambda x:x[0],colorp.colors) + list(map(lambda x:cmd.set_color(*x),color_gennerate)) + # 定义灰度配色 + cmd.set_color(*colorp.grey1) + + @staticmethod + def grey(obj='sele',opt=True): + colorp.defcolor() + if opt: method.optimisation() # 优化 + # 对某个对象进行灰化 + cmd.color('grey1',f'{obj}') + + @staticmethod + def color_mole(obj='hetatm'): + # color blue,sele in name c* + colorp.defcolor() + cmd.color('color9',f'{obj} in name c*') + cmd.color('color6',f'{obj} in name o*') + cmd.color('color5',f'{obj} in name s*') + cmd.color('color3',f'{obj} in name n*') + cmd.color('color8',f'{obj} in name cl*') + cmd.color('color10',f'{obj} in name f*') + + @staticmethod + def pretty(obj = None): + if not obj: obj = cmd.get_names()[0] + cmd.remove('solvent metals') # 移除金属离子和溶剂 + cmd.pretty(selection=obj) + + +class font(): + font_size = 28 # 单位就是正常的px。你也可以用负值,则单位是Å + +class mole(): + stick_radius=0.10 + +class atom(): + size = 0.28 + +class method(): + + @staticmethod + def optimisation(selection, grey=True): + colorp.defcolor() + if grey: cmd.set('cartoon_color','grey1', selection=selection) + cmd.set('stick_transparency','0.1', selection=selection) + cmd.set("ray_shadows","off", selection=selection) + cmd.set('cartoon_highlight_color', selection=selection) + cmd.set('cartoon_fancy_helices', selection=selection) + cmd.set('cartoon_transparency','0.5', selection=selection) + + + @staticmethod + def remove(): + cmd.remove('resn SO4') # SO4 remove + cmd.remove('solvent metals') # 移除金属离子和溶剂 + +def get_resn(name:str = 'all'): + resn_list = [] + cmd.iterate(selection=name, expression=lambda atom: resn_list.append(atom.resn)) + return tuple(set(resn_list)) + +def protgrey(selection='all'): + method.remove() + method.optimisation(selection) + +# 观察小分子 +# protgrey() +# colorp.color_mole('sele') +# add label +# label sele, "your label" +# black background +# set bg_rgb, [0, 0, 0] +# define label +# cmd.get_position("sele") +# set label_position, [x,y,z] +# cmd.h_add(pymol_obj) # add all hydrogens in this molecular +# cmd.remove(f"{pymol_obj} & hydro & not nbr. (don.|acc.)") # remove no-polar-hydrogen +# cmd.remove(f"sele & hydro") # 移除所有的H +# cmd.set('cartoon_color','color8', 'obj') +# cmd.hide("everything", "chain A and resn UHT and alt B") # 小分子隐藏B构象 +# cmd.label("chain A and resn UHT and alt A", "resn") # 添加标签 +#cmd.select('sele', f'resn {resn} and chain {chain}') +#cmd.center('sele') +#cmd.orient('sele') +#obj_name = 'around_and_self' +# cmd.create(obj_name, f'(resn {resn} and chain {chain}) around {distance}') # 不扩展残基 +# cmd.create(obj_name, f'byres (resn {resn} and chain {chain}) around {distance}') # 扩展残基 +# cmd.create(obj_name, f'(resn {resn} and chain {chain}) around {distance} or (resn {resn} and chain {chain})') # 选择resn的对象和resn对象周围3A的原子 +#cmd.create(obj_name, f'byres (resn {resn} and chain {chain}) around {distance} or (resn {resn} and chain {chain})') # 选择resn的对象和resn对象周围3A的原子扩展至残基的原子 +#cmd.hide('everything', 'all') +#cmd.show('lines', obj_name) +cmd.extend('colorp',colorp) +cmd.extend('method',method) + +if __name__ == '__main__': + PDB = '4i24' + cmd.reinitialize('everything') + cmd.fetch('4i24') + protgrey('4i24') + colorp.color_mole(obj='hetatm') + # 设置动画帧数 + # cmd.mset("1 x60 61 x60") # 60帧展示初始结构,第61帧用于突变,再60帧展示突变后结构 + + # 保存第1帧 + cmd.scene('frame1', 'store') + cmd.mview('store', state=1, scene='frame1') + + # 执行残基突变。这里以将位于链A和编号为797的残基突变为ALA为例。 + selection1 = "/4i24//A/797" + selection2 = "/4i24//A/1C9" + cmd.select("both_residues", f"{selection1} or {selection2}") + cmd.orient("both_residues") + cmd.show('sticks', selection1) + # 保存第2帧 + cmd.scene('frame60', 'store') + cmd.mview('store', state=60, scene='frame60') + # 关闭电影模式 + cmd.mstop() # 停止电影播放 + cmd.mdelete() # 删除所有电影帧 + cmd.wizard("mutagenesis") + cmd.refresh_wizard() + cmd.get_wizard().set_mode("GLY") + cmd.get_wizard().do_select(selection1) + # 保存第3帧 + cmd.scene('frame120', 'store') + cmd.mview('store', state=120, scene='frame120') + cmd.get_wizard().apply() + cmd.set_wizard() + # 保存第4帧 + cmd.scene('frame180', 'store') + cmd.mview('store', state=180, scene='frame180') + + cmd.mpng('frame_prefix_', 1, 4) \ No newline at end of file diff --git a/pymolrc b/pymolrc new file mode 100644 index 0000000..6929bca --- /dev/null +++ b/pymolrc @@ -0,0 +1 @@ +run pymol_color_plugin.py \ No newline at end of file diff --git a/script/.gitignore b/script/.gitignore new file mode 100644 index 0000000..615daf8 --- /dev/null +++ b/script/.gitignore @@ -0,0 +1,4 @@ +__pycache__ +*.pdb +*.pse +*.cif \ No newline at end of file diff --git a/script/README.md b/script/README.md new file mode 100644 index 0000000..4534fbd --- /dev/null +++ b/script/README.md @@ -0,0 +1,6 @@ +# pymol + + + +## Getting started + diff --git a/script/__init__.py b/script/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/script/add_hydrogen.py b/script/add_hydrogen.py new file mode 100644 index 0000000..909d3fa --- /dev/null +++ b/script/add_hydrogen.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :add_hydrogen.py +@Description: : add hydrogen in pymol +@Date :2022/10/24 11:17:11 +@Author :hotwa +@version :1.0 +''' +from pymol import cmd +from openbabel import pybel +""" +conda install -c conda-forge pymol-open-source openbabel --yes +""" + +def add_polar_hydrogen(file, out_file,read_fmt = 'mol2', pymol_obj='sele', + save_format='mol2',tool = 'pymol'): + """add_polar_hydrogen _summary_ + + _extended_summary_ + + Arguments: + file {pathlike} -- input file path + out_file {pathlike} -- output file + + Keyword Arguments: + read_fmt {string} -- the format of read file (default: {'mol2'}) + pymol_obj {string } -- add polar hydrogen object name, ligand ID (default: {'sele'}) + save_format {string} -- the format of save file (default: {'mol2'}) + tool {string} -- the tool of add polar hydrogens (default: {'pymol'}) + + Returns: + _type_ -- path or None + """ + if tool == 'pymol': + cmd.reinitialize("everything") + cmd.load(filename=file, format=read_fmt) + cmd.h_add(pymol_obj) # add all hydrogens in this molecular + cmd.remove(f"{pymol_obj} & hydro & not nbr. (don.|acc.)") # remove no-polar-hydrogen + cmd.save(filename=out_file,selection=pymol_obj, format=save_format) + return out_file + elif tool == 'pybel': + omol = list(pybel.readfile(format = 'mol2', filename = file))[0] + omol.OBMol.AddPolarHydrogens() + omol.write('mol2',out_file,overwrite=True) + return out_file + else: + return None diff --git a/script/biotools.py b/script/biotools.py new file mode 100644 index 0000000..3fd96c7 --- /dev/null +++ b/script/biotools.py @@ -0,0 +1,465 @@ +from email import header +from unittest.mock import Base + + +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :biotools.py +@Description: : +@Date :2021/06/22 16:36:34 +@Author :hotwa +@version :1.0 +''' +import os,re,time +from Bio.PDB.PDBParser import PDBParser +from Bio.PDB import PDBIO,Select,parse_pdb_header,PDBList +from tools import mdcheck,get_path_contents +import requests +import random,string +from pathlib import Path + +file_dir = os.path.dirname(os.path.realpath(__file__)) # 当前python文件的绝对路径 +residue = [i.upper() for i in 'Gly,Ala,Val,Leu,Ile,Pro,Phe,Tyr,Trp,Ser,Thr,Cys,Met,Asn,Gln,Asp,Glu,Lys,Arg,His'.split(',')] + +""" getpdbfile function""" +agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36 -- '+''.join(random.sample(string.ascii_letters+string.digits, 32)) +HEADER = {'User-Agent': agent} + +def getpdbfile(pdbid,dir): # 下载备用,不是很稳定 + pdbid = pdbid.upper() + res = os.path.exists(f'{pdbid}.pdb') + if not res: + r = requests.get(url = f'https://files.rcsb.org/download/{pdbid}.pdb1',verify=False,headers=HEADER) + _file_path = os.path.join(dir,f'{pdbid}.pdb') + with open(_file_path,'wb+') as f: + f.write(r.content) + +""" getpdbfile function""" + + +class PDBLengthError(BaseException): + def __init__(self,arg): + self.arg = arg + +def downloadpdbfile(pdbid,pdir,overwrite=True,changename=True): + """downloadpdbfile [summary] + + [extended_summary] + + :param pdbid: like code 5xv7 + :type pdbid: string + :param pdir: save pdb file path + :type pdir: string + :param overwrite: overwrite pdb file, defaults to True + :type overwrite: bool, optional + :param changename: change .ent to .pdb, defaults to True + :type changename: bool, optional + """ + if isinstance(pdbid,list): + for i in pdbid: + downloadpdbfile_sub(pdbid=i,dir = pdir,overwrite=overwrite,changename=changename) + elif isinstance(pdbid,str): + if len(pdbid) == 4: + downloadpdbfile_sub(pdbid=pdbid,dir = pdir,overwrite=overwrite,changename=changename) + else: + raise PDBLengthError('pdbid length error!') + + +def downloadpdbfile_sub(pdbid,dir,overwrite,changename): + _pdbname = f'pdb{pdbid}.ent' + _pdbname_new = f'{pdbid}.pdb' + if not overwrite: + _path = os.path.join(dir,_pdbname_new) + if os.path.exists(_path): + ... + else: + pdbl = PDBList() + try: + pdbl.retrieve_pdb_file(pdbid,file_format='pdb',pdir = dir,overwrite=overwrite) + except FileNotFoundError: # 调用biopython下载接口失败,使用自己的接口 + getpdbfile(pdbid,dir) + if changename: + srcFile = os.path.join(dir,_pdbname) + dstFile = os.path.join(dir,_pdbname_new) + try: + os.rename(srcFile,dstFile) + except FileExistsError as e: # 如果重名名的文件存在,则表示该文件已经下载 + ... + except Exception as e: + raise e + elif overwrite: # 覆盖之前已经下载的pdb文件 + pdbl = PDBList() + try: + pdbl.retrieve_pdb_file(pdbid,file_format='pdb',pdir = dir,overwrite=overwrite) + except FileNotFoundError: # 调用biopython下载接口失败,使用自己的接口 + getpdbfile(pdbid,dir) + if changename: + srcFile = os.path.join(dir,_pdbname) + dstFile = os.path.join(dir,_pdbname_new) + try: + os.rename(srcFile,dstFile) + except Exception as e: + raise e + else: + print('error overwrite params!') + +class ChainSelect(Select): + def __init__(self,chain_string='A'): + super(Select,self).__init__() + self.chain_string = chain_string + + def accept_chain(self, chain): + if str(chain.__repr__()) == ''.format(self.chain_string): # judge chain name + return 1 + else: + return 0 + +class ResidueSelect(Select): + """ResidueSelect ues by select + + [extended_summary] + + :param Select: [description] + :type Select: [type] + """ + def __init__(self,residue): + super(Select,self).__init__() + self.residue = residue + + def accept_residue(self, residue): + if residue.get_resname()==self.residue: + return 1 + else: + return 0 + +def return_path(path): + # 判断是否为绝对路径 + path_split_string = os.path.split(path) + if os.path.isdir(path_split_string[0]) and os.path.isfile(path): + return path # 输入绝对路径,返回绝对路径 + elif bool(path_split_string[0]) == False: + return os.path.join(file_dir,path) # 尝试在当前目录下自动补全路径 + elif not (path_split_string[1][-4:] == '.pdb' or '.ent'): + raise ValueError(f'this file, {path} is not a valid pdb file') + else: + raise ValueError(f'valid path: {path}') + +# ! error class for Bdp +class DoiError(BaseException): + def __init__(self,arg): + self.arg = arg + +class ChainError(BaseException): + def __init__(self,arg): + self.arg = arg + + +class Bdp(object): + __slots__ = ['path','sid','mole_id','mole_struct','create_path'] + + def __init__(self,path,sid): + self.path = path + self.create_path = os.path.dirname(os.path.dirname(path)) + self.sid = sid + self.mole_id = [] + self.mole_struct = [] + # 初始化数据 + self.read_formula() + self.init_path() + + @property + def chain_num(self): + s = self.read_struct() + first_model = s[0] + return len(first_model) + + @property + def header_infos(self): + return parse_pdb_header(self.path) + + @property + def doi(self): + _journal = self.header_infos['journal'] + _doi = _journal.split()[-1] + if _doi.startswith('10.'): + return _doi + else: + raise DoiError(f'current pdb does not doi, {_journal}') + + def mkdir(self,pname,md = False): + """mkdir [summary] + + [extended_summary] + + :param pname: [description] + :type pname: [type] + :param md: [dict], defaults to False + :type md: [dict], optional + """ + def wrapper(func): + def deco(self,*arg,**kwargs): + create_path = os.path.join(self.create_path,pname) + if os.path.exists(create_path): + ... + else: + os.mkdir(create_path) + # create markdown + mdc = mdcheck(md) + header_info = f'''--- + title: {mdc['title']} + date: {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} + type: {mdc['typeinfo']} + --- + [TOC] + # Description + {mdc['description']} + ''' + create_path_md = os.path.join(create_path,'README.md') + with open(create_path_md,'w+') as f: + f.write(header_info) + func(self,*arg,**kwargs) + return deco + return wrapper + + def value_check(self): + try: + if len(self.mole_id) == len(self.mole_struct): + print('check pass!') + else: + raise ValueError('molecule identifier do not match formula identifier! manual check') + except : + raise ValueError + + @staticmethod + def read_line_list(first_column,path): + """read_line_list 读取第一列为first_column字符串的行,存为列表返回 + + [extended_summary] + + Arguments: + first_column {[string]} -- [pdb文件内容第一列常常为大写] + path {[string]} -- [路径] + + Returns: + [iter] -- [含有first_column字符串的生成器] + """ + stringiter = Bdp.stringlinesiter(file = path) + stringiter = map(lambda x:x.strip(), stringiter) + return filter(lambda x:x.split()[0]==first_column,stringiter) + + @staticmethod + def stringlinesiter(file): + with open(file, 'r+', encoding='utf-8') as f: + yield from f.readlines() + + @mkdir(self = 'self',pname = 'split_molecule') + def split_molecule(self,residue): + """split_molecule [split molecule or residue line save to file] + + [extended_summary] + + :param residue: [select residue] + :type residue: [string] + :extract_chain: extract molecule corresponding chain + :type extract_chain: bool + """ + remove_list,residue = [],str(residue) + s = self.read_struct() + chainlist = self.getchain_str() + # 对每条链上都与这个小分子结合的链上的结合信息都提取出来 + for i in chainlist: + io = PDBIO() + io.set_structure(s[0][i]) + sidc = self.sid.replace('.pdb', '') if '.pdb' in self.sid else self.sid + savename = f'{sidc}_{i}_{residue}.pdb' + path = os.path.join(self.create_path,'split_molecule',savename) + io.save(path,ResidueSelect(residue)) + if os.path.exists(path): + with open(path,'r+') as f: + content = f.readline() + if content.strip() == 'END': + # 该链没有小分子,删除文件 + # print(f'{savename} this chain have not molecule remove it') + remove_list.append(path) + for i in remove_list: + os.remove(i) # remove the empty molecule file + + + def getchain_str(self): + """getchain_str retrun chain symbol name + + [extended_summary] + + :return: [description] + :rtype: list + """ + _l = [] + p = PDBParser(PERMISSIVE=1) + s = p.get_structure(self.sid, self.path) + chain_gennerate = s[0].copy().get_chains() + for i in chain_gennerate: + _l.append(i.id) + _l.sort() + return _l + + def site_exists(self,residue,num): + """site_exists 判断改蛋白的某个位点是否存在,并返回该位点所在的链 + + 用于判断结合位点的链在哪里 + + :param residue: 残基名称 + :type residue: string + :param num: 残基编号 + :type num: int + :return: chain + :rtype: list + """ + _l = [] + num = int(num) + residue = residue.strip().upper() + chainlist = self.getchain_str() + s = self.read_struct() + for i in chainlist: + try: + resname_from_pdb = s[0][i][num].get_resname() + if residue == resname_from_pdb: + _l.append(i) + except KeyError as e: + print(e,f'chain {i} not exist {residue}-{num}') + return _l + + def _split_chain_save_struct(self,obj,sid): + io = PDBIO() + io.set_structure(obj) + sidc = sid.replace('.pdb', '') if '.pdb' in sid else sid + name,path = f'{sidc}_{obj.id}.pdb', self.create_path + io.save(os.path.join(path,'split_chain',name),ChainSelect(obj.id)) + + @mkdir(self = 'self',pname ='split_chain') + def split_chain(self,extract='auto'): + """split_chain [summary] + + [extended_summary] + + :param extract: [extract the chain], defaults to 'auto' extract all chains + :type extract: [string], optional + :raises ValueError: [save to pdb in current path in ./splitchain] + """ + # 将传入的蛋白文件按照蛋白链分割并按照 pdbid_A_m.pdb 重新命名 A代表链,m表示该链含有小分子 + p = PDBParser(PERMISSIVE=1) + s = p.get_structure(self.sid, self.path) + if extract == 'auto': + chainlist = self.getchain_str() + for j in chainlist: + self._split_chain_save_struct(obj=s[0][j],sid=self.sid) + else: + chainlist = self.getchain_str() + if extract not in chainlist: + raise ChainError(f'The {extract} chain not exists !') + if extract in chainlist: + extract = extract.upper() + self._split_chain_save_struct(obj=s[0][extract],sid=self.sid) + else: + raise ValueError + + def read_struct(self): + """read_struct read pdb structure + + https://biopython-cn.readthedocs.io/zh_CN/latest/cn/chr11.html#id3 + 一个 Structure 对象的整体布局遵循称为SMCRA(Structure/Model/Chain/Residue/Atom,结构/模型/链/残基/原子)的体系架构: + + 结构由模型组成 + 模型由多条链组成 + 链由残基组成 + 多个原子构成残基 + Returns: + [obj] -- [Model] + """ + p = PDBParser(PERMISSIVE=1) + s = p.get_structure(self.sid, self.path) + return s + + def get_chain(self,chain_str): + s = self.read_struct() + return s[0][chain_str] + + def init_path(self): + _p = return_path(self.path) + self.path = _p + if os.path.isfile(_p) == False: + raise FileNotFoundError(f'file {_p} not found') + + def read_formula(self): + """read_formula read pdb file molecule information and identifier in this pdb file(just like residue) + + [extended_summary] + + :raises OSError: Do not find this file + """ + _path = self.path + try: + with open(_path,'r') as f: + text_line = f.readlines() + _l = [] + for i in text_line: + li = i.split() + if li[0] == 'FORMUL': + _l.append(li) + self.mole_id.append(li[2]) + for ii in _l: + self.mole_struct.append(''.join(ii[3:])) + except Exception as e: + raise e + + def clean_pdb(self,outfile=None,path=None): + __lst = Bdp.read_line_list('ATOM',self.path) + _p = Path(path) if path else Path(self.path).absolute().parent + _f = outfile if outfile else f"{self.sid}.clean.pdb" + write_path = _p.joinpath(_f) + if write_path.exists(): write_path.unlink() + with open(write_path,'w+') as f: + for i in __lst: + f.write(f"{i}\n") + +def renamepdbfile(path): + """renamepdbfile 自动对path路径中批量下载的pdb中文件的文件名进行重命名 + + [extended_summary] + + Arguments: + path {[type]} -- [description] + """ + _flist = get_path_contents(path)[0] + for i in _flist: + i = i.strip() + basetuple = os.path.split(i) + basedir = basetuple[0] + _fn = basetuple[1] + res = re.match(pattern='pdb([A-Za-z0-9]*).ent',string=_fn) + # nfn = res.group(1) + # newname =os.path.join(basedir,str(nfn)+'.pbd') + # os.rename(src = i,dst = newname) + try: + nfn = res.group(1) + newname =os.path.join(basedir,str(nfn)+'.pdb') + os.rename(src = i,dst = newname) + except IndexError: + raise IndexError(f'Could not find pdbid in this filename: {i}') + except AttributeError: + continue + except FileNotFoundError as e: + if os.path.exists(i): + raise e + elif os.path.exists(newname): + continue + else: + raise e + + + +def pdbupdate(path): + pl = PDBList(pdb=path) + pl.update_pdb() + + diff --git a/script/mutagenesis.py b/script/mutagenesis.py new file mode 100644 index 0000000..c125b97 --- /dev/null +++ b/script/mutagenesis.py @@ -0,0 +1,66 @@ +# 定义残基突变函数 +from pymol import cmd +from pathlib import Path +def Mutagenesis_site(filename: Path, mutation_type: str, site: int, outfile: Path = None) -> Path: + """Mutagenesis_site Mutagenesis residue in site + residue site have mutil conformations, need to select one conformations, some error accured. + pass + _extended_summary_ + + Arguments: + filename {str} -- PDB file format + mutation_type {str} -- 'VAL' for ALA TO VAL; 'ALA' for any/not ALA to ALA; 'GLY' for ALA to GLY + site {int} -- residue site in pdbfile + + Keyword Arguments: + outfile {str} -- _description_ (default: {None}) + + Raises: + ValueError: not one object in PDBs,need to fix + + Returns: + str -- save mutagenesis file path + """ + p = Path(filename) + savename = p.stem + f"_{site}_mutation.pdb" + _out_file = Path(outfile) if outfile else p.absolute().parent.joinpath(savename) + if not _out_file.absolute().parent.exists(): _out_file.absolute().parent.mkdir(parents=True) + cmd.reinitialize('everything') # ! clean up + cmd.load(filename.as_posix()) + PDBs = cmd.get_names() + # Get the ID numbers of c-alpha (CA) atoms of all residues + if len(PDBs) == 1: + PDB = PDBs[0] + else: + raise ValueError(f'this pdb have more than one object!PDBs:{PDBs}') + CAindex = cmd.identify(f"{PDB} and name CA") + pdbstrList = [cmd.get_pdbstr("%s and id %s" % (PDB, CAid)).splitlines() for CAid in CAindex] + ProtChainResiList = [[PDB, i[0][21], i[0][22:26].strip()] for i in pdbstrList] + for i, j, k in ProtChainResiList: + if int(k) == int(site): + cmd.wizard("mutagenesis") + # print(i,j,k) + cmd.refresh_wizard() + cmd.get_wizard().set_mode(mutation_type) + ##Possible mutation_type could be: + ##'VAL' for ALA TO VAL + ##'ALA' for any/not ALA to ALA + ##'GLY' for ALA to GLY + # 'selection' will select each residue that you have selected + # on ProtChainResiList above using the PDBid,chain,and residue + # present on your pdb file.If you didn't select a range on + # ProteinChainResiList, it will do the mutation on all the residues + # present in your protein. + selection = f"/{i}//{j}/{k}" + # Print selection to check the output + # print(selection) + # Selects where to place the mutation + cmd.get_wizard().do_select(selection) + ##Applies mutation + cmd.get_wizard().apply() + # Save each mutation and reinitialize the session before the next mutation + ##to have pdb files only with the residue-specific single-point mutation you were interested. + cmd.set_wizard("done") + cmd.save(_out_file.as_posix(), f"{PDB}") + cmd.reinitialize('everything') # Reinitialize PyMOL to default settings. + return _out_file \ No newline at end of file diff --git a/script/ppi.py b/script/ppi.py new file mode 100644 index 0000000..3ca59c9 --- /dev/null +++ b/script/ppi.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :ppi.py +@Description: : +@Date :2023/09/01 16:56:22 +@Author :lyzeng +@Email :pylyzeng@gmail.com +@version :1.0 +''' +import sys +from dataclasses import dataclass, field, InitVar +from functools import partial +from pymol import cmd +from pymol.cmd import util +from pathlib import Path +from collections import defaultdict + +# local import +from .pymolbase import PyMOLBase + +@dataclass +class PyMOLConcrete(PyMOLBase): + file: Path + ligand_positions: defaultdict = field(default_factory=lambda: defaultdict(list), init=False) + + def __post_init__(self, file): + self.load_file(file) + self.global_obj_name = self.cleanpdb(file.stem) + + def load_file(self, file): + # 加载文件到PyMOL(这里是一个示例,您可能需要使用实际的PyMOL命令) + cmd.load(file) + pass + + def collect_ligand_positions(self, model, chain, resn, resi): + position = f'/{model}//{chain}/{resi}' + self.ligand_positions[resn].append(position) + + def gather_ligand_info(self): + cmd.iterate('organ', 'collect_ligand_positions(model, chain, resn, resi)', space={'collect_ligand_positions': partial(self.collect_ligand_positions)}) + + + @staticmethod + def cleanpdb(before_string): + """ + 处理传入参数pdb 例如: pdb1b12.ent 1b12.pdb 转化成 1b12 + """ + # print(f'try to format {before_string}') + if '.ent' in before_string: + before_string = before_string[3:-4].lower() + elif '.pdb' in before_string: + before_string = before_string[:4].lower() + elif len(before_string) == 4: + before_string = before_string.lower() + else: + if len(before_string) != 4: + raise ValueError(f'length out of 4 {before_string}') + return before_string + +@dataclass +class PDBAnalyzer: + pdb: Path # 使用Path对象代替字符串 + ChA: str + ChB: str + dist: str + + + # 主运行函数 + def run(self): + cmd.delete('all') + cmd.load(str(self.pdb)) # 将Path对象转为字符串以兼容PyMOL命令 + pdb_name = self.pdb.stem # 获取不包括扩展名的文件名 + + # ...(省略了其他设置和选择) + + # 调用封装好的函数 + self.HBond() + self.Pi_Pi_interaction() + self.Cation_Pi_interaction() + self.Salt_bridge_positive() + self.Salt_bridge_negative() + + # ...(省略了其他设置和保存) + + cmd.save(f'{pdb_name}_interaction.pse') + +class + +# 主程序 +def main(): + pdb_file = Path(sys.argv[1]) # 使用Path对象 + CHA = sys.argv[2] + CHB = sys.argv[3] + dist = sys.argv[4] + cmd.set('bg_rgb', 'white') + + analyzer = PDBAnalyzer(pdb_file, CHA, CHB, dist) + analyzer.run() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/script/pymol_color_plugin.py b/script/pymol_color_plugin.py new file mode 100644 index 0000000..27d0cff --- /dev/null +++ b/script/pymol_color_plugin.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :pymol_color_plugin.py +@Description: :pymol配色方案 +@Date :2023/6/1 10:52:25 +@Author :hotwa +@version :1.0 +''' + + +from pymol import cmd +from pathlib import Path + +class colorp(): + color1 = (('color1', '[186,182,217]'), 'purple') + color2 = (('color2', '[233,195,153]'), 'yellow') + color3 = (('color3', '[43,113,216]'), 'blue-N') + color4 = (('color4', '[206,155,198]'), 'purple') + color5 = (('color5', '[251,187,62]'), 'orange') + color6 = (('color6', '[245,157,158]'), 'red') + color7 = (('color7', '[133,188,135]'), 'green') + color8 = (('color8', '[30,230,30]'),'green-CL') # Cl卤素配色 + color9 = (('color9', '[141,215,247]'),'blue-C') # C配色 + color10 = (('color10', '[0,132,55]'),'green-F') # F卤素配色 + grey1 = ('grey1','[224,224,224]') + colors = (color1, color2, color3, color4, color5, color6, color7, color8, color9, color10) + + def __init__(self, path, id): + self.id = id.lower() + cmd.reinitialize() + p = Path(path) + if p.is_dir(): + file = p.joinpath(f"{id}.pdb") + else: + raise ValueError('path params error') + cmd.load(file,id) + + def pretty(self): + cmd.remove('solvent metals') # 移除金属离子和溶剂 + cmd.remove("resn SO4 or resn PO4 or resn NA,CL,GOL") + cmd.remove('not polymer') # 删除所有不是多聚物(通常指蛋白质或核酸链)的原子或分子。 + cmd.pretty(selection=self.id) + + @staticmethod + def defcolor(): + # 定义常用配色 + color_gennerate = map(lambda x:x[0],colorp.colors) + list(map(lambda x:cmd.set_color(*x),color_gennerate)) + # 定义灰度配色 + cmd.set_color(*colorp.grey1) + + @staticmethod + def grey(obj='sele',opt=True): + colorp.defcolor() + if opt: method.optimisation() # 优化 + # 对某个对象进行灰化 + cmd.color('grey1',f'{obj}') + + @staticmethod + def color_mole(obj='hetatm'): + # color blue,sele in name c* + colorp.defcolor() + cmd.color('color9',f'{obj} in name c*') + cmd.color('color6',f'{obj} in name o*') + cmd.color('color5',f'{obj} in name s*') + cmd.color('color3',f'{obj} in name n*') + cmd.color('color8',f'{obj} in name cl*') + cmd.color('color10',f'{obj} in name f*') + + @staticmethod + def pretty(obj = None): + if not obj: obj = cmd.get_names()[0] + cmd.remove('solvent metals') # 移除金属离子和溶剂 + cmd.pretty(selection=obj) + + +class font(): + font_size = 28 # 单位就是正常的px。你也可以用负值,则单位是Å + +class mole(): + stick_radius=0.10 + +class atom(): + size = 0.28 + +class method(): + + @staticmethod + def optimisation(grey=True, selection='sele'): + colorp.defcolor() + if grey: cmd.set('cartoon_color','grey1', selection=selection) + cmd.set('stick_transparency','0.1', selection=selection) + cmd.set("ray_shadows","off", selection=selection) + cmd.set('cartoon_highlight_color', selection=selection) + cmd.set('cartoon_fancy_helices', selection=selection) + cmd.set('cartoon_transparency','0.5', selection=selection) + + @staticmethod + def remove(): + cmd.remove('resn SO4') # SO4 remove + cmd.remove('solvent metals') # 移除金属离子和溶剂 + +def get_resn(name:str = 'all'): + resn_list = [] + cmd.iterate(selection=name, expression=lambda atom: resn_list.append(atom.resn)) + return tuple(set(resn_list)) + + +def protgrey(): + method.remove() + method.optimisation() + +# 观察小分子 +# https://chatmol.org/qa/ +# protgrey() +# colorp.color_mole('sele') +# add label +# label sele, "your label" +# black background +# set bg_rgb, [0, 0, 0] +# define label +# cmd.get_position("sele") +# set label_position, [x,y,z] +# cmd.h_add(pymol_obj) # add all hydrogens in this molecular +# cmd.remove(f"{pymol_obj} & hydro & not nbr. (don.|acc.)") # remove no-polar-hydrogen +# cmd.remove(f"sele & hydro") # 移除所有的H +# cmd.set('cartoon_color','color8', 'obj') +# cmd.hide("everything", "chain A and resn UHT and alt B") # 小分子隐藏B构象 +# cmd.label("chain A and resn UHT and alt A", "resn") # 添加标签 +#cmd.select('sele', f'resn {resn} and chain {chain}') +#cmd.center('sele') +#cmd.orient('sele') +#obj_name = 'around_and_self' +# cmd.create(obj_name, f'(resn {resn} and chain {chain}) around {distance}') # 不扩展残基 +# cmd.create(obj_name, f'byres (resn {resn} and chain {chain}) around {distance}') # 扩展残基 +# cmd.create(obj_name, f'(resn {resn} and chain {chain}) around {distance} or (resn {resn} and chain {chain})') # 选择resn的对象和resn对象周围3A的原子 +#cmd.create(obj_name, f'byres (resn {resn} and chain {chain}) around {distance} or (resn {resn} and chain {chain})') # 选择resn的对象和resn对象周围3A的原子扩展至残基的原子 +#cmd.hide('everything', 'all') +#cmd.show('lines', obj_name) +# cmd.remove('not polymer') #除非聚合物部分 +cmd.extend('colorp',colorp) +cmd.extend('method',method) \ No newline at end of file diff --git a/script/pymol_plugin.py b/script/pymol_plugin.py new file mode 100644 index 0000000..d8da346 --- /dev/null +++ b/script/pymol_plugin.py @@ -0,0 +1,498 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :auto.py +@Description: :自动处理脚本 +@Date :2021/09/02 15:19:15 +@Author :hotwa +@version :1.0 +https://zhuanlan.zhihu.com/p/121215784 # PyMOL 选择器的语法参考 +https://blog.csdn.net/u011450367/article/details/51815130 +resource: https://www.cnblogs.com/wq242424/p/13073222.html +http://blog.sina.com.cn/s/blog_7188922f0100wbz1.html +https://www.jianshu.com/p/3396e94315cb +https://blog.csdn.net/dengximo9047/article/details/101221495 # 疏水表面 +http://www.mdbbs.org/thread-4064-1-1.html # 用来寻找配体口袋的残基 +''' + +from functools import partial +from pymol import cmd +from pathlib import Path +import pandas as pd +from json import dumps +import numpy as np +from biotools import Bdp +from loguru import logger +# from types import FunctionType,CodeType + +logger.add('pymol_plugin_{time}.log') + +# select ligand, resn x +# cmd.select('ligand','byres 5UEH within 6 of resn GOL resn 85P') +def autoshow(i,path,distance = 6,ionshow = False): + """autoshow 自动展示所有配体6A以内的lines,方便查找共价键 + + [extended_summary] # cmd.create('pocket',f'byres {i} within {distance} of {rawstring}') # 创建一个在当前pdb对象中配体残基周围距离为6A的口袋对象 + + Arguments: + i {[string]} -- [pdbid 例如 5EA9] + + Keyword Arguments: + path {[string]} -- [pdb文件存放的目录,目前支持后缀为.pdb的文件,也可以在全局变量中设置好文件目录] (default: {path}) + distance {[int]} -- [显示周围原子的距离参数] (default: {6}) + ionshow {[bool]} -- [离子和有机小分子周围共价结合观察使用,默认不展示] (default: {False}) + + Returns: + [list] -- [返回ligid标识符,除去了部分离子] + """ + cmd.reinitialize() + p = Path(path) + file = p.joinpath(f"{i}.pdb") + cmd.load(file,i) + cmd.remove('solvent metals') # 移除金属离子和溶剂 + mole = moleculeidentity(f"{i}",path) + rawstring = 'resn ' + ' resn '.join(mole.ligIdNoion) + print(f'{rawstring} around {distance}') + cmd.select('ligand',f'{rawstring}') + cmd.select('ligand_around',f'{rawstring} around {distance}') # 选择一个在当前pdb对象中配体残基周围距离为6A的口袋对象 + if ionshow: # 是否显示所有记录小分子HET条记录中的信息,对于离子与有机物显示相关共价键有效 + cmd.show('lines','ligand_part') # 显示所有HET侧链 + cmd.create('ligand_part',f'{rawstring} expand {distance}') # 单独显示小分子扩展6A周围的lines对象 + cmd.create('organ',f'organic expand {distance}') + cmd.show('lines','organ') + return mole.ligId + +def bejson(d:dict(help='need to beauty json')) -> dict: + return dumps(d,indent=4,ensure_ascii=False) + +class covalentidentity(): + """covalentidentity 使用pymol进行识别共价键,在输出有机小分子6A以内的原子时可能出现分子的构象发生改变导致共价键距离计算有问题,还可能是与金属离子形成共价键 + """ + # __slots__ = [] + + def __init__(self,pdbfilename,pdbid,path): + """__init__ [summary] + + [extended_summary] + + Arguments: + pdbfilename {[string]} -- pdb文件名称 + pdbid {[string]} -- 4 length id + path {[string or pathlib obj]} -- pdb文件路径 + """ + self.pdbfilename = pdbfilename + self.pdbid = pdbid + self.path = Path(path) + self.init() + + @staticmethod + def cleanpdb(before_string): + """ + 处理传入参数pdb 例如: pdb1b12.ent 1b12.pdb 转化成 1b12 + """ + # print(f'try to format {before_string}') + if '.ent' in before_string: + before_string = before_string[3:-4].lower() + elif '.pdb' in before_string: + before_string = before_string[:4].lower() + elif len(before_string) == 4: + before_string = before_string.lower() + else: + if len(before_string) != 4: + raise ValueError(f'length out of 4 {before_string}') + return before_string + + @classmethod + def export_organ(cls,pdbfile,path,distance = 6,remove_water_metals = True): + # 输出小分子结合部分的共价结合信息对象,pdb格式 + file = path.joinpath(pdbfile) + _pdbid = covalentidentity.cleanpdb(pdbfile) + savepath = path.joinpath(f'{_pdbid}_organ_around_{distance}_lines.pdb') + cmd.reinitialize() + cmd.load(filename = file,object = pdbfile) + if remove_water_metals: cmd.remove('solvent metals') # 移除金属离子和溶剂 + cmd.create('organ',f'organic expand {distance}') + cmd.show('lines','organ') + cmd.save(filename=savepath,selection='organ') + cls.connect_info_pdb_path = savepath + return savepath + + def init(self): + self.__create_dataframe() + + def __read_organ_pdb(self): + exportfile = covalentidentity.export_organ(self.pdbfilename,path = Path(self.path)) + with open(file=exportfile,mode = 'r') as f: + read_list = [i.replace('\n', '') for i in f.readlines()] + self.read_connect = [i for i in read_list if i[:6].strip()=='CONECT'] + self.read_hetatm = [i for i in read_list if i[:6].strip()=='HETATM'] + self.read_atom = [i for i in read_list if i[:6].strip()=='ATOM'] + + def __create_dataframe(self): + self.__read_organ_pdb() # 读取初始化数据 + connect_infos = [] + for i in self.read_connect: #清洗数据 + if len(i.split()) == 3: # 策略:查找只有两个原子键的连接信息 + connect_infos.append(i) + self.df = self.transformtodataframe(self.read_hetatm).append(self.transformtodataframe(self.read_atom)) + target_search_connect = [] + for i in connect_infos: + l = [i for i in i.split() if i != 'CONECT'] + target_search_connect.append(l) + self.target_search_connect = target_search_connect # 两个连接信息的列表,不一定都是共价键 + + def __search_convenlent(self,distance_accuracy=1): + true_covlent = [] # 共价键连接信息记录 有几个列表就有几个共价键链接信息 + infos = [] + locateinfos = {} + for item in self.target_search_connect: # 对两个连接原子进行判断 + df = self.df + # molecule chain search + res0 = df[df['AtomNum']==item[0]] + res1 = df[df['AtomNum']==item[1]] + if (res0['Identifier'].all() == res1['Identifier'].all()): + continue + else: + true_covlent.append(item) + # 定位信息那个是小分子的行信息和那个是蛋白行的信息 + if res0['Identifier'].all() == 'ATOM': locateinfos['ATOM'] = res0 + if res0['Identifier'].all() == 'HETATM': locateinfos['HETATM'] = res0 + if res1['Identifier'].all() == 'ATOM': locateinfos['ATOM'] = res1 + if res1['Identifier'].all() == 'HETATM': locateinfos['HETATM'] = res1 + assert (not res0.empty and not res1.empty),( + 'this code occur a bug' + 'try to connet to author fix it' + 'maybe is pymol output connect infos error' + 'never be happen' + ) + partinfos = self.__fill_infos(i=item,pro=locateinfos['ATOM'],mole=locateinfos['HETATM'],distance_accuracy=distance_accuracy) + infos.append(partinfos) + return infos + + def convenlent_infos(self,distance_accuracy=1): + return self.__search_convenlent(distance_accuracy=distance_accuracy) + + @property + def cov_infos(self): + d = self.__search_convenlent() + return d + + @staticmethod + def __fill_infos(i,pro,mole,distance_accuracy=1): + distance = np.sqrt(np.square(float(pro['X'].all())-float(mole['X'].all())) + np.square(float(pro['Y'].all())-float(mole['Y'].all())) + np.square(float(pro['Z'].all())-float(mole['Z'].all()))) + infostmplate = { + 'ConnectInfos': i, + 'ResidueName':pro['ResidueName'].all(), + 'ResidueSequence':pro['ResidueSequence'].all(), + 'ChainIndentifier':pro['ChainIndentifier'].all(), + 'CovenlentAtom':{ + 'protein': { + 'AtomName':pro['AtomName'].all(), + 'ElementSymbol':pro['ElementSymbol'].all(), + }, + 'molecule':{ + 'AtomName':mole['AtomName'].all(), + 'ElementSymbol':mole['ElementSymbol'].all(), + }, + }, + 'CovenlentDistance':round(distance,distance_accuracy), + 'LigandName':mole['ResidueName'].all(), + } + return infostmplate + + @staticmethod + def transformtodataframe(readlinecontent=None,first_label = 'ATOM',path = False): + path_info = (Path(path).name.__str__(),Path(path).parent.__str__()) if path else ('unknown file name','unknown path') + if path: readlinecontent = Bdp.read_line_list(first_column=first_label,path=path) + if readlinecontent == None: raise ValueError('readlinecontent need') + if first_label == ('ATOM' or 'HETATM'): # ! 注意python中的懒惰运算,及运算符特性 + # 将pdb每行读取的信息转化为dataframe 针对ATOM HETAM开头的行适用 + Identifier,AtomNum,AtomName,AtomLocationIndicator,ResidueName,ChainIndentifier,ResidueSequence,InsertionsResidue,X,Y,Z,Occupancy,Tfactor,SegmentIdentifier,ElementSymbol = [],[],[],[],[],[],[],[],[],[],[],[],[],[],[] + for i in readlinecontent: + Identifier.append(i[:7].strip()) + AtomNum.append(i[6:11].strip()) + AtomName.append(i[12:16].strip()) + AtomLocationIndicator.append(i[16].strip()) + ResidueName.append(i[17:20].strip()) + ChainIndentifier.append(i[21].strip()) + ResidueSequence.append(i[22:26].strip()) + InsertionsResidue.append(i[26].strip()) + X.append(i[30:38].strip()) + Y.append(i[38:46].strip()) + Z.append(i[46:54].strip()) + Occupancy.append(i[54:60].strip()) + Tfactor.append(i[60:66].strip()) + SegmentIdentifier.append(i[72:76].strip()) + ElementSymbol.append(i[76:78].strip()) + df = pd.DataFrame(data={ + 'Identifier':Identifier, + 'AtomNum':AtomNum, + 'AtomName':AtomName, + 'AtomLocationIndicator':AtomLocationIndicator, + 'ResidueName':ResidueName, + 'ChainIndentifier':ChainIndentifier, + 'ResidueSequence':ResidueSequence, + 'InsertionsResidue':InsertionsResidue, + 'X':X, + 'Y':Y, + 'Z':Z, + 'Occupancy':Occupancy, + 'Tfactor':Tfactor, + 'SegmentIdentifier':SegmentIdentifier, + 'ElementSymbol':ElementSymbol + }) + elif first_label == 'LINK': + # https://www.wwpdb.org/documentation/file-format-content/format33/sect6.html + Record_name,Atom_name1,Alternate_location_indicator1,Residue_name1,Chain_identifier1,Residue_sequence_number1,Insertion_code1,Atom_name2,Alternate_location_indicator2,Residue_name2,Chain_identifier2,Residue_sequence_number2,Insertion_code2,Symmetry_operator_atom_1,Symmetry_operator_atom_2,Link_distance = [],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[] + for i in readlinecontent: + if len(i) != 78: + logger.exception( + f"[LINK label length error] not match the LINK line length\n" + f"[input string]:{i}\n" + f"check your input file:{path_info[0]} from {path_info[1]}\n" + "if this line not have link distance, try to caculate by covalent.bypymol method") + pass + else: + Record_name.append(i[:6].strip()) + Atom_name1.append(i[12:16].strip()) + Alternate_location_indicator1.append(i[16].strip()) + Residue_name1.append(i[17:20].strip()) + Chain_identifier1.append(i[21].strip()) + Residue_sequence_number1.append(i[22:26].strip()) + Insertion_code1.append(i[26].strip()) + Atom_name2.append(i[42:46].strip()) + Alternate_location_indicator2.append(i[46].strip()) + Residue_name2.append(i[47:50].strip()) + Chain_identifier2.append(i[51].strip()) + Residue_sequence_number2.append(i[52:56].strip()) + Insertion_code2.append(i[56].strip()) + Symmetry_operator_atom_1.append(i[59:65].strip()) + Symmetry_operator_atom_2.append(i[66:72].strip()) + Link_distance.append(i[73:78].strip()) + df = pd.DataFrame(data={ + 'Record_name':Record_name, + 'Atom_name1':Atom_name1, + 'Alternate_location_indicator1':Alternate_location_indicator1, + 'Residue_name1':pd.Series(Residue_name1,dtype='object'), + 'Chain_identifier1':Chain_identifier1, + 'Residue_sequence_number1':Residue_sequence_number1, + 'Insertion_code1':Insertion_code1, + 'Atom_name2':Atom_name2, + 'Alternate_location_indicator2':Alternate_location_indicator2, + 'Residue_name2':pd.Series(Residue_name2,dtype='object'), + 'Chain_identifier2':Chain_identifier2, + 'Residue_sequence_number2':Residue_sequence_number2, + 'Insertion_code2':Insertion_code2, + 'Symmetry_operator_atom_1':Symmetry_operator_atom_1, + 'Symmetry_operator_atom_2':Symmetry_operator_atom_2, + 'Link_distance':pd.Series(Link_distance,dtype='float32'), + }) + else: + assert False,( + 'No return', + 'a error occurred' + ) + return df + +class color_plan(): + color1 = (('color1', '[186,182,217]'), 'purple') + color2 = (('color2', '[233,195,153]'), 'yellow') + color3 = (('color3', '[141,215,247]'), 'blue') + color4 = (('color4', '[206,155,198]'), 'purple') + color5 = (('color5', '[251,187,62]'), 'orange') + color6 = (('color6', '[245,157,158]'), 'red') + color7 = (('color7', '[133,188,135]'), 'green') + colors = (color1, color2, color3, color4, color5, color6, color7) + + @staticmethod + def defcolor(): + color_gennerate = map(lambda x:x[0],color_plan.colors) + list(map(lambda x:cmd.set_color(*x),color_gennerate)) + +# error class +class PathError(BaseException): + def __init__(self,arg): + self.arg = arg + +class moleculeidentity(): + """moleculeidentity [summary] + + [识别pdb蛋白文件中的小分子标识符] + """ + + def __init__(self,pdbfile,path): + self.pathstr = path + self.path = Path(path) + self.pdbfile = pdbfile + self._init() + + def _init(self): + if not self.path.exists(): + raise PathError('path not exist') + if not isinstance(self.pdbfile,str): + raise TypeError('Pdbid must be a string!') + if ('.pdb' not in self.pdbfile) and (len(self.pdbfile) != 4): + raise TypeError('Pdbid must be 4 letters') + if ('.pdb' or '.PDB') in self.pdbfile: + raise TypeError(f'{self.pdbfile} Remove ".pdb" from input arg, add automatically') + file_list = list(self.path.glob('*.pdb')) + self.path_parent = file_list[0].parent + self.pdbfilelist = [i.name[:4].upper() for i in file_list] + + def __parse_pdb_ligid(self,ion=True): + if self.pdbfile.upper() not in self.pdbfilelist: + raise FileNotFoundError(f'not found {self.pdbfile} in {self.path}') + infos_line = [] + ligId = [] + for i in self.__generate_pdb_lines(): + if self.check_line_header(i) == 'HET': + infos_line.append(i) + ligId = [i.split()[1] for i in infos_line] + ligId = list(set(ligId)) + if not ion: ligId = [i for i in ligId if len(i) == 3 ] # remove ion from list + return ligId + + @property + def ligId(self): + # return ligId include ion + return self.__parse_pdb_ligid() + + @property + def ligIdNoion(self): + return self.__parse_pdb_ligid(ion=False) + + @staticmethod + def check_line_header(line_text): + return line_text[0:6].strip() + + def __generate_pdb_lines(self): + openpdbfile = self.pdbfile + '.pdb' if '.pdb' not in self.pdbfile else self.pdbfile + for row in open(self.path_parent.joinpath(openpdbfile),'r+'): + yield row.strip() + +class covalent(object): + + def __init__(self,pdbfilename,path,pdbid): + self.pdbfilename = pdbfilename + self.pdbid = covalentidentity.cleanpdb(pdbid) + self.path_parent = Path(path) + self.path = self.path_parent.joinpath(pdbfilename) + self._init() + + def _init(self): + self.link_df = covalentidentity.transformtodataframe(path = self.path,first_label='LINK') + + @classmethod + def bypdb(cls,pdbfilename,path,pdbid): + return cls(pdbfilename=pdbfilename,path=path,pdbid=pdbid) + + @staticmethod + def bypymol(pdbfilename,pdbid,path): + return covalentidentity(pdbfilename,pdbid,path) + + @property + def mole_id(self)->list: + _instance=Bdp(path=self.path,sid=self.pdbid) + return [i for i in _instance.mole_id if len(i)==3 and i != 'HOH' and i != 'SO4' and i != 'PO4'] # ! 在这里手动剔除掉硫酸根和磷酸根 + + def modresdata(self)->list: + """modresdata 标准残疾的修饰信息 + """ + modres_lst = Bdp.read_line_list(first_column='MODRES',path=self.path) + lst = [i[12:15] for i in modres_lst] # 切片出修饰残基的小分子 + return list(set(lst)) + + def get_covalent(self): + # 从原始的pdb文件中可获取共价键信息 # ? 对其中的信息正确率为100% 来自原始的pdb文件中的信息 + res1 = self.link_df.query(f'Residue_name1 in {self.remove_modres_moleid}') + res2 = self.link_df.query(f'Residue_name2 in {self.remove_modres_moleid}') + df = pd.merge(res1, res2,how='outer') + df.astype('object') + # remove ions link with ligand + df = df[df['Residue_name1'].str.len()==3] + df = df[df['Residue_name2'].str.len()==3] + return df + + + @property + def remove_modres_moleid(self)->list: + # 清除link信息中的modres,即修饰残基的ligid + # lst = Bdp.read_line_list(first_column='LINK',path=self.path) + # lst = list(set(map(lambda s:s[17:20],lst))) + clean_func = partial(self.func_dynamic_data_template,dynamic_data=self.ModifiedResidues) + res = clean_func(origin_data=self.mole_id) + return list(filter(lambda x:len(x.strip())==3 and x != 'HOH',res)) + + @property + def ModifiedResidues(self): + ModifiedResiduesId = [] + for i in self.mole_id: + if i in self.modresdata(): + ModifiedResiduesId.append(i) + # 从SEQRES序列中获取MODRES的修饰残基的小分子,仅仅从SEQRES有时获取不全面 + seqres_lst = [i[19:].strip() for i in Bdp.read_line_list(first_column='SEQRES',path = self.path)] + seqres_lst_string = '\r\n'.join(seqres_lst) + seqres_lst_set = set(list(i for line in seqres_lst_string.splitlines() for i in line.split())) + seqres_list = seqres_lst_set.intersection(set(self.mole_id)) + ModifiedResiduesId.extend(seqres_list) + return list(set(ModifiedResiduesId)) + + @staticmethod + def func_dynamic_data_template(origin_data:list,dynamic_data:list)->list: + # 取origin_data和dynamic_data补集 + origin_data_set = set(origin_data) + dynamic_data_set = set(dynamic_data) + _data = origin_data_set.difference(dynamic_data_set) + return _data + + + + +cmd.extend('autoshow', autoshow) # pymol中自定义autoshow函数,请使用cmd.autoshow()命令 + +#! 下面为具体使用查找共价键的代码 +# def read_link_lines(item): +# ins1 = covalent.bypdb(pdbfilename = item,path='M:\program\\autotask\search_res1_pdb',pdbid = item[3:7]) +# res = ins1.get_covalent() +# res['Pdb_id'] = [item[3:7] for _ in range(len(res))] +# return res + +def return_mole(series): + # LINK 字段信息进行分析解读 + residue = [i.upper() for i in 'Gly,Ala,Val,Leu,Ile,Pro,Phe,Tyr,Trp,Ser,Thr,Cys,Met,Asn,Gln,Asp,Glu,Lys,Arg,His'.split(',')] + if str(series[1]['Residue_name1']) in residue: + if str(series[1]['Residue_name2']) in residue: + # 除去两个氨基酸连接的情况 + return pd.DataFrame() + else: + _data = series[1]['Residue_name2'],series[1]['Link_distance'],series[1]['Pdb_id'],series[1]['Chain_identifier2'] + _df = pd.DataFrame(data={ + 'LigandName': pd.Series([_data[0]],dtype='string'), + 'Link_distance': pd.Series([_data[1]],dtype='float32'), + 'Pdb_id': pd.Series([_data[2]],dtype='string'), + 'Chain_identifier': pd.Series([_data[3]],dtype='string') + }) + return _df + elif str(series[1]['Residue_name2']) in residue: + if str(series[1]['Residue_name1']) in residue: + # 除去两个氨基酸连接的情况 + return pd.DataFrame() + else: + _data = series[1]['Residue_name1'],series[1]['Link_distance'],series[1]['Pdb_id'],series[1]['Chain_identifier1'] + _df = pd.DataFrame(data={ + 'LigandName': pd.Series([_data[0]],dtype='string'), + 'Link_distance': pd.Series([_data[1]],dtype='float32'), + 'Pdb_id': pd.Series([_data[2]],dtype='string'), + 'Chain_identifier': pd.Series([_data[3]],dtype='string') + }) + return _df + else: + logger.exception( + f'[MoleculeMatchError] Pdbid {series[1]["Pdb_id"]} occur a error\n' + f'出现了两个残基名称都不属于常见的20中\n{residue}\n' + f'source: {series[1]}' + ) + return pd.DataFrame() # 两个小分子连接的情况,去除 + +if __name__ == '__main__': + ... \ No newline at end of file diff --git a/script/pymolbase.py b/script/pymolbase.py new file mode 100644 index 0000000..3095c5d --- /dev/null +++ b/script/pymolbase.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :base.py +@Description: :PyMOLBase的抽象基类 +@Date :2023/09/01 16:54:02 +@Author :lyzeng +@Email :pylyzeng@gmail.com +@version :1.0 +''' + +from abc import ABC, abstractmethod + +class PyMOLBase(ABC): + + @abstractmethod + def clean_pdb_name(self, pdb_name): + """ + This abstract method is meant to clean or process the given PDB name. + Implement this method in the subclass. + """ + pass + diff --git a/script/pymolbond.py b/script/pymolbond.py new file mode 100644 index 0000000..dd0eff0 --- /dev/null +++ b/script/pymolbond.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +''' +@file :pymolbond.py +@Description: : +@Date :2023/09/01 16:57:02 +@Author :lyzeng +@Email :pylyzeng@gmail.com +@version :1.0 +''' + +class Bond(): + """ + This class contains static methods for identifying various types of molecular interactions. + """ + + @staticmethod + def HBond(ChA, ChB): + """Identifies hydrogen bonds between two chains.""" + name = 'Hbond' + cmd.distance(name, ChA, ChB, '3.5', '2') + return name + + @staticmethod + def Pi_Pi_interaction(ChA, ChB): + """Identifies pi-pi interactions between two chains.""" + name = 'PI_PI' + cmd.distance(name, ChA, ChB, '5.5', '6') + return name + + @staticmethod + def Cation_Pi_interaction(ChA, ChB): + """Identifies cation-pi interactions between two chains.""" + name = 'PI_Cation' + cmd.distance(name, ChA, ChB, '5.5', '7') + return name + + @staticmethod + def Salt_bridge_positive(ChA, ChB): + """Identifies salt bridges from positive residues in one chain to negative residues in another chain.""" + name = 'SBP' + selection_1 = f'{ChB} and ((resn LYS and name NZ) or (resn ARG and name NE+NH*))' + selection_2 = f'{ChA} and resn ASP+GLU and name OD*+OE*' + cmd.distance(name, selection_1, selection_2, '5.0', '0') + return name + + @staticmethod + def Salt_bridge_negative(ChA, ChB): + """Identifies salt bridges from negative residues in one chain to positive residues in another chain.""" + + name = 'SBN' + selection_1 = f'{ChA} and ((resn LYS and name NZ) or (resn ARG and name NE+NH*))' + selection_2 = f'{ChB} and resn ASP+GLU and name OD*+OE*' + cmd.distance(name, selection_1, selection_2, '5.0', '0') + return name + + @staticmethod + def Van_der_Waals(ChA, ChB): # not tested + """Identifies Van der Waals interactions between two chains.""" + name = 'VDW' + cmd.distance(name, ChA, ChB, '0.6', '0') + return name + + @staticmethod + def Hydrophobic_interaction(ChA, ChB): # not tested + """Identifies hydrophobic interactions between two chains.""" + name = 'Hydrophobic' + cmd.distance(name, ChA, ChB, '0.6', '0') + return name + + @staticmethod + def Electrostatic_interaction(ChA, ChB): # not tested + """Identifies electrostatic interactions between two chains.""" + name = 'Electrostatic' + cmd.distance(name, ChA, ChB, '0.6', '0') + return name + + @staticmethod + def Metal_coordination(ChA, ChB): # not tested + """Identifies metal coordination bonds between two chains.""" + name = 'MetalCoord' + cmd.distance(name, ChA, ChB, '0.2', '0') + return name + + @staticmethod + def Disulfide_bond(ChA, ChB): # not tested + """Identifies disulfide bonds between two chains.""" + name = 'Disulfide' + cmd.distance(name, ChA, ChB, '0.2', '0') + return name diff --git a/script/show_protein_protein_interaction.py b/script/show_protein_protein_interaction.py new file mode 100644 index 0000000..dd0cc2a --- /dev/null +++ b/script/show_protein_protein_interaction.py @@ -0,0 +1,92 @@ +from pymol import cmd +from pymol.cmd import util +import sys +from pymol_color_plugin import colorp + + +def HBond(ChA,ChB): + name = f'Hbond' + dist = cmd.distance(name, ChA,ChB, '3.5', '2') + return name + +def Pi_Pi_interaction(ChA,ChB): + name = f'PI_PI' + cmd.distance(name,ChA, ChB,'5.5','6') + return name + +def Cation_Pi_interaction(ChA,ChB): + name = f'PI_Cation' + cmd.distance(name, ChA, ChB, '5.5', '7') + return name + +def Salt_bridge_positive(ChA,ChB): + name = f'SBP' + + selection_1 = f'{ChB} and ((resn LYS and name NZ) or (resn ARG and name NE+NH*))' + selection_2 = f'{ChA} and resn ASP+GLU and name OD*+OE*' + cmd.distance(name,selection_1,selection_2,'5.0','0') + return name + +def Salt_bridge_negative(ChA,ChB): + name = f'SBN' + selection_1 = f'{ChA} and ((resn LYS and name NZ) or (resn ARG and name NE+NH*))' + selection_2 = f'{ChB} and resn ASP+GLU and name OD*+OE*' + cmd.distance(name,selection_1,selection_2,'5.0','0') + return name + +# def T_stacking(ChA, ChB): +# name = 'T_stacking' +# # 添加适当的距离和选择标准 +# cmd.distance(name, ChA, ChB, '5.5', '2') +# return name + +# def Hydrophobic_interaction(ChA, ChB): +# name = 'Hydrophobic' +# # 添加适当的距离和选择标准 +# cmd.distance(name, ChA, ChB, '7.0', '2') +# return name + +# def VDW_interaction(ChA, ChB): +# name = 'VDW' +# # 添加适当的距离和选择标准 +# cmd.distance(name, ChA, ChB, '4.0', '2') +# return name + +def show_label_name(obj:str): + cmd.label(f'name CA and {obj}', 'oneletter+resi') + cmd.set('label_bg_color', -7, '', 0) + cmd.set('label_size', 18, '', 0) + cmd.set('label_font_id', '7') + +def interact(obj:str='epitope', distance:int=5): + cmd.show('stick',f'{obj}') + cmd.set('stick_radius', '0.3', f'{obj}') + colorp.color_mole(obj) + #show_label_name(obj) + cmd.select('nearby',f'byres {obj} around {distance}') + cmd.show('stick','nearby') + cmd.set('stick_radius', '0.1', 'nearby') + cmd.set('cartoon_transparency', 0.7, selection='nearby') + Hbond = HBond(obj, 'nearby') + PI = Pi_Pi_interaction(obj, 'nearby') + C_PI = Cation_Pi_interaction(obj, 'nearby') + SBP = Salt_bridge_positive(obj, 'nearby') + SBN = Salt_bridge_negative(obj, 'nearby') + cmd.set('dash_color', 'yellow', Hbond) + cmd.set('dash_color', 'blue', PI) + cmd.set('dash_color', 'blue', C_PI) + cmd.set('dash_color', 'orange', SBP) + cmd.set('dash_color', 'orange', SBN) + cmd.set('dash_radius', '0.07') + cmd.set('dash_gap', '0.3') + #cmd.remove('e. h and neighbor(name C*)') + cmd.zoom('Hbond') + +def main(): + cmd.reinitialize() + cmd.fetch('5sws',type='pdb') + colorp.pretty() + cmd.select('epitope','chain C and resi 1-9') + cmd.zoom('nearby') + interact() + cmd.save(f'5sws_interaction.pse') \ No newline at end of file diff --git a/script/test.py b/script/test.py new file mode 100644 index 0000000..dbc2dbb --- /dev/null +++ b/script/test.py @@ -0,0 +1,7 @@ + + +if __name__ == '__main__': + from plugin.cmd_singleton import PyMOLSingleton + cmd = PyMOLSingleton + print( + help(cmd)) \ No newline at end of file diff --git a/show_protein_protein_interaction.py b/show_protein_protein_interaction.py new file mode 100644 index 0000000..6a5f4a4 --- /dev/null +++ b/show_protein_protein_interaction.py @@ -0,0 +1,198 @@ +from pymol import cmd +from pymol.cmd import util +import sys +import os +import math + +def pi_pi_angle(x1,x2,x3,y1,y2,y3): + import numpy as np + #print(x1,x2,x3,y1,y2,y3) + + B1, B2, B3 = [x1[0] - x2[0], x1[1] - x2[1], x1[2] - x2[2]] + C1, C2, C3 = [x1[0] - x3[0], x1[1] - x3[1], x1[2] - x3[2]] + #print([x1[0] - x2[0], x1[1] - x2[1], x1[2] - x2[2]]) + n1 = [B2 * C3 - C2 * B3, B3 * C1 - C3 * B1, B1 * C2 - C1 * B2] + D1, D2, D3 = [y1[0] - y2[0], y1[1] - y2[1], y1[2] - y2[2]] + E1, E2, E3 = [y1[0] - y3[0], y1[1] - y3[1], y1[2] - y3[2]] + n2 = [D2 * E3 - E2 * D3, D3 * E1 - E3 * D1, D1 * E2 - E1 * D2] + dot_product = np.dot(n1, n2) + magnitude1 = np.linalg.norm(n1) # ???????????????? + magnitude2 = np.linalg.norm(n2) + #print(dot_product,magnitude1,magnitude2,dot_product / (magnitude1 * magnitude2)) + + xx = math.acos(dot_product / (magnitude1 * magnitude2)) + degree = math.degrees(xx) + if degree > 90: + degree = 180 - degree + return round(degree,2) + + + +def pi_pi(aro_ring_info,cutoff=5.0): + aro_resn =['HIS','PHE','TRP','TYR'] + aro_resn_list = '+'.join(aro_resn) + chA_aro_res = f'inter_1 and resn {aro_resn_list}' + chB_aro_res = f'inter_2 and resn {aro_resn_list}' + chA_aro_res_list = list(set([f'resi {a.resi} and resn {a.resn}' for a in cmd.get_model(chA_aro_res).atom])) + chB_aro_res_list = list(set([f'resi {a.resi} and resn {a.resn}' for a in cmd.get_model(chB_aro_res).atom])) + for A in chA_aro_res_list: + for B in chB_aro_res_list: + A_type = A.split()[-1] + B_type = B.split()[-1] + #print(f'{A} and name {aro_ring_info[A_type]}') + x = cmd.centerofmass(f'inter_1 and {A} and name {aro_ring_info[A_type]}') + y = cmd.centerofmass(f'inter_2 and {B} and name {aro_ring_info[B_type]}') + dist = math.sqrt((x[0] - y[0]) ** 2 + (x[1] - y[1]) ** 2 + (x[2] - y[2]) ** 2) + if dist < cutoff: + cmd.show('sticks',f'inter_1 and {A}') + cmd.show('sticks', f'inter_2 and {B}') + cmd.label(f'name CA and inter_1 and {A}', 'oneletter+resi') + cmd.label(f'name CA and inter_2 and {B}', 'oneletter+resi') + x1 = cmd.get_coords(f"{A} and name {aro_ring_info[A_type].split('+')[0]}")[0] + x2 = cmd.get_coords(f"{A} and name {aro_ring_info[A_type].split('+')[1]}")[0] + x3 = cmd.get_coords(f"{A} and name {aro_ring_info[A_type].split('+')[2]}")[0] + y1 = cmd.get_coords(f"{B} and name {aro_ring_info[B_type].split('+')[0]}")[0] + y2 = cmd.get_coords(f"{B} and name {aro_ring_info[B_type].split('+')[1]}")[0] + y3 = cmd.get_coords(f"{B} and name {aro_ring_info[B_type].split('+')[2]}")[0] + print(A,B) + pi_angle = pi_pi_angle(x1, x2, x3, y1, y2, y3) + pi_name = f'pi_{A.split()[1]}_{B.split()[1]}_{str(pi_angle)}' + print(A, B,pi_name) + + + cmd.distance(pi_name, f"inter_1 and resi {A.split()[1]} and name {aro_ring_info[A_type]}", f'inter_2 and resi {B.split()[1]} and name {aro_ring_info[B_type]}', mode=4) + + +def Salt_bridge(ChA,ChB,cutoff=5.0): + selection_1 = f'{ChB} and ((resn LYS and name NZ) or (resn ARG and name NE+NH*))' + a_charge_res = set([f'chain {a.chain} and resi {a.resi} and resn {a.resn}' for a in cmd.get_model(f'inter_1 and resn LYS+ARG+ASP+GLU').atom]) + b_charge_res = set([f'chain {a.chain} and resi {a.resi} and resn {a.resn}' for a in cmd.get_model(f'inter_2 and resn LYS+ARG+ASP+GLU').atom]) + #print(a_charge_res) + p_info = {'LYS':'NZ','ARG':'NE+NH*'} + n_info = {'ASP':'OD*+OE*','GLU':'OD*+OE*'} + for a in a_charge_res: + a_resn = a.split()[-1] + for b in b_charge_res: + b_resn = b.split()[-1] + name = f'SB_{a.split()[4]}_{b.split()[4]}' + + if (a_resn in ['LYS','ARG']) and (b_resn in ['ASP','GLU']): + a_resn_atom = p_info[a_resn] + b_resn_atom = n_info[b_resn] + #print(a,b) + #print(a_resn_atom,b_resn_atom) + selection_1 = f'{a} and name {a_resn_atom}' + selection_2 = f'{b} and name {b_resn_atom}' + cal_saltbrige(selection_1, selection_2, name, a,b,cutoff) + + + elif (a_resn in ['ASP','GLU'] ) and (b_resn in ['LYS','ARG']): + a_resn_atom = n_info[a_resn] + b_resn_atom = p_info[b_resn] + + + selection_1 = f'{a} and name {a_resn_atom}' + selection_2 = f'{b} and name {b_resn_atom}' + cal_saltbrige(selection_1,selection_2,name, a,b,cutoff) + + + + + +def cal_saltbrige(selection1,selection2,name,a,b,cutoff): + x = cmd.centerofmass(selection1) + y = cmd.centerofmass(selection2) + dist = math.sqrt((x[0] - y[0]) ** 2 + (x[1] - y[1]) ** 2 + (x[2] - y[2]) ** 2) + + if dist < cutoff: + cmd.show('stick', a) + cmd.show('stick', b) + print(a,b) + cmd.label(f'name CA and {a}', 'oneletter+resi') + cmd.label(f'name CA and {b}', 'oneletter+resi') + + cmd.distance(name, selection1, selection2, mode=4) + return name + +def DA_Hbond(DA_type,select1,select2,cutoff='3.5',angle='150'): + hbond_pairs = [] + pairs = cmd.find_pairs(select1, select2, mode=0, cutoff=cutoff) + for pair in pairs: + + D_atom = f'index {pair[0][1]}' + A_atom = f'index {pair[1][1]}' + HD_atom = f'e. h and neighbor({D_atom})' + index_1 = [a.index for a in cmd.get_model(A_atom).atom] + index_2 = [a.index for a in cmd.get_model(HD_atom).atom] + index_3 = [a.index for a in cmd.get_model(D_atom).atom] + + for h in index_2: + h_angle = cmd.angle('angle', A_atom, f'index {h}', D_atom) + #print(type, pair, h_angle) + if h_angle > float(angle): + D_atom_name = [f'{a.resi}-{a.name}' for a in cmd.get_model(f'index {pair[0][1]} ').atom][0] + + A_atom_name = [f'{a.resi}-{a.name}' for a in cmd.get_model(f'index {pair[1][1]} ').atom][0] + #name = [f'HB-{a.resi}{a.resn}{a.chain}-{base_atom_name}-lig-{lig_atom_name}' for a in cmd.get_model(f'index {pair[0][1]} and polymer').atom][0] + name = f'{DA_type}_{D_atom_name}_{A_atom_name}' + cmd.show('stick', f'byres(index {pair[0][1]})') + cmd.show('stick', f'byres(index {pair[1][1]})') + cmd.label(f'name CA and byres(index {pair[0][1]})', 'oneletter+resi') + cmd.label(f'name CA and byres(index {pair[1][1]})', 'oneletter+resi') + + + cmd.distance(name, f'index {index_1[0]} ', f'index {h} ') + + break + return hbond_pairs +def run(pdb,ChA,ChB,dist): + cmd.delete('all') + cmd.load(pdb) + + pdb_name = pdb[:-4] + #cmd.h_add('all') + #cmd.remove('solvent and resn SO4, NA,CL,GOL') + cmd.remove('not polymer') + #util.cbc('all', first_color=7, quiet=1, legacy=0, _self=cmd) + cmd.select('inter_1',f'{pdb_name} and chain {ChA} and byres(chain {ChB}) around {dist}') + cmd.select('inter_2', f'{pdb_name} and chain {ChB} and byres(chain {ChA}) around {dist}') + util.cba('lightblue',f'chain {ChA}') + util.cba('gray90', f'chain {ChB}') + else_part = cmd.select('else',f'{pdb_name} and not (chain {ChA}+{ChB})') + if else_part > 0: + util.cba('lime', 'else') + + chA_D = f'inter_1 and e. o+n and (neighbor e. h)' + chB_A = f'inter_2 and e. o+n ' + DA_Hbond('HDA',chA_D,chB_A) + chB_D = f'inter_2 and e. o+n and (neighbor e. h)' + chA_A = f'inter_1 and e. o+n ' + DA_Hbond('HAD',chB_D,chA_A) + aro_ring_name = {'HIS':'CG+ND1+CE1+NE2+CD2', + 'TRP':'CH2+CZ3+CE3+CZ2+CE2+CD2', + 'TYR':'CG+CD1+CE1+CZ+CE2+CD2', + 'PHE':'CG+CD1+CE1+CZ+CE2+CD2'} + pi_pi(aro_ring_name) + Salt_bridge(ChA,ChB) + cmd.do('set cartoon_transparency, 0.4') + cmd.do('set dash_gap, 0.2') + cmd.do('set dash_round_ends, 0') + cmd.do('set label_size, 24') + cmd.do('set dash_color, blue,pi*') + cmd.do('set dash_color, violet,SB*') + cmd.do('set dash_radius, 0.1') + cmd.do('set stick_radius, 0.15') + cmd.delete('angle') + cmd.disable('angle') + cmd.set('bg_rgb', 'white') + cmd.save(f'{pdb_name}_interaction.pse') + + +def main(): + pdb_file = sys.argv[1] + CHA = sys.argv[2] + CHB = sys.argv[3] + dist = sys.argv[4] + + run(pdb_file,CHA,CHB,dist) +main() \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..8d05ead --- /dev/null +++ b/test.py @@ -0,0 +1,2 @@ +from plugin.cmd_singleton import cmd +