This commit is contained in:
2023-12-03 23:01:00 +08:00
parent 2aef1c9de6
commit bf03213828
13 changed files with 11691 additions and 0 deletions

18
.condarc Normal file
View File

@@ -0,0 +1,18 @@
show_channel_urls: true
channels:
- conda-forge
- r
- defaults
- bioconda
# - https://levinthal:paradox@conda.graylab.jhu.edu
custom_channels:
conda-forge: https://mirrors.bfsu.edu.cn/anaconda/cloud
msys2: https://mirrors.bfsu.edu.cn/anaconda/cloud
bioconda: https://mirrors.bfsu.edu.cn/anaconda/cloud
menpo: https://mirrors.bfsu.edu.cn/anaconda/cloud
pytorch: https://mirrors.bfsu.edu.cn/anaconda/cloud
simpleitk: https://mirrors.bfsu.edu.cn/anaconda/cloud
default_channels:
- https://mirrors.bfsu.edu.cn/anaconda/pkgs/main
- https://mirrors.bfsu.edu.cn/anaconda/pkgs/r
- https://mirrors.bfsu.edu.cn/anaconda/pkgs/msys2

22
.gitignore vendored Normal file
View File

@@ -0,0 +1,22 @@
pyrosetta-2023.31+release.1799523-py311_0.tar.bz2
*.sif
test/*
foldx/*
*.log
EvoEF2-master/
bbDepRotLib.bin
Agreement.txt
rotabase.txt
Scwrl4
Scwrl4.ini
yasaraPlugin.zip
Readme.txt
QuickStart.txt
foldx_20231231
1jpz*.pdb
abcd_IN.pdb
*.fxout
output.pdb
individual_list.txt
mutation_clean_mole.py
4i24*

57
Dockerfile Normal file
View File

@@ -0,0 +1,57 @@
FROM ubuntu:22.04
LABEL maintainer="lingyu zeng <pylyzeng@gmail.com>"
WORKDIR /root
# COPY ../../share_data/opus_mut.zip ./
COPY 4i24.pdb test.list _foldxLinux64.tar_.gz EvoEF2-master.zip install_scwrl4.0.2_64bit_2020_linux mutation.py pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 noarch/repodata.json ./
ENV PATH="/root/bin:/root/micromamba/bin:${PATH}"
ENV CONDA_PREFIX="/root/micromamba/envs/pyrosetta"
ENV TZ="Asia/Shanghai"
ENV DEBIAN_FRONTEND="noninteractive"
# Update and install necessary packages
RUN apt-get update && \
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
apt-get install tzdata -y && \
apt-get install git zip wget bzip2 libgl1-mesa-glx g++ -y && \
unzip EvoEF2-master.zip && \
chmod +x EvoEF2-master/build.sh && \
cd EvoEF2-master && \
./build.sh && \
cd .. && \
tar zxvf _foldxLinux64.tar_.gz && \
chmod +x install_scwrl4.0.2_64bit_2020_linux && \
echo -e "Y\nLicense Holder Name" | ./install_scwrl4.0.2_64bit_2020_linux ./ && \
# Install Micromamba
wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \
./bin/micromamba shell init --shell bash --root-prefix=~/micromamba && \
echo -e "show_channel_urls: true\n\
channels:\n\
- conda-forge\n\
- r\n\
- defaults\n\
- bioconda\n\
- https://levinthal:paradox@conda.graylab.jhu.edu\n\
custom_channels:\n\
conda-forge: https://mirrors.bfsu.edu.cn/anaconda/cloud\n\
msys2: https://mirrors.bfsu.edu.cn/anaconda/cloud\n\
bioconda: https://mirrors.bfsu.edu.cn/anaconda/cloud\n\
menpo: https://mirrors.bfsu.edu.cn/anaconda/cloud\n\
pytorch: https://mirrors.bfsu.edu.cn/anaconda/cloud\n\
simpleitk: https://mirrors.bfsu.edu.cn/anaconda/cloud\n\
default_channels:\n\
- https://mirrors.bfsu.edu.cn/anaconda/pkgs/main\n\
- https://mirrors.bfsu.edu.cn/anaconda/pkgs/r\n\
- https://mirrors.bfsu.edu.cn/anaconda/pkgs/msys2" >> ~/.condarc && \
mkdir -p /root/noarch && \
mv repodata.json /root/noarch && \
mv pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 /root/noarch && \
./bin/micromamba create -n pyrosetta -c conda-forge -c bioconda -c defaults python=3.11 click loguru biopython pymol-open-source pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 -y && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* EvoEF2-master.zip install_scwrl4.0.2_64bit_2020_linux _foldxLinux64.tar_.gz /root/noarch
WORKDIR /work
VOLUME ["/work"]
ENTRYPOINT ["/root/micromamba/envs/pyrosetta/bin/python", "/root/mutation.py"]
CMD ["--help"]

57
Dockerfile_developer Normal file
View File

@@ -0,0 +1,57 @@
# Use Ubuntu 22.04 as base image
FROM ubuntu:22.04
LABEL maintainer="lingyu zeng <pylyzeng@gmail.com>"
ENV TZ=Asia/Shanghai
ENV DEBIAN_FRONTEND=noninteractive
# $(curl https://ipapi.co/timezone)
# Configure timezone and install necessary packages
RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \
sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \
apt-get update && \
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
apt-get install -y tzdata && \
apt-get install -y git zip curl wget bzip2 libgl1-mesa-glx g++ sudo software-properties-common && \
adduser --disabled-password --gecos "" developer && \
echo "developer ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/developer && \
sudo apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Set environment variables
ENV PATH="/home/developer/bin:/home/developer/micromamba/bin:${PATH}"
ENV CONDA_PREFIX="/home/developer/micromamba/envs/pyrosetta"
# Switch to user developer
USER developer
# Set working directory
WORKDIR /home/developer
# Copy necessary files to the working directory
COPY --chown=developer .condarc mutation.py 4i24.pdb test.list _foldxLinux64.tar_.gz EvoEF2-master.zip install_scwrl4.0.2_64bit_2020_linux pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 noarch/repodata.json .condarc /home/developer/
# Update and install necessary packages
RUN unzip EvoEF2-master.zip && \
chmod +x EvoEF2-master/build.sh && \
cd EvoEF2-master && \
./build.sh && \
cd .. && \
tar zxvf _foldxLinux64.tar_.gz && \
chmod +x install_scwrl4.0.2_64bit_2020_linux && \
echo -e "Y\nLicense Holder Name" | ./install_scwrl4.0.2_64bit_2020_linux ./ && \
# Install Micromamba
wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \
./bin/micromamba shell init --shell bash --root-prefix=~/micromamba && \
mkdir -p ~/noarch && mkdir ~/work && sudo chmod -R 777 ~/work && \
mv repodata.json ~/noarch && \
mv pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 ~/noarch && \
./bin/micromamba create -n pyrosetta -c conda-forge -c bioconda -c defaults python=3.11 click loguru biopython pymol-open-source pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 -y && \
rm -rf EvoEF2-master.zip install_scwrl4.0.2_64bit_2020_linux _foldxLinux64.tar_.gz ~/noarch
WORKDIR /home/developer/work
VOLUME ["/home/developer/work"]
ENTRYPOINT ["/home/developer/micromamba/envs/pyrosetta/bin/python", "/home/developer/mutation.py"]
# 普通用户使用的版本总有奇怪的权限问题待解决可以考虑使用Dockerfile构建root用户的镜像通过测试
# PermissionError: [Errno 13] Permission denied: '/home/developer/work/4i24.clean.pdb'
# docker run --rm -it -v /home/zenglingyu/tools/dockertest:/home/developer/work hotwa/test1:latest rosetta -p /home/developer/work/4i24.pdb -m /home/developer/work/test.list
# 映射宿主机的目录/home/zenglingyu/tools/dockertest权限要求是777否则会报错

BIN
EvoEF2-master.zip Normal file

Binary file not shown.

27
base.Dockerfile Normal file
View File

@@ -0,0 +1,27 @@
FROM ubuntu:22.04
LABEL maintainer="lingyu zeng <pylyzeng@gmail.com>"
ENV TZ=Asia/Shanghai \
DEBIAN_FRONTEND=noninteractive \
PATH="/home/developer/bin:/home/developer/micromamba/bin:${PATH}" \
CONDA_PREFIX="/home/developer/micromamba/envs/pyrosetta"
# Configure timezone and install necessary packages
RUN <<EOT
#!/bin/bash
sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list;
sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list;
apt-get update;
ln -snf /usr/share/zoneinfo/\$TZ /etc/localtime; echo \$TZ > /etc/timezone;
apt-get install -y tzdata git zip curl wget bzip2 sudo;
adduser --disabled-password --gecos "" developer;
echo "developer:password" | chpasswd;
echo "developer ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/developer;
apt-get clean;
rm -rf /var/lib/apt/lists/*
EOT
# Switch to user developer
USER developer
# Set working directory
WORKDIR /home/developer

54
developer.Dockerfile Normal file
View File

@@ -0,0 +1,54 @@
FROM ubuntu:22.04
LABEL maintainer="lingyu zeng <pylyzeng@gmail.com>"
ENV TZ=Asia/Shanghai \
DEBIAN_FRONTEND=noninteractive \
PATH="/home/developer/bin:/home/developer/micromamba/bin:${PATH}" \
CONDA_PREFIX="/home/developer/micromamba/envs/pyrosetta"
# Configure timezone and install necessary packages
RUN <<EOT
#!/bin/bash
sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list;
sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list;
apt-get update;
ln -snf /usr/share/zoneinfo/\$TZ /etc/localtime; echo \$TZ > /etc/timezone;
apt-get install -y tzdata git zip curl wget bzip2 libgl1-mesa-glx g++ sudo software-properties-common build-essential;
adduser --disabled-password --gecos "" developer;
echo "developer:password" | chpasswd;
echo "developer ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/developer;
apt-get clean;
rm -rf /var/lib/apt/lists/*
EOT
# Switch to user developer
USER developer
# Set working directory
WORKDIR /home/developer
# Copy files
COPY --chown=developer .condarc mutation.py 4i24.pdb test.list _foldxLinux64.tar_.gz EvoEF2-master.zip install_scwrl4.0.2_64bit_2020_linux pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 noarch/repodata.json .condarc /home/developer/
# Update and install necessary packages
RUN <<EOM
#!/bin/bash
unzip EvoEF2-master.zip;
chmod +x EvoEF2-master/build.sh;
cd EvoEF2-master;
./build.sh;
cd ..;
tar zxvf _foldxLinux64.tar_.gz;
chmod +x install_scwrl4.0.2_64bit_2020_linux;
echo -e "Y\nLicense Holder Name" | ./install_scwrl4.0.2_64bit_2020_linux ./;
wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba;
./bin/micromamba shell init --shell bash --root-prefix=~/micromamba;
mkdir -p ~/noarch; mkdir ~/work; sudo chmod -R 777 ~/work;
mv repodata.json ~/noarch;
mv pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 ~/noarch;
./bin/micromamba create -n pyrosetta -c conda-forge -c bioconda -c defaults python=3.11 click loguru biopython pymol-open-source pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 -y;
rm -rf EvoEF2-master.zip install_scwrl4.0.2_64bit_2020_linux _foldxLinux64.tar_.gz ~/noarch
EOM
WORKDIR /home/developer/work
VOLUME ["/home/developer/work"]
ENTRYPOINT ["/home/developer/micromamba/envs/pyrosetta/bin/python", "/home/developer/mutation.py"]

Binary file not shown.

48
mutation.def Normal file
View File

@@ -0,0 +1,48 @@
Bootstrap: docker
From: ubuntu:22.04
%files
.condarc /root/.condarc
4i24.pdb /root/4i24.pdb
test.list /root/test.list
_foldxLinux64.tar_.gz /root/_foldxLinux64.tar_.gz
EvoEF2-master.zip /root/EvoEF2-master.zip
install_scwrl4.0.2_64bit_2020_linux /root/install_scwrl4.0.2_64bit_2020_linux
mutation.py /root/mutation.py
pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 /root/pyrosetta-2023.31+release.1799523-py311_0.tar.bz2
noarch/repodata.json /root/repodata.json
%post
cd /root && \
apt-get update && \
apt-get install tzdata -y && \
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
apt-get install git zip wget bzip2 libgl1-mesa-glx g++ -y && \
unzip EvoEF2-master.zip && \
chmod +x EvoEF2-master/build.sh && \
cd EvoEF2-master && \
./build.sh && \
cd .. && \
tar zxvf _foldxLinux64.tar_.gz && \
chmod +x install_scwrl4.0.2_64bit_2020_linux && \
echo -e "Y\nLicense Holder Name" | ./install_scwrl4.0.2_64bit_2020_linux ./ && \
wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \
./bin/micromamba shell init --shell bash --root-prefix=~/micromamba && \
mkdir -p /root/noarch && \
mv repodata.json /root/noarch && \
mv pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 /root/noarch && \
./bin/micromamba create -n pyrosetta -c conda-forge -c default -c bioconda python=3.11 click loguru biopython pymol-open-source noarch/pyrosetta-2023.31+release.1799523-py311_0.tar.bz2 -y && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* EvoEF2-master.zip install_scwrl4.0.2_64bit_2020_linux _foldxLinux64.tar_.gz noarch/*
%environment
export author="lingyuzeng"
export version="1.0.0"
export PATH="/root/bin:/root/micromamba/bin:${PATH}"
export CONDA_PREFIX="/root/micromamba/envs/pyrosetta"
export DEBIAN_FRONTEND="noninteractive"
export TZ="EAsia/Shanghai"
%runscript
exec /root/micromamba/envs/pyrosetta/bin/python /root/mutation.py "$@"

512
mutation.py Normal file
View File

@@ -0,0 +1,512 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@file :mutation.py
@Description: :利用各种工具pyrosetta, foldx, scwrl4, evoef2, pymol的突变脚本,注意不要重复调用,文件覆盖上可能出现问题
@Date :2023/9/8 11:26:21
@Author :lyzeng
@mail :pylyzeng@gmail.com
@version :1.0
'''
import asyncio
import click
from pathlib import Path
import os
import subprocess
from loguru import logger
from dataclasses import dataclass, field
import datetime
import shutil
from pyrosetta import init, pose_from_pdb, version
from pyrosetta.toolbox import mutate_residue, cleanATOM
from multiprocessing import Pool
from typing import List, Union
from Bio.SeqUtils import seq3
from Bio import PDB
import warnings
from Bio import BiopythonWarning
warnings.simplefilter('ignore', BiopythonWarning)
# ---- config ----
here = Path(__file__).absolute().parent
cwd = Path.cwd()
zfill_number = 4
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
evoef2_binary = here.joinpath("EvoEF2-master/EvoEF2")
scwrl4_binary = here.joinpath('Scwrl4')
foldx_binary = here.joinpath('foldx_20231231')
conda_prefix = os.environ.get('CONDA_PREFIX')
pymol_binary = Path(conda_prefix).joinpath('bin/pymol') if conda_prefix else Path('/root/micromamba/envs/pyrosetta').joinpath('bin/pymol')
logger.add(cwd.joinpath('mutation.log'))
logger.info(f'\n cwd: {cwd}\n here: {here}\n conda_prefix: {conda_prefix}\n pymol_binary: {pymol_binary}\n evoef2_binary: {evoef2_binary}\n scwrl4_binary: {scwrl4_binary}\n foldx_binary: {foldx_binary}')
# ---- check ----
if not pymol_binary.exists():
raise FileNotFoundError(f'{pymol_binary.as_posix()} not exists!')
if not evoef2_binary.exists():
raise FileNotFoundError(f'{evoef2_binary.as_posix()} not exists!')
if not scwrl4_binary.exists():
raise FileNotFoundError(f'{scwrl4_binary.as_posix()} not exists!')
if not foldx_binary.exists():
raise FileNotFoundError(f'{foldx_binary.as_posix()} not exists!')
# ---- function ----
@dataclass()
class pyrosetta_mutation:
pdb: Path
mutation_file: Path
def __post_init__(self):
init()
cleanATOM(self.pdb.as_posix())
self.pose = pose_from_pdb(self.pdb.parent.joinpath(f"{self.pdb.stem}.clean.pdb").as_posix())
@staticmethod
def mutate_task(mutation_file):
with open(mutation_file, 'r', encoding='utf-8') as f: # read mutation file
mutation_list = f.readlines()
parser = lambda x: x.strip().rstrip(';').split(',')
mutation_lists = list(map(parser, mutation_list)) # 去除行尾的";"并根据","分割突变
return mutation_lists
def mutate_from_file(self,file: Path=None)-> List[Path]:
if not file: file = self.mutation_file
mutation_lists = self.mutate_task(mutation_file = self.mutation_file)
# 使用多进程并行处理每行突变
with Pool() as pool:
all_results = pool.starmap(self.mutation, [(self.pdb, self.pose, i, n + 1, 2.0) for n,i in enumerate(mutation_lists)])
logger.info(f'PyRosetta mutation {self.pdb} finished\n results:\n{all_results}')
return all_results
@staticmethod
def mutation(pdb: Path, pose, line: List[str], line_number: Union[str, int], pack_radius:float)-> Path: # 每一行的突变操作
for mutation in line: # parser site
ref_residue = mutation[0]
chain = mutation[1]
residue_num = int(mutation[2:-1])
target_residue = mutation[-1]
# 这里可以调用您的突变函数进行实际的突变操作
logger.info(f'single site: PyRosetta mutation {pdb.name} {ref_residue}{chain}{residue_num}{target_residue}')
pyrosetta_mutation.mutate(pose, chain, residue_num, target_residue, pack_radius)
out_file = pdb.parent.joinpath(f'{pdb.stem}_Model_{str(line_number).zfill(zfill_number)}.pdb')
return pyrosetta_mutation.save(pose, name=out_file) # 保存单行突变的结果
@staticmethod
def mutate(pose, chain: str, residue_number_in_chain: int, target_residue: str, pack_radius: float=2.0):
chain_ids = [pose.pdb_info().chain(i) for i in range(1, pose.total_residue() + 1)]
logger.info("Chains:" + str(set(chain_ids)))
logger.info("Residues in chain " + chain + ": " + str([pose.pdb_info().number(i) for i in range(1, pose.total_residue() + 1) if pose.pdb_info().chain(i) == chain]))
pose_residue_number = pose.pdb_info().pdb2pose(res=residue_number_in_chain, chain=chain)
logger.info("pose_residue_number: " + str(pose_residue_number))
logger.info("Original residue: " + pose.residue(pose_residue_number).name())
mutate_residue(pose, pose_residue_number, target_residue, pack_radius=pack_radius) # pack_radius (float): 定义邻近残基的半径。在这个半径范围内的残基可能会被重新打包以适应新的突变残基。
logger.info("Mutated residue: " + pose.residue(pose_residue_number).name())
@staticmethod
def save(pose, name:Path) -> Path:
# 将突变后的 Pose 保存到新的 PDB 文件
pose.dump_pdb(name.as_posix())
if name.exists():
return name
else:
raise FileNotFoundError(f'{name.as_posix()} mutation failed!')
@dataclass()
class pyrosetta_mutate_one: # rosetta 单点突变
pdb: Path
chain: str
residue_number_in_chain: int
target_residue: str
def __post_init__(self):
init()
cleanATOM(self.pdb.as_posix())
pose = pose_from_pdb(self.pdb.absolute().parent.joinpath(f"{self.pdb.stem}.clean.pdb").as_posix())
self.mutate(pose)
def mutate(self, pose):
chain_ids = [pose.pdb_info().chain(i) for i in range(1, pose.total_residue() + 1)]
logger.info("Chains:" + str(set(chain_ids)))
logger.info("Residues in chain " + self.chain + ": " + str([pose.pdb_info().number(i) for i in range(1, pose.total_residue() + 1) if pose.pdb_info().chain(i) == self.chain]))
pose_residue_number = pose.pdb_info().pdb2pose(res=self.residue_number_in_chain, chain=self.chain)
logger.info("pose_residue_number: " + str(pose_residue_number))
logger.info("Original residue: " + pose.residue(pose_residue_number).name())
mutate_residue(pose, pose_residue_number, self.target_residue, 0.0)
logger.info("Mutated residue: " + pose.residue(pose_residue_number).name())
# 将突变后的 Pose 保存到新的 PDB 文件
pose.dump_pdb(self.pdb.stem + "_mutated.pdb")
return Path(f"{self.pdb.stem}_mutated.pdb")
@dataclass()
class evoEF2():
pdb: Path
mutationfile: Path
def __post_init__(self):
self.file = evoef2_binary
if not self.file.exists():
raise FileNotFoundError(f'{self.file} not exists!')
def evoEF2base(self):
CMD_ = f"{self.file.absolute().as_posix()} --command=BuildMutant --pdb={self.pdb.absolute().as_posix()} " \
f"--mutant_file={self.mutationfile.absolute().as_posix()}"
p = subprocess.Popen(CMD_, shell=True, stdout=subprocess.PIPE, cwd=self.pdb.absolute().parent.as_posix())
while p.poll() is None: # progress still running
subprocess_read_res = p.stdout.read().decode('utf-8')
logger.info(f'''Task record : {datetime.datetime.now()}:\n {subprocess_read_res}''')
with open(self.mutationfile.as_posix(), 'r', encoding='utf-8') as f: # read mutation file
mutation_list = f.readlines()
mf_list = []
for j,i in enumerate(mutation_list): # check mutation file
mf = self.pdb.parent.joinpath(f'{self.pdb.stem}_Model_{str(j + 1).zfill(zfill_number)}.pdb')
if not mf.exists():
logger.error(f'{mf.as_posix()} mutation failed! mutation line: {i}')
else:
mf_list.append(mf)
return mf_list
@dataclass()
class Scwrl4():
'''
Scwrl4的主要功能是优化蛋白质侧链的构象以达到最低的能量状态。这是通过使用旋转异构体库rotamer library来实现的该库包含了各种氨基酸侧链可能的构象。Scwrl4通过在这个库中寻找最低能量的侧链构象来优化蛋白质的侧链。
如果你想使用Scwrl4来构建蛋白质突变体你可能需要先使用其他工具或方法来创建一个包含突变的蛋白质结构然后再使用Scwrl4来优化这个突变蛋白质的侧链构象。例如你可以使用Biopython或其他蛋白质处理库来创建突变蛋白质然后使用Scwrl4来优化侧链。
Scwrl4是一个用于预测蛋白质侧链构象的程序它在给定固定的蛋白质主链后可以预测蛋白质侧链的构象。
scwrl4接受一个骨架的PDB然后修复侧链构象。这里使用任何一个工具(rosetta,pymol等)突变氨基酸并使用opus_mut/mk_mut_backbone.py生成蛋白质骨架仅改变了希望突变蛋白质的缩写然后使用scwrl4进行残基突变。
'''
input_pdb: Path
mutationfile: Path
def __post_init__(self):
self.file = scwrl4_binary
if not self.file.exists():
raise FileNotFoundError(f'{self.file} not exists!')
def prepare_backbone(self)->List[Path]: # 准备骨架文件
out_file = pyrosetta_mutation(pdb=Path(self.input_pdb), mutation_file=Path(self.mutationfile)).mutate_from_file() # pyrosetta mutate file for change residue name
out_file_list = []
for i in out_file:
r = self.prepare_backone_base(i)
out_file_list.append(r)
# delete pyrosetta mutate file
for i in out_file:
i.unlink()
return out_file_list
async def async_scwrl4(self):
backbone_files = self.prepare_backbone()
tasks = []
output_files = [] # 存储输出文件名
for n, input_pdb in enumerate(backbone_files):
output_pdb = input_pdb.parent / f"{self.input_pdb.stem}_Model_{str(n + 1).zfill(4)}.pdb"
output_files.append(output_pdb) # 将输出文件名添加到列表
cmd = [
scwrl4_binary.absolute().as_posix(),
'-i', input_pdb.as_posix(),
'-o', output_pdb.as_posix()
]
tasks.append(self.run_command(cmd))
await asyncio.gather(*tasks)
# remove backbone files
for file in backbone_files:
file.unlink()
return output_files # 返回输出文件名列表
async def run_command(self, cmd):
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
print(f"[stdout]\n{stdout.decode()}")
print(f"[stderr]\n{stderr.decode()}")
print(f"Return Code: {process.returncode}")
@staticmethod
def prepare_backone_base(file: Path)-> Path: # 使用biopython删除侧链, 静态方法
parser = PDB.PDBParser()
structure = parser.get_structure(file.stem, file.as_posix())
io = PDB.PDBIO()
for model in structure:
for chain in model:
for residue in chain:
for atom in list(residue):
if atom.id not in ["N", "CA", "C", "O"]:
residue.detach_child(atom.id)
io.set_structure(structure) # 保存新的PDB文件没有侧链
out_file = file.parent.joinpath(f"{file.stem}_backbone.pdb")
io.save(out_file.as_posix())
return out_file
@dataclass()
class foldX():
'''
This class is for using FoldX to predict the changes in the Gibbs free energy of a protein upon mutation.
./foldx_20231231 --command=BuildModel --pdb=4i24.pdb --mutant-file=individual_list.txt --pdb-dir=/home/zenglingyu/tools
'''
pdb: Path
mutationfile: Path
def __post_init__(self):
self.file = foldx_binary
if not self.file.exists():
raise FileNotFoundError(f'{self.file} not exists!')
self.mutationfile = self.mutationfile.rename('individual_list.txt') # 改名foldx要求固定名称为individual_list.txt
def foldXbase(self): # foldx 使用的是单进程
CMD_ = f"{self.file.absolute().as_posix()} --command=BuildModel" \
f" --pdb={self.pdb.name} --mutant-file={self.mutationfile.absolute().as_posix()} --pdb-dir={self.pdb.absolute().parent.as_posix()} --output-dir={self.pdb.absolute().parent.as_posix()}"
p = subprocess.Popen(CMD_, shell=True, stdout=subprocess.PIPE, cwd=self.file.parent.as_posix())
while p.poll() is None: # progress still runing
subprocess_read_res = p.stdout.read().decode('utf-8')
logger.info(f'''Task record : {datetime.datetime.now()}:\n {subprocess_read_res}''')
with open(self.mutationfile.as_posix(), 'r', encoding='utf-8') as f: # read mutation file
mutation_list = f.readlines()
for j,i in enumerate(mutation_list): # check mutation file
mf = self.pdb.parent.joinpath(f'{self.pdb.stem}_{str(j + 1)}.pdb')
out = f'{self.pdb.stem}_Model_{str(j + 1).zfill(zfill_number)}.pdb'
if not mf.exists():
logger.error(f'{out} mutation failed! mutation line: {j + 1} content: {i}')
else:
mf.rename(out)
logger.info(f'foldX mutaion {out} success')
@dataclass()
class pymol_mutation(pyrosetta_mutation):
pdb: Path
mutation_file: Path
def __post_init__(self):
...
def mutate_from_file(self, file: Path=None, cleanATOM:bool=False)-> List[Path]:
if not file: file = self.mutation_file
if cleanATOM:
cleanATOM(self.pdb.as_posix())
cleanfilename = self.pdb.parent / f'{self.pdb.stem}.clean.pdb'
if not cleanfilename.exists(): raise FileNotFoundError(f'{cleanfilename.as_posix()} not exists! use pyrosetta cleanATOM faild.')
self.pdb = cleanfilename
mutation_lists = self.mutate_task(mutation_file = self.mutation_file)
# 使用多进程并行处理每行突变
with Pool() as pool:
all_results = pool.starmap(self.mutation, [(self.pdb, i, n+1) for n,i in enumerate(mutation_lists)])
logger.info(f'Pymol mutation {self.pdb} finished\n results:\n{all_results}')
return all_results
@staticmethod
def mutation(pdb_file:Path, mutate_list:List, mutate_number:int):
"""
mutate_string: list, like: [CA797G,CB797G,MA793G,MB793G] one line in .list file
突变前应该完全删除无关信息如头部等信息可以保留ATOM和HETATM列
"""
from pymol import cmd
cmd.load(pdb_file.as_posix())
cmd.remove('solvent')
for mutate_string in mutate_list:
ref_residue = mutate_string[0]
chain = mutate_string[1]
site = mutate_string[2:-1]
mutation_type = seq3(mutate_string[-1]).upper()
logger.info(f'pymol mutation reference: {seq3(ref_residue).upper()} to {mutation_type} [chain {chain} site {site}]')
# Implement the pymol mutation here
# Rest of the code from the Mutagenesis_site function...
PDBs = cmd.get_names()
if len(PDBs) == 1:
PDB = PDBs[0]
else:
raise ValueError(f'this pdb have more than one object! PDBs:{PDBs}')
CAindex = cmd.identify(f"{PDB} and name CA") # get CA index
pdbstrList = [cmd.get_pdbstr("%s and id %s" % (PDB, CAid)).splitlines() for CAid in CAindex]
# Function to filter each sublist
filter_sublist = lambda sublist: list(filter(lambda x: x.startswith(('ATOM', 'HETATM')), sublist)) # 保留ATOM和HETATM列
# Use map to apply the function to each sublist
filtered_pdbstrList = list(map(filter_sublist, pdbstrList))
ProtChainResiList = [[i[0][21], i[0][22:26].strip()] for i in filtered_pdbstrList] # get pdb chain line string
for item in ProtChainResiList:
if item[0] == str(chain) and item[1] == str(site):
cmd.wizard("mutagenesis")
cmd.refresh_wizard()
cmd.get_wizard().set_mode(mutation_type)
selection = f"/{PDB}//{item[0]}/{item[1]}"
cmd.get_wizard().do_select(selection)
cmd.get_wizard().apply()
cmd.set_wizard("done")
# save pdb
pid = PDB.split('.')[0] if '.' in PDB else PDB # split name pid.clean.pdb
outfile = Path(f'{pid}_Model_{str(mutate_number).zfill(zfill_number)}.pdb')
cmd.save(outfile.as_posix(), f"{PDB}")
cmd.reinitialize('everything')
if outfile.exists():
return outfile.name
def print_version(ctx, param, value):
if not value or ctx.resilient_parsing:
return
# The command to run
cmd = {
'evoef2': [evoef2_binary.as_posix(), "--version"],
'foldx': [foldx_binary.as_posix(), "--help"],
'scwrl4': [scwrl4_binary.as_posix(), "--help"],
'pymol': [pymol_binary.as_posix(), "--version"]
}
# Run the command and capture the output
if value in cmd.keys():
result = subprocess.run(cmd[value], capture_output=True, text=True)
# Check if the command was successful
if result.returncode == 0:
print(result.stdout)
else:
print(result.stderr)
elif value == 'rosetta':
print(version())
else:
print('Not match, please input a correct software name!')
ctx.exit()
@click.group(context_settings=CONTEXT_SETTINGS)
def cli():
"""
author: zenglingyu
email: pylyzeng@gmail.com
data: 2023/8/17
version: 1.0
description: \n
This is a tool for protein mutation using various methods.\n
Here, the 'test.list' file shows the mutants that you want to build. It has the following format: \n
CA171A,DB180E; \n
Each mutant is written in one line ending with “;”, and multiple mutations in a mutant are divided by “,”. Note that there is no gap or space character between single mutations. For each single mutation, the first letter is the reference amino acid, the second letter is the chain identifier of the amino acid followed by its position in the chain, and the last letter is the amino acid after mutation.
"""
pass
def mutate_line(line, protein_path):
# 去除行尾的";"并根据","分割突变
mutations = line.strip().rstrip(';').split(',')
results = []
# 对于该行中的每个突变,都进行相应的处理
for mutation in mutations:
ref_residue = mutation[0]
chain = mutation[1]
residue_num = int(mutation[2:-1])
target_residue = mutation[-1]
# 这里可以调用您的突变函数进行实际的突变操作
ins = pyrosetta_mutation(Path(protein_path), chain, int(residue_num), target_residue)
logger.info(f'PyRosetta mutation {protein_path} {ref_residue}{chain}{residue_num}{target_residue}')
results.append(f"Processed mutation: {mutation}")
return results
# 修饰器统一移动文件的代码,用于将结果文件移动到/work工作目录下在docker执行的时候可以将/work目录挂载映射
def handle_file_path(here):
def decorator(func):
def wrapper(protein, mutation, *args, **kwargs):
current_working_directory = Path(protein).parent
if here.resolve() != current_working_directory.resolve():
shutil.copy(protein, here.as_posix())
result = func(protein=Path(protein), mutation=Path(mutation), *args, **kwargs)
if here.resolve() != current_working_directory.resolve():
for file_path in here.glob('*_Model_*.pdb'):
dest_path = current_working_directory.joinpath(file_path.name)
if dest_path.exists():
dest_path.unlink()
shutil.move(file_path.as_posix(), dest_path.as_posix())
for file_path in here.glob('*.log'):
dest_path = current_working_directory.joinpath(file_path.name)
if dest_path.exists():
dest_path.unlink()
shutil.move(file_path.as_posix(), dest_path.as_posix())
return result
return wrapper
return decorator
@click.command(help='This tool is designed for mutating proteins using PyRosetta and analyzing the results. Version:\n PyRosetta-4 2023 [Rosetta PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python311.Release 2023.31+release.1799523c1e5ce7129824215cddea0f15d3f087dd 2023-08-01T12:24:20] retrieved from: http://www.pyrosetta.org(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.', context_settings=CONTEXT_SETTINGS)
@click.option('-p', '--protein', type=click.Path(exists=True), help='Path to the input protein file in PDB format.(.pdb)')
@click.option('-m', '--mutation', type=click.Path(exists=True),
help="Path to the mutation list file. ")
@click.option('-v', '--version', is_flag=True, flag_value='rosetta', callback=print_version, expose_value=False, is_eager=True, help='Print version information.')
def rosetta(protein, mutation):
@handle_file_path(here)
def execute_rosetta(protein, mutation): # 参数名与wrapper中的相同
pyrosetta_mutation(pdb=protein, mutation_file=mutation).mutate_from_file()
execute_rosetta(protein, mutation)
@click.command(help="This tool is designed for mutating proteins using EvoEF2 and analyzing the results.", context_settings=CONTEXT_SETTINGS)
@click.option('-p', '--protein', type=click.Path(exists=True), help='Path to the input protein file in PDB format.(.pdb)')
@click.option('-m', '--mutation', type=click.Path(exists=True),
help="Path to the mutation list file. ")
@click.option('-v', '--version', is_flag=True, flag_value='evoef2', callback=print_version, expose_value=False, is_eager=True, help='Print version information.')
def evoef2(protein, mutation):
@handle_file_path(here)
def execute_evoef2(protein, mutation):
# EvoEF2 specific code here
ins = evoEF2(Path(protein), Path(mutation)).evoEF2base()
logger.info(f'EvoEF2 mutation {protein} finished\n results:\n{ins}')
execute_evoef2(protein, mutation)
@click.command(help="This tool is designed for mutating proteins using FoldX and analyzing the results. Version: foldx_20231231", context_settings=CONTEXT_SETTINGS)
@click.option('-p', '--protein', type=click.Path(exists=True), help='Path to the input protein file in PDB format.(.pdb)')
@click.option('-m', '--mutation', type=click.Path(exists=True),
help="Path to the mutation list file. ")
@click.option('-v', '--version', is_flag=True, flag_value='foldx', callback=print_version, expose_value=False, is_eager=True, help='Print version information.')
def foldx(protein, mutation):
@handle_file_path(here)
def execute_foldx(protein, mutation):
# FoldX specific code here
ins = foldX(Path(protein), Path(mutation))
ins.foldXbase()
execute_foldx(protein, mutation)
@click.command(help="This tool is designed for mutating proteins using PyMOL and analyzing the results. Version: PyMOL 2.5.0 Open-Source (04df6f86a0), 2023-05-23", context_settings=CONTEXT_SETTINGS)
@click.option('-p', '--protein', type=click.Path(exists=True), help='Path to the input protein file in PDB format.(.pdb)')
@click.option('-m', '--mutation', type=click.Path(exists=True),
help="Path to the mutation list file. ")
@click.option('-v', '--version', is_flag=True, flag_value='pymol', callback=print_version, expose_value=False, is_eager=True, help='Print version information.')
def pymol(protein, mutation):
@handle_file_path(here)
def execute_pymol(protein, mutation):
# PyMol specific code here
ins = pymol_mutation(Path(protein), mutation).mutate_from_file()
logger.info(f'PyMOL mutation {protein} finished\n results:\n{ins}')
execute_pymol(protein, mutation)
@click.command(help="This tool is designed for mutating proteins using scwrl4 and analyzing the results. Version: 4.0 Copyright (c) 2009-2020 Georgii Krivov, Maxim Shapovalov and Roland Dunbrack Fox Chase Cancer Center, Philadelphia PA 19111, USA", context_settings=CONTEXT_SETTINGS)
@click.option('-p', '--protein', type=click.Path(exists=True), help='Path to the input protein file in PDB format.(.pdb)')
@click.option('-m', '--mutation', type=click.Path(exists=True),
help="Path to the mutation list file. ")
@click.option('-v', '--version', is_flag=True, flag_value='scwrl4', callback=print_version, expose_value=False, is_eager=True, help='Print version information.')
def scwrl4(protein, mutation):
@handle_file_path(here)
def execute_scwrl4(protein, mutation):
ins = Scwrl4(Path(protein), mutation)
output_files = asyncio.run(ins.async_scwrl4()) # 获取输出文件名
logger.info(f'Scwrl4 mutation {protein} finished\nOutput files:\n{output_files}\n')
execute_scwrl4(protein, mutation)
cli.add_command(rosetta)
cli.add_command(evoef2)
cli.add_command(foldx)
cli.add_command(pymol)
cli.add_command(scwrl4)
if __name__ == '__main__':
cli()

10791
noarch/repodata.json Normal file

File diff suppressed because it is too large Load Diff

99
sifbuild.md Normal file
View File

@@ -0,0 +1,99 @@
构建Singularity现在称为Apptainer的SIFSingularity Image Format文件有几种不同的方法。以下是一些常见的构建方法
1. **使用定义文件**:使用定义文件(例如您之前提供的内容)是最常见的构建方法。您可以通过定义文件详细描述镜像的内容和构建过程。
```
sudo singularity build output.sif definition.def
```
2. **从现有的Docker容器构建**您可以直接从Docker镜像构建SIF文件。
```
sudo singularity build output.sif docker-daemon://ubuntu:22.04
```
3. **从现有的Singularity容器构建**如果您有现有的Singularity容器您可以从那个容器构建新的SIF文件。
```
sudo singularity build new_output.sif old_output.sif
```
4. **使用沙盒sandbox模式**:沙盒模式允许您在一个可写的目录结构中构建和修改容器。这在迭代开发和测试中很有用。
```
sudo singularity build --sandbox sandbox/ definition.def
```
您可以随后将沙盒目录转换为SIF文件
```
sudo singularity build final_output.sif sandbox/
```
5. **使用远程构建服务**如果您不希望在本地机器上构建容器您可以使用Singularity的远程构建服务。
以上方法可以根据您的需求和限制进行混合和匹配。例如您可以先使用定义文件在沙盒模式下构建容器然后手动修改容器最后将其转换为SIF文件。
## 使用使用沙盒sandbox模式构建sif文件
使用沙盒sandbox模式构建SIF文件允许您以更灵活的方式工作。沙盒是一个可写的目录结构您可以在其中修改和测试容器的内容。以下是使用沙盒模式构建SIF文件的步骤
1. **创建沙盒目录**首先您可以使用定义文件或从现有的Docker或Singularity镜像创建沙盒目录。
使用定义文件创建沙盒:
```
sudo singularity build --sandbox sandbox/ definition.def
```
或从Docker镜像创建沙盒
```
sudo singularity build --sandbox sandbox/ docker://ubuntu:22.04
```
这里“sandbox/”是沙盒目录的名称,您可以更改为所需的任何路径。
2. **修改沙盒内容**:一旦沙盒被创建,您可以直接在该目录结构中进行更改。您可以使用`singularity shell`命令进入沙盒,并进行所需的修改:
```
sudo singularity shell --writable sandbox/
```
您也可以直接在文件系统中编辑沙盒目录的内容。
3. **构建SIF文件**一旦您对沙盒的内容满意可以将其转换为SIF文件
```
sudo singularity build final_output.sif sandbox/
```
这里“final_output.sif”是最终SIF文件的名称。
4. **清理**:如果您不再需要沙盒,可以删除沙盒目录:
```
sudo rm -r sandbox/
```
沙盒模式特别适合需要迭代开发和测试的情况。通过直接在可写的目录结构中工作,您可以更容易地尝试不同的配置和设置。
## wsl2 沙盒模式
如果你想在WSL2的子系统中进入沙盒模式首先确保你的镜像文件如 `mutation.sif`)已经存在。然后,你可以使用以下命令进入沙盒模式:
```
singularity shell --writable-tmpfs mutation.sif
```
这个命令将允许你在容器内部进行更改但更改不会保存到原始SIF文件中。如果你想保存更改你可能需要使用可写的沙盒目录。你可以通过以下命令创建沙盒目录并启动容器
```
singularity shell --writable mutation/
```
在沙盒模式中,你可以对容器进行更改,并且更改将会保存。当你完成时,可以使用 `exit` 命令退出沙盒模式。
在wsl2使用apptainer会在当前目录解压目录mutation_sandbox一个目录里面包括了一整个linux系统的文件。但是交互环境还是在windows的wsl2的子系统中可能原因在于windows的wsl2的子系统本来就在沙盒中运行所以无法在沙盒中在创建一个沙盒于是就解压出来。可能需要在真正是Linux系统的物理机中才能正常进入沙盒模式。

6
test.list Normal file
View File

@@ -0,0 +1,6 @@
CA797G,CB797G,MA793G,MB793G;
CA797A,CB797A,MA793G,MB793G;
CA797M,CB797M,MA793A,MB793A;
CA797G,CB797G,MA793C,MB793C;
CA797A,CB797A,MA793A,MB793A;
CA797M,CB797M,MA793A,MB793A;