将cleanATOM保存为classmethod
This commit is contained in:
@@ -7,6 +7,9 @@
|
|||||||
@Author :lyzeng
|
@Author :lyzeng
|
||||||
@Email :pylyzeng@gmail.com
|
@Email :pylyzeng@gmail.com
|
||||||
@version :1.0
|
@version :1.0
|
||||||
|
# 清理杂原子并初始化PDBAnalyzer
|
||||||
|
analyzer = PDBAnalyzer.cleanATOM(pdb_file)
|
||||||
|
print(analyzer.pdb_file)
|
||||||
'''
|
'''
|
||||||
# micromamba create -n modeller modeller biopython pymol-open-source biopandas requests -y -c conda-forge -c salilab
|
# micromamba create -n modeller modeller biopython pymol-open-source biopandas requests -y -c conda-forge -c salilab
|
||||||
# modeller注册码:MODELIRANJE (<conda_env>//lib/modeller-10.4/modlib/modeller/config.py)
|
# modeller注册码:MODELIRANJE (<conda_env>//lib/modeller-10.4/modlib/modeller/config.py)
|
||||||
@@ -103,36 +106,46 @@ class PDBAnalyzer:
|
|||||||
"""
|
"""
|
||||||
Initialize the PDB structure after the object is created.
|
Initialize the PDB structure after the object is created.
|
||||||
"""
|
"""
|
||||||
self.pid = self.pdb_file.stem.lower() if len(self.pdb_file.stem) == 4 else None
|
self.initialize_properties()
|
||||||
|
|
||||||
|
def initialize_properties(self):
|
||||||
|
"""Initialize properties based on the pdb_file."""
|
||||||
|
self.pdb_file_stem = self.pdb_file.stem.split('.')[0]
|
||||||
|
self.pid = self.pdb_file_stem.lower() if len(self.pdb_file_stem) == 4 else None
|
||||||
self.structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix())
|
self.structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix())
|
||||||
self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.cleanATOM().as_posix())
|
self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix())
|
||||||
self.biodata = PandasPdb().read_pdb(self.pdb_file.as_posix())
|
self.biodata = PandasPdb().read_pdb(self.pdb_file.as_posix())
|
||||||
self.protein_state = 'Holo' if 'HETATM' in self.biodata.df.keys() else 'Apo'
|
self.protein_state = 'Holo' if 'HETATM' in self.biodata.df.keys() else 'Apo'
|
||||||
self.chain_id_list = self.biodata.df['ATOM']['chain_id'].drop_duplicates().to_list()
|
self.chain_id_list = self.biodata.df['ATOM']['chain_id'].drop_duplicates().to_list()
|
||||||
|
|
||||||
def cleanATOM(self, out_file=None, ext="_clean.pdb") -> Path: # from pyrosetta.toolbox import cleanATOM
|
@classmethod
|
||||||
"""Extract all ATOM and TER records in a PDB file and write them to a new file.
|
def cleanATOM(cls, input_file: Path, out_file: Path = None, ext: str = ".clean.pdb") -> 'PDBAnalyzer':
|
||||||
|
"""
|
||||||
|
Class method to clean PDB file by extracting all ATOM and TER records and write them to a new file.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
pdb_file (str): Path of the PDB file from which ATOM and TER records
|
input_file (Path): Path of the PDB file to be cleaned.
|
||||||
will be extracted
|
out_file (Path): Optional; output filename. Defaults to None, which will create <input_file>_clean.pdb.
|
||||||
out_file (str): Optional argument to specify a particular output filename.
|
ext (str): Extension for the output file if out_file is not specified. Defaults to "_clean.pdb".
|
||||||
Defaults to <pdb_file>.clean.pdb.
|
|
||||||
ext (str): File extension to use for output file. Defaults to ".clean.pdb"
|
Returns:
|
||||||
|
PDBAnalyzer: An instance of PDBAnalyzer pointing to the cleaned PDB file.
|
||||||
"""
|
"""
|
||||||
pdb_file = self.pdb_file.as_posix()
|
# Define the output file name if not provided
|
||||||
# find all ATOM and TER lines
|
|
||||||
with open(pdb_file, "r") as fid:
|
|
||||||
good = [l for l in fid if l.startswith(("ATOM", "TER"))]
|
|
||||||
|
|
||||||
# default output file to <pdb_file>_clean.pdb
|
|
||||||
if out_file is None:
|
if out_file is None:
|
||||||
out_file = os.path.splitext(pdb_file)[0] + ext
|
out_file = input_file.with_suffix(ext)
|
||||||
|
|
||||||
# write the selected records to a new file
|
# Extract ATOM and TER lines
|
||||||
|
with open(input_file, "r") as fid:
|
||||||
|
good_lines = [line for line in fid if line.startswith(("ATOM", "TER"))]
|
||||||
|
|
||||||
|
# Write the selected records to the new file
|
||||||
with open(out_file, "w") as fid:
|
with open(out_file, "w") as fid:
|
||||||
fid.writelines(good)
|
fid.writelines(good_lines)
|
||||||
return Path(out_file)
|
|
||||||
|
# Return a new instance of PDBAnalyzer pointing to the cleaned file
|
||||||
|
return cls(out_file)
|
||||||
|
|
||||||
|
|
||||||
def sequence_similarity(self, seq1: str, seq2: str) -> float:
|
def sequence_similarity(self, seq1: str, seq2: str) -> float:
|
||||||
"""
|
"""
|
||||||
@@ -203,7 +216,6 @@ class PDBAnalyzer:
|
|||||||
|
|
||||||
sequences = {}
|
sequences = {}
|
||||||
# Process each chain in the structure
|
# Process each chain in the structure
|
||||||
# use cleanATOM to remove HETATM
|
|
||||||
for model in self.protein_structure:
|
for model in self.protein_structure:
|
||||||
chains = model.get_list()
|
chains = model.get_list()
|
||||||
for chain in chains:
|
for chain in chains:
|
||||||
|
|||||||
Reference in New Issue
Block a user