将cleanATOM保存为classmethod

This commit is contained in:
2024-01-31 16:36:42 +08:00
parent 0612fa3e41
commit 95fed4983b

View File

@@ -7,6 +7,9 @@
@Author :lyzeng
@Email :pylyzeng@gmail.com
@version :1.0
# 清理杂原子并初始化PDBAnalyzer
analyzer = PDBAnalyzer.cleanATOM(pdb_file)
print(analyzer.pdb_file)
'''
# micromamba create -n modeller modeller biopython pymol-open-source biopandas requests -y -c conda-forge -c salilab
# modeller注册码MODELIRANJE (<conda_env>//lib/modeller-10.4/modlib/modeller/config.py)
@@ -103,36 +106,46 @@ class PDBAnalyzer:
"""
Initialize the PDB structure after the object is created.
"""
self.pid = self.pdb_file.stem.lower() if len(self.pdb_file.stem) == 4 else None
self.initialize_properties()
def initialize_properties(self):
"""Initialize properties based on the pdb_file."""
self.pdb_file_stem = self.pdb_file.stem.split('.')[0]
self.pid = self.pdb_file_stem.lower() if len(self.pdb_file_stem) == 4 else None
self.structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix())
self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.cleanATOM().as_posix())
self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix())
self.biodata = PandasPdb().read_pdb(self.pdb_file.as_posix())
self.protein_state = 'Holo' if 'HETATM' in self.biodata.df.keys() else 'Apo'
self.chain_id_list = self.biodata.df['ATOM']['chain_id'].drop_duplicates().to_list()
def cleanATOM(self, out_file=None, ext="_clean.pdb") -> Path: # from pyrosetta.toolbox import cleanATOM
"""Extract all ATOM and TER records in a PDB file and write them to a new file.
@classmethod
def cleanATOM(cls, input_file: Path, out_file: Path = None, ext: str = ".clean.pdb") -> 'PDBAnalyzer':
"""
Class method to clean PDB file by extracting all ATOM and TER records and write them to a new file.
Args:
pdb_file (str): Path of the PDB file from which ATOM and TER records
will be extracted
out_file (str): Optional argument to specify a particular output filename.
Defaults to <pdb_file>.clean.pdb.
ext (str): File extension to use for output file. Defaults to ".clean.pdb"
input_file (Path): Path of the PDB file to be cleaned.
out_file (Path): Optional; output filename. Defaults to None, which will create <input_file>_clean.pdb.
ext (str): Extension for the output file if out_file is not specified. Defaults to "_clean.pdb".
Returns:
PDBAnalyzer: An instance of PDBAnalyzer pointing to the cleaned PDB file.
"""
pdb_file = self.pdb_file.as_posix()
# find all ATOM and TER lines
with open(pdb_file, "r") as fid:
good = [l for l in fid if l.startswith(("ATOM", "TER"))]
# default output file to <pdb_file>_clean.pdb
# Define the output file name if not provided
if out_file is None:
out_file = os.path.splitext(pdb_file)[0] + ext
out_file = input_file.with_suffix(ext)
# write the selected records to a new file
# Extract ATOM and TER lines
with open(input_file, "r") as fid:
good_lines = [line for line in fid if line.startswith(("ATOM", "TER"))]
# Write the selected records to the new file
with open(out_file, "w") as fid:
fid.writelines(good)
return Path(out_file)
fid.writelines(good_lines)
# Return a new instance of PDBAnalyzer pointing to the cleaned file
return cls(out_file)
def sequence_similarity(self, seq1: str, seq2: str) -> float:
"""
@@ -203,7 +216,6 @@ class PDBAnalyzer:
sequences = {}
# Process each chain in the structure
# use cleanATOM to remove HETATM
for model in self.protein_structure:
chains = model.get_list()
for chain in chains: