将cleanATOM保存为classmethod
This commit is contained in:
@@ -7,6 +7,9 @@
|
||||
@Author :lyzeng
|
||||
@Email :pylyzeng@gmail.com
|
||||
@version :1.0
|
||||
# 清理杂原子并初始化PDBAnalyzer
|
||||
analyzer = PDBAnalyzer.cleanATOM(pdb_file)
|
||||
print(analyzer.pdb_file)
|
||||
'''
|
||||
# micromamba create -n modeller modeller biopython pymol-open-source biopandas requests -y -c conda-forge -c salilab
|
||||
# modeller注册码:MODELIRANJE (<conda_env>//lib/modeller-10.4/modlib/modeller/config.py)
|
||||
@@ -103,36 +106,46 @@ class PDBAnalyzer:
|
||||
"""
|
||||
Initialize the PDB structure after the object is created.
|
||||
"""
|
||||
self.pid = self.pdb_file.stem.lower() if len(self.pdb_file.stem) == 4 else None
|
||||
self.initialize_properties()
|
||||
|
||||
def initialize_properties(self):
|
||||
"""Initialize properties based on the pdb_file."""
|
||||
self.pdb_file_stem = self.pdb_file.stem.split('.')[0]
|
||||
self.pid = self.pdb_file_stem.lower() if len(self.pdb_file_stem) == 4 else None
|
||||
self.structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix())
|
||||
self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.cleanATOM().as_posix())
|
||||
self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix())
|
||||
self.biodata = PandasPdb().read_pdb(self.pdb_file.as_posix())
|
||||
self.protein_state = 'Holo' if 'HETATM' in self.biodata.df.keys() else 'Apo'
|
||||
self.chain_id_list = self.biodata.df['ATOM']['chain_id'].drop_duplicates().to_list()
|
||||
|
||||
def cleanATOM(self, out_file=None, ext="_clean.pdb") -> Path: # from pyrosetta.toolbox import cleanATOM
|
||||
"""Extract all ATOM and TER records in a PDB file and write them to a new file.
|
||||
@classmethod
|
||||
def cleanATOM(cls, input_file: Path, out_file: Path = None, ext: str = ".clean.pdb") -> 'PDBAnalyzer':
|
||||
"""
|
||||
Class method to clean PDB file by extracting all ATOM and TER records and write them to a new file.
|
||||
|
||||
Args:
|
||||
pdb_file (str): Path of the PDB file from which ATOM and TER records
|
||||
will be extracted
|
||||
out_file (str): Optional argument to specify a particular output filename.
|
||||
Defaults to <pdb_file>.clean.pdb.
|
||||
ext (str): File extension to use for output file. Defaults to ".clean.pdb"
|
||||
input_file (Path): Path of the PDB file to be cleaned.
|
||||
out_file (Path): Optional; output filename. Defaults to None, which will create <input_file>_clean.pdb.
|
||||
ext (str): Extension for the output file if out_file is not specified. Defaults to "_clean.pdb".
|
||||
|
||||
Returns:
|
||||
PDBAnalyzer: An instance of PDBAnalyzer pointing to the cleaned PDB file.
|
||||
"""
|
||||
pdb_file = self.pdb_file.as_posix()
|
||||
# find all ATOM and TER lines
|
||||
with open(pdb_file, "r") as fid:
|
||||
good = [l for l in fid if l.startswith(("ATOM", "TER"))]
|
||||
|
||||
# default output file to <pdb_file>_clean.pdb
|
||||
# Define the output file name if not provided
|
||||
if out_file is None:
|
||||
out_file = os.path.splitext(pdb_file)[0] + ext
|
||||
out_file = input_file.with_suffix(ext)
|
||||
|
||||
# write the selected records to a new file
|
||||
# Extract ATOM and TER lines
|
||||
with open(input_file, "r") as fid:
|
||||
good_lines = [line for line in fid if line.startswith(("ATOM", "TER"))]
|
||||
|
||||
# Write the selected records to the new file
|
||||
with open(out_file, "w") as fid:
|
||||
fid.writelines(good)
|
||||
return Path(out_file)
|
||||
fid.writelines(good_lines)
|
||||
|
||||
# Return a new instance of PDBAnalyzer pointing to the cleaned file
|
||||
return cls(out_file)
|
||||
|
||||
|
||||
def sequence_similarity(self, seq1: str, seq2: str) -> float:
|
||||
"""
|
||||
@@ -203,7 +216,6 @@ class PDBAnalyzer:
|
||||
|
||||
sequences = {}
|
||||
# Process each chain in the structure
|
||||
# use cleanATOM to remove HETATM
|
||||
for model in self.protein_structure:
|
||||
chains = model.get_list()
|
||||
for chain in chains:
|
||||
|
||||
Reference in New Issue
Block a user