将cleanATOM保存为classmethod

2024-01-31 16:36:42 +08:00
parent 0612fa3e41
commit 95fed4983b
1 changed files with 32 additions and 20 deletions
--- a/analysis_pdb.py
+++ b/analysis_pdb.py
@@ -7,6 +7,9 @@
@Author             :lyzeng
@Email              :pylyzeng@gmail.com
@version            :1.0
 # 清理杂原子并初始化PDBAnalyzer
 analyzer = PDBAnalyzer.cleanATOM(pdb_file)
 print(analyzer.pdb_file)
 '''
 # micromamba create -n modeller modeller biopython pymol-open-source biopandas requests -y -c conda-forge -c salilab
 # modeller注册码：MODELIRANJE (<conda_env>//lib/modeller-10.4/modlib/modeller/config.py)
@@ -103,36 +106,46 @@ class PDBAnalyzer:
        """ 
        Initialize the PDB structure after the object is created.
        """
-        self.pid = self.pdb_file.stem.lower() if len(self.pdb_file.stem) == 4 else None
+        self.initialize_properties()
    def initialize_properties(self):
        """Initialize properties based on the pdb_file."""
        self.pdb_file_stem = self.pdb_file.stem.split('.')[0]
        self.pid = self.pdb_file_stem.lower() if len(self.pdb_file_stem) == 4 else None
        self.structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix())
-        self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.cleanATOM().as_posix())
+        self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix())
        self.biodata = PandasPdb().read_pdb(self.pdb_file.as_posix())
        self.protein_state = 'Holo' if 'HETATM' in self.biodata.df.keys() else 'Apo'
        self.chain_id_list = self.biodata.df['ATOM']['chain_id'].drop_duplicates().to_list()
-    def cleanATOM(self, out_file=None, ext="_clean.pdb") -> Path: # from pyrosetta.toolbox import cleanATOM
+    @classmethod
-        """Extract all ATOM and TER records in a PDB file and write them to a new file.
+    def cleanATOM(cls, input_file: Path, out_file: Path = None, ext: str = ".clean.pdb") -> 'PDBAnalyzer':
        """
        Class method to clean PDB file by extracting all ATOM and TER records and write them to a new file.
        Args:
-            pdb_file (str): Path of the PDB file from which ATOM and TER records
+            input_file (Path): Path of the PDB file to be cleaned.
-                will be extracted
+            out_file (Path): Optional; output filename. Defaults to None, which will create <input_file>_clean.pdb.
-            out_file (str): Optional argument to specify a particular output filename.
+            ext (str): Extension for the output file if out_file is not specified. Defaults to "_clean.pdb".
-                Defaults to <pdb_file>.clean.pdb.
+
-            ext (str): File extension to use for output file. Defaults to ".clean.pdb"
+        Returns:
            PDBAnalyzer: An instance of PDBAnalyzer pointing to the cleaned PDB file.
        """
-        pdb_file = self.pdb_file.as_posix()
+        # Define the output file name if not provided
        # find all ATOM and TER lines
        with open(pdb_file, "r") as fid:
            good = [l for l in fid if l.startswith(("ATOM", "TER"))]
        # default output file to <pdb_file>_clean.pdb
        if out_file is None:
-            out_file = os.path.splitext(pdb_file)[0] + ext
+            out_file = input_file.with_suffix(ext)
-        # write the selected records to a new file
+        # Extract ATOM and TER lines
        with open(input_file, "r") as fid:
            good_lines = [line for line in fid if line.startswith(("ATOM", "TER"))]
        # Write the selected records to the new file
        with open(out_file, "w") as fid:
-            fid.writelines(good)
+            fid.writelines(good_lines)
-        return Path(out_file)
+
        # Return a new instance of PDBAnalyzer pointing to the cleaned file
        return cls(out_file)
    def sequence_similarity(self, seq1: str, seq2: str) -> float:
        """
@@ -203,7 +216,6 @@ class PDBAnalyzer:
        sequences = {}
        # Process each chain in the structure
        # use cleanATOM to remove HETATM
        for model in self.protein_structure:
            chains = model.get_list()
            for chain in chains: