extract_chains_to_new_pdb method

2024-01-31 16:39:09 +08:00
parent 95fed4983b
commit ed56355d55
1 changed files with 30 additions and 1 deletions
--- a/analysis_pdb.py
+++ b/analysis_pdb.py
@@ -146,7 +146,36 @@ class PDBAnalyzer:
        # Return a new instance of PDBAnalyzer pointing to the cleaned file
        return cls(out_file)

-    
+    def extract_chains_to_new_pdb(self, chains: List[str]) -> PandasPdb:
+        """
+        Extract specified chains into a new PandasPdb object.
+        
+        Args:
+            chains (List[str]): List of chain IDs to be extracted.
+        
+        Returns:
+            PandasPdb: A new PandasPdb object containing only the specified chains.
+        
+        Raises:
+            ValueError: If any of the specified chains are not found in the PDB file.
+        """
+        # Check if all specified chains exist in the PDB file
+        if not all(chain in self.chain_id_list for chain in chains):
+            missing_chains = [chain for chain in chains if chain not in self.chain_id_list]
+            raise ValueError(f"Chains {missing_chains} not found in the PDB file. {self.pdb_file.as_posix()}")
+        
+        # Create a new PandasPdb object for the specified chains
+        new_ppdb = PandasPdb()
+        
+        # Extract ATOM records for specified chains
+        new_ppdb.df['ATOM'] = self.biodata.df['ATOM'][self.biodata.df['ATOM']['chain_id'].isin(chains)]
+        
+        # Extract HETATM records for specified chains, if needed
+        if 'HETATM' in self.biodata.df:
+            new_ppdb.df['HETATM'] = self.biodata.df['HETATM'][self.biodata.df['HETATM']['chain_id'].isin(chains)]
+        
+        return new_ppdb
+
    def sequence_similarity(self, seq1: str, seq2: str) -> float:
        """
        Calculate the similarity between two sequences.