extract_chains_to_new_pdb method

2024-01-31 16:39:09 +08:00
parent 95fed4983b
commit ed56355d55
1 changed files with 30 additions and 1 deletions
--- a/analysis_pdb.py
+++ b/analysis_pdb.py
@@ -146,7 +146,36 @@ class PDBAnalyzer:
        # Return a new instance of PDBAnalyzer pointing to the cleaned file
        return cls(out_file)
-    
+    def extract_chains_to_new_pdb(self, chains: List[str]) -> PandasPdb:
        """
        Extract specified chains into a new PandasPdb object.
        Args:
            chains (List[str]): List of chain IDs to be extracted.
        Returns:
            PandasPdb: A new PandasPdb object containing only the specified chains.
        Raises:
            ValueError: If any of the specified chains are not found in the PDB file.
        """
        # Check if all specified chains exist in the PDB file
        if not all(chain in self.chain_id_list for chain in chains):
            missing_chains = [chain for chain in chains if chain not in self.chain_id_list]
            raise ValueError(f"Chains {missing_chains} not found in the PDB file. {self.pdb_file.as_posix()}")
        # Create a new PandasPdb object for the specified chains
        new_ppdb = PandasPdb()
        # Extract ATOM records for specified chains
        new_ppdb.df['ATOM'] = self.biodata.df['ATOM'][self.biodata.df['ATOM']['chain_id'].isin(chains)]
        # Extract HETATM records for specified chains, if needed
        if 'HETATM' in self.biodata.df:
            new_ppdb.df['HETATM'] = self.biodata.df['HETATM'][self.biodata.df['HETATM']['chain_id'].isin(chains)]
        return new_ppdb
    def sequence_similarity(self, seq1: str, seq2: str) -> float:
        """
        Calculate the similarity between two sequences.