extract_chains_to_new_pdb method

This commit is contained in:
2024-01-31 16:39:09 +08:00
parent 95fed4983b
commit ed56355d55

View File

@@ -146,7 +146,36 @@ class PDBAnalyzer:
# Return a new instance of PDBAnalyzer pointing to the cleaned file
return cls(out_file)
def extract_chains_to_new_pdb(self, chains: List[str]) -> PandasPdb:
"""
Extract specified chains into a new PandasPdb object.
Args:
chains (List[str]): List of chain IDs to be extracted.
Returns:
PandasPdb: A new PandasPdb object containing only the specified chains.
Raises:
ValueError: If any of the specified chains are not found in the PDB file.
"""
# Check if all specified chains exist in the PDB file
if not all(chain in self.chain_id_list for chain in chains):
missing_chains = [chain for chain in chains if chain not in self.chain_id_list]
raise ValueError(f"Chains {missing_chains} not found in the PDB file. {self.pdb_file.as_posix()}")
# Create a new PandasPdb object for the specified chains
new_ppdb = PandasPdb()
# Extract ATOM records for specified chains
new_ppdb.df['ATOM'] = self.biodata.df['ATOM'][self.biodata.df['ATOM']['chain_id'].isin(chains)]
# Extract HETATM records for specified chains, if needed
if 'HETATM' in self.biodata.df:
new_ppdb.df['HETATM'] = self.biodata.df['HETATM'][self.biodata.df['HETATM']['chain_id'].isin(chains)]
return new_ppdb
def sequence_similarity(self, seq1: str, seq2: str) -> float:
"""
Calculate the similarity between two sequences.