add protein_structure (remove HETATM)
This commit is contained in:
@@ -37,6 +37,7 @@ class PDBAnalyzer:
|
||||
pdb_file: Path
|
||||
pid: Optional[str] = field(default=None, init=False)
|
||||
structure: object = field(init=False)
|
||||
protein_structure: object = field(init=False)
|
||||
biodf: PandasPdb = field(init=False)
|
||||
protein_state: str = field(init=False) # Apo or Holo
|
||||
chain_id_list: List[str] = field(init=False)
|
||||
@@ -45,9 +46,9 @@ class PDBAnalyzer:
|
||||
"""
|
||||
Initialize the PDB structure after the object is created.
|
||||
"""
|
||||
parser = PDBParser(QUIET=True)
|
||||
self.pid = self.pdb_file.stem.lower() if len(self.pdb_file.stem) == 4 else None
|
||||
self.structure = parser.get_structure('PDB_structure', self.pdb_file.as_posix())
|
||||
self.structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix())
|
||||
self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.cleanATOM().as_posix())
|
||||
self.biodata = PandasPdb().read_pdb(self.pdb_file.as_posix())
|
||||
self.protein_state = 'Holo' if 'HETATM' in self.biodata.df.keys() else 'Apo'
|
||||
self.chain_id_list = self.biodata.df['ATOM']['chain_id'].drop_duplicates().to_list()
|
||||
@@ -62,7 +63,7 @@ class PDBAnalyzer:
|
||||
Defaults to <pdb_file>.clean.pdb.
|
||||
ext (str): File extension to use for output file. Defaults to ".clean.pdb"
|
||||
"""
|
||||
pdb_file = self.path.as_posix()
|
||||
pdb_file = self.pdb_file.as_posix()
|
||||
# find all ATOM and TER lines
|
||||
with open(pdb_file, "r") as fid:
|
||||
good = [l for l in fid if l.startswith(("ATOM", "TER"))]
|
||||
@@ -129,7 +130,8 @@ class PDBAnalyzer:
|
||||
|
||||
sequences = {}
|
||||
# Process each chain in the structure
|
||||
for model in self.structure:
|
||||
# use cleanATOM to remove HETATM
|
||||
for model in self.protein_structure:
|
||||
chains = model.get_list()
|
||||
for chain in chains:
|
||||
# Check continuity and get the sequence of residues
|
||||
@@ -155,7 +157,7 @@ class PDBAnalyzer:
|
||||
return chain.get_id(), sorted(set(full_range) - set(observed))
|
||||
return chain.get_id(), []
|
||||
|
||||
chains = [chain for model in self.structure for chain in model]
|
||||
chains = [chain for model in self.protein_structure for chain in model]
|
||||
missing_info = map(find_missing, chains)
|
||||
return dict(filter(lambda x: x[1], missing_info))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user