add protein_structure (remove HETATM)
This commit is contained in:
@@ -37,6 +37,7 @@ class PDBAnalyzer:
|
|||||||
pdb_file: Path
|
pdb_file: Path
|
||||||
pid: Optional[str] = field(default=None, init=False)
|
pid: Optional[str] = field(default=None, init=False)
|
||||||
structure: object = field(init=False)
|
structure: object = field(init=False)
|
||||||
|
protein_structure: object = field(init=False)
|
||||||
biodf: PandasPdb = field(init=False)
|
biodf: PandasPdb = field(init=False)
|
||||||
protein_state: str = field(init=False) # Apo or Holo
|
protein_state: str = field(init=False) # Apo or Holo
|
||||||
chain_id_list: List[str] = field(init=False)
|
chain_id_list: List[str] = field(init=False)
|
||||||
@@ -45,9 +46,9 @@ class PDBAnalyzer:
|
|||||||
"""
|
"""
|
||||||
Initialize the PDB structure after the object is created.
|
Initialize the PDB structure after the object is created.
|
||||||
"""
|
"""
|
||||||
parser = PDBParser(QUIET=True)
|
|
||||||
self.pid = self.pdb_file.stem.lower() if len(self.pdb_file.stem) == 4 else None
|
self.pid = self.pdb_file.stem.lower() if len(self.pdb_file.stem) == 4 else None
|
||||||
self.structure = parser.get_structure('PDB_structure', self.pdb_file.as_posix())
|
self.structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix())
|
||||||
|
self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.cleanATOM().as_posix())
|
||||||
self.biodata = PandasPdb().read_pdb(self.pdb_file.as_posix())
|
self.biodata = PandasPdb().read_pdb(self.pdb_file.as_posix())
|
||||||
self.protein_state = 'Holo' if 'HETATM' in self.biodata.df.keys() else 'Apo'
|
self.protein_state = 'Holo' if 'HETATM' in self.biodata.df.keys() else 'Apo'
|
||||||
self.chain_id_list = self.biodata.df['ATOM']['chain_id'].drop_duplicates().to_list()
|
self.chain_id_list = self.biodata.df['ATOM']['chain_id'].drop_duplicates().to_list()
|
||||||
@@ -62,7 +63,7 @@ class PDBAnalyzer:
|
|||||||
Defaults to <pdb_file>.clean.pdb.
|
Defaults to <pdb_file>.clean.pdb.
|
||||||
ext (str): File extension to use for output file. Defaults to ".clean.pdb"
|
ext (str): File extension to use for output file. Defaults to ".clean.pdb"
|
||||||
"""
|
"""
|
||||||
pdb_file = self.path.as_posix()
|
pdb_file = self.pdb_file.as_posix()
|
||||||
# find all ATOM and TER lines
|
# find all ATOM and TER lines
|
||||||
with open(pdb_file, "r") as fid:
|
with open(pdb_file, "r") as fid:
|
||||||
good = [l for l in fid if l.startswith(("ATOM", "TER"))]
|
good = [l for l in fid if l.startswith(("ATOM", "TER"))]
|
||||||
@@ -129,7 +130,8 @@ class PDBAnalyzer:
|
|||||||
|
|
||||||
sequences = {}
|
sequences = {}
|
||||||
# Process each chain in the structure
|
# Process each chain in the structure
|
||||||
for model in self.structure:
|
# use cleanATOM to remove HETATM
|
||||||
|
for model in self.protein_structure:
|
||||||
chains = model.get_list()
|
chains = model.get_list()
|
||||||
for chain in chains:
|
for chain in chains:
|
||||||
# Check continuity and get the sequence of residues
|
# Check continuity and get the sequence of residues
|
||||||
@@ -155,7 +157,7 @@ class PDBAnalyzer:
|
|||||||
return chain.get_id(), sorted(set(full_range) - set(observed))
|
return chain.get_id(), sorted(set(full_range) - set(observed))
|
||||||
return chain.get_id(), []
|
return chain.get_id(), []
|
||||||
|
|
||||||
chains = [chain for model in self.structure for chain in model]
|
chains = [chain for model in self.protein_structure for chain in model]
|
||||||
missing_info = map(find_missing, chains)
|
missing_info = map(find_missing, chains)
|
||||||
return dict(filter(lambda x: x[1], missing_info))
|
return dict(filter(lambda x: x[1], missing_info))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user