diff --git a/analysis_pdb.py b/analysis_pdb.py index 2383044..621c1f7 100755 --- a/analysis_pdb.py +++ b/analysis_pdb.py @@ -7,6 +7,9 @@ @Author :lyzeng @Email :pylyzeng@gmail.com @version :1.0 +# 清理杂原子并初始化PDBAnalyzer +analyzer = PDBAnalyzer.cleanATOM(pdb_file) +print(analyzer.pdb_file) ''' # micromamba create -n modeller modeller biopython pymol-open-source biopandas requests -y -c conda-forge -c salilab # modeller注册码:MODELIRANJE (//lib/modeller-10.4/modlib/modeller/config.py) @@ -103,36 +106,46 @@ class PDBAnalyzer: """ Initialize the PDB structure after the object is created. """ - self.pid = self.pdb_file.stem.lower() if len(self.pdb_file.stem) == 4 else None + self.initialize_properties() + + def initialize_properties(self): + """Initialize properties based on the pdb_file.""" + self.pdb_file_stem = self.pdb_file.stem.split('.')[0] + self.pid = self.pdb_file_stem.lower() if len(self.pdb_file_stem) == 4 else None self.structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix()) - self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.cleanATOM().as_posix()) + self.protein_structure = PDBParser(QUIET=True).get_structure('PDB_structure', self.pdb_file.as_posix()) self.biodata = PandasPdb().read_pdb(self.pdb_file.as_posix()) self.protein_state = 'Holo' if 'HETATM' in self.biodata.df.keys() else 'Apo' self.chain_id_list = self.biodata.df['ATOM']['chain_id'].drop_duplicates().to_list() - def cleanATOM(self, out_file=None, ext="_clean.pdb") -> Path: # from pyrosetta.toolbox import cleanATOM - """Extract all ATOM and TER records in a PDB file and write them to a new file. + @classmethod + def cleanATOM(cls, input_file: Path, out_file: Path = None, ext: str = ".clean.pdb") -> 'PDBAnalyzer': + """ + Class method to clean PDB file by extracting all ATOM and TER records and write them to a new file. Args: - pdb_file (str): Path of the PDB file from which ATOM and TER records - will be extracted - out_file (str): Optional argument to specify a particular output filename. - Defaults to .clean.pdb. - ext (str): File extension to use for output file. Defaults to ".clean.pdb" + input_file (Path): Path of the PDB file to be cleaned. + out_file (Path): Optional; output filename. Defaults to None, which will create _clean.pdb. + ext (str): Extension for the output file if out_file is not specified. Defaults to "_clean.pdb". + + Returns: + PDBAnalyzer: An instance of PDBAnalyzer pointing to the cleaned PDB file. """ - pdb_file = self.pdb_file.as_posix() - # find all ATOM and TER lines - with open(pdb_file, "r") as fid: - good = [l for l in fid if l.startswith(("ATOM", "TER"))] - - # default output file to _clean.pdb + # Define the output file name if not provided if out_file is None: - out_file = os.path.splitext(pdb_file)[0] + ext + out_file = input_file.with_suffix(ext) - # write the selected records to a new file + # Extract ATOM and TER lines + with open(input_file, "r") as fid: + good_lines = [line for line in fid if line.startswith(("ATOM", "TER"))] + + # Write the selected records to the new file with open(out_file, "w") as fid: - fid.writelines(good) - return Path(out_file) + fid.writelines(good_lines) + + # Return a new instance of PDBAnalyzer pointing to the cleaned file + return cls(out_file) + def sequence_similarity(self, seq1: str, seq2: str) -> float: """ @@ -203,7 +216,6 @@ class PDBAnalyzer: sequences = {} # Process each chain in the structure - # use cleanATOM to remove HETATM for model in self.protein_structure: chains = model.get_list() for chain in chains: