add cleanATOM

2024-01-11 16:10:44 +08:00
parent 5fa9cae3f2
commit 751f3280c9
1 changed files with 115 additions and 67 deletions
--- a/analysis_pdb.py
+++ b/analysis_pdb.py
@@ -9,7 +9,7 @@
@version            :1.0
 '''
 # micromamba create -n modeller modeller biopython pymol-open-source biopandas requests -y -c conda-forge -c salilab
-# modeller注册码：MODELIRANJE
+# modeller注册码：MODELIRANJE (<conda_env>//lib/modeller-10.4/modlib/modeller/config.py)
 from dataclasses import dataclass, field
 from Bio.PDB import PDBParser
 from Bio.SeqUtils import seq1
@@ -23,6 +23,7 @@ from Bio import SeqIO
 import requests
 from copy import deepcopy
 from pymol import cmd
+import os

@dataclass
 class PDBAnalyzer:
@@ -51,6 +52,31 @@ class PDBAnalyzer:
        self.protein_state = 'Holo' if 'HETATM' in self.biodata.df.keys() else 'Apo'
        self.chain_id_list = self.biodata.df['ATOM']['chain_id'].drop_duplicates().to_list()

+    def cleanATOM(self, out_file=None, ext="_clean.pdb") -> Path: # from pyrosetta.toolbox import cleanATOM
+        """Extract all ATOM and TER records in a PDB file and write them to a new file.
+
+        Args:
+            pdb_file (str): Path of the PDB file from which ATOM and TER records
+                will be extracted
+            out_file (str): Optional argument to specify a particular output filename.
+                Defaults to <pdb_file>.clean.pdb.
+            ext (str): File extension to use for output file. Defaults to ".clean.pdb"
+        """
+        pdb_file = self.path.as_posix()
+        # find all ATOM and TER lines
+        with open(pdb_file, "r") as fid:
+            good = [l for l in fid if l.startswith(("ATOM", "TER"))]
+
+        # default output file to <pdb_file>_clean.pdb
+        if out_file is None:
+            out_file = os.path.splitext(pdb_file)[0] + ext
+
+        # write the selected records to a new file
+        with open(out_file, "w") as fid:
+            fid.writelines(good)
+        return Path(out_file)
+
+
    def check_continuity(self, chain, missing_char):
        """ 
        Check the continuity of residues in a protein chain.
@@ -370,17 +396,25 @@ def import_and_merge_pdb_strings(pdb_strings, merged_object_name, output_file):
    # 保存合并后的对象
    cmd.save(output_file, merged_object_name)

-if __name__ == "__main__":
-    # import argparse
-    # parser = argparse.ArgumentParser(description="Build model by Modeller")
-    # parser.add_argument("-s", "--structure", help="Structure file")
-    # parser.add_argument("-o", "--outdir", help="Output directory")
-    # parser.add_argument("-f", "--fasta", help="Fasta file")
-    # parser.add_argument("-n", "--num_loop", help="Number of loop model")
-    # parser.add_argument("-m", "--md_level", help="MD level")
-    # parser.add_argument("-c", "--chain", help="Chain ID")
-    # args = parser.parse_args()
-    pdbfiles = [i for i in Path('../PDBfile').glob('*.pdb')]
+'''
+# 示例: 使用biopython提取A链（将会保留HETATM）
+chain_extractor = analyzer.extract_chain('A')  # 假设要提取的链ID是 'A'
+chain_extractor.save('biopython_extracted_chain_A.pdb')  # 保存为PDB文件
+# 示例: 使用biopandas提取A链（将不会保留HETATM）
+chain_extractor = analyzer.split_chain('A')  # 假设要提取的链ID是 'A'
+chain_extractor.to_pdb('biopandas_extracted_chain_A.pdb')  # 保存为PDB文件
+# A链改B链, 并分割保存为单独文件
+analyzer.change_chain_identifier('A', 'B', split=True).to_pdb(f'{PDB_ID}_B.pdb')
+# 分割所有的链
+split_dict = {}
+for j in analyzer.chain_id_list:
+    fn = Path(f'{PDB_ID}_{j}.pdb')
+    analyzer.split_chain(j).to_pdb(fn.as_posix())
+    split_dict[j]=fn.read_text()
+'''
+
+def fix_all(path:Path):
+    pdbfiles = [i for i in path.glob('*.pdb')]
    for i in pdbfiles:
        PDB_file_path = i
        PDB_ID = i.stem
@@ -448,3 +482,17 @@ if __name__ == "__main__":
        split_dict.update(mc_dict)  # 更新 split_dict
        import_and_merge_pdb_strings(split_dict, "merged_object", f'{PDB_ID}.modellerfix.pdb')

+if __name__ == "__main__":
+    # import argparse
+    # parser = argparse.ArgumentParser(description="Build model by Modeller")
+    # parser.add_argument("-s", "--structure", help="Structure file")
+    # parser.add_argument("-o", "--outdir", help="Output directory")
+    # parser.add_argument("-f", "--fasta", help="Fasta file")
+    # parser.add_argument("-n", "--num_loop", help="Number of loop model")
+    # parser.add_argument("-m", "--md_level", help="MD level")
+    # parser.add_argument("-c", "--chain", help="Chain ID")
+    # args = parser.parse_args()
+    # fix_all(Path('./pdb_test1'))
+    pass
+
+