diff --git a/build_modeller.py b/build_modeller.py index e7c4c74..0cf7c56 100755 --- a/build_modeller.py +++ b/build_modeller.py @@ -86,15 +86,35 @@ class PDBModeler: aln.append_model(mdl, align_codes=self.structure, atom_files=self.structure_file.as_posix()) aln.append(file=self.ali_file.as_posix(), align_codes=self.sequence) aln.align2d() - aln.write(file=(self.outdir / 'alignment.ali').as_posix(), alignment_format='PIR') - aln.write(file=(self.outdir / 'alignment.pap').as_posix(), alignment_format='PAP') + aln.write(file=(self.outdir / 'alignment1.ali').as_posix(), alignment_format='PIR') + aln.write(file=(self.outdir / 'alignment1.pap').as_posix(), alignment_format='PAP') + + # save alignment in FASTA format + aln.write(file=(self.outdir / 'alignment1.fasta').as_posix(), alignment_format='FASTA') + slice_fasta = PDBModeler.align_sequences(self.outdir / 'alignment1.fasta') + slice_ali = self.outdir / 'alignment_slice.ali' + fx = pyfastx.Fasta(slice_fasta.as_posix(), build_index=True) + assert len(fx) == 1, "FASTA file should contain only one sequence" + PDBModeler.write_ali(slice_ali, fx[0].name, fx[0].seq) + + env2 = Environ() + mdl2 = Model(env2, file=self.structure_file.as_posix(), model_segment=(f'FIRST:{self.chain}', f'LAST:{self.chain}')) + aln2 = Alignment(env2) + aln2.append_model(mdl2, align_codes=self.structure, atom_files=self.structure_file.as_posix()) + aln2.append(file=slice_ali.as_posix(), align_codes=self.sequence) + aln2.align2d() + aln2.write(file=(self.outdir / 'alignment2.ali').as_posix(), alignment_format='PIR') + aln2.write(file=(self.outdir / 'alignment2.pap').as_posix(), alignment_format='PAP') log.verbose() + # choose ali file + fix_ali_file = self.outdir / 'alignment2.ali' if (self.outdir / 'alignment2.ali').exists() else self.outdir / 'alignment1.ali' + env3 = Environ() env3.io.atom_files_directory = ['.'] loop_model = LoopModel(env3, - alnfile=(self.outdir / 'alignment.ali').as_posix(), + alnfile=fix_ali_file.as_posix(), knowns=self.structure, sequence=self.sequence, loop_assess_methods=(assess.DOPE, assess.GA341)) @@ -151,6 +171,13 @@ class PDBModeler: if output.get('failure') is None: model_files.append(Path(output.get('name'))) return model_files + + @staticmethod + def write_ali(ali_file: Path, description: str, sequence: str): + with open(ali_file, 'w') as f: + f.write(f'>P1;{description}\n') + f.write(f'sequence:{description}:::::::0.00: 0.00\n') + f.write(f'{sequence}*') def fasta_to_ali(self) -> Path: if not self.outdir.exists(): @@ -162,12 +189,7 @@ class PDBModeler: fx = pyfastx.Fasta(self.fasta_file.as_posix(), build_index=True) assert len(fx) == 1, "FASTA file should contain only one sequence" - - with open(ali_file, 'w') as f: - f.write(f'>P1;{self.sequence}\n') - f.write(f'sequence:{self.sequence}:::::::0.00: 0.00\n') - f.write(f'{fx[0].seq}*') - + PDBModeler.write_ali(ali_file, self.sequence, fx[0].seq) return ali_file if __name__ == "__main__":