add align_sequences and find_non_dash_indices
This commit is contained in:
@@ -25,6 +25,54 @@ class PDBModeler:
|
|||||||
self.sequence = self.fasta_file.stem
|
self.sequence = self.fasta_file.stem
|
||||||
self.ali_file = self.fasta_to_ali()
|
self.ali_file = self.fasta_to_ali()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def find_non_dash_indices(seq):
|
||||||
|
start = next((i for i, c in enumerate(seq) if c != '-'), None)
|
||||||
|
end = next((i for i, c in enumerate(reversed(seq)) if c != '-'), None)
|
||||||
|
if start is not None and end is not None:
|
||||||
|
end = len(seq) - end
|
||||||
|
return start, end
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def align_sequences(file: Path) -> Path:
|
||||||
|
fx = pyfastx.Fasta(file.as_posix(), build_index=True)
|
||||||
|
assert len(fx) == 2
|
||||||
|
seqs = [seq for seq in fx]
|
||||||
|
|
||||||
|
# 确定哪条链需要裁剪
|
||||||
|
if seqs[0].seq.startswith('-') or seqs[0].seq.endswith('-'):
|
||||||
|
trim_index = 0
|
||||||
|
elif seqs[1].seq.startswith('-') or seqs[1].seq.endswith('-'):
|
||||||
|
trim_index = 1
|
||||||
|
else:
|
||||||
|
# 如果两条链都不需要裁剪,就直接返回原文件
|
||||||
|
return file
|
||||||
|
|
||||||
|
start, end = PDBModeler.find_non_dash_indices(seqs[trim_index].seq)
|
||||||
|
|
||||||
|
# 根据确定的裁剪位置裁剪两条链
|
||||||
|
trimmed_seqs = []
|
||||||
|
for seq in seqs:
|
||||||
|
trimmed_seq = seq.seq[start:end]
|
||||||
|
trimmed_seqs.append(SeqRecord(Seq(trimmed_seq), id=seq.name, description=""))
|
||||||
|
|
||||||
|
# 选择没有'-'的序列
|
||||||
|
selected_seq = None
|
||||||
|
for seq_record in trimmed_seqs:
|
||||||
|
if '-' not in seq_record.seq:
|
||||||
|
selected_seq = seq_record
|
||||||
|
break
|
||||||
|
|
||||||
|
assert selected_seq is not None, "no sequence without '-' found"
|
||||||
|
assert not selected_seq.seq.startswith('-') and not selected_seq.seq.endswith('-'), "selected sequence should not start or end with '-'"
|
||||||
|
|
||||||
|
# Write the selected sequence to a new FASTA file using Biopython
|
||||||
|
new_fasta_file = file.with_suffix('.selected.fasta')
|
||||||
|
with open(new_fasta_file, 'w') as output_handle:
|
||||||
|
SeqIO.write([selected_seq], output_handle, "fasta")
|
||||||
|
|
||||||
|
return new_fasta_file
|
||||||
|
|
||||||
def make_model(self): # 单模板建模
|
def make_model(self): # 单模板建模
|
||||||
print("***************************************************")
|
print("***************************************************")
|
||||||
print("md_level ====", self.md_level)
|
print("md_level ====", self.md_level)
|
||||||
|
|||||||
Reference in New Issue
Block a user