54 lines
2.0 KiB
Python
54 lines
2.0 KiB
Python
import numpy as np
|
|
from jaxtyping import Float, Array
|
|
|
|
def needleman_wunsch(seq1: str, seq2: str,
|
|
match_score: int = 1,
|
|
mismatch_penalty: int = -1,
|
|
gap_penalty: int = -2) -> tuple[str, str, Float[Array, "len_seq1+1 len_seq2+1"]]:
|
|
# 初始化矩阵
|
|
len_seq1, len_seq2 = len(seq1), len(seq2)
|
|
score_matrix: Float[Array, "len_seq1+1 len_seq2+1"] = np.zeros((len_seq1 + 1, len_seq2 + 1))
|
|
|
|
# 初始化第一行和第一列
|
|
for i in range(len_seq1 + 1):
|
|
score_matrix[i][0] = i * gap_penalty
|
|
for j in range(len_seq2 + 1):
|
|
score_matrix[0][j] = j * gap_penalty
|
|
|
|
# 填充得分矩阵
|
|
for i in range(1, len_seq1 + 1):
|
|
for j in range(1, len_seq2 + 1):
|
|
match = score_matrix[i - 1][j - 1] + (match_score if seq1[i - 1] == seq2[j - 1] else mismatch_penalty)
|
|
delete = score_matrix[i - 1][j] + gap_penalty
|
|
insert = score_matrix[i][j - 1] + gap_penalty
|
|
score_matrix[i][j] = max(match, delete, insert)
|
|
|
|
# 回溯得到比对结果
|
|
align1, align2 = '', ''
|
|
i, j = len_seq1, len_seq2
|
|
while i > 0 or j > 0:
|
|
current_score = score_matrix[i][j]
|
|
if i > 0 and j > 0 and (current_score == score_matrix[i - 1][j - 1] + (match_score if seq1[i - 1] == seq2[j - 1] else mismatch_penalty)):
|
|
align1 = seq1[i - 1] + align1
|
|
align2 = seq2[j - 1] + align2
|
|
i -= 1
|
|
j -= 1
|
|
elif i > 0 and (current_score == score_matrix[i - 1][j] + gap_penalty):
|
|
align1 = seq1[i - 1] + align1
|
|
align2 = '-' + align2
|
|
i -= 1
|
|
else:
|
|
align1 = '-' + align1
|
|
align2 = seq2[j - 1] + align2
|
|
j -= 1
|
|
|
|
return align1, align2, score_matrix
|
|
|
|
# 测试示例
|
|
seq1 = "GATTACA"
|
|
seq2 = "GCATGCU"
|
|
alignment = needleman_wunsch(seq1, seq2)
|
|
print("Aligned Sequences:")
|
|
print(alignment[0])
|
|
print(alignment[1])
|
|
print("Alignment Score:", alignment[2]) |