update
This commit is contained in:
@@ -288,5 +288,7 @@ RNA可以用Amber的Parmbsc1的力场,是amber中最新的适合模拟核酸
|
||||
单纯的常规MD就用水中溶菌酶那个教程就行。力场用我上面提到的就行。
|
||||
不过你模拟两个碱基的RNA有什么意义吗?是要模拟碱基配对过程(比如不同碱基之间的配对,这个貌似有人研究过了)吗还是怎么样。
|
||||
|
||||
你可以看下这篇文章,今晚才看见的,
|
||||
Assessment of AMBER Force Fields for Simulations of ssDNA(DOI: 10.1021/acs.jctc.0c0093)
|
||||
你可以看下这篇文章
|
||||
|
||||
Assessment of AMBER Force Fields for Simulations of ssDNA(DOI: 10.1021/acs.jctc.0c0093)
|
||||
|
||||
|
||||
1
README_QMMM.md
Normal file
1
README_QMMM.md
Normal file
@@ -0,0 +1 @@
|
||||
# [ASH - A QM/MM packages](https://mp.weixin.qq.com/s/jjVe81BwBjUWeNSEGymsAw)
|
||||
73
bioinfo/MHH.py
Normal file
73
bioinfo/MHH.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import numpy as np
|
||||
from hmmlearn import hmm
|
||||
|
||||
# 定义状态和观察集合
|
||||
# 状态集合代表不同的基因型: MM, MZ, ZZ
|
||||
states = ["MM", "MZ", "ZZ"]
|
||||
n_states = len(states)
|
||||
|
||||
# 观测集合为 0 和 1,其中 0 表示 ZZ,1 表示 MM
|
||||
observations = [0, 1]
|
||||
n_observations = len(observations)
|
||||
|
||||
# 初始概率分布 (initial state probabilities)
|
||||
# 定义每个状态的初始概率: MM: 0.4, MZ: 0.2, ZZ: 0.4
|
||||
start_probability = np.array([0.4, 0.2, 0.4])
|
||||
|
||||
# 转移概率矩阵 (state transition matrix)
|
||||
# 每个状态到其他状态的转移概率,基于重组率,假设重组率为 0.01
|
||||
# 例如,从 MM 状态保持不变的概率为 0.99,转移到 MZ 或 ZZ 的概率为 0.005
|
||||
transition_probability = np.array([
|
||||
[0.99, 0.005, 0.005], # MM -> [MM, MZ, ZZ]
|
||||
[0.005, 0.99, 0.005], # MZ -> [MM, MZ, ZZ]
|
||||
[0.005, 0.005, 0.99] # ZZ -> [MM, MZ, ZZ]
|
||||
])
|
||||
|
||||
# 输出概率矩阵 (emission probability matrix)
|
||||
# 定义每个隐藏状态 (MM, MZ, ZZ) 生成观测值 (0, 1) 的概率
|
||||
# 基于测序错误率 3%
|
||||
# 例如,MM 状态下观测到 1 (MM) 的概率为 0.97,观测到 0 (ZZ) 的概率为 0.03
|
||||
emission_probability = np.array([
|
||||
[0.97, 0.03], # MM -> [观测到 0 (ZZ), 观测到 1 (MM)]
|
||||
[0.5, 0.5], # MZ -> [观测到 0 (ZZ), 观测到 1 (MM)]
|
||||
[0.03, 0.97] # ZZ -> [观测到 0 (ZZ), 观测到 1 (MM)]
|
||||
])
|
||||
|
||||
# 建立 HMM 模型
|
||||
# 使用 hmmlearn 库的 CategoricalHMM 来表示隐马尔可夫模型
|
||||
# CategoricalHMM 用于离散观测值的建模,与之前的 MultinomialHMM 有所不同
|
||||
# MultinomialHMM 新版本中进行了重大变动,要求明确设置 n_trials,且现在遵循标准的多项式分布定义
|
||||
model = hmm.CategoricalHMM(n_components=n_states, n_iter=100, tol=0.01)
|
||||
model.startprob_ = start_probability # 设置初始状态概率
|
||||
model.transmat_ = transition_probability # 设置状态转移概率矩阵
|
||||
model.emissionprob_ = emission_probability # 设置输出概率矩阵
|
||||
|
||||
# 观测序列(给定的 109 个位点的测序结果)
|
||||
# 将观测结果调整为二维数组,其中每一行为一个观测值
|
||||
observation_sequence = np.array([[1], [1], [1], [1], [1], [1], [1], [0], [1], [1], [1], [1], [1], [1], [0], [0], [0], [0], [0], [0],
|
||||
[1], [0], [0], [0], [0], [0], [0], [0], [1], [0], [1], [0], [1], [0], [1], [1], [1], [1], [1], [1],
|
||||
[1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [0], [1], [0], [1], [0], [1],
|
||||
[1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1],
|
||||
[1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1],
|
||||
[1], [1], [1], [1], [1], [1], [1], [1], [1]]).reshape(-1, 1) # 调整为二维数组
|
||||
|
||||
# 使用维特比算法解码最可能的状态序列
|
||||
# 使用维特比算法计算最可能的隐藏状态序列(即最可能的基因型序列)
|
||||
logprob, hidden_states = model.decode(observation_sequence, algorithm="viterbi")
|
||||
|
||||
# 输出结果
|
||||
# 打印最可能的基因型序列
|
||||
print("最可能的基因型序列:")
|
||||
results = ",".join([states[state] for state in hidden_states])
|
||||
print(" ".join([states[state] for state in hidden_states]))
|
||||
new_list= []
|
||||
for i in [states[state] for state in hidden_states]:
|
||||
if i == 'ZZ':
|
||||
new_list.append('0')
|
||||
elif i == 'MM':
|
||||
new_list.append('1')
|
||||
elif i == 'MZ':
|
||||
new_list.append('2')
|
||||
else:
|
||||
pass
|
||||
print("".join(new_list))
|
||||
54
bioinfo/sum.py
Normal file
54
bioinfo/sum.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import numpy as np
|
||||
from jaxtyping import Float, Array
|
||||
|
||||
def needleman_wunsch(seq1: str, seq2: str,
|
||||
match_score: int = 1,
|
||||
mismatch_penalty: int = -1,
|
||||
gap_penalty: int = -2) -> tuple[str, str, Float[Array, "len_seq1+1 len_seq2+1"]]:
|
||||
# 初始化矩阵
|
||||
len_seq1, len_seq2 = len(seq1), len(seq2)
|
||||
score_matrix: Float[Array, "len_seq1+1 len_seq2+1"] = np.zeros((len_seq1 + 1, len_seq2 + 1))
|
||||
|
||||
# 初始化第一行和第一列
|
||||
for i in range(len_seq1 + 1):
|
||||
score_matrix[i][0] = i * gap_penalty
|
||||
for j in range(len_seq2 + 1):
|
||||
score_matrix[0][j] = j * gap_penalty
|
||||
|
||||
# 填充得分矩阵
|
||||
for i in range(1, len_seq1 + 1):
|
||||
for j in range(1, len_seq2 + 1):
|
||||
match = score_matrix[i - 1][j - 1] + (match_score if seq1[i - 1] == seq2[j - 1] else mismatch_penalty)
|
||||
delete = score_matrix[i - 1][j] + gap_penalty
|
||||
insert = score_matrix[i][j - 1] + gap_penalty
|
||||
score_matrix[i][j] = max(match, delete, insert)
|
||||
|
||||
# 回溯得到比对结果
|
||||
align1, align2 = '', ''
|
||||
i, j = len_seq1, len_seq2
|
||||
while i > 0 or j > 0:
|
||||
current_score = score_matrix[i][j]
|
||||
if i > 0 and j > 0 and (current_score == score_matrix[i - 1][j - 1] + (match_score if seq1[i - 1] == seq2[j - 1] else mismatch_penalty)):
|
||||
align1 = seq1[i - 1] + align1
|
||||
align2 = seq2[j - 1] + align2
|
||||
i -= 1
|
||||
j -= 1
|
||||
elif i > 0 and (current_score == score_matrix[i - 1][j] + gap_penalty):
|
||||
align1 = seq1[i - 1] + align1
|
||||
align2 = '-' + align2
|
||||
i -= 1
|
||||
else:
|
||||
align1 = '-' + align1
|
||||
align2 = seq2[j - 1] + align2
|
||||
j -= 1
|
||||
|
||||
return align1, align2, score_matrix
|
||||
|
||||
# 测试示例
|
||||
seq1 = "GATTACA"
|
||||
seq2 = "GCATGCU"
|
||||
alignment = needleman_wunsch(seq1, seq2)
|
||||
print("Aligned Sequences:")
|
||||
print(alignment[0])
|
||||
print(alignment[1])
|
||||
print("Alignment Score:", alignment[2])
|
||||
Reference in New Issue
Block a user