finsh merge all results
This commit is contained in:
@@ -1,151 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
分析AutoDock Vina对接结果脚本
|
|
||||||
用法: python analyze_results.py poses_directory output_csv
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import pandas as pd
|
|
||||||
import re
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
def parse_vina_output(pdbqt_file):
|
|
||||||
"""解析Vina输出文件,提取能量信息"""
|
|
||||||
results = []
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(pdbqt_file, 'r') as f:
|
|
||||||
lines = f.readlines()
|
|
||||||
|
|
||||||
ligand_name = Path(pdbqt_file).stem.replace('_out', '')
|
|
||||||
model_num = 0
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
if line.startswith('REMARK VINA RESULT:'):
|
|
||||||
# 解析能量信息
|
|
||||||
# 格式: REMARK VINA RESULT: -8.5 0.000 0.000
|
|
||||||
parts = line.strip().split()
|
|
||||||
if len(parts) >= 4:
|
|
||||||
binding_energy = float(parts[3])
|
|
||||||
rmsd_lb = float(parts[4]) if len(parts) > 4 else 0.0
|
|
||||||
rmsd_ub = float(parts[5]) if len(parts) > 5 else 0.0
|
|
||||||
|
|
||||||
model_num += 1
|
|
||||||
results.append({
|
|
||||||
'ligand_name': ligand_name,
|
|
||||||
'model': model_num,
|
|
||||||
'binding_energy': binding_energy,
|
|
||||||
'rmsd_lb': rmsd_lb,
|
|
||||||
'rmsd_ub': rmsd_ub,
|
|
||||||
'file_path': pdbqt_file
|
|
||||||
})
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"解析文件失败 {pdbqt_file}: {e}")
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
def analyze_poses_directory(poses_dir):
|
|
||||||
"""分析整个poses目录"""
|
|
||||||
all_results = []
|
|
||||||
|
|
||||||
poses_path = Path(poses_dir)
|
|
||||||
if not poses_path.exists():
|
|
||||||
print(f"错误: 目录不存在 {poses_dir}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# 查找所有_out.pdbqt文件
|
|
||||||
pdbqt_files = list(poses_path.glob("*_out.pdbqt"))
|
|
||||||
|
|
||||||
print(f"找到 {len(pdbqt_files)} 个结果文件")
|
|
||||||
|
|
||||||
for pdbqt_file in pdbqt_files:
|
|
||||||
results = parse_vina_output(str(pdbqt_file))
|
|
||||||
all_results.extend(results)
|
|
||||||
|
|
||||||
return all_results
|
|
||||||
|
|
||||||
def generate_summary_stats(df):
|
|
||||||
"""生成汇总统计信息"""
|
|
||||||
if df.empty:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
# 获取每个配体的最佳结合能
|
|
||||||
best_poses = df.loc[df.groupby('ligand_name')['binding_energy'].idxmin()]
|
|
||||||
|
|
||||||
stats = {
|
|
||||||
'total_ligands': df['ligand_name'].nunique(),
|
|
||||||
'total_poses': len(df),
|
|
||||||
'best_binding_energy': df['binding_energy'].min(),
|
|
||||||
'worst_binding_energy': df['binding_energy'].max(),
|
|
||||||
'mean_binding_energy': df['binding_energy'].mean(),
|
|
||||||
'median_binding_energy': df['binding_energy'].median(),
|
|
||||||
'std_binding_energy': df['binding_energy'].std(),
|
|
||||||
'ligands_better_than_minus_7': (best_poses['binding_energy'] < -7.0).sum(),
|
|
||||||
'ligands_better_than_minus_8': (best_poses['binding_energy'] < -8.0).sum(),
|
|
||||||
'ligands_better_than_minus_9': (best_poses['binding_energy'] < -9.0).sum(),
|
|
||||||
}
|
|
||||||
|
|
||||||
return stats
|
|
||||||
|
|
||||||
def main():
|
|
||||||
if len(sys.argv) != 3:
|
|
||||||
print("用法: python analyze_results.py <poses目录> <输出CSV文件>")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
poses_dir = sys.argv[1]
|
|
||||||
output_csv = sys.argv[2]
|
|
||||||
|
|
||||||
print("开始分析对接结果...")
|
|
||||||
|
|
||||||
# 分析所有结果
|
|
||||||
results = analyze_poses_directory(poses_dir)
|
|
||||||
|
|
||||||
if not results:
|
|
||||||
print("没有找到有效的结果文件")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# 转换为DataFrame
|
|
||||||
df = pd.DataFrame(results)
|
|
||||||
|
|
||||||
# 保存详细结果
|
|
||||||
df.to_csv(output_csv, index=False)
|
|
||||||
print(f"详细结果已保存到: {output_csv}")
|
|
||||||
|
|
||||||
# 生成汇总统计
|
|
||||||
stats = generate_summary_stats(df)
|
|
||||||
|
|
||||||
# 打印汇总信息
|
|
||||||
print("\n=== 对接结果汇总 ===")
|
|
||||||
print(f"总配体数量: {stats['total_ligands']}")
|
|
||||||
print(f"总构象数量: {stats['total_poses']}")
|
|
||||||
print(f"最佳结合能: {stats['best_binding_energy']:.2f} kcal/mol")
|
|
||||||
print(f"最差结合能: {stats['worst_binding_energy']:.2f} kcal/mol")
|
|
||||||
print(f"平均结合能: {stats['mean_binding_energy']:.2f} kcal/mol")
|
|
||||||
print(f"中位结合能: {stats['median_binding_energy']:.2f} kcal/mol")
|
|
||||||
print(f"标准差: {stats['std_binding_energy']:.2f} kcal/mol")
|
|
||||||
print(f"结合能 < -7.0 的配体: {stats['ligands_better_than_minus_7']}")
|
|
||||||
print(f"结合能 < -8.0 的配体: {stats['ligands_better_than_minus_8']}")
|
|
||||||
print(f"结合能 < -9.0 的配体: {stats['ligands_better_than_minus_9']}")
|
|
||||||
|
|
||||||
# 显示前10个最佳结果
|
|
||||||
best_results = df.loc[df.groupby('ligand_name')['binding_energy'].idxmin()]
|
|
||||||
best_results = best_results.sort_values('binding_energy').head(10)
|
|
||||||
|
|
||||||
print("\n=== 前10个最佳结果 ===")
|
|
||||||
for _, row in best_results.iterrows():
|
|
||||||
print(f"{row['ligand_name']}: {row['binding_energy']:.2f} kcal/mol")
|
|
||||||
|
|
||||||
# 保存汇总统计
|
|
||||||
summary_file = output_csv.replace('.csv', '_summary.txt')
|
|
||||||
with open(summary_file, 'w') as f:
|
|
||||||
f.write("AutoDock Vina 对接结果汇总\n")
|
|
||||||
f.write("=" * 30 + "\n\n")
|
|
||||||
for key, value in stats.items():
|
|
||||||
f.write(f"{key}: {value}\n")
|
|
||||||
|
|
||||||
print(f"\n汇总统计已保存到: {summary_file}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
41167
scripts/finally_data/qed_values_poses_fgbar_all.csv
Normal file
41167
scripts/finally_data/qed_values_poses_fgbar_all.csv
Normal file
File diff suppressed because it is too large
Load Diff
41167
scripts/finally_data/qed_values_poses_trpe_all.csv
Normal file
41167
scripts/finally_data/qed_values_poses_trpe_all.csv
Normal file
File diff suppressed because it is too large
Load Diff
23
scripts/merge_csv.py
Normal file
23
scripts/merge_csv.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
# 合并 COCOUNT 与 plant 的对接结果。
|
||||||
|
from pathlib import Path
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"plant": ["qed_values_poses_trpe_converted.csv", "qed_values_poses_fgbar_converted.csv"],
|
||||||
|
"cocount": ["qed_values_trpe.csv", "qed_values_fgbar.csv"]
|
||||||
|
}
|
||||||
|
|
||||||
|
df_trpe_plant1 = pd.read_csv(data["plant"][0])
|
||||||
|
df_fgbar_plant2 = pd.read_csv(data["plant"][1])
|
||||||
|
df_trpe_plant1["source"] = "plant"
|
||||||
|
df_fgbar_plant2["source"] = "plant"
|
||||||
|
df_trpe_cocount1 = pd.read_csv(data["cocount"][0])
|
||||||
|
df_fgbar_cocount2 = pd.read_csv(data["cocount"][1])
|
||||||
|
df_trpe_cocount1["source"] = "cocount"
|
||||||
|
df_fgbar_cocount2["source"] = "cocount"
|
||||||
|
|
||||||
|
df_trpe = pd.concat([df_trpe_plant1, df_trpe_cocount1])
|
||||||
|
df_fgbar = pd.concat([df_fgbar_plant2, df_fgbar_cocount2])
|
||||||
|
|
||||||
|
df_trpe.to_csv("qed_values_poses_trpe_all.csv", index=False)
|
||||||
|
df_fgbar.to_csv("qed_values_poses_fgbar_all.csv", index=False) # 41166 条数据
|
||||||
5883
scripts/qed_values_poses_fgbar_converted.csv
Normal file
5883
scripts/qed_values_poses_fgbar_converted.csv
Normal file
File diff suppressed because it is too large
Load Diff
5883
scripts/qed_values_poses_trpe_converted.csv
Normal file
5883
scripts/qed_values_poses_trpe_converted.csv
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user