聚类方法,聚类后选择打分最高那个分子,并对 karamadock 的结果求交集
This commit is contained in:
59
scripts/extract_and_intersect.py
Normal file
59
scripts/extract_and_intersect.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
def process_cluster_file(cluster_file, score_file, output_file):
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(cluster_file):
|
||||
raise FileNotFoundError(f"聚类文件不存在: {cluster_file}")
|
||||
if not os.path.exists(score_file):
|
||||
raise FileNotFoundError(f"评分文件不存在: {score_file}")
|
||||
|
||||
# 读取聚类结果文件
|
||||
cluster_df = pd.read_csv(cluster_file)
|
||||
|
||||
# 提取filename列的stem属性
|
||||
cluster_df['filename_stem'] = cluster_df['filename'].apply(
|
||||
lambda x: Path(x).stem.split('_out')[0]
|
||||
)
|
||||
|
||||
# 读取score文件
|
||||
score_df = pd.read_csv(score_file)
|
||||
|
||||
# 获取两个文件的交集
|
||||
intersection = pd.merge(
|
||||
cluster_df,
|
||||
score_df,
|
||||
left_on='filename_stem',
|
||||
right_on='pdb_id',
|
||||
how='inner'
|
||||
)
|
||||
|
||||
# 保存结果
|
||||
intersection.to_csv(output_file, index=False)
|
||||
|
||||
return len(intersection)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 使用绝对路径确保文件位置正确
|
||||
base_dir = "/Users/lingyuzeng/Downloads/211.69.141.180/202508021824/vina"
|
||||
|
||||
# 处理fgbar数据
|
||||
fgbar_count = process_cluster_file(
|
||||
f"{base_dir}/scripts/finally_data/cluster_best/fgbar_cluster_best_vina_butina_butina.csv",
|
||||
f"{base_dir}/result/karamadock/FgBar1_score.csv",
|
||||
f"{base_dir}/scripts/finally_data/cluster_best/fgbar_intersection.csv"
|
||||
)
|
||||
|
||||
# 处理trpe数据
|
||||
trpe_count = process_cluster_file(
|
||||
f"{base_dir}/scripts/finally_data/cluster_best/trpe_cluster_best_vina_butina_butina.csv",
|
||||
f"{base_dir}/result/karamadock/TrpE_score.csv",
|
||||
f"{base_dir}/scripts/finally_data/cluster_best/trpe_intersection.csv"
|
||||
)
|
||||
|
||||
print(f"fgbar交集数量: {fgbar_count}")
|
||||
print(f"trpe交集数量: {trpe_count}")
|
||||
|
||||
# 验证输出文件是否生成
|
||||
print("脚本执行完成")
|
||||
Reference in New Issue
Block a user