update location
This commit is contained in:
36
tools/bttoxin_digger/external_dbs/bt_toxin/seq/updateDB.py
Normal file
36
tools/bttoxin_digger/external_dbs/bt_toxin/seq/updateDB.py
Normal file
@@ -0,0 +1,36 @@
|
||||
def get_unique_headers(file_path):
|
||||
"""读取文件中以'>'开头的行,返回'>'后面内容的集合"""
|
||||
headers = set()
|
||||
with open(file_path, 'r') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith('>'):
|
||||
# 提取'>'后面的内容(包括可能的空格和其他字符)
|
||||
header = line[1:]
|
||||
headers.add(header)
|
||||
return headers
|
||||
|
||||
# 输入文件路径
|
||||
file1 = 'bt_toxin20251104.fas'
|
||||
file2 = 'all_app_cry_cyt_gpp_mcf_mpf_mpp_mtx_pra_prb_spp_tpp_txp_vip_vpa_vpb_xpp_fasta_sequences.txt'
|
||||
output_file = 'unique_headers.txt'
|
||||
|
||||
# 获取两个文件中的header集合
|
||||
headers1 = get_unique_headers(file1)
|
||||
headers2 = get_unique_headers(file2)
|
||||
|
||||
# 计算各自独有的header
|
||||
unique_to_file1 = headers1 - headers2
|
||||
unique_to_file2 = headers2 - headers1
|
||||
|
||||
# 写入输出文件
|
||||
with open(output_file, 'w') as out_f:
|
||||
out_f.write(f"### Unique headers in {file1} ###\n")
|
||||
for header in sorted(unique_to_file1):
|
||||
out_f.write(f">{header}\n")
|
||||
|
||||
out_f.write(f"\n### Unique headers in {file2} ###\n")
|
||||
for header in sorted(unique_to_file2):
|
||||
out_f.write(f">{header}\n")
|
||||
|
||||
print(f"处理完成,结果已保存至 {output_file}")
|
||||
Reference in New Issue
Block a user