feat(digger): containerize BtToxin_Digger with v5 database integration

- Added Dockerfile and docker-compose.yml for BtToxin_Digger
- Integrated external v5 BLAST database into the container image
- Updated main docker-compose.yml to include the digger service
- Updated documentation with database update instructions
This commit is contained in:
zly
2026-01-17 12:14:39 +08:00
parent 6f2365981d
commit 700bdb8307
33 changed files with 232973 additions and 75716 deletions

View File

@@ -0,0 +1,22 @@
{
"version": "1.2",
"dbname": "bt_toxin",
"dbtype": "Protein",
"db-version": 5,
"description": "bt_toxin20251104.fas",
"number-of-letters": 996368,
"number-of-sequences": 1199,
"last-updated": "2025-11-04T15:35:00",
"number-of-volumes": 1,
"bytes-total": 1149077,
"bytes-to-cache": 1007264,
"files": [
"bt_toxin.pdb",
"bt_toxin.phr",
"bt_toxin.pin",
"bt_toxin.pot",
"bt_toxin.psq",
"bt_toxin.ptf",
"bt_toxin.pto"
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,36 @@
def get_unique_headers(file_path):
"""读取文件中以'>'开头的行,返回'>'后面内容的集合"""
headers = set()
with open(file_path, 'r') as f:
for line in f:
line = line.strip()
if line.startswith('>'):
# 提取'>'后面的内容(包括可能的空格和其他字符)
header = line[1:]
headers.add(header)
return headers
# 输入文件路径
file1 = 'bt_toxin20251104.fas'
file2 = 'all_app_cry_cyt_gpp_mcf_mpf_mpp_mtx_pra_prb_spp_tpp_txp_vip_vpa_vpb_xpp_fasta_sequences.txt'
output_file = 'unique_headers.txt'
# 获取两个文件中的header集合
headers1 = get_unique_headers(file1)
headers2 = get_unique_headers(file2)
# 计算各自独有的header
unique_to_file1 = headers1 - headers2
unique_to_file2 = headers2 - headers1
# 写入输出文件
with open(output_file, 'w') as out_f:
out_f.write(f"### Unique headers in {file1} ###\n")
for header in sorted(unique_to_file1):
out_f.write(f">{header}\n")
out_f.write(f"\n### Unique headers in {file2} ###\n")
for header in sorted(unique_to_file2):
out_f.write(f">{header}\n")
print(f"处理完成,结果已保存至 {output_file}")