68 lines
2.3 KiB
YAML
68 lines
2.3 KiB
YAML
version: '3.8'
|
||
|
||
# DeepSpeed支持多种C++/CUDA扩展(ops),这些ops旨在优化深度学习的训练和推理过程。以下是一些主要的DeepSpeed ops及其功能:
|
||
|
||
# FusedAdam - 提供融合优化的Adam优化器,适用于GPU。
|
||
# FusedLamb - 类似FusedAdam,针对LAMB优化器,适用于大规模分布式训练。
|
||
# SparseAttention - 用于高效计算稀疏注意力机制。
|
||
# Transformer - 提供Transformer模型的高效实现。
|
||
# TransformerInference - 专门用于Transformer模型的推理优化。
|
||
# CPUAdam - 针对CPU优化的Adam优化器。
|
||
# CPULion - 针对CPU的Lion优化器。
|
||
# Quantizer - 提供量化支持,以减少模型大小和提高推理速度。
|
||
# RandomLTD - 用于随机层裁剪的优化器。
|
||
# StochasticTransformer - 支持随机Transformer模型的训练和推理。
|
||
# 检测系统总内存(以GB为单位)
|
||
# TOTAL_MEM=$(awk '/MemTotal/ {printf "%.0f\n", $2/1024/1024}' /proc/meminfo)
|
||
# echo "Docker Compose 文件已生成,shm_size 设置为 ${TOTAL_MEM}GB。"
|
||
|
||
services:
|
||
alphafold3:
|
||
build:
|
||
context: .
|
||
dockerfile: Dockerfile
|
||
args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
|
||
PYTHON_VERSION: "3.10"
|
||
# CUDA_VERSION: "12.1.0"
|
||
TAG_VERSION: "12.4.1"
|
||
# env_file:
|
||
# - .env
|
||
volumes:
|
||
- /mnt/d/alphafold3_database:/app/alphafold/alphafold3_database
|
||
- /mnt/d/models:/app/alphafold/models
|
||
- /mnt/d/alphafold_input.json:/app/alphafold/alphafold_input.json
|
||
container_name: alphafold3
|
||
pull_policy: if_not_present
|
||
ulimits:
|
||
memlock:
|
||
soft: -1
|
||
hard: -1
|
||
restart: unless-stopped
|
||
image: cford38/alphafold3:latest
|
||
privileged: true
|
||
cap_add:
|
||
- ALL
|
||
- CAP_SYS_PTRACE
|
||
shm_size: '32gb'
|
||
ipc: host
|
||
# ports:
|
||
# - 3228:2222
|
||
environment:
|
||
- NVIDIA_VISIBLE_DEVICES=all
|
||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||
network_mode: host
|
||
# networks:
|
||
# - network_finetune
|
||
command: ["tail", "-f", "/dev/null"]
|
||
deploy:
|
||
resources:
|
||
reservations:
|
||
devices:
|
||
- driver: nvidia
|
||
count: all
|
||
capabilities: [gpu]
|
||
|
||
# networks:
|
||
# network_finetune:
|
||
# name: network_finetune
|