This commit is contained in:
2024-08-28 17:21:56 +08:00
parent f735605e9f
commit 9fcc584b5c

View File

@@ -20,7 +20,7 @@ services:
ubuntu-finetune:
build:
context: .
dockerfile: Dockerfile.update
dockerfile: Dockerfile.zly
args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
PYTHON_VERSION: "3.10"
GO_VERSION: "1.21.13"
@@ -34,8 +34,8 @@ services:
USE_ROCM: 0
USE_XPU: 0
CUDA: "cu121"
CUDA_ARCH_LIST: "80;90" # for RTX 4090, all : "80;86;89;90"
TORCH_CUDA_ARCH_LIST: "8.0;9.0+PTX" # all "6.0;6.1;6.2;7.0;7.5;8.0;8.6;8.9;9.0"
CUDA_ARCH_LIST: "80;89;90" # for RTX 4090, all : "80;86;89;90"
TORCH_CUDA_ARCH_LIST: "8.0;8.9;9.0+PTX" # all "6.0;6.1;6.2;7.0;7.5;8.0;8.6;8.9;9.0"
SETUPTOOLS_VERSION: "69.5.1"
# Hopper (H100): 90a
# Ampere (A100, RTX 30 series): 80
@@ -43,7 +43,7 @@ services:
# Volta (V100): 70
# Pascal (P100, GTX 10 series): 60;61
# Maxwell (GTX 900 series): 50;53
DCUTLASS_NVCC_ARCHS: "80;90a" # 90a for H100 GPU 89:GeForce RTX 4090
DCUTLASS_NVCC_ARCHS: "80;89,90a" # 90a for H100 GPU 89:GeForce RTX 4090
DEEPSPEED_VERSION: "master"
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
# HTTP_PROXY: "http://127.0.0.1:15777"
@@ -69,18 +69,18 @@ services:
cap_add:
- ALL
- CAP_SYS_PTRACE
shm_size: '63gb'
devices:
- /dev/infiniband/rdma_cm
- /dev/infiniband/uverbs0
- /dev/infiniband/uverbs1
- /dev/infiniband/uverbs2
- /dev/infiniband/uverbs3
- /dev/infiniband/uverbs4
- /dev/infiniband/uverbs5
- /dev/infiniband/uverbs6
- /dev/infiniband/uverbs7
- /dev/infiniband/uverbs8
shm_size: '32gb'
# devices:
# - /dev/infiniband/rdma_cm
# - /dev/infiniband/uverbs0
# - /dev/infiniband/uverbs1
# - /dev/infiniband/uverbs2
# - /dev/infiniband/uverbs3
# - /dev/infiniband/uverbs4
# - /dev/infiniband/uverbs5
# - /dev/infiniband/uverbs6
# - /dev/infiniband/uverbs7
# - /dev/infiniband/uverbs8
ipc: host
# ports:
# - 3228:2222