This commit is contained in:
2024-06-21 18:17:28 +08:00
parent 2620524636
commit 8017a3e104
9 changed files with 53 additions and 530 deletions

View File

@@ -339,7 +339,7 @@ source /opt/conda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME}
python -m pip install setuptools==${SETUPTOOLS_VERSION}
# install oneapi for deepspeed
git clone https://ghproxy.dockless.eu.org/https://github.com/oneapi-src/oneCCL.git ${STAGE_DIR}/oneCCL
git clone https://github.com/oneapi-src/oneCCL.git ${STAGE_DIR}/oneCCL
cd ${STAGE_DIR}/oneCCL
git checkout .
git checkout master
@@ -353,7 +353,7 @@ RUN <<EOT
#!/bin/bash
source /opt/conda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME}
git clone https://ghproxy.dockless.eu.org/https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
cd ${STAGE_DIR}/DeepSpeed-Kernels
CUDA_ARCH_LIST=${CUDA_ARCH_LIST} python setup.py bdist_wheel
pip install dist/deepspeed_kernels-*.whl
@@ -364,7 +364,7 @@ RUN <<EOT
#!/bin/bash
source /opt/conda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME}
git clone https://ghproxy.dockless.eu.org/https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
cd ${STAGE_DIR}/DeepSpeed
git checkout .
# git checkout v${DEEPSPEED_VERSION}

View File

@@ -19,11 +19,11 @@ services:
context: .
dockerfile: Dockerfile
args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
PYTHON_VERSION: 3.10
CUDA_VERSION: 12.1.0
PYTORCH_VERSION: 2.3.1
TORCHVISION_VERSION: 0.18.0
TORCHAUDIO_VERSION: 2.3.0
PYTHON_VERSION: "3.10"
CUDA_VERSION: "12.1.0"
PYTORCH_VERSION: "2.3.0"
TORCHVISION_VERSION: "0.18.0"
TORCHAUDIO_VERSION: "2.3.0"
DS_BUILD_OPS: 1
DS_BUILD_SPARSE_ATTN: 0
DS_BUILD_FUSED_ADAM: 1
@@ -32,7 +32,7 @@ services:
USE_ROCM: 0
USE_XPU: 0
CUDA: cu121
CUDA_ARCH_LIST: "80;86;89;90;90a" # for RTX 4090, all : "80;86;89;90"
CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
SETUPTOOLS_VERSION: "69.5.1"
DCUTLASS_NVCC_ARCHS: "90a" # 90a for H100 GPU 89:GeForce RTX 4090
volumes:
@@ -44,7 +44,7 @@ services:
image: hotwa/deepspeed:pt23
shm_size: '32gb'
ports:
- 3227:2222
- 3228:2222
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility