update
This commit is contained in:
@@ -3,13 +3,8 @@ FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04
|
|||||||
ARG DEBIAN_FRONTEND="noninteractive"
|
ARG DEBIAN_FRONTEND="noninteractive"
|
||||||
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
|
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
|
||||||
ENV MAMBA_ROOT_PREFIX=~/micromamba
|
ENV MAMBA_ROOT_PREFIX=~/micromamba
|
||||||
ARG CONDA_ENV_NAME="deepspeed"
|
|
||||||
ENV CONDA_ENV_NAME=${CONDA_ENV_NAME}
|
|
||||||
ARG PYTHON_VERSION=3.10
|
|
||||||
ENV PYTHON_VERSION=${PYTHON_VERSION}
|
|
||||||
ARG ROOT_PASSWD="root"
|
ARG ROOT_PASSWD="root"
|
||||||
ENV ROOT_PASSWD=${ROOT_PASSWD}
|
ENV ROOT_PASSWD=${ROOT_PASSWD}
|
||||||
ENV PATH /opt/conda/bin:/opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH
|
|
||||||
WORKDIR /root
|
WORKDIR /root
|
||||||
SHELL ["/bin/bash", "-c"]
|
SHELL ["/bin/bash", "-c"]
|
||||||
# base tools
|
# base tools
|
||||||
@@ -41,7 +36,6 @@ rm /tmp/miniconda.sh
|
|||||||
conda init bash
|
conda init bash
|
||||||
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
|
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
|
||||||
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc
|
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc
|
||||||
echo "conda activate ${CONDA_ENV_NAME}" >> ~/.bashrc
|
|
||||||
# 配置 .condarc 文件
|
# 配置 .condarc 文件
|
||||||
cat <<EOF > ~/.condarc
|
cat <<EOF > ~/.condarc
|
||||||
channels:
|
channels:
|
||||||
@@ -76,6 +70,11 @@ EOT
|
|||||||
|
|
||||||
# reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
|
# reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
|
||||||
# PyTorch
|
# PyTorch
|
||||||
|
ARG CONDA_ENV_NAME="deepspeed"
|
||||||
|
ENV CONDA_ENV_NAME=${CONDA_ENV_NAME}
|
||||||
|
ARG PYTHON_VERSION=3.10
|
||||||
|
ENV PYTHON_VERSION=${PYTHON_VERSION}
|
||||||
|
ENV PATH /opt/conda/bin:/opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH
|
||||||
ENV REF='main'
|
ENV REF='main'
|
||||||
ENV STAGE_DIR=/tmp
|
ENV STAGE_DIR=/tmp
|
||||||
ENV NV_PEER_MEM_VERSION=1.2
|
ENV NV_PEER_MEM_VERSION=1.2
|
||||||
@@ -95,38 +94,47 @@ ENV PYTORCH_CUDA_VERSION=${PYTORCH_CUDA_VERSION}
|
|||||||
ENV MLNX_OFED_VERSION=4.9-7.1.0.0
|
ENV MLNX_OFED_VERSION=4.9-7.1.0.0
|
||||||
ARG SETUPTOOLS_VERSION=69.5.1
|
ARG SETUPTOOLS_VERSION=69.5.1
|
||||||
ENV SETUPTOOLS_VERSION=${SETUPTOOLS_VERSION}
|
ENV SETUPTOOLS_VERSION=${SETUPTOOLS_VERSION}
|
||||||
|
ARG USE_CUDA=1
|
||||||
|
ENV USE_CUDA=${USE_CUDA}
|
||||||
|
ARG USE_ROCM=0
|
||||||
|
ENV USE_ROCM=${USE_ROCM}
|
||||||
|
ARG USE_XPU=0
|
||||||
|
ENV USE_XPU=${USE_XPU}
|
||||||
|
ARG _GLIBCXX_USE_CXX11_ABI=1
|
||||||
|
ENV _GLIBCXX_USE_CXX11_ABI=${_GLIBCXX_USE_CXX11_ABI}
|
||||||
RUN <<EOT
|
RUN <<EOT
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
source /opt/conda/etc/profile.d/conda.sh
|
source /opt/conda/etc/profile.d/conda.sh
|
||||||
|
conda create -n ${CONDA_ENV_NAME} python=${PYTHON_VERSION} pyyaml ipython -c conda-forge -y
|
||||||
|
echo "conda activate ${CONDA_ENV_NAME}" >> ~/.bashrc
|
||||||
conda activate ${CONDA_ENV_NAME}
|
conda activate ${CONDA_ENV_NAME}
|
||||||
# 克隆 ninja 源码并编译
|
# 克隆 ninja 源码并编译
|
||||||
git clone https://github.com/ninja-build/ninja.git ${STAGE_DIR}/ninja
|
git clone https://github.com/ninja-build/ninja.git ${STAGE_DIR}/ninja
|
||||||
cd ${STAGE_DIR}/ninja
|
cd ${STAGE_DIR}/ninja
|
||||||
# 克隆 GoogleTest 源码
|
# 克隆 GoogleTest 源码
|
||||||
git clone https://github.com/google/googletest.git
|
git clone https://github.com/google/googletest.git
|
||||||
conda run -n ${CONDA_ENV_NAME} python ./configure.py --bootstrap
|
python ./configure.py --bootstrap
|
||||||
# 配置并构建 Ninja 测试,添加 pthread 链接选项
|
# 配置并构建 Ninja 测试,添加 pthread 链接选项
|
||||||
# CXXFLAGS="-pthread" LDFLAGS="-pthread" ./configure.py --bootstrap --gtest-source-dir=$(pwd)/googletest
|
# CXXFLAGS="-pthread" LDFLAGS="-pthread" ./configure.py --bootstrap --gtest-source-dir=$(pwd)/googletest
|
||||||
conda run -n ${CONDA_ENV_NAME} bash -c "CXXFLAGS='-pthread' LDFLAGS='-pthread' python ./configure.py --bootstrap --gtest-source-dir=$(pwd)/googletest"
|
conda run -n ${CONDA_ENV_NAME} bash -c "CXXFLAGS='-pthread' LDFLAGS='-pthread' python ./configure.py --bootstrap --gtest-source-dir=$(pwd)/googletest"
|
||||||
./ninja all
|
./ninja all
|
||||||
# 运行 Ninja 单元测试
|
# 运行 Ninja 单元测试
|
||||||
./ninja_test
|
./ninja_test
|
||||||
conda create -n ${CONDA_ENV_NAME} python=${PYTHON_VERSION} pyyaml ipython -c conda-forge -y
|
|
||||||
python3 -m pip install --no-cache-dir --upgrade pip
|
python3 -m pip install --no-cache-dir --upgrade pip
|
||||||
conda run -n ${CONDA_ENV_NAME} python -m pip install open_clip_torch nvidia-ml-py3 opencv-contrib-python
|
python -m pip install open_clip_torch nvidia-ml-py3 opencv-contrib-python
|
||||||
conda clean -afy
|
conda clean -afy
|
||||||
git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF && cd ..
|
git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF && cd ..
|
||||||
conda run -n ${CONDA_ENV_NAME} python -m pip install setuptools==${SETUPTOOLS_VERSION}
|
python -m pip install setuptools==${SETUPTOOLS_VERSION}
|
||||||
conda run -n ${CONDA_ENV_NAME} python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
|
python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
|
||||||
# # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
|
# # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
|
||||||
# # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
|
# # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
|
||||||
conda run -n ${CONDA_ENV_NAME} python3 -m pip uninstall -y torch torchvision torchaudio
|
python3 -m pip uninstall -y torch torchvision torchaudio
|
||||||
# # install pytorch create conda env aleay exists
|
# # install pytorch create conda env aleay exists
|
||||||
conda run -n ${CONDA_ENV_NAME} python3 -m pip install torch==${PYTORCH_VERSION}+${CUDA} torchvision==${TORCHVISION_VERSION}+${CUDA} torchaudio==${TORCHAUDIO_VERSION} --extra-index-url https://download.pytorch.org/whl/${CUDA}
|
python3 -m pip install torch==${PYTORCH_VERSION}+${CUDA} torchvision==${TORCHVISION_VERSION}+${CUDA} torchaudio==${TORCHAUDIO_VERSION} --extra-index-url https://download.pytorch.org/whl/${CUDA}
|
||||||
conda run -n ${CONDA_ENV_NAME} python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||||
conda run -n ${CONDA_ENV_NAME} python3 -m pip uninstall -y transformer-engine
|
python3 -m pip uninstall -y transformer-engine
|
||||||
conda run -n ${CONDA_ENV_NAME} python3 -m pip uninstall -y torch-tensorrt
|
python3 -m pip uninstall -y torch-tensorrt
|
||||||
conda run -n ${CONDA_ENV_NAME} python3 -m pip uninstall -y apex
|
python3 -m pip uninstall -y apex
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# install apex
|
# install apex
|
||||||
@@ -305,11 +313,11 @@ EOT
|
|||||||
# CUDA_ARCH_LIST="80;86;89;90"
|
# CUDA_ARCH_LIST="80;86;89;90"
|
||||||
ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --no_clean"
|
ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --no_clean"
|
||||||
ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
|
ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
|
||||||
ARG CUDA_ARCH_LIST="80;86"
|
ARG CUDA_ARCH_LIST="80;86;89;90"
|
||||||
ENV CUDA_ARCH_LIST=${CUDA_ARCH_LIST}
|
ENV CUDA_ARCH_LIST=${CUDA_ARCH_LIST}
|
||||||
ARG DS_BUILD_SPARSE_ATTN=0
|
ARG DS_BUILD_SPARSE_ATTN=0
|
||||||
ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN}
|
ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN}
|
||||||
ARG DS_BUILD_FUSED_ADAM=0
|
ARG DS_BUILD_FUSED_ADAM=1
|
||||||
ENV DS_BUILD_FUSED_ADAM=${DS_BUILD_FUSED_ADAM}
|
ENV DS_BUILD_FUSED_ADAM=${DS_BUILD_FUSED_ADAM}
|
||||||
ARG DS_BUILD_CPU_ADAM=0
|
ARG DS_BUILD_CPU_ADAM=0
|
||||||
ENV DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM}
|
ENV DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM}
|
||||||
@@ -341,9 +349,9 @@ cd ${STAGE_DIR}/DeepSpeed
|
|||||||
git checkout .
|
git checkout .
|
||||||
git checkout master
|
git checkout master
|
||||||
python setup.py bdist_wheel
|
python setup.py bdist_wheel
|
||||||
pip install dist/deepspeed*.whl --force-reinstall
|
DS_BUILD_OPS=${DS_BUILD_OPS} pip install dist/deepspeed*.whl --force-reinstall
|
||||||
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -r requirements/requirements.txt
|
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -r requirements/requirements.txt
|
||||||
# DS_BUILD_OPS=0 DS_BUILD_SPARSE_ATTN=0 DS_BUILD_CPU_ADAM=0 DS_BUILD_FUSED_ADAM=0 pip install -U --no-cache-dir .
|
# DS_BUILD_OPS=0 DS_BUILD_SPARSE_ATTN=0 DS_BUILD_CPU_ADAM=0 DS_BUILD_FUSED_ADAM=1 pip install -U --no-cache-dir .
|
||||||
# ./install.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /job/hostfile # ./install.sh --allow_sudo --pip_sudo --no_clean --hostfile /path/to/your/hostfile
|
# ./install.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /job/hostfile # ./install.sh --allow_sudo --pip_sudo --no_clean --hostfile /path/to/your/hostfile
|
||||||
cd ..
|
cd ..
|
||||||
# rm -rf ${STAGE_DIR}/DeepSpeed
|
# rm -rf ${STAGE_DIR}/DeepSpeed
|
||||||
|
|||||||
@@ -13,7 +13,10 @@ services:
|
|||||||
DS_BUILD_OPS: 1
|
DS_BUILD_OPS: 1
|
||||||
DS_BUILD_SPARSE_ATTN: 0
|
DS_BUILD_SPARSE_ATTN: 0
|
||||||
DS_BUILD_FUSED_ADAM: 1
|
DS_BUILD_FUSED_ADAM: 1
|
||||||
DS_BUILD_CPU_ADAM: 0
|
DS_BUILD_CPU_ADAM: 1
|
||||||
|
USE_CUDA: 1
|
||||||
|
USE_ROCM: 0
|
||||||
|
USE_XPU: 0
|
||||||
CUDA: cu117
|
CUDA: cu117
|
||||||
CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
|
CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
|
||||||
SETUPTOOLS_VERSION: "69.5.1"
|
SETUPTOOLS_VERSION: "69.5.1"
|
||||||
|
|||||||
@@ -1,5 +1,18 @@
|
|||||||
version: '3.8'
|
version: '3.8'
|
||||||
|
|
||||||
|
# DeepSpeed支持多种C++/CUDA扩展(ops),这些ops旨在优化深度学习的训练和推理过程。以下是一些主要的DeepSpeed ops及其功能:
|
||||||
|
|
||||||
|
# FusedAdam - 提供融合优化的Adam优化器,适用于GPU。
|
||||||
|
# FusedLamb - 类似FusedAdam,针对LAMB优化器,适用于大规模分布式训练。
|
||||||
|
# SparseAttention - 用于高效计算稀疏注意力机制。
|
||||||
|
# Transformer - 提供Transformer模型的高效实现。
|
||||||
|
# TransformerInference - 专门用于Transformer模型的推理优化。
|
||||||
|
# CPUAdam - 针对CPU优化的Adam优化器。
|
||||||
|
# CPULion - 针对CPU的Lion优化器。
|
||||||
|
# Quantizer - 提供量化支持,以减少模型大小和提高推理速度。
|
||||||
|
# RandomLTD - 用于随机层裁剪的优化器。
|
||||||
|
# StochasticTransformer - 支持随机Transformer模型的训练和推理。
|
||||||
|
|
||||||
services:
|
services:
|
||||||
ubuntu-finetune:
|
ubuntu-finetune:
|
||||||
build:
|
build:
|
||||||
@@ -13,7 +26,10 @@ services:
|
|||||||
DS_BUILD_OPS: 1
|
DS_BUILD_OPS: 1
|
||||||
DS_BUILD_SPARSE_ATTN: 0
|
DS_BUILD_SPARSE_ATTN: 0
|
||||||
DS_BUILD_FUSED_ADAM: 1
|
DS_BUILD_FUSED_ADAM: 1
|
||||||
DS_BUILD_CPU_ADAM: 0
|
DS_BUILD_CPU_ADAM: 1
|
||||||
|
USE_CUDA: 1
|
||||||
|
USE_ROCM: 0
|
||||||
|
USE_XPU: 0
|
||||||
CUDA: cu121
|
CUDA: cu121
|
||||||
CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
|
CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
|
||||||
SETUPTOOLS_VERSION: "69.5.1"
|
SETUPTOOLS_VERSION: "69.5.1"
|
||||||
|
|||||||
Reference in New Issue
Block a user