# NOTE: Building this image require's docker version >= 23.0. # # For reference: # - https://docs.docker.com/build/dockerfile/frontend/#stable-channel ARG CUDA_VERSION=12.1.0 FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu22.04 ARG DEBIAN_FRONTEND="noninteractive" ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND} ENV MAMBA_ROOT_PREFIX=~/micromamba ARG ROOT_PASSWD="root" ENV ROOT_PASSWD=${ROOT_PASSWD} WORKDIR /root SHELL ["/bin/bash", "-c"] # base tools RUN <> ~/.bashrc # 配置 .condarc 文件 cat < ~/.condarc channels: - conda-forge - bioconda - pytorch - pytorch-nightly - nvidia - defaults show_channel_urls: true EOF # 安装 micromamba # echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh) # micromamba shell init -s bash -p ~/micromamba # cat <<'EOF' >> ~/.bashrc # source ~/micromamba/etc/profile.d/micromamba.sh # EOF # # 配置 .mambarc 文件 # cat < ~/.mambarc # channels: # - conda-forge # - bioconda # - pytorch # - pytorch-nightly # - nvidia # - defaults # show_channel_urls: true # EOF EOT # reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile # PyTorch ARG CONDA_ENV_NAME="deepspeed" ENV CONDA_ENV_NAME=${CONDA_ENV_NAME} ARG PYTHON_VERSION=3.10 ENV PYTHON_VERSION=${PYTHON_VERSION} ENV PATH=/opt/conda/envs/${CONDA_ENV_NAME}/bin:/usr/bin:/opt/conda/bin:$PATH ENV DEEPSPEED_PYTHON="/opt/conda/envs/${CONDA_ENV_NAME}/bin/python3" ENV REF='main' ENV STAGE_DIR=/tmp ENV NV_PEER_MEM_VERSION=1.2 ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0 ENV OPENMPI_BASEVERSION=4.1 ENV OPENMPI_VERSION=${OPENMPI_BASEVERSION}.6 ARG CUDA='cu121' ENV CUDA=${CUDA} ARG PYTORCH_VERSION=2.3.1 ENV PYTORCH_VERSION=${PYTORCH_VERSION} ARG TORCHVISION_VERSION=0.18.1 ENV TORCHVISION_VERSION=${TORCHVISION_VERSION} ARG TORCHAUDIO_VERSION=2.3.1 ENV TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION} ARG PYTORCH_CUDA_VERSION=12.1 ENV PYTORCH_CUDA_VERSION=${PYTORCH_CUDA_VERSION} ENV MLNX_OFED_VERSION=4.9-7.1.0.0 ARG SETUPTOOLS_VERSION=69.5.1 ENV SETUPTOOLS_VERSION=${SETUPTOOLS_VERSION} ARG USE_CUDA=1 ENV USE_CUDA=${USE_CUDA} ARG USE_ROCM=0 ENV USE_ROCM=${USE_ROCM} ARG USE_XPU=0 ENV USE_XPU=${USE_XPU} ARG _GLIBCXX_USE_CXX11_ABI=1 ENV _GLIBCXX_USE_CXX11_ABI=${_GLIBCXX_USE_CXX11_ABI} RUN <> ~/.bashrc which python > ~/python_path.txt conda activate ${CONDA_ENV_NAME} # 克隆 ninja 源码并编译 git clone https://github.com/ninja-build/ninja.git ${STAGE_DIR}/ninja cd ${STAGE_DIR}/ninja # 克隆 GoogleTest 源码 git clone https://github.com/google/googletest.git python ./configure.py --bootstrap # 配置并构建 Ninja 测试，添加 pthread 链接选项 # CXXFLAGS="-pthread" LDFLAGS="-pthread" ./configure.py --bootstrap --gtest-source-dir=$(pwd)/googletest conda run -n ${CONDA_ENV_NAME} bash -c "CXXFLAGS='-pthread' LDFLAGS='-pthread' python ./configure.py --bootstrap --gtest-source-dir=$(pwd)/googletest" ./ninja all # 运行 Ninja 单元测试 ./ninja_test python3 -m pip install --no-cache-dir --upgrade pip python3 -m pip install open_clip_torch nvidia-ml-py3 opencv-contrib-python conda clean -afy git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF && cd .. python -m pip install setuptools==${SETUPTOOLS_VERSION} python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing] # # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.) # # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops) python3 -m pip uninstall -y torch torchvision torchaudio # # install pytorch create conda env aleay exists python3 -m pip install torch==${PYTORCH_VERSION}+${CUDA} torchvision==${TORCHVISION_VERSION}+${CUDA} torchaudio==${TORCHAUDIO_VERSION} xformers --extra-index-url https://download.pytorch.org/whl/${CUDA} python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate python3 -m pip uninstall -y transformer-engine python3 -m pip uninstall -y torch-tensorrt python3 -m pip uninstall -y apex EOT # install apex RUN <= 23.1 (ref: https://pip.pypa.io/en/stable/news/#v23-1) which supports multiple `--config-settings` with the same key... MAX_JOBS=1 python3 -m pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ python -c "import apex.amp; print('Apex is installed and the amp module is available.')" cd .. rm -rf ${STAGE_DIR}/apex EOT RUN <&1 # from https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile install deepspeed fail # reference deepspeed install from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile # install deepspeed prepare # install Mellanox OFED mkdir -p ${STAGE_DIR} wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64.tgz | tar xzf - cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64 ./mlnxofedinstall --user-space-only --without-fw-update --all -q cd ${STAGE_DIR} rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64* cd .. # install nv_peer_mem rm -rf ${STAGE_DIR} mkdir -p ${STAGE_DIR} git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory cd ${STAGE_DIR}/nv_peer_memory ./build_module.sh cd ${STAGE_DIR} tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION} apt-get update apt-get install -y dkms dpkg-buildpackage -us -uc dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb EOT # install mpi ENV PATH=/usr/local/mpi/bin:${PATH} ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:${LD_LIBRARY_PATH} RUN < /usr/local/mpi/bin/mpirun echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun chmod a+x /usr/local/mpi/bin/mpirun EOT # SSH daemon port inside container cannot conflict with host OS port ENV SSH_PORT=2222 RUN < ${STAGE_DIR}/sshd_config && \ sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config EOT # 29.78 Usage: install.sh [options...] # 29.78 # 29.78 By default will install deepspeed and all third party dependencies across all machines listed in # 29.78 hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally # 29.78 # 29.78 [optional] # 29.78 -l, --local_only Install only on local machine # 29.78 -s, --pip_sudo Run pip install with sudo (default: no sudo) # 29.78 -r, --allow_sudo Allow script to be run by root (probably don't want this, instead use --pip_sudo) # 29.78 -n, --no_clean Do not clean prior build state, by default prior build files are removed before building wheels # 29.78 -m, --pip_mirror Use the specified pip mirror (default: the default pip mirror) # 29.78 -H, --hostfile Path to MPI-style hostfile (default: /job/hostfile) # 29.78 -e, --examples Checkout deepspeed example submodule (no install) # 29.78 -v, --verbose Verbose logging # 29.78 -h, --help This help text RUN <> /etc/sudoers EOT # install cutlass https://github.com/NVIDIA/cutlass # H100: architecture is Hopper (cutlass need add : cmake .. -DCUTLASS_NVCC_ARCHS="90a" ) # A100: architecture is Ampere # V100: architecture is Volta # T4: architecture is Turing # ENV CUDACXX=${CUDA_INSTALL_PATH}/bin/nvcc # 70：适用于 NVIDIA Volta 架构（如 Tesla V100）。 # 75：适用于 NVIDIA Turing 架构（如 Tesla T4）。 # 80：适用于 NVIDIA Ampere 架构（如 A100）。 # 90a：适用于 NVIDIA Hopper 架构（如 H100）。 # 89:GeForce RTX 4090 ARG DCUTLASS_NVCC_ARCHS="80;89;90a" ENV DCUTLASS_NVCC_ARCHS=${DCUTLASS_NVCC_ARCHS} RUN < install_modified.sh chmod +x ./install_modified.sh # 检查 HOSTFILE_CONTENT 并写入文件 if [ -n "${HOSTFILE_CONTENT}" ]; then echo "${HOSTFILE_CONTENT}" > /tmp/hostfile INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /tmp/hostfile" else INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}" fi eval $INSTALL_CMD # compile deepspeed ops cat <<'EOF' >> ~/.bashrc source ~/micromamba/etc/profile.d/micromamba.sh echo "alias mamba=micromamba" >> ~/.bashrc echo "alias mba=mamba" >> ~/.bashrc EOF # 配置 .mambarc 文件 cat < ~/compile_deepspeed_ops.py import deepspeed def compile_ops(): builders = [ deepspeed.ops.op_builder.AsyncIOBuilder, deepspeed.ops.op_builder.FusedAdamBuilder, deepspeed.ops.op_builder.CPUAdamBuilder, deepspeed.ops.op_builder.CPUAdagradBuilder, deepspeed.ops.op_builder.CPULionBuilder, deepspeed.ops.op_builder.EvoformerAttnBuilder, deepspeed.ops.op_builder.FPQuantizerBuilder, deepspeed.ops.op_builder.FusedLambBuilder, deepspeed.ops.op_builder.FusedLionBuilder, deepspeed.ops.op_builder.QuantizerBuilder, deepspeed.ops.op_builder.RaggedOpsBuilder, deepspeed.ops.op_builder.RandomLTDBuilder, deepspeed.ops.op_builder.SparseAttnBuilder, deepspeed.ops.op_builder.SpatialInferenceBuilder, deepspeed.ops.op_builder.TransformerBuilder, deepspeed.ops.op_builder.StochasticTransformerBuilder, ] for builder in builders: print(f"Compiling {builder.__name__}") builder().load() if __name__ == "__main__": compile_ops() EOF python compile_deepspeed_ops.py ds_report # clean up # rm -f deepspeed/git_version_info_installed.py # rm -rf dist build deepspeed.egg-info # python setup.py bdist_wheel # DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl # DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt # pip install numpy==1.22.4 # ImportError: cannot import name 'BUFSIZE' from 'numpy' (/opt/conda/envs/deepspeed/lib/python3.10/site-packages/numpy/__init__.py) wait for fix in numpy=2.0.0 EOT # install transformers and flash-attn RUN <