ARG CUDA_VERSION=12.1.0 FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04 ARG DEBIAN_FRONTEND="noninteractive" ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND} ENV MAMBA_ROOT_PREFIX=~/micromamba ARG CONDA_ENV_NAME="deepspeed" ENV CONDA_ENV_NAME=${CONDA_ENV_NAME} ARG PYTHON_VERSION=3.10 ENV PYTHON_VERSION=${PYTHON_VERSION} ARG ROOT_PASSWD="root" ENV ROOT_PASSWD=${ROOT_PASSWD} ENV PATH /opt/conda/bin:/opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH WORKDIR /root SHELL ["/bin/bash", "-c"] # base tools RUN <> ~/.bashrc echo "conda activate ${CONDA_ENV_NAME}" >> ~/.bashrc # 配置 .condarc 文件 cat < ~/.condarc channels: - conda-forge - bioconda - pytorch - pytorch-nightly - nvidia - defaults show_channel_urls: true EOF # 安装 micromamba echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh) micromamba shell init -s bash -p ~/micromamba cat <<'EOF' >> ~/.bashrc source ~/micromamba/etc/profile.d/micromamba.sh alias mamba=micromamba alias mba=mamba EOF # 配置 .mambarc 文件 cat < ~/.mambarc channels: - conda-forge - bioconda - pytorch - pytorch-nightly - nvidia - defaults show_channel_urls: true EOF EOT # reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile # PyTorch ENV REF='main' ENV STAGE_DIR=/tmp ENV NV_PEER_MEM_VERSION=1.2 ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0 ENV OPENMPI_BASEVERSION=4.1 ENV OPENMPI_VERSION=${OPENMPI_BASEVERSION}.6 ARG CUDA='cu121' ENV CUDA=${CUDA} ARG PYTORCH_VERSION=2.3.0 ENV PYTORCH_VERSION=${PYTORCH_VERSION} ARG TORCHVISION_VERSION=0.18.0 ENV TORCHVISION_VERSION=${TORCHVISION_VERSION} ARG TORCHAUDIO_VERSION=2.3.0 ENV TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION} ARG PYTORCH_CUDA_VERSION=12.1 ENV PYTORCH_CUDA_VERSION=${PYTORCH_CUDA_VERSION} ENV MLNX_OFED_VERSION=4.9-7.1.0.0 ARG SETUPTOOLS_VERSION=69.5.1 ENV SETUPTOOLS_VERSION=${SETUPTOOLS_VERSION} RUN <