This commit is contained in:
2024-06-21 14:31:53 +08:00
parent 6bd77c71ca
commit d351fe520a
6 changed files with 400 additions and 7 deletions

View File

@@ -1,3 +1,9 @@
# syntax=docker/dockerfile:1
# NOTE: Building this image require's docker version >= 23.0.
#
# For reference:
# - https://docs.docker.com/build/dockerfile/frontend/#stable-channel
ARG CUDA_VERSION=12.1.0
FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04
ARG DEBIAN_FRONTEND="noninteractive"
@@ -81,6 +87,8 @@ ENV NV_PEER_MEM_VERSION=1.2
ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
ENV OPENMPI_BASEVERSION=4.1
ENV OPENMPI_VERSION=${OPENMPI_BASEVERSION}.6
ARG CUDA_NUM='121'
ENV CUDA_NUM=${CUDA_NUM}
ARG CUDA='cu121'
ENV CUDA=${CUDA}
ARG PYTORCH_VERSION=2.3.0
@@ -91,6 +99,9 @@ ARG TORCHAUDIO_VERSION=2.3.0
ENV TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION}
ARG PYTORCH_CUDA_VERSION=12.1
ENV PYTORCH_CUDA_VERSION=${PYTORCH_CUDA_VERSION}
ENV TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 8.9 9.0 9.0a"
ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
ENV MLNX_OFED_VERSION=4.9-7.1.0.0
ARG SETUPTOOLS_VERSION=69.5.1
ENV SETUPTOOLS_VERSION=${SETUPTOOLS_VERSION}
@@ -105,7 +116,7 @@ ENV _GLIBCXX_USE_CXX11_ABI=${_GLIBCXX_USE_CXX11_ABI}
RUN <<EOT
#!/bin/bash
source /opt/conda/etc/profile.d/conda.sh
conda create -n ${CONDA_ENV_NAME} python=${PYTHON_VERSION} pyyaml ipython cmake ninja -c conda-forge -y
conda create -n ${CONDA_ENV_NAME} python=${PYTHON_VERSION} cmake ninja -c conda-forge -y
echo "conda activate ${CONDA_ENV_NAME}" >> ~/.bashrc
conda activate ${CONDA_ENV_NAME}
python3 -m pip install --no-cache-dir --upgrade pip
@@ -120,11 +131,14 @@ python3 -m pip uninstall -y torch torchvision torchaudio
# # install pytorch create conda env aleay exists
git clone --recursive https://github.com/pytorch/pytorch ${STAGE_DIR}/pytorch
cd ${STAGE_DIR}/pytorch
git checkout v${PYTORCH_VERSION}
git submodule sync
git submodule update --init --recursive
pip install -r requirements.txt
conda install -y intel::mkl-static intel::mkl-include
conda install -y -c pytorch magma-cuda${CUDA_NUM}
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
python setup.py develop
python setup.py install
# python3 -m pip install torch==${PYTORCH_VERSION}+${CUDA} torchvision==${TORCHVISION_VERSION}+${CUDA} torchaudio==${TORCHAUDIO_VERSION} --extra-index-url https://download.pytorch.org/whl/${CUDA}
python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
python3 -m pip uninstall -y transformer-engine