From adc45bc432d6d0aab1323b1834a9075832adb348 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 17 Jul 2024 04:49:01 +0000 Subject: [PATCH] update to drive --- spawnerdockerfile/Dockerfile.ngc | 73 ++++++++++++++++++++++---------- spawnerdockerfile/README.md | 5 ++- 2 files changed, 54 insertions(+), 24 deletions(-) diff --git a/spawnerdockerfile/Dockerfile.ngc b/spawnerdockerfile/Dockerfile.ngc index 0818e32..7a4e3c5 100644 --- a/spawnerdockerfile/Dockerfile.ngc +++ b/spawnerdockerfile/Dockerfile.ngc @@ -15,6 +15,49 @@ ENV ROOT_PASSWD=${ROOT_PASSWD} WORKDIR /root SHELL ["/bin/bash", "-c"] +# https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/ +ENV MLNX_OFED_VERSION=23.10-3.2.2.0 +RUN <&1 +# from https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile install deepspeed fail +# reference deepspeed install from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile +# install deepspeed prepare +# install Mellanox OFED +mkdir -p ${STAGE_DIR} +wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz | tar xzf - +cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64 +./mlnxofedinstall --user-space-only --without-fw-update --all -q +cd ${STAGE_DIR} +rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64* +EOT + +ARG NV_PEER_MEM_VERSION="1.2" +ENV NV_PEER_MEM_VERSION=${NV_PEER_MEM_VERSION} +ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0 +RUN <> ~/.bashrc -. /opt/conda/etc/profile.d/conda.sh -conda init bash -conda config --set show_channel_urls true -# 配置 .condarc 文件 -cat < ~/.condarc -channels: - - conda-forge - - bioconda - - pytorch - - pytorch-nightly - - nvidia - - defaults -show_channel_urls: true -EOF +eval "$(curl https://get.x-cmd.com)" # install pixi curl -fsSL https://pixi.sh/install.sh | bash EOT -ENV STAGE_DIR=/tmp RUN <=0.17.0 +python -m pip install --no-deps git+https://github.com/huggingface/optimum.git#egg=optimum[diffusers,quality] EOT RUN <