ARG REGISTRY=quay.io ARG OWNER=jupyter ARG LABEL=notebook ARG VERSION ARG BASE_CONTAINER=$REGISTRY/$OWNER/$LABEL:$VERSION FROM $BASE_CONTAINER ARG HTTP_PROXY ARG HTTPS_PROXY ENV http_proxy=${HTTP_PROXY} ENV https_proxy=${HTTPS_PROXY} ARG DEBIAN_FRONTEND="noninteractive" ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND} ARG ROOT_PASSWD="root" ENV ROOT_PASSWD=${ROOT_PASSWD} WORKDIR /root SHELL ["/bin/bash", "-c"] # https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/ ENV MLNX_OFED_VERSION=23.10-3.2.2.0 RUN <&1 # from https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile install deepspeed fail # reference deepspeed install from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile # install deepspeed prepare # install Mellanox OFED mkdir -p ${STAGE_DIR} wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz | tar xzf - cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64 ./mlnxofedinstall --user-space-only --without-fw-update --all -q cd ${STAGE_DIR} rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64* EOT ARG NV_PEER_MEM_VERSION="1.2" ENV NV_PEER_MEM_VERSION=${NV_PEER_MEM_VERSION} ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0 RUN <=0.17.0 python -m pip install --no-deps git+https://github.com/huggingface/optimum.git#egg=optimum[diffusers,quality] EOT RUN < ~/.deepspeed_env TORCH_USE_CUDA_DSA=1 DEEPSPEED_VERBOSE=1 DEEPSPEED_LOG_LEVEL=DEBUG CUTLASS_PATH=${CUTLASS_PATH} TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} CUDA_HOME=${CUDA_HOME} LD_LIBRARY_PATH=${LD_LIBRARY_PATH} EOF unset https_proxy http_proxy EOT CMD ["/usr/sbin/sshd", "-D"]