update
This commit is contained in:
@@ -15,6 +15,7 @@ ENV ROOT_PASSWD=${ROOT_PASSWD}
|
||||
WORKDIR /root
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ENV STAGE_DIR=/tmp
|
||||
# https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/
|
||||
ENV MLNX_OFED_VERSION=23.10-3.2.2.0
|
||||
RUN <<EOT
|
||||
@@ -35,7 +36,7 @@ apt-get install -y flex tk ethtool libpci3 libltdl-dev bison lsof tcl libelf1 pc
|
||||
# install Mellanox OFED
|
||||
mkdir -p ${STAGE_DIR}
|
||||
wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz | tar xzf -
|
||||
cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
|
||||
cd ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
|
||||
./mlnxofedinstall --user-space-only --without-fw-update --all -q
|
||||
cd ${STAGE_DIR}
|
||||
rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
|
||||
@@ -55,7 +56,7 @@ cd ${STAGE_DIR}
|
||||
tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz
|
||||
cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION}
|
||||
apt-get update
|
||||
apt --fix-broken install -y
|
||||
apt-get --fix-broken install -y
|
||||
apt-get install -y dkms
|
||||
dpkg-buildpackage -us -uc
|
||||
dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
|
||||
@@ -89,7 +90,6 @@ pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=
|
||||
pip install git+https://github.com/huggingface/transformers
|
||||
EOT
|
||||
|
||||
ENV STAGE_DIR=/tmp
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
|
||||
|
||||
@@ -27,6 +27,7 @@ services:
|
||||
OWNER: "nvidia" # nvcr.io/nvidia/pytorch:24.06-py3
|
||||
LABEL: "pytorch"
|
||||
VERSION: "24.06-py3"
|
||||
NV_PEER_MEM_VERSION: "1.2"
|
||||
DS_BUILD_OPS: 1
|
||||
DEEPSPEED_VERSION: "master"
|
||||
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
|
||||
|
||||
Reference in New Issue
Block a user