This commit is contained in:
Your Name
2024-07-17 07:09:16 +00:00
parent 7f7d490de6
commit edfdd28619
2 changed files with 31 additions and 17 deletions

View File

@@ -45,22 +45,37 @@ EOT
ARG NV_PEER_MEM_VERSION="1.2"
ENV NV_PEER_MEM_VERSION=${NV_PEER_MEM_VERSION}
ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
RUN <<EOT
#!/bin/bash
# install nv_peer_mem
mkdir -p ${STAGE_DIR}
git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory
cd ${STAGE_DIR}/nv_peer_memory
./build_module.sh
cd ${STAGE_DIR}
tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz
cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION}
apt-get update
apt-get --fix-broken install -y
apt-get install -y dkms
dpkg-buildpackage -us -uc
dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
EOT
RUN mkdir -p ${STAGE_DIR} && \
git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory && \
cd ${STAGE_DIR}/nv_peer_memory && \
./build_module.sh && \
cd ${STAGE_DIR} && \
tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz && \
cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION} && \
apt-get update && \
apt-get --fix-broken install -y && \
apt-get install -y dkms && \
dpkg-buildpackage -us -uc && \
dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
# ENV NV_PEER_MEM_VERSION=${NV_PEER_MEM_VERSION}
# ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
# RUN <<EOT
# #!/bin/bash
# # install nv_peer_mem
# mkdir -p ${STAGE_DIR}
# git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory
# cd ${STAGE_DIR}/nv_peer_memory
# ./build_module.sh
# cd ${STAGE_DIR}
# tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz
# cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION}
# apt-get update
# apt-get --fix-broken install -y
# apt-get install -y dkms
# dpkg-buildpackage -us -uc
# dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
# EOT
# base tools
RUN <<EOT

View File

@@ -27,7 +27,6 @@ services:
OWNER: "nvidia" # nvcr.io/nvidia/pytorch:24.06-py3
LABEL: "pytorch"
VERSION: "24.06-py3"
NV_PEER_MEM_VERSION: "1.2"
DS_BUILD_OPS: 1
DEEPSPEED_VERSION: "master"
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"