update
This commit is contained in:
@@ -45,22 +45,37 @@ EOT
|
||||
ARG NV_PEER_MEM_VERSION="1.2"
|
||||
ENV NV_PEER_MEM_VERSION=${NV_PEER_MEM_VERSION}
|
||||
ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
# install nv_peer_mem
|
||||
mkdir -p ${STAGE_DIR}
|
||||
git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory
|
||||
cd ${STAGE_DIR}/nv_peer_memory
|
||||
./build_module.sh
|
||||
cd ${STAGE_DIR}
|
||||
tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz
|
||||
cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION}
|
||||
apt-get update
|
||||
apt-get --fix-broken install -y
|
||||
apt-get install -y dkms
|
||||
dpkg-buildpackage -us -uc
|
||||
dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
|
||||
EOT
|
||||
RUN mkdir -p ${STAGE_DIR} && \
|
||||
git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory && \
|
||||
cd ${STAGE_DIR}/nv_peer_memory && \
|
||||
./build_module.sh && \
|
||||
cd ${STAGE_DIR} && \
|
||||
tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz && \
|
||||
cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION} && \
|
||||
apt-get update && \
|
||||
apt-get --fix-broken install -y && \
|
||||
apt-get install -y dkms && \
|
||||
dpkg-buildpackage -us -uc && \
|
||||
dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
|
||||
|
||||
# ENV NV_PEER_MEM_VERSION=${NV_PEER_MEM_VERSION}
|
||||
# ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
|
||||
# RUN <<EOT
|
||||
# #!/bin/bash
|
||||
# # install nv_peer_mem
|
||||
# mkdir -p ${STAGE_DIR}
|
||||
# git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory
|
||||
# cd ${STAGE_DIR}/nv_peer_memory
|
||||
# ./build_module.sh
|
||||
# cd ${STAGE_DIR}
|
||||
# tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz
|
||||
# cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION}
|
||||
# apt-get update
|
||||
# apt-get --fix-broken install -y
|
||||
# apt-get install -y dkms
|
||||
# dpkg-buildpackage -us -uc
|
||||
# dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
|
||||
# EOT
|
||||
|
||||
# base tools
|
||||
RUN <<EOT
|
||||
|
||||
@@ -27,7 +27,6 @@ services:
|
||||
OWNER: "nvidia" # nvcr.io/nvidia/pytorch:24.06-py3
|
||||
LABEL: "pytorch"
|
||||
VERSION: "24.06-py3"
|
||||
NV_PEER_MEM_VERSION: "1.2"
|
||||
DS_BUILD_OPS: 1
|
||||
DEEPSPEED_VERSION: "master"
|
||||
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
|
||||
|
||||
Reference in New Issue
Block a user