This commit is contained in:
Your Name
2024-07-17 13:57:30 +00:00
parent f0d71a9441
commit e57401677f
2 changed files with 9 additions and 3 deletions

View File

@@ -37,9 +37,9 @@ apt-get install -y flex tk ethtool libpci3 libltdl-dev bison lsof tcl libelf1 pc
mkdir -p ${STAGE_DIR}
wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz | tar xzf -
cd ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
./mlnxofedinstall --user-space-only --without-fw-update --all -q
./mlnxofedinstall --user-space-only --without-fw-update --all -q > /tmp/mlnxofedinstall.log 2>&1
cd ${STAGE_DIR}
rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
# rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
EOT
ARG NV_PEER_MEM_VERSION="1.2"
@@ -107,6 +107,7 @@ EOT
RUN <<EOT
#!/bin/bash
apt install -y cmake gcc g++ libssl-dev
git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
cd ${STAGE_DIR}/DeepSpeed-Kernels
python -m pip install -v .
@@ -160,6 +161,9 @@ python -m pip install black~=23.1 ruff==0.1.5 diffusers>=0.17.0
python -m pip install --no-deps git+https://github.com/huggingface/optimum.git#egg=optimum[diffusers,quality]
EOT
# SSH daemon port inside container cannot conflict with host OS port
ENV SSH_PORT=2222
RUN <<EOT
#!/bin/bash
# 项目目录中的定义通常会覆盖用户家目录中的定义
@@ -174,6 +178,8 @@ CUDA_HOME=${CUDA_HOME}
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
EOF
unset https_proxy http_proxy
cat /etc/ssh/sshd_config > ${STAGE_DIR}/sshd_config && \
sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config
EOT
CMD ["/usr/sbin/sshd", "-D"]

View File

@@ -45,7 +45,7 @@ services:
# tty: true
# stdin_open: true
restart: unless-stopped
image: hotwa/notebook:ngc
image: quay.io/hotwa/ngc:latest
privileged: true
ipc: host
network_mode: host