Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ab0838c738 | ||
|
|
82ddb287fe | ||
|
|
c885d79433 | ||
|
|
2d6bb7c2cc | ||
| f2b9b5033b | |||
| 96b87a8f82 | |||
| 00a7a5ee6a | |||
| 4da2a4c813 | |||
| 4a6697ef69 | |||
| 305fd85088 |
@@ -69,7 +69,7 @@ wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | sud
|
||||
sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
|
||||
apt-get update
|
||||
apt install --no-install-recommends r-base
|
||||
sudo wget "https://download2.rstudio.org/server/$(lsb_release -cs)/amd64/rstudio-server-2023.06.1-524-amd64.deb" -O /tmp/rstudio-server.deb
|
||||
sudo wget "https://download2.rstudio.org/server/$(lsb_release -cs)/amd64/rstudio-server-2024.09.1-394-amd64.deb" -O /tmp/rstudio-server.deb
|
||||
sudo chmod +x /tmp/rstudio-server.deb
|
||||
sudo gdebi -n /tmp/rstudio-server.deb
|
||||
sudo rm -rf /tmp/rstudio-server.deb
|
||||
|
||||
@@ -110,7 +110,7 @@ python3 -m pip install aiohttp -i http://mirrors.aliyun.com/pypi/simple/ --trust
|
||||
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl aiohttp jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
|
||||
EOT
|
||||
|
||||
ARG RSERVER_VERSION="rstudio-server-2024.04.1-748-amd64.deb"
|
||||
ARG RSERVER_VERSION="rstudio-server-2024.09.1-394-amd64.deb"
|
||||
ENV RSERVER_VERSION=${RSERVER_VERSION}
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
|
||||
@@ -17,23 +17,19 @@ SHELL ["/bin/bash", "-c"]
|
||||
|
||||
# https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/
|
||||
ENV MLNX_OFED_VERSION=23.10-3.2.2.0
|
||||
ENV STAGE_DIR=/tmp
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
# SYSTEM_NAME=$(lsb_release -cs) # 查看发行版本
|
||||
# Pre-build **latest** DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
|
||||
python3 -m pip uninstall -y deepspeed
|
||||
# This has to be run (again) inside the GPU VMs running the tests.
|
||||
# The installation works here, but some tests fail, if we do not pre-build deepspeed again in the VMs running the tests.
|
||||
# TODO: Find out why test fail. install deepspeed
|
||||
# DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM} DS_BUILD_FUSED_ADAM={DS_BUILD_FUSED_ADAM} python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
|
||||
# from https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile install deepspeed fail
|
||||
# reference deepspeed install from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
|
||||
# install deepspeed prepare
|
||||
# install Mellanox OFED prepare
|
||||
apt-get update
|
||||
apt install -y libnvidia-compute-535
|
||||
apt-get install -y pciutils tk kmod libusb-1.0-0 tcl chrpath libpci3 bison lsof graphviz ethtool swig udev libltdl-dev libelf1 libmnl0 debhelper flex libfuse2
|
||||
# install Mellanox OFED
|
||||
mkdir -p ${STAGE_DIR}
|
||||
wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz | tar xzf -
|
||||
cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
|
||||
./mlnxofedinstall --user-space-only --without-fw-update --all -q
|
||||
wget http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz -O ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz
|
||||
tar xzf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz -C ${STAGE_DIR}
|
||||
cd ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
|
||||
./mlnxofedinstall --user-space-only --without-fw-update --all -q > ${STAGE_DIR}/mlnxofedinstall.log 2>&1
|
||||
cd ${STAGE_DIR}
|
||||
rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
|
||||
EOT
|
||||
@@ -86,7 +82,6 @@ pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=
|
||||
pip install git+https://github.com/huggingface/transformers
|
||||
EOT
|
||||
|
||||
ENV STAGE_DIR=/tmp
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
|
||||
|
||||
@@ -104,7 +104,7 @@ npm install -g configurable-http-proxy
|
||||
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server nglview dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
|
||||
EOT
|
||||
|
||||
ARG RSERVER_VERSION="rstudio-server-2024.04.1-748-amd64.deb"
|
||||
ARG RSERVER_VERSION="rstudio-server-2024.09.1-394-amd64.deb"
|
||||
ENV RSERVER_VERSION=${RSERVER_VERSION}
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
|
||||
@@ -29,7 +29,8 @@ NCCL_IB_DISABLE=0
|
||||
## test command
|
||||
|
||||
docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all hotwa/notebook:ngc
|
||||
docker run --rm -it --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 hotwa/notebook:ngc /bin/bash
|
||||
docker run --rm -it --network=host --privileged --ipc=host --gpus all --ulimit memlock=-1 stack=67108864 hotwa/notebook:ngc /bin/bash
|
||||
docker run --rm -it --privileged --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 hotwa/notebook:ngc /bin/bash
|
||||
|
||||
```shell
|
||||
nvidia-smi
|
||||
@@ -120,16 +121,23 @@ pytorch-notebook
|
||||
|
||||
```shell
|
||||
git clone https://github.com/jupyter/docker-stacks.git
|
||||
cd docker-stacks/images/docker-stacks-foundation
|
||||
docker buildx build --build-arg ROOT_CONTAINER=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest . --load # docker pull nvidia/cuda:12.4.1-devel-ubuntu22.04
|
||||
cd spawnerdockerfile/docker-stacks/images/docker-stacks-foundation
|
||||
nerdctl --namespace buildkit build --no-cache --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest .
|
||||
nerdctl --namespace buildkit build --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest .
|
||||
nerdctl --namespace buildkit push quay.io/hotwa/docker-stacks-foundation:latest
|
||||
docker buildx build --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest . --load # docker pull nvidia/cuda:12.4.1-devel-ubuntu22.04
|
||||
cd ../base-notebook
|
||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/base-notebook:latest . --load
|
||||
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/base-notebook:latest .
|
||||
cd ../minimal-notebook/
|
||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/minimal-notebook:latest . --load
|
||||
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/minimal-notebook:latest .
|
||||
cd ../scipy-notebook
|
||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/scipy-notebook:latest . --load
|
||||
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/scipy-notebook:latest .
|
||||
cd ../pytorch-notebook
|
||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/pytorch-notebook:latest . --load
|
||||
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/pytorch-notebook:latest .
|
||||
```
|
||||
|
||||
# 然后构建自己的基础镜像
|
||||
|
||||
@@ -45,7 +45,7 @@ services:
|
||||
# tty: true
|
||||
# stdin_open: true
|
||||
restart: unless-stopped
|
||||
image: hotwa/notebook:ngc
|
||||
image: quay.io/hotwa/ngc:latest
|
||||
privileged: true
|
||||
ipc: host
|
||||
network_mode: host
|
||||
|
||||
Reference in New Issue
Block a user