Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ab0838c738 | ||
|
|
82ddb287fe | ||
|
|
c885d79433 | ||
|
|
2d6bb7c2cc | ||
| f2b9b5033b | |||
| 96b87a8f82 | |||
| 00a7a5ee6a | |||
| 4da2a4c813 | |||
| 4a6697ef69 | |||
| 305fd85088 |
@@ -69,7 +69,7 @@ wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | sud
|
|||||||
sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
|
sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
|
||||||
apt-get update
|
apt-get update
|
||||||
apt install --no-install-recommends r-base
|
apt install --no-install-recommends r-base
|
||||||
sudo wget "https://download2.rstudio.org/server/$(lsb_release -cs)/amd64/rstudio-server-2023.06.1-524-amd64.deb" -O /tmp/rstudio-server.deb
|
sudo wget "https://download2.rstudio.org/server/$(lsb_release -cs)/amd64/rstudio-server-2024.09.1-394-amd64.deb" -O /tmp/rstudio-server.deb
|
||||||
sudo chmod +x /tmp/rstudio-server.deb
|
sudo chmod +x /tmp/rstudio-server.deb
|
||||||
sudo gdebi -n /tmp/rstudio-server.deb
|
sudo gdebi -n /tmp/rstudio-server.deb
|
||||||
sudo rm -rf /tmp/rstudio-server.deb
|
sudo rm -rf /tmp/rstudio-server.deb
|
||||||
|
|||||||
@@ -110,7 +110,7 @@ python3 -m pip install aiohttp -i http://mirrors.aliyun.com/pypi/simple/ --trust
|
|||||||
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl aiohttp jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
|
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl aiohttp jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
ARG RSERVER_VERSION="rstudio-server-2024.04.1-748-amd64.deb"
|
ARG RSERVER_VERSION="rstudio-server-2024.09.1-394-amd64.deb"
|
||||||
ENV RSERVER_VERSION=${RSERVER_VERSION}
|
ENV RSERVER_VERSION=${RSERVER_VERSION}
|
||||||
RUN <<EOT
|
RUN <<EOT
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|||||||
@@ -17,23 +17,19 @@ SHELL ["/bin/bash", "-c"]
|
|||||||
|
|
||||||
# https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/
|
# https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/
|
||||||
ENV MLNX_OFED_VERSION=23.10-3.2.2.0
|
ENV MLNX_OFED_VERSION=23.10-3.2.2.0
|
||||||
|
ENV STAGE_DIR=/tmp
|
||||||
RUN <<EOT
|
RUN <<EOT
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# SYSTEM_NAME=$(lsb_release -cs) # 查看发行版本
|
# install Mellanox OFED prepare
|
||||||
# Pre-build **latest** DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
|
apt-get update
|
||||||
python3 -m pip uninstall -y deepspeed
|
apt install -y libnvidia-compute-535
|
||||||
# This has to be run (again) inside the GPU VMs running the tests.
|
apt-get install -y pciutils tk kmod libusb-1.0-0 tcl chrpath libpci3 bison lsof graphviz ethtool swig udev libltdl-dev libelf1 libmnl0 debhelper flex libfuse2
|
||||||
# The installation works here, but some tests fail, if we do not pre-build deepspeed again in the VMs running the tests.
|
|
||||||
# TODO: Find out why test fail. install deepspeed
|
|
||||||
# DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM} DS_BUILD_FUSED_ADAM={DS_BUILD_FUSED_ADAM} python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
|
|
||||||
# from https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile install deepspeed fail
|
|
||||||
# reference deepspeed install from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
|
|
||||||
# install deepspeed prepare
|
|
||||||
# install Mellanox OFED
|
# install Mellanox OFED
|
||||||
mkdir -p ${STAGE_DIR}
|
mkdir -p ${STAGE_DIR}
|
||||||
wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz | tar xzf -
|
wget http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz -O ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz
|
||||||
cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
|
tar xzf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz -C ${STAGE_DIR}
|
||||||
./mlnxofedinstall --user-space-only --without-fw-update --all -q
|
cd ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
|
||||||
|
./mlnxofedinstall --user-space-only --without-fw-update --all -q > ${STAGE_DIR}/mlnxofedinstall.log 2>&1
|
||||||
cd ${STAGE_DIR}
|
cd ${STAGE_DIR}
|
||||||
rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
|
rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
|
||||||
EOT
|
EOT
|
||||||
@@ -86,7 +82,6 @@ pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=
|
|||||||
pip install git+https://github.com/huggingface/transformers
|
pip install git+https://github.com/huggingface/transformers
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
ENV STAGE_DIR=/tmp
|
|
||||||
RUN <<EOT
|
RUN <<EOT
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
|
git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ npm install -g configurable-http-proxy
|
|||||||
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server nglview dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
|
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server nglview dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
ARG RSERVER_VERSION="rstudio-server-2024.04.1-748-amd64.deb"
|
ARG RSERVER_VERSION="rstudio-server-2024.09.1-394-amd64.deb"
|
||||||
ENV RSERVER_VERSION=${RSERVER_VERSION}
|
ENV RSERVER_VERSION=${RSERVER_VERSION}
|
||||||
RUN <<EOT
|
RUN <<EOT
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|||||||
@@ -29,7 +29,8 @@ NCCL_IB_DISABLE=0
|
|||||||
## test command
|
## test command
|
||||||
|
|
||||||
docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all hotwa/notebook:ngc
|
docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all hotwa/notebook:ngc
|
||||||
docker run --rm -it --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 hotwa/notebook:ngc /bin/bash
|
docker run --rm -it --network=host --privileged --ipc=host --gpus all --ulimit memlock=-1 stack=67108864 hotwa/notebook:ngc /bin/bash
|
||||||
|
docker run --rm -it --privileged --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 hotwa/notebook:ngc /bin/bash
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
nvidia-smi
|
nvidia-smi
|
||||||
@@ -120,16 +121,23 @@ pytorch-notebook
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
git clone https://github.com/jupyter/docker-stacks.git
|
git clone https://github.com/jupyter/docker-stacks.git
|
||||||
cd docker-stacks/images/docker-stacks-foundation
|
cd spawnerdockerfile/docker-stacks/images/docker-stacks-foundation
|
||||||
docker buildx build --build-arg ROOT_CONTAINER=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest . --load # docker pull nvidia/cuda:12.4.1-devel-ubuntu22.04
|
nerdctl --namespace buildkit build --no-cache --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest .
|
||||||
|
nerdctl --namespace buildkit build --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest .
|
||||||
|
nerdctl --namespace buildkit push quay.io/hotwa/docker-stacks-foundation:latest
|
||||||
|
docker buildx build --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest . --load # docker pull nvidia/cuda:12.4.1-devel-ubuntu22.04
|
||||||
cd ../base-notebook
|
cd ../base-notebook
|
||||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/base-notebook:latest . --load
|
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/base-notebook:latest . --load
|
||||||
|
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/base-notebook:latest .
|
||||||
cd ../minimal-notebook/
|
cd ../minimal-notebook/
|
||||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/minimal-notebook:latest . --load
|
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/minimal-notebook:latest . --load
|
||||||
|
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/minimal-notebook:latest .
|
||||||
cd ../scipy-notebook
|
cd ../scipy-notebook
|
||||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/scipy-notebook:latest . --load
|
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/scipy-notebook:latest . --load
|
||||||
|
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/scipy-notebook:latest .
|
||||||
cd ../pytorch-notebook
|
cd ../pytorch-notebook
|
||||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/pytorch-notebook:latest . --load
|
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/pytorch-notebook:latest . --load
|
||||||
|
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/pytorch-notebook:latest .
|
||||||
```
|
```
|
||||||
|
|
||||||
# 然后构建自己的基础镜像
|
# 然后构建自己的基础镜像
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ services:
|
|||||||
# tty: true
|
# tty: true
|
||||||
# stdin_open: true
|
# stdin_open: true
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
image: hotwa/notebook:ngc
|
image: quay.io/hotwa/ngc:latest
|
||||||
privileged: true
|
privileged: true
|
||||||
ipc: host
|
ipc: host
|
||||||
network_mode: host
|
network_mode: host
|
||||||
|
|||||||
Reference in New Issue
Block a user