10 Commits
devgpu ... main

Author SHA1 Message Date
Your Name
ab0838c738 update 2024-11-13 17:44:41 +08:00
Your Name
82ddb287fe add example cmd 2024-11-12 21:02:24 +08:00
Your Name
c885d79433 update rstudio 2024-11-12 21:02:11 +08:00
Your Name
2d6bb7c2cc update to new rstudio 2024-11-12 16:26:45 +08:00
f2b9b5033b udpate 2024-07-18 13:07:39 +08:00
96b87a8f82 change name 2024-07-18 12:08:35 +08:00
00a7a5ee6a update 2024-07-18 12:05:40 +08:00
4da2a4c813 add system package 2024-07-18 10:43:06 +08:00
4a6697ef69 add env stage_dir at start 2024-07-18 10:19:53 +08:00
305fd85088 update 2024-07-18 09:57:12 +08:00
6 changed files with 24 additions and 21 deletions

View File

@@ -69,7 +69,7 @@ wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | sud
sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
apt-get update
apt install --no-install-recommends r-base
sudo wget "https://download2.rstudio.org/server/$(lsb_release -cs)/amd64/rstudio-server-2023.06.1-524-amd64.deb" -O /tmp/rstudio-server.deb
sudo wget "https://download2.rstudio.org/server/$(lsb_release -cs)/amd64/rstudio-server-2024.09.1-394-amd64.deb" -O /tmp/rstudio-server.deb
sudo chmod +x /tmp/rstudio-server.deb
sudo gdebi -n /tmp/rstudio-server.deb
sudo rm -rf /tmp/rstudio-server.deb

View File

@@ -110,7 +110,7 @@ python3 -m pip install aiohttp -i http://mirrors.aliyun.com/pypi/simple/ --trust
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl aiohttp jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
EOT
ARG RSERVER_VERSION="rstudio-server-2024.04.1-748-amd64.deb"
ARG RSERVER_VERSION="rstudio-server-2024.09.1-394-amd64.deb"
ENV RSERVER_VERSION=${RSERVER_VERSION}
RUN <<EOT
#!/bin/bash

View File

@@ -17,23 +17,19 @@ SHELL ["/bin/bash", "-c"]
# https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/
ENV MLNX_OFED_VERSION=23.10-3.2.2.0
ENV STAGE_DIR=/tmp
RUN <<EOT
#!/bin/bash
# SYSTEM_NAME=$(lsb_release -cs) # 查看发行版本
# Pre-build **latest** DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
python3 -m pip uninstall -y deepspeed
# This has to be run (again) inside the GPU VMs running the tests.
# The installation works here, but some tests fail, if we do not pre-build deepspeed again in the VMs running the tests.
# TODO: Find out why test fail. install deepspeed
# DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM} DS_BUILD_FUSED_ADAM={DS_BUILD_FUSED_ADAM} python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
# from https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile install deepspeed fail
# reference deepspeed install from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
# install deepspeed prepare
# install Mellanox OFED prepare
apt-get update
apt install -y libnvidia-compute-535
apt-get install -y pciutils tk kmod libusb-1.0-0 tcl chrpath libpci3 bison lsof graphviz ethtool swig udev libltdl-dev libelf1 libmnl0 debhelper flex libfuse2
# install Mellanox OFED
mkdir -p ${STAGE_DIR}
wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz | tar xzf -
cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
./mlnxofedinstall --user-space-only --without-fw-update --all -q
wget http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz -O ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz
tar xzf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz -C ${STAGE_DIR}
cd ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
./mlnxofedinstall --user-space-only --without-fw-update --all -q > ${STAGE_DIR}/mlnxofedinstall.log 2>&1
cd ${STAGE_DIR}
rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
EOT
@@ -86,7 +82,6 @@ pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=
pip install git+https://github.com/huggingface/transformers
EOT
ENV STAGE_DIR=/tmp
RUN <<EOT
#!/bin/bash
git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels

View File

@@ -104,7 +104,7 @@ npm install -g configurable-http-proxy
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server nglview dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
EOT
ARG RSERVER_VERSION="rstudio-server-2024.04.1-748-amd64.deb"
ARG RSERVER_VERSION="rstudio-server-2024.09.1-394-amd64.deb"
ENV RSERVER_VERSION=${RSERVER_VERSION}
RUN <<EOT
#!/bin/bash

View File

@@ -29,7 +29,8 @@ NCCL_IB_DISABLE=0
## test command
docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all hotwa/notebook:ngc
docker run --rm -it --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 hotwa/notebook:ngc /bin/bash
docker run --rm -it --network=host --privileged --ipc=host --gpus all --ulimit memlock=-1 stack=67108864 hotwa/notebook:ngc /bin/bash
docker run --rm -it --privileged --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 hotwa/notebook:ngc /bin/bash
```shell
nvidia-smi
@@ -120,16 +121,23 @@ pytorch-notebook
```shell
git clone https://github.com/jupyter/docker-stacks.git
cd docker-stacks/images/docker-stacks-foundation
docker buildx build --build-arg ROOT_CONTAINER=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest . --load # docker pull nvidia/cuda:12.4.1-devel-ubuntu22.04
cd spawnerdockerfile/docker-stacks/images/docker-stacks-foundation
nerdctl --namespace buildkit build --no-cache --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest .
nerdctl --namespace buildkit build --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest .
nerdctl --namespace buildkit push quay.io/hotwa/docker-stacks-foundation:latest
docker buildx build --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest . --load # docker pull nvidia/cuda:12.4.1-devel-ubuntu22.04
cd ../base-notebook
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/base-notebook:latest . --load
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/base-notebook:latest .
cd ../minimal-notebook/
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/minimal-notebook:latest . --load
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/minimal-notebook:latest .
cd ../scipy-notebook
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/scipy-notebook:latest . --load
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/scipy-notebook:latest .
cd ../pytorch-notebook
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/pytorch-notebook:latest . --load
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/pytorch-notebook:latest .
```
# 然后构建自己的基础镜像

View File

@@ -45,7 +45,7 @@ services:
# tty: true
# stdin_open: true
restart: unless-stopped
image: hotwa/notebook:ngc
image: quay.io/hotwa/ngc:latest
privileged: true
ipc: host
network_mode: host