10 Commits
devgpu ... main

Author SHA1 Message Date
Your Name
ab0838c738 update 2024-11-13 17:44:41 +08:00
Your Name
82ddb287fe add example cmd 2024-11-12 21:02:24 +08:00
Your Name
c885d79433 update rstudio 2024-11-12 21:02:11 +08:00
Your Name
2d6bb7c2cc update to new rstudio 2024-11-12 16:26:45 +08:00
f2b9b5033b udpate 2024-07-18 13:07:39 +08:00
96b87a8f82 change name 2024-07-18 12:08:35 +08:00
00a7a5ee6a update 2024-07-18 12:05:40 +08:00
4da2a4c813 add system package 2024-07-18 10:43:06 +08:00
4a6697ef69 add env stage_dir at start 2024-07-18 10:19:53 +08:00
305fd85088 update 2024-07-18 09:57:12 +08:00
11 changed files with 130 additions and 148 deletions

1
.gitignore vendored
View File

@@ -1,2 +1,3 @@
/docker-stacks
jupyterhub-data
*.tar

View File

@@ -1,3 +1,4 @@
# syntax=docker/dockerfile:1
FROM debian:bullseye
ARG CREATE_USER="jovyan"
ARG CREATE_USER_PASSWD="password"

View File

@@ -8,6 +8,8 @@ ARG ROOT_PASSWD="password"
ARG HOME="/home/${CREATE_USER}"
ARG DEBIAN_FRONTEND="noninteractive"
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
ARG NODEJS_VERSION='18'
ENV NODEJS_VERSION=${NODEJS_VERSION}
USER root
RUN <<EOT
@@ -19,6 +21,8 @@ apt-get install -y tzdata
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
echo 'Asia/Shanghai' > /etc/timezone
dpkg-reconfigure -f noninteractive tzdata
sudo apt-get remove --purge libnode72:amd64 -y
curl -fsSL https://deb.nodesource.com/setup_${NODEJS_VERSION}.x | sudo -E bash -
# 安装所需的软件包
apt-get install -y python3 python3-pip gcc g++ build-essential nodejs npm gdebi-core curl wget openssh-server vim lrzsz net-tools sudo git
# 创建新用户
@@ -29,17 +33,26 @@ EOT
RUN <<EOT
#!/bin/bash
mkdir -p ~/.pip
echo "
[global]
index-url = https://mirrors.aliyun.com/pypi/simple/
[install]
trusted-host=mirrors.aliyun.com
" >> ~/.pip/pip.conf
# 安装 Jupyter 和相关软件
npm install -g configurable-http-proxy
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl jupyter-rsession-proxy ipykernel jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab-unfold jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server nglview dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector
npm install -g configurable-http-proxy yarn --registry=https://registry.npmmirror.com
python3 -m pip install ipython jupyter_packaging jupyterhub jupyterlab notebook radian pycurl jupyter-rsession-proxy \
ipykernel jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator \
jupyter_contrib_nbextensions jupyterlab-unfold jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor \
jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse \
ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 \
xeus-python jupyterlab-lsp python-lsp-server dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
git clone https://ghproxy.dockless.eu.org/https://github.com/arose/nglview
cd nglview
python3 setup.py install
cd js
rm -rf node_modules package-lock.json
npm install typescript@latest --registry=https://registry.npmmirror.com
npm install --registry=https://registry.npmmirror.com
cd ../..
python -m ipykernel install --sys-prefix
jupyter nbextension enable --py --sys-prefix widgetsnbextension
jupyter nbextension enable --py --sys-prefix nglview
jupyter-nbextension enable nglview --py --sys-prefix
# 创建 JupyterHub 配置目录
mkdir -p /root/.jupyterhub
EOT
@@ -56,26 +69,13 @@ wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | sud
sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
apt-get update
apt install --no-install-recommends r-base
sudo wget "https://download2.rstudio.org/server/$(lsb_release -cs)/amd64/rstudio-server-2023.06.1-524-amd64.deb" -O /tmp/rstudio-server.deb
sudo wget "https://download2.rstudio.org/server/$(lsb_release -cs)/amd64/rstudio-server-2024.09.1-394-amd64.deb" -O /tmp/rstudio-server.deb
sudo chmod +x /tmp/rstudio-server.deb
sudo gdebi -n /tmp/rstudio-server.deb
sudo rm -rf /tmp/rstudio-server.deb
EOT
RUN <<EOT
#!/bin/bash
# 安装micromamba
echo "1" | bash <(curl -s https://raw.githubusercontent.com/hotwa/MicroMamba_Installer/main/install.sh)
mkdir -p /root/workspace/personal /root/workspace/project
EOT
RUN <<EOT
#!/bin/bash
# 清理和减小镜像大小
apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man/?? /usr/share/man/??_*
EOT
CMD ["jupyterhub", "-f", "/srv/jupyterhub/jupyterhub_config.py"]

View File

@@ -12,7 +12,7 @@ services:
args:
JUPYTERHUB_VERSION: latest
restart: always
image: hotwa/jupyterhub # registry.cn-hangzhou.aliyuncs.com/hotwa/jupyterhub
image: quay.io/hotwa/jupyterhub:latest
container_name: jupyterhub
networks:
- jupyterhub-network
@@ -23,16 +23,17 @@ services:
# within the container
- "/var/run/docker.sock:/var/run/docker.sock:rw"
# Bind Docker volume on host for JupyterHub database and cookie secrets
- "jupyterhub-data:/data"
- "./jupyterhub-data:/data"
ports:
- "8000:8000"
- "8080:8080"
environment:
# This username will be a JupyterHub admin
JUPYTERHUB_ADMIN: admin
# All containers will join this network
DOCKER_NETWORK_NAME: jupyterhub-network
# JupyterHub will spawn this Notebook image for users
DOCKER_NOTEBOOK_IMAGE: hotwa/notebook:latest
DOCKER_NOTEBOOK_IMAGE: quay.io/hotwa/notebook:latest
# Notebook directory inside user image
DOCKER_NOTEBOOK_DIR: /home/jovyan/work

49
id_rsa
View File

@@ -1,49 +0,0 @@
-----BEGIN OPENSSH PRIVATE KEY-----
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAACFwAAAAdzc2gtcn
NhAAAAAwEAAQAAAgEArjozHdq/VMgEmQZn5i/3UiRxmU2EZ0J7h0bMV/dIl7dALHDQeGuh
Y8rwcCXsCwNBYGvRhBYkaMIgM+Gi/BTDufgHyJ7UKsYWACdxrj8Ycy1nS/qB2ppuLcRrzi
us/XDuU8eBdqrvjL7KXn6FcBXE3wPSas3rPnBp1o0Fc4ipu91U/LDpZ8RvAesvMFfZc4cm
QO/6zNm7zQ7vEemhXmFKnafe+EGJl9YIJuHud8EkRKwb7YMCjSdUxk/LRN4hlFel5+Hgf5
tMmJpXdIyusCtGVG1IhvQv/R6ojLX+5qZE+3P7FFJWY/KeLqymiC9VCZ7nOmYzhhU7ROIv
I4Eszdpp+vVn93lpfa1jjYIl1WbU9rhCbmjdbthDrK/8R/9NRIgLJbCIdXUOKv+JS2aX+a
0dmPKDTuUAZwU7K5c4ftlfi7eb5D7gHKR4XE9NuOZncG8Idb0OQg79txOhYn/nheV3yv34
VcSQytR/9EoEgjEfdTUhaqovSGEkMWPC7JU3wZnvM0q46xdC/QCjqAciDVtkRJH9GnKpJ1
pnIvHSQMfy65xKcWA22SzlfL6hlEAVAsIj7/g85JkOkOxy/uSxPR0l/lKfPvu/JpaIudTG
R12K3aPoiNsJPR4UGIm1IBjv4f6QiKYjKImAY28MBtwAYKyZrbQBwMcMcsLyMowcU7HHtJ
0AAAdQYDap4GA2qeAAAAAHc3NoLXJzYQAAAgEArjozHdq/VMgEmQZn5i/3UiRxmU2EZ0J7
h0bMV/dIl7dALHDQeGuhY8rwcCXsCwNBYGvRhBYkaMIgM+Gi/BTDufgHyJ7UKsYWACdxrj
8Ycy1nS/qB2ppuLcRrzius/XDuU8eBdqrvjL7KXn6FcBXE3wPSas3rPnBp1o0Fc4ipu91U
/LDpZ8RvAesvMFfZc4cmQO/6zNm7zQ7vEemhXmFKnafe+EGJl9YIJuHud8EkRKwb7YMCjS
dUxk/LRN4hlFel5+Hgf5tMmJpXdIyusCtGVG1IhvQv/R6ojLX+5qZE+3P7FFJWY/KeLqym
iC9VCZ7nOmYzhhU7ROIvI4Eszdpp+vVn93lpfa1jjYIl1WbU9rhCbmjdbthDrK/8R/9NRI
gLJbCIdXUOKv+JS2aX+a0dmPKDTuUAZwU7K5c4ftlfi7eb5D7gHKR4XE9NuOZncG8Idb0O
Qg79txOhYn/nheV3yv34VcSQytR/9EoEgjEfdTUhaqovSGEkMWPC7JU3wZnvM0q46xdC/Q
CjqAciDVtkRJH9GnKpJ1pnIvHSQMfy65xKcWA22SzlfL6hlEAVAsIj7/g85JkOkOxy/uSx
PR0l/lKfPvu/JpaIudTGR12K3aPoiNsJPR4UGIm1IBjv4f6QiKYjKImAY28MBtwAYKyZrb
QBwMcMcsLyMowcU7HHtJ0AAAADAQABAAACAQCh2Xz84eXVy43B3hqM0shNLX++Ky2xgygf
7dwMZ7KvBdP+tLpk849UlgSwGfph7J9CBEb5Dly6WIPzzktLI7sDOm94pltwdN2hPQM1HH
YJIQeRpGAXBFkP+SUwvJASTGOJvo78/yRTujVifORW0FI33mQNRcegFGutnQmQX0dWNvEp
YOsis29v99u5bBKcaNNXhdeVjzsP7iyWllOKypS3SmgudSttuq2N+Vo/M1QO21eCV676E6
yEIUIycqv9E0MS21ljEZChPte5dKsfCsRlz7ONcrBV0rI/LR5jg9gXQ5HeOfL5HDxoHYkC
f/3eVxh0vtZkW7rYbcnekLRfcuP5brfyvdtzXGHGy1upVhmsYIJJ6s2OCEqi5NmQ36SgGO
rVwJMaX9WZbxXFP2Z8Kn/8Acx/pbxaco4X66vRtw6d8GE6WldlzTJF5okq0KSbCnTfyHvm
1j6TaHnFG0GqzX4D3THW7ZOGeud5unvI1MnP64P6q2hGax/KWFuQEl/WTxw3NDAC82kj+4
LqOpPt0OF8m9j+agVxb+dcdczw6MoB8tCWWQwZxYzUNqWAio6V82mTbCu/KFtuTIrd5orq
vLt2dIXeEPkGy2Q4f8RvlqigDQbwQW1lleWdCbX56SFPlASWDGTXF8EsP9m+LDsz9OY5Lu
2aue4YDaFAtOnOm332wQAAAQEAun/OvgihnXYyFdWNr6zlufEqDbHaR7hT7iefqpSM8NsO
rOS9WsSIyVYpIg6Od+/HWTrdpCDDUdRgX0znIhMZF+YjPV8+J8rVbwTCsmpxjBBtVPG3DX
ICb3Y+WHtMznyrk+3vKG+FhWFEip6BXiQQW/9r8by9/Ho19eK8IzmcYDWx81a6CaxgxGM3
OhdcVVxrMHrlX7Lj6guM81pc5L3HM4lYYihEuIDGMC6GqaKqPMISzroCJ1gIvlE+mdyK/u
lXV5tT41OIsnNOdNzTDPc6Iff62Of7KceuQpRAgUXKIReei6JCrGl66Q1EDY8f4RiwGU1o
tSUPJ3oc9KYTsNw6pgAAAQEA06Jk8wSN9siVTazsE0aNcE+cQUAdqjCcfgNt/PR8kfqUGs
9FA33icsulTiOXyzPr5Ua96v6mAtscbkWLPPXVYm9DifarC787EfaeX2SZrmNpELV4I4oj
U/mbKKDoMwtZTuzkClswBPa5o5yoo8TzFISTAdbK26xquS4THaOUsXv0zI26HUMoCf4Idr
06xtUwljLdVdEjBkpXTRMLUbSfMoOOYTPmsMN2yCyz2AI5XCs5ChXF0q7rFdY68VG6n7Go
6Un4HKfpUxdouHexTX4PJsU225oeLtCSMfsil1jf25Z7Kj3/VuEEQ4h/DW+iE7ZBz3Czgd
iiUTZHgTrWm18wkQAAAQEA0sBQdC2Af3psNbk0vQ7MVwMQED7q0OHSpDfKxQdLgo0xASJ0
nTX5HamlA57Owrln0HgW332Xp3vHwL9170pk260xx0R2gndX6JxZTbI6RBhggEfSDj7YOt
aAWEk1zzcqi0IBv51x8gd7xqwOezkxpM2k3Ej93/+/qzEJDwcn/QxCjqwRRQgY3KGr+H+q
R/cFkD8b+lFaLumQu1v7dMJltOD5Ls+rPglIUqWBeMILuyPbmBXxFF00yOuhkcpIwPN266
nTtSishRCLkDL83lWVGw3PmOtifEmlvU8cb+t7d+T/kvwgTUSd6m8oVHphxvx3NBUmpzhO
H0lsz1qPDuEJTQAAABJweWx5emVuZ0BnbWFpbC5jb20BAgMEBQYH
-----END OPENSSH PRIVATE KEY-----

View File

@@ -10,7 +10,21 @@ c = get_config() # noqa: F821
# avoid having to rebuild the JupyterHub container every time we change a
# configuration parameter.
# from dockerspawner import DockerSpawner
# class MyDockerSpawner(DockerSpawner):
# def start(self):
# # 启动父类的start方法
# self.user_options['environment']['JUPYTER_ENABLE_NBEXTENSIONS'] = 'true'
# self.user_options['cmd'] = [
# 'bash',
# '-c',
# 'pip install nglview jupyter_packaging -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com && jupyter nbextension enable nglview --py --sys-prefix && jupyter labextension install nglview-js-widgets && jupyter labextension install @jupyter-widgets/jupyterlab-manager && start-singleuser.sh'
# ]
# return super().start()
# Spawn single-user servers as Docker containers
c.Authenticator.allow_all = True
c.JupyterHub.spawner_class = "dockerspawner.DockerSpawner"
# Spawn containers from this image

View File

@@ -6,7 +6,7 @@ http {
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
'' close;
}
server {
@@ -33,6 +33,7 @@ http {
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Port $server_port; # 添加这一行,保持原有的端口号
# WebSocket support
proxy_http_version 1.1;

View File

@@ -78,6 +78,8 @@ ARG ROOT_PASSWD="password"
ARG HOME="/home/${CREATE_USER}"
ARG DEBIAN_FRONTEND="noninteractive"
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
ARG NODEJS_VERSION='18'
ENV NODEJS_VERSION=${NODEJS_VERSION}
COPY install.sh /tmp
RUN <<EOT
@@ -90,7 +92,10 @@ ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
echo 'Asia/Shanghai' > /etc/timezone
dpkg-reconfigure -f noninteractive tzdata
# 安装所需的软件包
sudo apt-get remove --purge libnode72:amd64 -y
curl -fsSL https://deb.nodesource.com/setup_${NODEJS_VERSION}.x | sudo -E bash -
apt-get install -y python3 python3-pip gcc g++ build-essential nodejs npm gdebi-core curl wget openssh-server vim lrzsz net-tools sudo git nodejs
npm install -g configurable-http-proxy yarn --registry=https://registry.npmmirror.com
# 创建新用户
useradd -m -s /bin/bash ${CREATE_USER}
echo "${CREATE_USER}:${CREATE_USER_PASSWD}" | chpasswd
@@ -100,16 +105,12 @@ EOT
# 安装 Jupyter 和相关软件
RUN <<EOT
#!/bin/bash
npm install -g configurable-http-proxy
git clone https://github.com/arose/nglview
cd nglview
python setup.py install
cd ..
python3 -m pip install ipython jupyterhub jupyterlab notebook jupyter_packaging -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
python3 -m pip install aiohttp -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl aiohttp jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
EOT
ARG RSERVER_VERSION="rstudio-server-2024.04.1-748-amd64.deb"
ARG RSERVER_VERSION="rstudio-server-2024.09.1-394-amd64.deb"
ENV RSERVER_VERSION=${RSERVER_VERSION}
RUN <<EOT
#!/bin/bash
@@ -138,23 +139,58 @@ USER ${NB_UID}
ARG MODULAR_HOME="/home/${CREATE_USER}/.modular"
ENV MODULAR_HOME=$MODULAR_HOME
ENV PATH="$MODULAR_HOME/pkg/packages.modular.com_mojo/bin:/home/${CREATE_USER}/.local/bin:$PATH"
ARG HTTP_PROXY=""
ARG HTTPS_PROXY=""
ARG NO_PROXY="localhost,127.0.0.1"
ENV HTTP_PROXY=$HTTP_PROXY
ENV HTTPS_PROXY=$HTTPS_PROXY
RUN <<EOT
#!/bin/bash
curl https://get.modular.com | sh -
mamba create -n mojo python=3.10 -c conda-forge -y
mamba activate mojo
modular install mojo
curl --retry 5 https://get.modular.com | sh -
mamba create -n mojo python=3.10 ipykernel ipython nglview -c conda-forge -y
mamba activate mojo && modular install mojo
mamba run -n mojo python -m ipykernel install --user --name="mojo" --display-name="mojo_env"
EOT
# Install Rust
# Install Rust https://rsproxy.cn/#getStarted
ENV RUSTUP_DIST_SERVER="https://rsproxy.cn"
ENV RUSTUP_UPDATE_ROOT="https://rsproxy.cn/rustup"
ENV PATH="/home/${CREATE_USER}/.cargo/bin:$PATH"
ENV CARGO_UNSTABLE_SPARSE_REGISTRY="true"
RUN <<EOT
#!/bin/bash
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
curl --proto '=https' --tlsv1.2 -sSf https://rsproxy.cn/rustup-init.sh | sh -s -- -y
echo '[source.crates-io]
replace-with = "rsproxy-sparse"
[source.rsproxy]
registry = "https://rsproxy.cn/crates.io-index"
[source.rsproxy-sparse]
registry = "sparse+https://rsproxy.cn/index/"
[registries.rsproxy]
index = "https://rsproxy.cn/crates.io-index"
[net]
git-fetch-with-cli = true' >> ~/.cargo/config.toml
source $HOME/.cargo/env
cargo install evcxr_jupyter
evcxr_jupyter --install
EOT
RUN <<EOT
#!/bin/bash
sudo apt update
sudo apt install -y texlive-full
mamba create -n torch tensorboard jupyter_packaging pytorch biopython biopandas pymol-open-source mdtraj torchvision torchaudio python=3 ipython requests scienceplots autopep8 ipykernel\
pandas numpy matplotlib scipy seaborn orange3 -c pytorch -c nvidia -c conda-forge -y
git clone https://ghproxy.dockless.eu.org/https://github.com/arose/nglview
cd nglview
mamba run -n torch python3 setup.py install
cd js
rm -rf node_modules package-lock.json
npm install typescript@latest --registry=https://registry.npmmirror.com
npm install --registry=https://registry.npmmirror.com
cd ../..
mamba run -n torch python -m ipykernel install --user --name="torch" --display-name="torch_env"
EOT
WORKDIR "${HOME}"

View File

@@ -15,68 +15,44 @@ ENV ROOT_PASSWD=${ROOT_PASSWD}
WORKDIR /root
SHELL ["/bin/bash", "-c"]
ENV STAGE_DIR=/tmp
# https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/
ENV MLNX_OFED_VERSION=23.10-3.2.2.0
ENV STAGE_DIR=/tmp
RUN <<EOT
#!/bin/bash
# SYSTEM_NAME=$(lsb_release -cs) # 查看发行版本
# Pre-build **latest** DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
python3 -m pip uninstall -y deepspeed
# This has to be run (again) inside the GPU VMs running the tests.
# The installation works here, but some tests fail, if we do not pre-build deepspeed again in the VMs running the tests.
# TODO: Find out why test fail. install deepspeed
# DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM} DS_BUILD_FUSED_ADAM={DS_BUILD_FUSED_ADAM} python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
# from https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile install deepspeed fail
# reference deepspeed install from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
# install deepspeed prepare
# install Mellanox OFED prepare
apt-get update
apt-get install -y libnvidia-compute-535
apt-get install -y flex tk ethtool libpci3 libltdl-dev bison lsof tcl libelf1 pciutils kmod libmnl0 debhelper libusb-1.0-0 graphviz chrpath swig libfuse2 udev
apt install -y libnvidia-compute-535
apt-get install -y pciutils tk kmod libusb-1.0-0 tcl chrpath libpci3 bison lsof graphviz ethtool swig udev libltdl-dev libelf1 libmnl0 debhelper flex libfuse2
# install Mellanox OFED
mkdir -p ${STAGE_DIR}
cd ${STAGE_DIR}
wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz | tar xzf -
wget http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz -O ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz
tar xzf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz -C ${STAGE_DIR}
cd ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
./mlnxofedinstall --user-space-only --without-fw-update --all -q > /tmp/mlnxofedinstall.log 2>&1
./mlnxofedinstall --user-space-only --without-fw-update --all -q > ${STAGE_DIR}/mlnxofedinstall.log 2>&1
cd ${STAGE_DIR}
# rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
EOT
ARG NV_PEER_MEM_VERSION="1.2"
ENV NV_PEER_MEM_VERSION=${NV_PEER_MEM_VERSION}
ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
RUN mkdir -p ${STAGE_DIR} && \
git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory && \
cd ${STAGE_DIR}/nv_peer_memory && \
./build_module.sh && \
cd ${STAGE_DIR} && \
tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz && \
cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION} && \
apt-get update && \
apt-get --fix-broken install -y && \
apt-get install -y dkms && \
dpkg-buildpackage -us -uc && \
dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
# ENV NV_PEER_MEM_VERSION=${NV_PEER_MEM_VERSION}
# ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
# RUN <<EOT
# #!/bin/bash
# # install nv_peer_mem
# mkdir -p ${STAGE_DIR}
# git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory
# cd ${STAGE_DIR}/nv_peer_memory
# ./build_module.sh
# cd ${STAGE_DIR}
# tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz
# cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION}
# apt-get update
# apt-get --fix-broken install -y
# apt-get install -y dkms
# dpkg-buildpackage -us -uc
# dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
# EOT
RUN <<EOT
#!/bin/bash
# install nv_peer_mem
mkdir -p ${STAGE_DIR}
git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory
cd ${STAGE_DIR}/nv_peer_memory
./build_module.sh
cd ${STAGE_DIR}
tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz
cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION}
apt-get update
apt --fix-broken install -y
apt-get install -y dkms
dpkg-buildpackage -us -uc
dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
EOT
# base tools
RUN <<EOT
@@ -108,7 +84,6 @@ EOT
RUN <<EOT
#!/bin/bash
apt install -y cmake gcc g++ libssl-dev
git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
cd ${STAGE_DIR}/DeepSpeed-Kernels
python -m pip install -v .
@@ -162,9 +137,6 @@ python -m pip install black~=23.1 ruff==0.1.5 diffusers>=0.17.0
python -m pip install --no-deps git+https://github.com/huggingface/optimum.git#egg=optimum[diffusers,quality]
EOT
# SSH daemon port inside container cannot conflict with host OS port
ENV SSH_PORT=2222
RUN <<EOT
#!/bin/bash
# 项目目录中的定义通常会覆盖用户家目录中的定义
@@ -179,8 +151,6 @@ CUDA_HOME=${CUDA_HOME}
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
EOF
unset https_proxy http_proxy
cat /etc/ssh/sshd_config > ${STAGE_DIR}/sshd_config && \
sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config
EOT
CMD ["/usr/sbin/sshd", "-D"]

View File

@@ -104,7 +104,7 @@ npm install -g configurable-http-proxy
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server nglview dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
EOT
ARG RSERVER_VERSION="rstudio-server-2024.04.1-748-amd64.deb"
ARG RSERVER_VERSION="rstudio-server-2024.09.1-394-amd64.deb"
ENV RSERVER_VERSION=${RSERVER_VERSION}
RUN <<EOT
#!/bin/bash

View File

@@ -29,8 +29,8 @@ NCCL_IB_DISABLE=0
## test command
docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all hotwa/notebook:ngc
docker run --rm -it --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/pytorch:24.06-py3 /bin/bash
docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all quay.io/hotwa/ngc:latest /bin/bash
docker run --rm -it --network=host --privileged --ipc=host --gpus all --ulimit memlock=-1 stack=67108864 hotwa/notebook:ngc /bin/bash
docker run --rm -it --privileged --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 hotwa/notebook:ngc /bin/bash
```shell
nvidia-smi
@@ -121,16 +121,23 @@ pytorch-notebook
```shell
git clone https://github.com/jupyter/docker-stacks.git
cd docker-stacks/images/docker-stacks-foundation
docker buildx build --build-arg ROOT_CONTAINER=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest . --load # docker pull nvidia/cuda:12.4.1-devel-ubuntu22.04
cd spawnerdockerfile/docker-stacks/images/docker-stacks-foundation
nerdctl --namespace buildkit build --no-cache --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest .
nerdctl --namespace buildkit build --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest .
nerdctl --namespace buildkit push quay.io/hotwa/docker-stacks-foundation:latest
docker buildx build --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest . --load # docker pull nvidia/cuda:12.4.1-devel-ubuntu22.04
cd ../base-notebook
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/base-notebook:latest . --load
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/base-notebook:latest .
cd ../minimal-notebook/
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/minimal-notebook:latest . --load
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/minimal-notebook:latest .
cd ../scipy-notebook
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/scipy-notebook:latest . --load
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/scipy-notebook:latest .
cd ../pytorch-notebook
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/pytorch-notebook:latest . --load
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/pytorch-notebook:latest .
```
# 然后构建自己的基础镜像