Revert "Merge branch 'main' into devgpu"

This reverts commit c7bff5448a, reversing changes made to cea7bc59f3.
2024-07-17 04:57:56 +00:00
parent c7bff5448a
commit 86efe1122c
12 changed files with 79 additions and 393 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,2 @@
 /docker-stacks
-jupyterhub-data
 *.tar
--- a/1
+++ b/1
@@ -1,4 +1,3 @@
-# syntax=docker/dockerfile:1
 FROM debian:bullseye
 ARG CREATE_USER="jovyan"
 ARG CREATE_USER_PASSWD="password"
--- a/Dockerfile.jupyterhub
+++ b/Dockerfile.jupyterhub
@@ -8,8 +8,6 @@ ARG ROOT_PASSWD="password"
 ARG HOME="/home/${CREATE_USER}"
 ARG DEBIAN_FRONTEND="noninteractive"
 ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
-ARG NODEJS_VERSION='18'
-ENV NODEJS_VERSION=${NODEJS_VERSION}

 USER root
 RUN <<EOT
@@ -21,8 +19,6 @@ apt-get install -y tzdata
 ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
 echo 'Asia/Shanghai' > /etc/timezone
 dpkg-reconfigure -f noninteractive tzdata
-sudo apt-get remove --purge libnode72:amd64 -y
-curl -fsSL https://deb.nodesource.com/setup_${NODEJS_VERSION}.x | sudo -E bash - 
 # 安装所需的软件包
 apt-get install -y python3 python3-pip gcc g++ build-essential nodejs npm gdebi-core curl wget openssh-server vim lrzsz net-tools sudo git
 # 创建新用户
@@ -42,25 +38,8 @@ index-url = https://mirrors.aliyun.com/pypi/simple/
 trusted-host=mirrors.aliyun.com
 " >> ~/.pip/pip.conf
 # 安装 Jupyter 和相关软件
-npm install -g configurable-http-proxy yarn --registry=https://registry.npmmirror.com
-python3 -m pip install ipython jupyter_packaging jupyterhub jupyterlab notebook radian pycurl jupyter-rsession-proxy \
-ipykernel jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator \
-jupyter_contrib_nbextensions jupyterlab-unfold jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor \
-jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse \
-ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 \
-xeus-python jupyterlab-lsp python-lsp-server dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
-git clone https://ghproxy.dockless.eu.org/https://github.com/arose/nglview
-cd nglview
-python3 setup.py install
-cd js
-rm -rf node_modules package-lock.json
-npm install typescript@latest --registry=https://registry.npmmirror.com
-npm install --registry=https://registry.npmmirror.com
-cd ../..
-python -m ipykernel install --sys-prefix
-jupyter nbextension enable --py --sys-prefix widgetsnbextension
-jupyter nbextension enable --py --sys-prefix nglview
-jupyter-nbextension enable nglview --py --sys-prefix
+npm install -g configurable-http-proxy
+python3 -m pip install jupyterhub jupyterlab notebook radian pycurl jupyter-rsession-proxy ipykernel jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab-unfold jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server nglview dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector
 # 创建 JupyterHub 配置目录
 mkdir -p /root/.jupyterhub
 EOT
@@ -81,9 +60,22 @@ sudo wget "https://download2.rstudio.org/server/$(lsb_release -cs)/amd64/rstudio
 sudo chmod +x /tmp/rstudio-server.deb 
 sudo gdebi -n /tmp/rstudio-server.deb
 sudo rm -rf /tmp/rstudio-server.deb
+EOT
+
+RUN <<EOT
+#!/bin/bash
+# 安装micromamba
+echo "1" | bash <(curl -s https://raw.githubusercontent.com/hotwa/MicroMamba_Installer/main/install.sh)
+mkdir -p /root/workspace/personal /root/workspace/project
+EOT
+
+RUN <<EOT
+#!/bin/bash
 # 清理和减小镜像大小
 apt-get clean 
 rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man/?? /usr/share/man/??_*
 EOT

+
+
 CMD ["jupyterhub", "-f", "/srv/jupyterhub/jupyterhub_config.py"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,7 +12,7 @@ services:
      args:
        JUPYTERHUB_VERSION: latest
    restart: always
-    image: quay.io/hotwa/jupyterhub:latest
+    image: hotwa/jupyterhub # registry.cn-hangzhou.aliyuncs.com/hotwa/jupyterhub
    container_name: jupyterhub
    networks:
      - jupyterhub-network
@@ -23,17 +23,16 @@ services:
      # within the container
      - "/var/run/docker.sock:/var/run/docker.sock:rw"
      # Bind Docker volume on host for JupyterHub database and cookie secrets
-      - "./jupyterhub-data:/data"
+      - "jupyterhub-data:/data"
    ports:
      - "8000:8000"
-      - "8080:8080"
    environment:
      # This username will be a JupyterHub admin
      JUPYTERHUB_ADMIN: admin
      # All containers will join this network
      DOCKER_NETWORK_NAME: jupyterhub-network
      # JupyterHub will spawn this Notebook image for users
-      DOCKER_NOTEBOOK_IMAGE: quay.io/hotwa/notebook:latest
+      DOCKER_NOTEBOOK_IMAGE: hotwa/notebook:latest
      # Notebook directory inside user image
      DOCKER_NOTEBOOK_DIR: /home/jovyan/work

--- a/49
+++ b/49
@@ -0,0 +1,49 @@
+-----BEGIN OPENSSH PRIVATE KEY-----
+b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAACFwAAAAdzc2gtcn
+NhAAAAAwEAAQAAAgEArjozHdq/VMgEmQZn5i/3UiRxmU2EZ0J7h0bMV/dIl7dALHDQeGuh
+Y8rwcCXsCwNBYGvRhBYkaMIgM+Gi/BTDufgHyJ7UKsYWACdxrj8Ycy1nS/qB2ppuLcRrzi
+us/XDuU8eBdqrvjL7KXn6FcBXE3wPSas3rPnBp1o0Fc4ipu91U/LDpZ8RvAesvMFfZc4cm
+QO/6zNm7zQ7vEemhXmFKnafe+EGJl9YIJuHud8EkRKwb7YMCjSdUxk/LRN4hlFel5+Hgf5
+tMmJpXdIyusCtGVG1IhvQv/R6ojLX+5qZE+3P7FFJWY/KeLqymiC9VCZ7nOmYzhhU7ROIv
+I4Eszdpp+vVn93lpfa1jjYIl1WbU9rhCbmjdbthDrK/8R/9NRIgLJbCIdXUOKv+JS2aX+a
+0dmPKDTuUAZwU7K5c4ftlfi7eb5D7gHKR4XE9NuOZncG8Idb0OQg79txOhYn/nheV3yv34
+VcSQytR/9EoEgjEfdTUhaqovSGEkMWPC7JU3wZnvM0q46xdC/QCjqAciDVtkRJH9GnKpJ1
+pnIvHSQMfy65xKcWA22SzlfL6hlEAVAsIj7/g85JkOkOxy/uSxPR0l/lKfPvu/JpaIudTG
+R12K3aPoiNsJPR4UGIm1IBjv4f6QiKYjKImAY28MBtwAYKyZrbQBwMcMcsLyMowcU7HHtJ
+0AAAdQYDap4GA2qeAAAAAHc3NoLXJzYQAAAgEArjozHdq/VMgEmQZn5i/3UiRxmU2EZ0J7
+h0bMV/dIl7dALHDQeGuhY8rwcCXsCwNBYGvRhBYkaMIgM+Gi/BTDufgHyJ7UKsYWACdxrj
+8Ycy1nS/qB2ppuLcRrzius/XDuU8eBdqrvjL7KXn6FcBXE3wPSas3rPnBp1o0Fc4ipu91U
+/LDpZ8RvAesvMFfZc4cmQO/6zNm7zQ7vEemhXmFKnafe+EGJl9YIJuHud8EkRKwb7YMCjS
+dUxk/LRN4hlFel5+Hgf5tMmJpXdIyusCtGVG1IhvQv/R6ojLX+5qZE+3P7FFJWY/KeLqym
+iC9VCZ7nOmYzhhU7ROIvI4Eszdpp+vVn93lpfa1jjYIl1WbU9rhCbmjdbthDrK/8R/9NRI
+gLJbCIdXUOKv+JS2aX+a0dmPKDTuUAZwU7K5c4ftlfi7eb5D7gHKR4XE9NuOZncG8Idb0O
+Qg79txOhYn/nheV3yv34VcSQytR/9EoEgjEfdTUhaqovSGEkMWPC7JU3wZnvM0q46xdC/Q
+CjqAciDVtkRJH9GnKpJ1pnIvHSQMfy65xKcWA22SzlfL6hlEAVAsIj7/g85JkOkOxy/uSx
+PR0l/lKfPvu/JpaIudTGR12K3aPoiNsJPR4UGIm1IBjv4f6QiKYjKImAY28MBtwAYKyZrb
+QBwMcMcsLyMowcU7HHtJ0AAAADAQABAAACAQCh2Xz84eXVy43B3hqM0shNLX++Ky2xgygf
+7dwMZ7KvBdP+tLpk849UlgSwGfph7J9CBEb5Dly6WIPzzktLI7sDOm94pltwdN2hPQM1HH
+YJIQeRpGAXBFkP+SUwvJASTGOJvo78/yRTujVifORW0FI33mQNRcegFGutnQmQX0dWNvEp
+YOsis29v99u5bBKcaNNXhdeVjzsP7iyWllOKypS3SmgudSttuq2N+Vo/M1QO21eCV676E6
+yEIUIycqv9E0MS21ljEZChPte5dKsfCsRlz7ONcrBV0rI/LR5jg9gXQ5HeOfL5HDxoHYkC
+f/3eVxh0vtZkW7rYbcnekLRfcuP5brfyvdtzXGHGy1upVhmsYIJJ6s2OCEqi5NmQ36SgGO
+rVwJMaX9WZbxXFP2Z8Kn/8Acx/pbxaco4X66vRtw6d8GE6WldlzTJF5okq0KSbCnTfyHvm
+1j6TaHnFG0GqzX4D3THW7ZOGeud5unvI1MnP64P6q2hGax/KWFuQEl/WTxw3NDAC82kj+4
+LqOpPt0OF8m9j+agVxb+dcdczw6MoB8tCWWQwZxYzUNqWAio6V82mTbCu/KFtuTIrd5orq
+vLt2dIXeEPkGy2Q4f8RvlqigDQbwQW1lleWdCbX56SFPlASWDGTXF8EsP9m+LDsz9OY5Lu
+2aue4YDaFAtOnOm332wQAAAQEAun/OvgihnXYyFdWNr6zlufEqDbHaR7hT7iefqpSM8NsO
+rOS9WsSIyVYpIg6Od+/HWTrdpCDDUdRgX0znIhMZF+YjPV8+J8rVbwTCsmpxjBBtVPG3DX
+ICb3Y+WHtMznyrk+3vKG+FhWFEip6BXiQQW/9r8by9/Ho19eK8IzmcYDWx81a6CaxgxGM3
+OhdcVVxrMHrlX7Lj6guM81pc5L3HM4lYYihEuIDGMC6GqaKqPMISzroCJ1gIvlE+mdyK/u
+lXV5tT41OIsnNOdNzTDPc6Iff62Of7KceuQpRAgUXKIReei6JCrGl66Q1EDY8f4RiwGU1o
+tSUPJ3oc9KYTsNw6pgAAAQEA06Jk8wSN9siVTazsE0aNcE+cQUAdqjCcfgNt/PR8kfqUGs
+9FA33icsulTiOXyzPr5Ua96v6mAtscbkWLPPXVYm9DifarC787EfaeX2SZrmNpELV4I4oj
+U/mbKKDoMwtZTuzkClswBPa5o5yoo8TzFISTAdbK26xquS4THaOUsXv0zI26HUMoCf4Idr
+06xtUwljLdVdEjBkpXTRMLUbSfMoOOYTPmsMN2yCyz2AI5XCs5ChXF0q7rFdY68VG6n7Go
+6Un4HKfpUxdouHexTX4PJsU225oeLtCSMfsil1jf25Z7Kj3/VuEEQ4h/DW+iE7ZBz3Czgd
+iiUTZHgTrWm18wkQAAAQEA0sBQdC2Af3psNbk0vQ7MVwMQED7q0OHSpDfKxQdLgo0xASJ0
+nTX5HamlA57Owrln0HgW332Xp3vHwL9170pk260xx0R2gndX6JxZTbI6RBhggEfSDj7YOt
+aAWEk1zzcqi0IBv51x8gd7xqwOezkxpM2k3Ej93/+/qzEJDwcn/QxCjqwRRQgY3KGr+H+q
+R/cFkD8b+lFaLumQu1v7dMJltOD5Ls+rPglIUqWBeMILuyPbmBXxFF00yOuhkcpIwPN266
+nTtSishRCLkDL83lWVGw3PmOtifEmlvU8cb+t7d+T/kvwgTUSd6m8oVHphxvx3NBUmpzhO
+H0lsz1qPDuEJTQAAABJweWx5emVuZ0BnbWFpbC5jb20BAgMEBQYH
+-----END OPENSSH PRIVATE KEY-----
--- a/jupyterhub_config.py
+++ b/jupyterhub_config.py
@@ -10,21 +10,7 @@ c = get_config()  # noqa: F821
 # avoid having to rebuild the JupyterHub container every time we change a
 # configuration parameter.

-# from dockerspawner import DockerSpawner
-
-# class MyDockerSpawner(DockerSpawner):
-#     def start(self):
-#         # 启动父类的start方法
-#         self.user_options['environment']['JUPYTER_ENABLE_NBEXTENSIONS'] = 'true'
-#         self.user_options['cmd'] = [
-#             'bash',
-#             '-c',
-#             'pip install nglview jupyter_packaging -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com && jupyter nbextension enable nglview --py --sys-prefix && jupyter labextension install nglview-js-widgets && jupyter labextension install @jupyter-widgets/jupyterlab-manager && start-singleuser.sh'
-#         ]
-#         return super().start()
-        
 # Spawn single-user servers as Docker containers
-c.Authenticator.allow_all = True
 c.JupyterHub.spawner_class = "dockerspawner.DockerSpawner"

 # Spawn containers from this image
--- a/nginx.conf
+++ b/nginx.conf
@@ -33,7 +33,6 @@ http {
            proxy_set_header Host $host;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
-            proxy_set_header X-Forwarded-Port $server_port; # 添加这一行,保持原有的端口号

            # WebSocket support
            proxy_http_version 1.1;
--- a/spawnerdockerfile/Dockerfile.base-notebook
+++ b/spawnerdockerfile/Dockerfile.base-notebook
@@ -78,8 +78,6 @@ ARG ROOT_PASSWD="password"
 ARG HOME="/home/${CREATE_USER}"
 ARG DEBIAN_FRONTEND="noninteractive"
 ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
-ARG NODEJS_VERSION='18'
-ENV NODEJS_VERSION=${NODEJS_VERSION}

 COPY install.sh /tmp
 RUN <<EOT
@@ -92,10 +90,7 @@ ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
 echo 'Asia/Shanghai' > /etc/timezone
 dpkg-reconfigure -f noninteractive tzdata
 # 安装所需的软件包
-sudo apt-get remove --purge libnode72:amd64 -y
-curl -fsSL https://deb.nodesource.com/setup_${NODEJS_VERSION}.x | sudo -E bash - 
 apt-get install -y python3 python3-pip gcc g++ build-essential nodejs npm gdebi-core curl wget openssh-server vim lrzsz net-tools sudo git nodejs
-npm install -g configurable-http-proxy yarn --registry=https://registry.npmmirror.com
 # 创建新用户
 useradd -m -s /bin/bash ${CREATE_USER}
 echo "${CREATE_USER}:${CREATE_USER_PASSWD}" | chpasswd
@@ -105,7 +100,11 @@ EOT
 # 安装 Jupyter 和相关软件
 RUN <<EOT
 #!/bin/bash
-python3 -m pip install ipython jupyterhub jupyterlab notebook jupyter_packaging -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
+npm install -g configurable-http-proxy
+git clone https://github.com/arose/nglview
+cd nglview
+python setup.py install
+cd ..
 python3 -m pip install aiohttp -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
 python3 -m pip install jupyterhub jupyterlab notebook radian pycurl aiohttp jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
 EOT
@@ -139,58 +138,23 @@ USER ${NB_UID}
 ARG MODULAR_HOME="/home/${CREATE_USER}/.modular"
 ENV MODULAR_HOME=$MODULAR_HOME
 ENV PATH="$MODULAR_HOME/pkg/packages.modular.com_mojo/bin:/home/${CREATE_USER}/.local/bin:$PATH"
-ARG HTTP_PROXY=""
-ARG HTTPS_PROXY=""
-ARG NO_PROXY="localhost,127.0.0.1"
-ENV HTTP_PROXY=$HTTP_PROXY
-ENV HTTPS_PROXY=$HTTPS_PROXY
 RUN <<EOT
 #!/bin/bash
-curl --retry 5 https://get.modular.com | sh - 
-mamba create -n mojo python=3.10 ipykernel ipython nglview -c conda-forge -y 
-mamba activate mojo && modular install mojo
-mamba run -n mojo python -m ipykernel install --user --name="mojo" --display-name="mojo_env"
+curl https://get.modular.com | sh - 
+mamba create -n mojo python=3.10 -c conda-forge -y 
+mamba activate mojo
+modular install mojo
 EOT

-# Install Rust https://rsproxy.cn/#getStarted
-ENV RUSTUP_DIST_SERVER="https://rsproxy.cn"
-ENV RUSTUP_UPDATE_ROOT="https://rsproxy.cn/rustup"
+# Install Rust
 ENV PATH="/home/${CREATE_USER}/.cargo/bin:$PATH"
-ENV CARGO_UNSTABLE_SPARSE_REGISTRY="true"
 RUN <<EOT
 #!/bin/bash
-curl --proto '=https' --tlsv1.2 -sSf https://rsproxy.cn/rustup-init.sh | sh -s -- -y
-echo '[source.crates-io]
-replace-with = "rsproxy-sparse"
-[source.rsproxy]
-registry = "https://rsproxy.cn/crates.io-index"
-[source.rsproxy-sparse]
-registry = "sparse+https://rsproxy.cn/index/"
-[registries.rsproxy]
-index = "https://rsproxy.cn/crates.io-index"
-[net]
-git-fetch-with-cli = true' >> ~/.cargo/config.toml
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 source $HOME/.cargo/env
 cargo install evcxr_jupyter
 evcxr_jupyter --install
 EOT

-RUN <<EOT
-#!/bin/bash
-sudo apt update
-sudo apt install -y texlive-full
-mamba create -n torch tensorboard jupyter_packaging pytorch biopython biopandas pymol-open-source mdtraj torchvision torchaudio python=3 ipython requests scienceplots autopep8 ipykernel\
- pandas numpy matplotlib scipy seaborn orange3 -c pytorch -c nvidia -c conda-forge -y 
-git clone https://ghproxy.dockless.eu.org/https://github.com/arose/nglview
-cd nglview
-mamba run -n torch python3 setup.py install
-cd js
-rm -rf node_modules package-lock.json
-npm install typescript@latest --registry=https://registry.npmmirror.com
-npm install --registry=https://registry.npmmirror.com
-cd ../..
-mamba run -n torch python -m ipykernel install --user --name="torch" --display-name="torch_env"
-EOT
-
 WORKDIR "${HOME}"

--- a/spawnerdockerfile/Dockerfile.ngc
+++ b/spawnerdockerfile/Dockerfile.ngc
@@ -1,161 +0,0 @@
-ARG REGISTRY=quay.io
-ARG OWNER=jupyter
-ARG LABEL=notebook
-ARG VERSION
-ARG BASE_CONTAINER=$REGISTRY/$OWNER/$LABEL:$VERSION
-FROM $BASE_CONTAINER
-ARG HTTP_PROXY
-ARG HTTPS_PROXY
-ENV http_proxy=${HTTP_PROXY}
-ENV https_proxy=${HTTPS_PROXY}
-ARG DEBIAN_FRONTEND="noninteractive"
-ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
-ARG ROOT_PASSWD="root"
-ENV ROOT_PASSWD=${ROOT_PASSWD}
-WORKDIR /root
-SHELL ["/bin/bash", "-c"]
-
-# https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/
-ENV MLNX_OFED_VERSION=23.10-3.2.2.0 
-RUN <<EOT
-#!/bin/bash
-# SYSTEM_NAME=$(lsb_release -cs) # 查看发行版本
-# Pre-build **latest** DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
-python3 -m pip uninstall -y deepspeed
-# This has to be run (again) inside the GPU VMs running the tests.
-# The installation works here, but some tests fail, if we do not pre-build deepspeed again in the VMs running the tests.
-# TODO: Find out why test fail. install deepspeed
-# DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM} DS_BUILD_FUSED_ADAM={DS_BUILD_FUSED_ADAM} python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
-# from https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile install deepspeed fail 
-# reference deepspeed install from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
-# install deepspeed prepare
-# install Mellanox OFED
-mkdir -p ${STAGE_DIR}
-wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz | tar xzf -
-cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
-./mlnxofedinstall --user-space-only --without-fw-update --all -q 
-cd ${STAGE_DIR} 
-rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
-EOT
-
-ARG NV_PEER_MEM_VERSION="1.2"
-ENV NV_PEER_MEM_VERSION=${NV_PEER_MEM_VERSION}
-ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
-RUN <<EOT
-#!/bin/bash
-# install nv_peer_mem
-mkdir -p ${STAGE_DIR}
-git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory
-cd ${STAGE_DIR}/nv_peer_memory
-./build_module.sh
-cd ${STAGE_DIR}
-tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz
-cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION}
-apt-get update
-apt --fix-broken install -y
-apt-get install -y dkms
-dpkg-buildpackage -us -uc
-dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
-EOT
-
-# base tools
-RUN <<EOT
-#!/bin/bash
-apt-get update
-apt-get install -y bash-completion wget curl htop jq vim bash libaio-dev build-essential openssh-server python3 python3-pip bzip2 sudo 
-apt-get install -y --no-install-recommends software-properties-common build-essential autotools-dev nfs-common pdsh cmake g++ gcc curl wget vim tmux emacs less unzip htop iftop iotop ca-certificates openssh-client openssh-server rsync iputils-ping net-tools sudo llvm-dev re2c
-add-apt-repository ppa:git-core/ppa -y
-apt-get install -y git libnuma-dev wget
-# Configure SSH for password and public key authentication
-sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
-sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
-sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_config
-sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config
-sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config
-mkdir /var/run/sshd
-echo "root:${ROOT_PASSWD}" | chpasswd
-mkdir -p ~/.pip
-eval "$(curl https://get.x-cmd.com)"
-# install pixi
-curl -fsSL https://pixi.sh/install.sh | bash
-EOT
-
-RUN <<EOT
-#!/bin/bash
-pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
-pip install git+https://github.com/huggingface/transformers
-EOT
-
-ENV STAGE_DIR=/tmp
-RUN <<EOT
-#!/bin/bash
-git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
-cd ${STAGE_DIR}/DeepSpeed-Kernels
-python -m pip install -v .
-EOT
-
-RUN <<EOT
-#!/bin/bash
-git clone https://github.com/oneapi-src/oneCCL.git ${STAGE_DIR}/oneCCL
-cd ${STAGE_DIR}/oneCCL
-git checkout . 
-git checkout master
-mkdir build
-cd build 
-cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
-make -j"$(nproc)" install
-EOT
-
-ARG DEEPSPEED_VERSION="v0.14.3"
-ENV DEEPSPEED_VERSION=${DEEPSPEED_VERSION}
-ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --verbose"
-ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
-ARG DS_BUILD_SPARSE_ATTN=0
-ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN}
-ARG DS_BUILD_FUSED_ADAM=1
-ENV DS_BUILD_FUSED_ADAM=${DS_BUILD_FUSED_ADAM}
-ARG DS_BUILD_CPU_ADAM=1
-ENV DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM}
-ARG DS_BUILD_OPS=1
-ENV DS_BUILD_OPS=${DS_BUILD_OPS}
-ARG HOSTFILE_CONTENT=""
-ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT}
-ENV CUTLASS_PATH="/opt/pytorch/pytorch/third_party/cutlass"
-ENV CUDA_HOME="/usr/local/cuda"
-ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
-ENV PATH=${CUDA_HOME}/bin:${PATH}
-RUN <<EOT
-#!/bin/bash
-git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
-cd ${STAGE_DIR}/DeepSpeed
-git checkout ${DEEPSPEED_VERSION}
-./install.sh ${DEEPSPEED_INSTALL_FLAGS}
-ds_report
-EOT
-
-RUN <<EOT
-#!/bin/bash
-python -m pip install --upgrade pip
-python -m pip install peft tiktoken seaborn blobfile open_clip_torch zstandard mpi4py
-# optimum 手动解决依赖
-python -m pip install black~=23.1 ruff==0.1.5 diffusers>=0.17.0
-python -m pip install --no-deps git+https://github.com/huggingface/optimum.git#egg=optimum[diffusers,quality]
-EOT
-
-RUN <<EOT
-#!/bin/bash
-# 项目目录中的定义通常会覆盖用户家目录中的定义
-# 配置 .deepspeed_env 文件
-cat <<EOF > ~/.deepspeed_env
-TORCH_USE_CUDA_DSA=1
-DEEPSPEED_VERBOSE=1
-DEEPSPEED_LOG_LEVEL=DEBUG
-CUTLASS_PATH=${CUTLASS_PATH}
-TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
-CUDA_HOME=${CUDA_HOME}
-LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
-EOF
-unset https_proxy http_proxy
-EOT
-
-CMD ["/usr/sbin/sshd", "-D"]
--- a/spawnerdockerfile/README.md
+++ b/spawnerdockerfile/README.md
@@ -1,53 +1,5 @@
 # Base Jupyter Notebook Stack

-## ds_report
-
-```shell
-[2024-07-17 02:25:56,956] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
- [WARNING]  async_io requires the dev libaio .so object and headers but these were not found.
- [WARNING]  async_io: please install the libaio-dev package with apt
- [WARNING]  If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found.
- [WARNING]  Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH
- [WARNING]  sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.4
- [WARNING]  using untested triton version (3.0.0), only 1.0.0 is known to be compatible
-
-(deepspeed) root@ubuntu-finetune:~/binbbt/train/pretrain# cat .deepspeed_env 
-CUDA_HOME=/usr/local/cuda/
-TORCH_USE_CUDA_DSA=1
-CUTLASS_PATH=/opt/cutlass
-TORCH_CUDA_ARCH_LIST="80;89;90;90a"
-LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
-NCCL_DEBUG=WARN
-NCCL_SOCKET_IFNAME=bond0
-NCCL_IB_HCA=mlx5_0:1,mlx5_2:1,mlx5_4:1,mlx5_6:1
-NCCL_IB_GID_INDEX=3
-NCCL_NET_GDR_LEVEL=2
-NCCL_P2P_DISABLE=0
-NCCL_IB_DISABLE=0
-```
-
-## test command
-
-docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all hotwa/notebook:ngc
-docker run --rm -it --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 hotwa/notebook:ngc /bin/bash
-
-```shell
-nvidia-smi
-nvcc -V
-ninja --version
-ds_report
-python -c "import torch; print('torch:', torch.__version__, torch)"
-python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
-python -c "import deepspeed; deepspeed.ops.op_builder.CPUAdamBuilder().load()"
-python -c "from flash_attn import flash_attn_func, flash_attn_varlen_func"
-python -c "import apex.amp; print('Apex is installed and the amp module is available.')"
-python -c "from xformers import ops as xops"
-ibstat
-ofed_info  -s # 如果输出显示了 OFED 版本号，则说明 OFED 驱动已安装。
-mst version
-mpirun --version
-```
-
 > **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/base-notebook)**

 [![docker pulls](https://img.shields.io/docker/pulls/jupyter/base-notebook.svg)](https://hub.docker.com/r/jupyter/base-notebook/)
--- a/spawnerdockerfile/docker-compose_ngc.yml
+++ b/spawnerdockerfile/docker-compose_ngc.yml
@@ -1,72 +0,0 @@
-version: '3.9'
-
-# DeepSpeed支持多种C++/CUDA扩展（ops），这些ops旨在优化深度学习的训练和推理过程。以下是一些主要的DeepSpeed ops及其功能：
-
-# FusedAdam - 提供融合优化的Adam优化器，适用于GPU。
-# FusedLamb - 类似FusedAdam，针对LAMB优化器，适用于大规模分布式训练。
-# SparseAttention - 用于高效计算稀疏注意力机制。
-# Transformer - 提供Transformer模型的高效实现。
-# TransformerInference - 专门用于Transformer模型的推理优化。
-# CPUAdam - 针对CPU优化的Adam优化器。
-# CPULion - 针对CPU的Lion优化器。
-# Quantizer - 提供量化支持，以减少模型大小和提高推理速度。
-# RandomLTD - 用于随机层裁剪的优化器。
-# StochasticTransformer - 支持随机Transformer模型的训练和推理。
-
-# 检测系统总内存（以GB为单位）
-# TOTAL_MEM=$(awk '/MemTotal/ {printf "%.0f\n", $2/1024/1024}' /proc/meminfo)
-# echo "Docker Compose 文件已生成，shm_size 设置为 ${TOTAL_MEM}GB。"
-
-services:
-  ubuntu-finetune:
-    build: 
-      context: .
-      dockerfile: Dockerfile.ngc
-      args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
-        REGISTRY: "nvcr.io"
-        OWNER: "nvidia" # nvcr.io/nvidia/pytorch:24.06-py3
-        LABEL: "pytorch"
-        VERSION: "24.06-py3"
-        DS_BUILD_OPS: 1
-        DEEPSPEED_VERSION: "master"
-        DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
-        HTTP_PROXY: "http://127.0.0.1:15777"
-        HTTPS_PROXY: "http://127.0.0.1:15777"
-        CACHEBUST: 1
-    # volumes:
-    #   - ./workspace:/workspace
-      # - /tmp:/tmp
-    container_name: ubuntu-ngc
-    pull_policy: if_not_present
-    ulimits:
-      memlock:
-        soft: -1
-        hard: -1
-    # tty: true
-    # stdin_open: true
-    restart: unless-stopped
-    image: hotwa/notebook:ngc
-    privileged: true
-    ipc: host
-    network_mode: host
-    shm_size: '128gb'
-    # ports:
-    #   - 3228:2222
-    environment:
-      - NVIDIA_VISIBLE_DEVICES=all
-      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
-      - TMPDIR=/var/tmp
-    # networks:
-    #   - network_finetune
-    # command: ["/usr/sbin/sshd", "-D"]
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-
-# networks:
-#   network_finetune:
-#     name: network_finetune
--- a/spawnerdockerfile/install_conda.sh
+++ b/spawnerdockerfile/install_conda.sh
@@ -1,20 +0,0 @@
-# install miniconda
-wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
-bash /tmp/miniconda.sh -b -p /opt/conda 
-rm /tmp/miniconda.sh 
-ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
-echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc 
-. /opt/conda/etc/profile.d/conda.sh 
-conda init bash
-conda config --set show_channel_urls true
-# 配置 .condarc 文件
-cat <<EOF > ~/.condarc
-channels:
-  - conda-forge
-  - bioconda
-  - pytorch
-  - pytorch-nightly
-  - nvidia
-  - defaults
-show_channel_urls: true
-EOF