Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ab0838c738 | ||
|
|
82ddb287fe | ||
|
|
c885d79433 | ||
|
|
2d6bb7c2cc | ||
| f2b9b5033b | |||
| 96b87a8f82 | |||
| 00a7a5ee6a | |||
| 4da2a4c813 | |||
| 4a6697ef69 | |||
| 305fd85088 | |||
|
|
adc45bc432 | ||
|
|
f685bf6d80 | ||
|
|
0ced7fca49 |
@@ -69,7 +69,7 @@ wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | sud
|
||||
sudo add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
|
||||
apt-get update
|
||||
apt install --no-install-recommends r-base
|
||||
sudo wget "https://download2.rstudio.org/server/$(lsb_release -cs)/amd64/rstudio-server-2023.06.1-524-amd64.deb" -O /tmp/rstudio-server.deb
|
||||
sudo wget "https://download2.rstudio.org/server/$(lsb_release -cs)/amd64/rstudio-server-2024.09.1-394-amd64.deb" -O /tmp/rstudio-server.deb
|
||||
sudo chmod +x /tmp/rstudio-server.deb
|
||||
sudo gdebi -n /tmp/rstudio-server.deb
|
||||
sudo rm -rf /tmp/rstudio-server.deb
|
||||
|
||||
@@ -110,7 +110,7 @@ python3 -m pip install aiohttp -i http://mirrors.aliyun.com/pypi/simple/ --trust
|
||||
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl aiohttp jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
|
||||
EOT
|
||||
|
||||
ARG RSERVER_VERSION="rstudio-server-2024.04.1-748-amd64.deb"
|
||||
ARG RSERVER_VERSION="rstudio-server-2024.09.1-394-amd64.deb"
|
||||
ENV RSERVER_VERSION=${RSERVER_VERSION}
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
|
||||
156
spawnerdockerfile/Dockerfile.ngc
Normal file
156
spawnerdockerfile/Dockerfile.ngc
Normal file
@@ -0,0 +1,156 @@
|
||||
ARG REGISTRY=quay.io
|
||||
ARG OWNER=jupyter
|
||||
ARG LABEL=notebook
|
||||
ARG VERSION
|
||||
ARG BASE_CONTAINER=$REGISTRY/$OWNER/$LABEL:$VERSION
|
||||
FROM $BASE_CONTAINER
|
||||
ARG HTTP_PROXY
|
||||
ARG HTTPS_PROXY
|
||||
ENV http_proxy=${HTTP_PROXY}
|
||||
ENV https_proxy=${HTTPS_PROXY}
|
||||
ARG DEBIAN_FRONTEND="noninteractive"
|
||||
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
|
||||
ARG ROOT_PASSWD="root"
|
||||
ENV ROOT_PASSWD=${ROOT_PASSWD}
|
||||
WORKDIR /root
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
# https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/
|
||||
ENV MLNX_OFED_VERSION=23.10-3.2.2.0
|
||||
ENV STAGE_DIR=/tmp
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
# install Mellanox OFED prepare
|
||||
apt-get update
|
||||
apt install -y libnvidia-compute-535
|
||||
apt-get install -y pciutils tk kmod libusb-1.0-0 tcl chrpath libpci3 bison lsof graphviz ethtool swig udev libltdl-dev libelf1 libmnl0 debhelper flex libfuse2
|
||||
# install Mellanox OFED
|
||||
mkdir -p ${STAGE_DIR}
|
||||
wget http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz -O ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz
|
||||
tar xzf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz -C ${STAGE_DIR}
|
||||
cd ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
|
||||
./mlnxofedinstall --user-space-only --without-fw-update --all -q > ${STAGE_DIR}/mlnxofedinstall.log 2>&1
|
||||
cd ${STAGE_DIR}
|
||||
rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
|
||||
EOT
|
||||
|
||||
ARG NV_PEER_MEM_VERSION="1.2"
|
||||
ENV NV_PEER_MEM_VERSION=${NV_PEER_MEM_VERSION}
|
||||
ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
# install nv_peer_mem
|
||||
mkdir -p ${STAGE_DIR}
|
||||
git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory
|
||||
cd ${STAGE_DIR}/nv_peer_memory
|
||||
./build_module.sh
|
||||
cd ${STAGE_DIR}
|
||||
tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz
|
||||
cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION}
|
||||
apt-get update
|
||||
apt --fix-broken install -y
|
||||
apt-get install -y dkms
|
||||
dpkg-buildpackage -us -uc
|
||||
dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
|
||||
EOT
|
||||
|
||||
# base tools
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
apt-get update
|
||||
apt-get install -y bash-completion wget curl htop jq vim bash libaio-dev build-essential openssh-server python3 python3-pip bzip2 sudo
|
||||
apt-get install -y --no-install-recommends software-properties-common build-essential autotools-dev nfs-common pdsh cmake g++ gcc curl wget vim tmux emacs less unzip htop iftop iotop ca-certificates openssh-client openssh-server rsync iputils-ping net-tools sudo llvm-dev re2c
|
||||
add-apt-repository ppa:git-core/ppa -y
|
||||
apt-get install -y git libnuma-dev wget
|
||||
# Configure SSH for password and public key authentication
|
||||
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config
|
||||
sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config
|
||||
mkdir /var/run/sshd
|
||||
echo "root:${ROOT_PASSWD}" | chpasswd
|
||||
mkdir -p ~/.pip
|
||||
eval "$(curl https://get.x-cmd.com)"
|
||||
# install pixi
|
||||
curl -fsSL https://pixi.sh/install.sh | bash
|
||||
EOT
|
||||
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
|
||||
pip install git+https://github.com/huggingface/transformers
|
||||
EOT
|
||||
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
|
||||
cd ${STAGE_DIR}/DeepSpeed-Kernels
|
||||
python -m pip install -v .
|
||||
EOT
|
||||
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
git clone https://github.com/oneapi-src/oneCCL.git ${STAGE_DIR}/oneCCL
|
||||
cd ${STAGE_DIR}/oneCCL
|
||||
git checkout .
|
||||
git checkout master
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
|
||||
make -j"$(nproc)" install
|
||||
EOT
|
||||
|
||||
ARG DEEPSPEED_VERSION="v0.14.3"
|
||||
ENV DEEPSPEED_VERSION=${DEEPSPEED_VERSION}
|
||||
ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --verbose"
|
||||
ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
|
||||
ARG DS_BUILD_SPARSE_ATTN=0
|
||||
ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN}
|
||||
ARG DS_BUILD_FUSED_ADAM=1
|
||||
ENV DS_BUILD_FUSED_ADAM=${DS_BUILD_FUSED_ADAM}
|
||||
ARG DS_BUILD_CPU_ADAM=1
|
||||
ENV DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM}
|
||||
ARG DS_BUILD_OPS=1
|
||||
ENV DS_BUILD_OPS=${DS_BUILD_OPS}
|
||||
ARG HOSTFILE_CONTENT=""
|
||||
ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT}
|
||||
ENV CUTLASS_PATH="/opt/pytorch/pytorch/third_party/cutlass"
|
||||
ENV CUDA_HOME="/usr/local/cuda"
|
||||
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
|
||||
ENV PATH=${CUDA_HOME}/bin:${PATH}
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
|
||||
cd ${STAGE_DIR}/DeepSpeed
|
||||
git checkout ${DEEPSPEED_VERSION}
|
||||
./install.sh ${DEEPSPEED_INSTALL_FLAGS}
|
||||
ds_report
|
||||
EOT
|
||||
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install peft tiktoken seaborn blobfile open_clip_torch zstandard mpi4py
|
||||
# optimum 手动解决依赖
|
||||
python -m pip install black~=23.1 ruff==0.1.5 diffusers>=0.17.0
|
||||
python -m pip install --no-deps git+https://github.com/huggingface/optimum.git#egg=optimum[diffusers,quality]
|
||||
EOT
|
||||
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
# 项目目录中的定义通常会覆盖用户家目录中的定义
|
||||
# 配置 .deepspeed_env 文件
|
||||
cat <<EOF > ~/.deepspeed_env
|
||||
TORCH_USE_CUDA_DSA=1
|
||||
DEEPSPEED_VERBOSE=1
|
||||
DEEPSPEED_LOG_LEVEL=DEBUG
|
||||
CUTLASS_PATH=${CUTLASS_PATH}
|
||||
TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
|
||||
CUDA_HOME=${CUDA_HOME}
|
||||
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
|
||||
EOF
|
||||
unset https_proxy http_proxy
|
||||
EOT
|
||||
|
||||
CMD ["/usr/sbin/sshd", "-D"]
|
||||
@@ -104,7 +104,7 @@ npm install -g configurable-http-proxy
|
||||
python3 -m pip install jupyterhub jupyterlab notebook radian pycurl jupyter-rsession-proxy ipykernel>=6.25.0 jupyterlab-language-pack-zh-CN jupyterlab-git jupyterlab-system-monitor jupyter_nbextensions_configurator jupyter_contrib_nbextensions jupyterlab_widgets jupyterlab-drawio jupyterlab-spreadsheet-editor jupyterlab-cell-flash jedi-language-server jupyterlab_code_formatter jupyterlab-spellchecker jupyterlab_vim nbresuse ipydrawio jedi ipympl black isort theme-darcula ipywidgets tensorboard jupyterlab_latex jupyter_bokeh autopep8 xeus-python jupyterlab-lsp python-lsp-server nglview dockerspawner jupyterhub-nativeauthenticator lckr_jupyterlab_variableinspector -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
|
||||
EOT
|
||||
|
||||
ARG RSERVER_VERSION="rstudio-server-2024.04.1-748-amd64.deb"
|
||||
ARG RSERVER_VERSION="rstudio-server-2024.09.1-394-amd64.deb"
|
||||
ENV RSERVER_VERSION=${RSERVER_VERSION}
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
|
||||
@@ -1,5 +1,54 @@
|
||||
# Base Jupyter Notebook Stack
|
||||
|
||||
## ds_report
|
||||
|
||||
```shell
|
||||
[2024-07-17 02:25:56,956] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
|
||||
[WARNING] async_io requires the dev libaio .so object and headers but these were not found.
|
||||
[WARNING] async_io: please install the libaio-dev package with apt
|
||||
[WARNING] If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found.
|
||||
[WARNING] Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH
|
||||
[WARNING] sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.4
|
||||
[WARNING] using untested triton version (3.0.0), only 1.0.0 is known to be compatible
|
||||
|
||||
(deepspeed) root@ubuntu-finetune:~/binbbt/train/pretrain# cat .deepspeed_env
|
||||
CUDA_HOME=/usr/local/cuda/
|
||||
TORCH_USE_CUDA_DSA=1
|
||||
CUTLASS_PATH=/opt/cutlass
|
||||
TORCH_CUDA_ARCH_LIST="80;89;90;90a"
|
||||
LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
||||
NCCL_DEBUG=WARN
|
||||
NCCL_SOCKET_IFNAME=bond0
|
||||
NCCL_IB_HCA=mlx5_0:1,mlx5_2:1,mlx5_4:1,mlx5_6:1
|
||||
NCCL_IB_GID_INDEX=3
|
||||
NCCL_NET_GDR_LEVEL=2
|
||||
NCCL_P2P_DISABLE=0
|
||||
NCCL_IB_DISABLE=0
|
||||
```
|
||||
|
||||
## test command
|
||||
|
||||
docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all hotwa/notebook:ngc
|
||||
docker run --rm -it --network=host --privileged --ipc=host --gpus all --ulimit memlock=-1 stack=67108864 hotwa/notebook:ngc /bin/bash
|
||||
docker run --rm -it --privileged --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 hotwa/notebook:ngc /bin/bash
|
||||
|
||||
```shell
|
||||
nvidia-smi
|
||||
nvcc -V
|
||||
ninja --version
|
||||
ds_report
|
||||
python -c "import torch; print('torch:', torch.__version__, torch)"
|
||||
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
|
||||
python -c "import deepspeed; deepspeed.ops.op_builder.CPUAdamBuilder().load()"
|
||||
python -c "from flash_attn import flash_attn_func, flash_attn_varlen_func"
|
||||
python -c "import apex.amp; print('Apex is installed and the amp module is available.')"
|
||||
python -c "from xformers import ops as xops"
|
||||
ibstat
|
||||
ofed_info -s # 如果输出显示了 OFED 版本号,则说明 OFED 驱动已安装。
|
||||
mst version
|
||||
mpirun --version
|
||||
```
|
||||
|
||||
> **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/base-notebook)**
|
||||
|
||||
[](https://hub.docker.com/r/jupyter/base-notebook/)
|
||||
@@ -72,16 +121,23 @@ pytorch-notebook
|
||||
|
||||
```shell
|
||||
git clone https://github.com/jupyter/docker-stacks.git
|
||||
cd docker-stacks/images/docker-stacks-foundation
|
||||
docker buildx build --build-arg ROOT_CONTAINER=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest . --load # docker pull nvidia/cuda:12.4.1-devel-ubuntu22.04
|
||||
cd spawnerdockerfile/docker-stacks/images/docker-stacks-foundation
|
||||
nerdctl --namespace buildkit build --no-cache --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest .
|
||||
nerdctl --namespace buildkit build --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest .
|
||||
nerdctl --namespace buildkit push quay.io/hotwa/docker-stacks-foundation:latest
|
||||
docker buildx build --build-arg ROOT_IMAGE=nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 -t quay.io/hotwa/docker-stacks-foundation:latest . --load # docker pull nvidia/cuda:12.4.1-devel-ubuntu22.04
|
||||
cd ../base-notebook
|
||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/base-notebook:latest . --load
|
||||
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/base-notebook:latest .
|
||||
cd ../minimal-notebook/
|
||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/minimal-notebook:latest . --load
|
||||
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/minimal-notebook:latest .
|
||||
cd ../scipy-notebook
|
||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/scipy-notebook:latest . --load
|
||||
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/scipy-notebook:latest .
|
||||
cd ../pytorch-notebook
|
||||
docker buildx build --build-arg OWNER=hotwa -t quay.io/hotwa/pytorch-notebook:latest . --load
|
||||
nerdctl build --build-arg OWNER=hotwa -t quay.io/hotwa/pytorch-notebook:latest .
|
||||
```
|
||||
|
||||
# 然后构建自己的基础镜像
|
||||
|
||||
72
spawnerdockerfile/docker-compose_ngc.yml
Normal file
72
spawnerdockerfile/docker-compose_ngc.yml
Normal file
@@ -0,0 +1,72 @@
|
||||
version: '3.9'
|
||||
|
||||
# DeepSpeed支持多种C++/CUDA扩展(ops),这些ops旨在优化深度学习的训练和推理过程。以下是一些主要的DeepSpeed ops及其功能:
|
||||
|
||||
# FusedAdam - 提供融合优化的Adam优化器,适用于GPU。
|
||||
# FusedLamb - 类似FusedAdam,针对LAMB优化器,适用于大规模分布式训练。
|
||||
# SparseAttention - 用于高效计算稀疏注意力机制。
|
||||
# Transformer - 提供Transformer模型的高效实现。
|
||||
# TransformerInference - 专门用于Transformer模型的推理优化。
|
||||
# CPUAdam - 针对CPU优化的Adam优化器。
|
||||
# CPULion - 针对CPU的Lion优化器。
|
||||
# Quantizer - 提供量化支持,以减少模型大小和提高推理速度。
|
||||
# RandomLTD - 用于随机层裁剪的优化器。
|
||||
# StochasticTransformer - 支持随机Transformer模型的训练和推理。
|
||||
|
||||
# 检测系统总内存(以GB为单位)
|
||||
# TOTAL_MEM=$(awk '/MemTotal/ {printf "%.0f\n", $2/1024/1024}' /proc/meminfo)
|
||||
# echo "Docker Compose 文件已生成,shm_size 设置为 ${TOTAL_MEM}GB。"
|
||||
|
||||
services:
|
||||
ubuntu-finetune:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.ngc
|
||||
args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
|
||||
REGISTRY: "nvcr.io"
|
||||
OWNER: "nvidia" # nvcr.io/nvidia/pytorch:24.06-py3
|
||||
LABEL: "pytorch"
|
||||
VERSION: "24.06-py3"
|
||||
DS_BUILD_OPS: 1
|
||||
DEEPSPEED_VERSION: "master"
|
||||
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
|
||||
HTTP_PROXY: "http://127.0.0.1:15777"
|
||||
HTTPS_PROXY: "http://127.0.0.1:15777"
|
||||
CACHEBUST: 1
|
||||
# volumes:
|
||||
# - ./workspace:/workspace
|
||||
# - /tmp:/tmp
|
||||
container_name: ubuntu-ngc
|
||||
pull_policy: if_not_present
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
# tty: true
|
||||
# stdin_open: true
|
||||
restart: unless-stopped
|
||||
image: quay.io/hotwa/ngc:latest
|
||||
privileged: true
|
||||
ipc: host
|
||||
network_mode: host
|
||||
shm_size: '128gb'
|
||||
# ports:
|
||||
# - 3228:2222
|
||||
environment:
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
- TMPDIR=/var/tmp
|
||||
# networks:
|
||||
# - network_finetune
|
||||
# command: ["/usr/sbin/sshd", "-D"]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
|
||||
# networks:
|
||||
# network_finetune:
|
||||
# name: network_finetune
|
||||
20
spawnerdockerfile/install_conda.sh
Normal file
20
spawnerdockerfile/install_conda.sh
Normal file
@@ -0,0 +1,20 @@
|
||||
# install miniconda
|
||||
wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
|
||||
bash /tmp/miniconda.sh -b -p /opt/conda
|
||||
rm /tmp/miniconda.sh
|
||||
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
|
||||
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc
|
||||
. /opt/conda/etc/profile.d/conda.sh
|
||||
conda init bash
|
||||
conda config --set show_channel_urls true
|
||||
# 配置 .condarc 文件
|
||||
cat <<EOF > ~/.condarc
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- pytorch
|
||||
- pytorch-nightly
|
||||
- nvidia
|
||||
- defaults
|
||||
show_channel_urls: true
|
||||
EOF
|
||||
Reference in New Issue
Block a user