update
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
*.tar
|
||||
build_d/
|
||||
78
Dockerfile.base
Normal file
78
Dockerfile.base
Normal file
@@ -0,0 +1,78 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
|
||||
ARG DEBIAN_FRONTEND="noninteractive"
|
||||
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
|
||||
ENV MAMBA_ROOT_PREFIX=~/micromamba
|
||||
ARG CONDA_ENV_NAME="ldh"
|
||||
ENV CONDA_ENV_NAME=${CONDA_ENV_NAME}
|
||||
ARG PYTHON_VERSION=3.10
|
||||
ENV PYTHON_VERSION=${PYTHON_VERSION}
|
||||
ARG ROOT_PASSWD="root"
|
||||
ENV ROOT_PASSWD=${ROOT_PASSWD}
|
||||
ENV PATH /opt/conda/bin:/opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH
|
||||
WORKDIR /root
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
# base tools
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
apt-get update
|
||||
apt-get install -y wget curl htop jq vim bash libaio-dev build-essential openssh-server
|
||||
apt-get install -y --no-install-recommends software-properties-common build-essential autotools-dev nfs-common pdsh cmake g++ gcc curl wget vim tmux emacs less unzip htop iftop iotop ca-certificates openssh-client openssh-server rsync iputils-ping net-tools sudo llvm-dev re2c
|
||||
add-apt-repository ppa:git-core/ppa -y
|
||||
apt-get install -y git libnuma-dev wget
|
||||
# install latest cmake
|
||||
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc | sudo apt-key add -
|
||||
sudo apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main"
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cmake
|
||||
# Configure SSH for password and public key authentication
|
||||
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config
|
||||
sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config
|
||||
mkdir /var/run/sshd
|
||||
echo 'root:${ROOT_PASSWD}' | chpasswd
|
||||
mkdir -p ~/.pip
|
||||
# install miniconda
|
||||
wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
|
||||
bash /tmp/miniconda.sh -b -p /opt/conda
|
||||
rm /tmp/miniconda.sh
|
||||
conda init bash
|
||||
conda create -n ${CONDA_ENV_NAME} python=${PYTHON_VERSION} pyyaml ipython -y
|
||||
conda run -n ${CONDA_ENV_NAME} python -m pip install open_clip_torch vidia-ml-py3 opencv-contrib-python
|
||||
conda clean -afy
|
||||
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
|
||||
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc
|
||||
echo "conda activate ${CONDA_ENV_NAME}" >> ~/.bashrc
|
||||
# 配置 .condarc 文件
|
||||
cat <<EOF > ~/.condarc
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- pytorch
|
||||
- pytorch-nightly
|
||||
- nvidia
|
||||
- defaults
|
||||
show_channel_urls: true
|
||||
EOF
|
||||
# 安装 micromamba
|
||||
echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
|
||||
micromamba shell init -s bash -p ~/micromamba
|
||||
cat <<'EOF' >> ~/.bashrc
|
||||
source ~/micromamba/etc/profile.d/micromamba.sh
|
||||
alias mamba=micromamba
|
||||
alias mba=mamba
|
||||
EOF
|
||||
# 配置 .mambarc 文件
|
||||
cat <<EOF > ~/.mambarc
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- pytorch
|
||||
- pytorch-nightly
|
||||
- nvidia
|
||||
- defaults
|
||||
show_channel_urls: true
|
||||
EOF
|
||||
EOT
|
||||
@@ -21,9 +21,23 @@ echo "PubkeyAuthentication yes" >> /etc/ssh/sshd_config
|
||||
echo "Port 22" >> /etc/ssh/sshd_config
|
||||
mkdir /var/run/sshd
|
||||
echo 'root:cdcdocker' | chpasswd
|
||||
# Install Micromamba
|
||||
# 安装 micromamba 并配置 mambarc
|
||||
echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
|
||||
micromamba shell init -s bash -p ~/micromamba
|
||||
cat <<'EOF' >> ~/.bashrc
|
||||
source ~/micromamba/etc/profile.d/micromamba.sh
|
||||
alias mamba=micromamba
|
||||
alias mba=mamba
|
||||
EOF
|
||||
# 配置 .mambarc 文件
|
||||
cat <<EOF > ~/.mambarc
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- pytorch
|
||||
- pytorch-nightly
|
||||
- nvidia
|
||||
EOF
|
||||
mkdir -p ~/.pip
|
||||
echo "
|
||||
[global]
|
||||
|
||||
@@ -1,163 +1,204 @@
|
||||
FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
|
||||
|
||||
FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
|
||||
ARG DEBIAN_FRONTEND="noninteractive"
|
||||
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
|
||||
ENV MAMBA_ROOT_PREFIX=~/micromamba
|
||||
ARG CONDA_ENV_NAME="ldh"
|
||||
ENV CONDA_ENV_NAME=${CONDA_ENV_NAME}
|
||||
ARG PYTHON_VERSION=3.10
|
||||
ENV PYTHON_VERSION=${PYTHON_VERSION}
|
||||
ARG ROOT_PASSWD="root"
|
||||
ENV ROOT_PASSWD=${ROOT_PASSWD}
|
||||
ENV PATH /opt/conda/bin:/opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH
|
||||
WORKDIR /root
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
# base tools
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
apt-get update
|
||||
apt-get install -y wget curl git jq vim bash libaio-dev build-essential openssh-server
|
||||
apt-get install -y wget curl htop jq vim bash libaio-dev build-essential openssh-server
|
||||
apt-get install -y --no-install-recommends software-properties-common build-essential autotools-dev nfs-common pdsh cmake g++ gcc curl wget vim tmux emacs less unzip htop iftop iotop ca-certificates openssh-client openssh-server rsync iputils-ping net-tools sudo llvm-dev re2c
|
||||
add-apt-repository ppa:git-core/ppa -y
|
||||
apt-get install -y git libnuma-dev wget
|
||||
# install latest cmake
|
||||
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc | sudo apt-key add -
|
||||
sudo apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main"
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cmake
|
||||
# Configure SSH for password and public key authentication
|
||||
echo "PermitRootLogin yes" >> /etc/ssh/sshd_config
|
||||
echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config
|
||||
echo "PubkeyAuthentication yes" >> /etc/ssh/sshd_config
|
||||
echo "Port 22" >> /etc/ssh/sshd_config
|
||||
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config
|
||||
sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config
|
||||
mkdir /var/run/sshd
|
||||
echo 'root:root' | chpasswd
|
||||
echo 'root:${ROOT_PASSWD}' | chpasswd
|
||||
mkdir -p ~/.pip
|
||||
echo "
|
||||
[global]
|
||||
index-url = https://mirrors.aliyun.com/pypi/simple/
|
||||
|
||||
[install]
|
||||
trusted-host=mirrors.aliyun.com
|
||||
" >> ~/.pip/pip.conf
|
||||
# install miniconda
|
||||
wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
|
||||
bash /tmp/miniconda.sh -b -p /opt/conda
|
||||
rm /tmp/miniconda.sh
|
||||
conda init bash
|
||||
conda create -n ${CONDA_ENV_NAME} python=${PYTHON_VERSION} pyyaml ipython -y
|
||||
conda run -n ${CONDA_ENV_NAME} python -m pip install open_clip_torch vidia-ml-py3 opencv-contrib-python
|
||||
conda clean -afy
|
||||
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
|
||||
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc
|
||||
echo "conda activate ${CONDA_ENV_NAME}" >> ~/.bashrc
|
||||
# 配置 .condarc 文件
|
||||
cat <<EOF > ~/.condarc
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- pytorch
|
||||
- pytorch-nightly
|
||||
- nvidia
|
||||
- defaults
|
||||
show_channel_urls: true
|
||||
EOF
|
||||
# 安装 micromamba
|
||||
echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
|
||||
micromamba shell init -s bash -p ~/micromamba
|
||||
cat <<'EOF' >> ~/.bashrc
|
||||
source ~/micromamba/etc/profile.d/micromamba.sh
|
||||
alias mamba=micromamba
|
||||
alias mba=mamba
|
||||
EOF
|
||||
# 配置 .mambarc 文件
|
||||
cat <<EOF > ~/.mambarc
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- pytorch
|
||||
- pytorch-nightly
|
||||
- nvidia
|
||||
- defaults
|
||||
show_channel_urls: true
|
||||
EOF
|
||||
EOT
|
||||
|
||||
# deepspeed
|
||||
ENV STAGE_DIR=/tmp
|
||||
|
||||
# 安装 ninja 并测试
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
mkdir -p ${STAGE_DIR}
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends
|
||||
software-properties-common build-essential autotools-dev \
|
||||
nfs-common pdsh \
|
||||
cmake g++ gcc \
|
||||
curl wget vim tmux emacs less unzip \
|
||||
htop iftop iotop ca-certificates openssh-client openssh-server \
|
||||
rsync iputils-ping net-tools sudo \
|
||||
llvm-dev
|
||||
add-apt-repository ppa:git-core/ppa -y
|
||||
apt-get install -y git
|
||||
echo "ClientAliveInterval 30" >> /etc/ssh/sshd_config
|
||||
cp /etc/ssh/sshd_config ${STAGE_DIR}/sshd_config
|
||||
sed "0,/^#Port 22/s//Port 22/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config
|
||||
# 安装 ninja
|
||||
source /opt/conda/etc/profile.d/conda.sh
|
||||
conda activate ${CONDA_ENV_NAME}
|
||||
# 克隆 ninja 源码并编译
|
||||
git clone https://github.com/ninja-build/ninja.git
|
||||
cd ninja
|
||||
# 克隆 GoogleTest 源码
|
||||
git clone https://github.com/google/googletest.git
|
||||
conda run -n ${CONDA_ENV_NAME} python ./configure.py --bootstrap
|
||||
# 配置并构建 Ninja 测试,添加 pthread 链接选项
|
||||
CXXFLAGS="-pthread" LDFLAGS="-pthread" ./configure.py --bootstrap --gtest-source-dir=$(pwd)/googletest
|
||||
./ninja all
|
||||
# 运行 Ninja 单元测试
|
||||
./ninja_test
|
||||
EOT
|
||||
|
||||
# Mellanox OFED
|
||||
ENV MLNX_OFED_VERSION=4.9-7.1.0.0
|
||||
RUN apt-get install -y libnuma-dev
|
||||
RUN cd ${STAGE_DIR} && \
|
||||
wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64.tgz | tar xzf - && \
|
||||
cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64 && \
|
||||
./mlnxofedinstall --user-space-only --without-fw-update --all -q && \
|
||||
cd ${STAGE_DIR} && \
|
||||
rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64*
|
||||
# nv_peer_mem
|
||||
ENV NV_PEER_MEM_VERSION=1.2
|
||||
ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
|
||||
RUN mkdir -p ${STAGE_DIR} && \
|
||||
git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory && \
|
||||
cd ${STAGE_DIR}/nv_peer_memory && \
|
||||
./build_module.sh && \
|
||||
cd ${STAGE_DIR} && \
|
||||
tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz && \
|
||||
cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION} && \
|
||||
apt-get update && \
|
||||
apt-get install -y dkms && \
|
||||
dpkg-buildpackage -us -uc && \
|
||||
dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
|
||||
# OPENMPI
|
||||
ENV OPENMPI_BASEVERSION=4.1
|
||||
ENV OPENMPI_VERSION=${OPENMPI_BASEVERSION}.6
|
||||
RUN cd ${STAGE_DIR} && \
|
||||
wget -q -O - https://download.open-mpi.org/release/open-mpi/v${OPENMPI_BASEVERSION}/openmpi-${OPENMPI_VERSION}.tar.gz | tar xzf - && \
|
||||
cd openmpi-${OPENMPI_VERSION} && \
|
||||
./configure --prefix=/usr/local/openmpi-${OPENMPI_VERSION} && \
|
||||
make -j"$(nproc)" install && \
|
||||
ln -s /usr/local/openmpi-${OPENMPI_VERSION} /usr/local/mpi && \
|
||||
# Sanity check:
|
||||
test -f /usr/local/mpi/bin/mpic++ && \
|
||||
cd ${STAGE_DIR} && \
|
||||
rm -r ${STAGE_DIR}/openmpi-${OPENMPI_VERSION}
|
||||
ENV PATH=/usr/local/mpi/bin:${PATH} \
|
||||
LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:${LD_LIBRARY_PATH}
|
||||
# Create a wrapper for OpenMPI to allow running as root by default
|
||||
RUN mv /usr/local/mpi/bin/mpirun /usr/local/mpi/bin/mpirun.real && \
|
||||
echo '#!/bin/bash' > /usr/local/mpi/bin/mpirun && \
|
||||
echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun && \
|
||||
chmod a+x /usr/local/mpi/bin/mpirun
|
||||
# Python
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHON_VERSION=3
|
||||
RUN apt-get install -y python3 python3-dev && \
|
||||
rm -f /usr/bin/python && \
|
||||
ln -s /usr/bin/python3 /usr/bin/python && \
|
||||
curl -O https://bootstrap.pypa.io/pip/3.6/get-pip.py && \
|
||||
python get-pip.py && \
|
||||
rm get-pip.py && \
|
||||
pip install --upgrade pip && \
|
||||
# Print python an pip version
|
||||
python -V && pip -V
|
||||
RUN pip install pyyaml
|
||||
RUN pip install ipython
|
||||
# Some Packages
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libsndfile-dev \
|
||||
libcupti-dev \
|
||||
libjpeg-dev \
|
||||
libpng-dev \
|
||||
screen \
|
||||
libaio-dev
|
||||
RUN pip install psutil \
|
||||
yappi \
|
||||
cffi \
|
||||
ipdb \
|
||||
pandas \
|
||||
matplotlib \
|
||||
py3nvml \
|
||||
pyarrow \
|
||||
graphviz \
|
||||
astor \
|
||||
boto3 \
|
||||
tqdm \
|
||||
sentencepiece \
|
||||
msgpack \
|
||||
requests \
|
||||
pandas \
|
||||
sphinx \
|
||||
sphinx_rtd_theme \
|
||||
scipy \
|
||||
numpy \
|
||||
scikit-learn \
|
||||
nvidia-ml-py3 \
|
||||
mpi4py
|
||||
# # deepspeed
|
||||
# ENV STAGE_DIR=/tmp
|
||||
# RUN <<EOT
|
||||
# #!/bin/bash
|
||||
# mkdir -p ${STAGE_DIR}
|
||||
# echo "ClientAliveInterval 30" >> /etc/ssh/sshd_config
|
||||
# cp /etc/ssh/sshd_config ${STAGE_DIR}/sshd_config
|
||||
# sed "0,/^#Port 22/s//Port 22/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config
|
||||
# EOT
|
||||
|
||||
# # Mellanox OFED
|
||||
# WORKDIR ${STAGE_DIR}
|
||||
# ENV MLNX_OFED_VERSION=4.9-7.1.0.0
|
||||
# RUN wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64.tgz | tar xzf - && \
|
||||
# cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64 && \
|
||||
# ./mlnxofedinstall --user-space-only --without-fw-update --all -q && \
|
||||
# cd ${STAGE_DIR} && \
|
||||
# rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64*
|
||||
# # nv_peer_mem
|
||||
# ENV NV_PEER_MEM_VERSION=1.2
|
||||
# ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
|
||||
# RUN mkdir -p ${STAGE_DIR} && \
|
||||
# git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory && \
|
||||
# cd ${STAGE_DIR}/nv_peer_memory && \
|
||||
# ./build_module.sh && \
|
||||
# cd ${STAGE_DIR} && \
|
||||
# tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz && \
|
||||
# cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION} && \
|
||||
# apt-get update && \
|
||||
# apt-get install -y dkms && \
|
||||
# dpkg-buildpackage -us -uc && \
|
||||
# dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
|
||||
# # OPENMPI
|
||||
# ENV OPENMPI_BASEVERSION=4.1
|
||||
# ENV OPENMPI_VERSION=${OPENMPI_BASEVERSION}.6
|
||||
# RUN <<EOT
|
||||
# #!/bin/bash
|
||||
# cd ${STAGE_DIR}
|
||||
# wget -q -O - https://download.open-mpi.org/release/open-mpi/v${OPENMPI_BASEVERSION}/openmpi-${OPENMPI_VERSION}.tar.gz | tar xzf -
|
||||
# cd openmpi-${OPENMPI_VERSION}
|
||||
# ./configure --prefix=/usr/local/openmpi-${OPENMPI_VERSION}
|
||||
# make -j"$(nproc)" install
|
||||
# ln -s /usr/local/openmpi-${OPENMPI_VERSION} /usr/local/mpi
|
||||
# # Sanity check:
|
||||
# test -f /usr/local/mpi/bin/mpic++
|
||||
# cd ${STAGE_DIR}
|
||||
# rm -r ${STAGE_DIR}/openmpi-${OPENMPI_VERSION}
|
||||
# EOT
|
||||
|
||||
# ENV PATH=/usr/local/mpi/bin:${PATH}
|
||||
# ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:${LD_LIBRARY_PATH}
|
||||
# # Create a wrapper for OpenMPI to allow running as root by default
|
||||
# RUN mv /usr/local/mpi/bin/mpirun /usr/local/mpi/bin/mpirun.real && \
|
||||
# echo '#!/bin/bash' > /usr/local/mpi/bin/mpirun && \
|
||||
# echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun && \
|
||||
# chmod a+x /usr/local/mpi/bin/mpirun
|
||||
# # Some Packages
|
||||
# RUN <<EOT
|
||||
# apt-get update
|
||||
# apt-get install -y --no-install-recommends libsndfile-dev libcupti-dev libjpeg-dev libpng-dev screen libaio-dev
|
||||
# source /opt/conda/etc/profile.d/conda.sh
|
||||
# conda activate ${CONDA_ENV_NAME}
|
||||
# conda install -y mpi4py
|
||||
# python -m pip install psutil \
|
||||
# yappi \
|
||||
# cffi \
|
||||
# ipdb \
|
||||
# pandas \
|
||||
# matplotlib \
|
||||
# py3nvml \
|
||||
# pyarrow \
|
||||
# graphviz \
|
||||
# astor \
|
||||
# boto3 \
|
||||
# tqdm \
|
||||
# sentencepiece \
|
||||
# msgpack \
|
||||
# requests \
|
||||
# pandas \
|
||||
# sphinx \
|
||||
# sphinx_rtd_theme \
|
||||
# scipy \
|
||||
# numpy \
|
||||
# scikit-learn \
|
||||
# nvidia-ml-py3
|
||||
# EOT
|
||||
|
||||
# PyTorch
|
||||
ARG PYTORCH_VERSION=1.13.0
|
||||
ENV PYTORCH_VERSION=${PYTORCH_VERSION}
|
||||
RUN pip install torch==${PYTORCH_VERSION}
|
||||
RUN rm -rf /usr/lib/python3/dist-packages/yaml && \
|
||||
rm -rf /usr/lib/python3/dist-packages/PyYAML-*
|
||||
## Add deepspeed user
|
||||
# Add a deepspeed user with user id 8877
|
||||
#RUN useradd --create-home --uid 8877 deepspeed
|
||||
RUN useradd --create-home --uid 1000 --shell /bin/bash deepspeed
|
||||
RUN usermod -aG sudo deepspeed
|
||||
RUN echo "deepspeed ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
|
||||
# # Change to non-root privilege
|
||||
USER deepspeed
|
||||
# DeepSpeed
|
||||
RUN git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
|
||||
RUN cd ${STAGE_DIR}/DeepSpeed && \
|
||||
git checkout . && \
|
||||
git checkout master && \
|
||||
./install.sh --pip_sudo
|
||||
RUN rm -rf ${STAGE_DIR}/DeepSpeed
|
||||
RUN python -c "import deepspeed; print(deepspeed.__version__)"
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
source /opt/conda/etc/profile.d/conda.sh
|
||||
conda activate ${CONDA_ENV_NAME}
|
||||
pip install deepspeed torch==${PYTORCH_VERSION} torchvision torchaudio bitsandbytes accelerate transformers optimum
|
||||
pip install \
|
||||
torch torchvision torchaudio \
|
||||
pydantic transformers datasets accelerate evaluate peft deepspeed tiktoken \
|
||||
sentencepiece tqdm nltk matplotlib seaborn numpy pandas scikit-learn diffusers \
|
||||
huggingface_hub spacy Pillow blobfile requests scipy pycocotools protobuf timm \
|
||||
pyyaml ipython xformers opencv-contrib-python open_clip_torch flash-attn \
|
||||
packaging psutil zstandard
|
||||
python -c "import deepspeed; print(deepspeed.__version__)"
|
||||
EOT
|
||||
|
||||
# 启动 ssh 服务
|
||||
CMD ["/bin/bash", "-c", "service ssh start; tail -f /dev/null"]
|
||||
Reference in New Issue
Block a user