From be77975ce2f37f73fee5b40cfeb654ff51a07ccc Mon Sep 17 00:00:00 2001
From: hotwa <pylyzeng@gmail.com>
Date: Fri, 21 Jun 2024 15:12:44 +0800
Subject: [PATCH] update

---
 finetune/Dockerfile                           |  22 +-
 finetune/Dockerfile.conda                     | 404 ------------------
 finetune/Dockerfile.conda1                    | 372 ----------------
 finetune/docker-compose_pytorch1.13.yml       |   2 +-
 finetune/docker-compose_pytorch1.13_conda.yml |  53 ---
 finetune/docker-compose_pytorch2.3.yml        |   4 +-
 finetune/docker-compose_pytorch2.3_conda.yml  |  63 ---
 7 files changed, 20 insertions(+), 900 deletions(-)
 delete mode 100644 finetune/Dockerfile.conda
 delete mode 100644 finetune/Dockerfile.conda1
 delete mode 100644 finetune/docker-compose_pytorch1.13_conda.yml
 delete mode 100644 finetune/docker-compose_pytorch2.3_conda.yml

diff --git a/finetune/Dockerfile b/finetune/Dockerfile
index 4b9ba41..cb3af52 100644
--- a/finetune/Dockerfile
+++ b/finetune/Dockerfile
@@ -317,7 +317,7 @@ cd ..
 EOT
 
 # CUDA_ARCH_LIST="80;86;89;90"
-ARG DEEPSPEED_VERSION="0.8.3"
+ARG DEEPSPEED_VERSION="0.14.3"
 ENV DEEPSPEED_VERSION=${DEEPSPEED_VERSION}
 ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --no_clean"
 ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
@@ -339,7 +339,7 @@ source /opt/conda/etc/profile.d/conda.sh
 conda activate ${CONDA_ENV_NAME}
 python -m pip install setuptools==${SETUPTOOLS_VERSION}
 # install oneapi for deepspeed
-git clone https://github.com/oneapi-src/oneCCL.git ${STAGE_DIR}/oneCCL
+git clone https://ghproxy.dockless.eu.org/https://github.com/oneapi-src/oneCCL.git ${STAGE_DIR}/oneCCL
 cd ${STAGE_DIR}/oneCCL
 git checkout . 
 git checkout master
@@ -347,15 +347,27 @@ mkdir build
 cd build 
 cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
 make -j"$(nproc)" install
-git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
+EOT
+
+RUN <<EOT
+#!/bin/bash
+source /opt/conda/etc/profile.d/conda.sh
+conda activate ${CONDA_ENV_NAME}
+git clone https://ghproxy.dockless.eu.org/https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
 cd ${STAGE_DIR}/DeepSpeed-Kernels
 CUDA_ARCH_LIST=${CUDA_ARCH_LIST} python setup.py bdist_wheel
 pip install dist/deepspeed_kernels-*.whl
 # CUDA_ARCH_LIST=${CUDA_ARCH_LIST} pip install -v .
-git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
+EOT
+
+RUN <<EOT
+#!/bin/bash
+source /opt/conda/etc/profile.d/conda.sh
+conda activate ${CONDA_ENV_NAME}
+git clone https://ghproxy.dockless.eu.org/https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
 cd ${STAGE_DIR}/DeepSpeed 
 git checkout . 
-git checkout v0.8.3
+# git checkout v${DEEPSPEED_VERSION}
 python setup.py bdist_wheel
 DS_BUILD_OPS=${DS_BUILD_OPS} pip install dist/deepspeed*.whl --force-reinstall
 # DS_BUILD_OPS=${DS_BUILD_OPS} pip install -r requirements/requirements.txt
diff --git a/finetune/Dockerfile.conda b/finetune/Dockerfile.conda
deleted file mode 100644
index de5f50f..0000000
--- a/finetune/Dockerfile.conda
+++ /dev/null
@@ -1,404 +0,0 @@
-# syntax=docker/dockerfile:1
-
-# NOTE: Building this image require's docker version >= 23.0.
-#
-# For reference:
-# - https://docs.docker.com/build/dockerfile/frontend/#stable-channel
-ARG CUDA_VERSION=12.1.0
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04
-ARG DEBIAN_FRONTEND="noninteractive"
-ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
-ENV MAMBA_ROOT_PREFIX=~/micromamba
-ARG ROOT_PASSWD="root"
-ENV ROOT_PASSWD=${ROOT_PASSWD}
-WORKDIR /root
-SHELL ["/bin/bash", "-c"]
-# base tools
-RUN <<EOT
-#!/bin/bash
-apt-get update
-apt-get install -y wget curl htop jq vim bash libaio-dev build-essential openssh-server python3 python3-pip
-apt-get install -y --no-install-recommends software-properties-common build-essential autotools-dev nfs-common pdsh cmake g++ gcc curl wget vim tmux emacs less unzip htop iftop iotop ca-certificates openssh-client openssh-server rsync iputils-ping net-tools sudo llvm-dev re2c
-add-apt-repository ppa:git-core/ppa -y
-apt-get install -y git libnuma-dev wget
-# # install latest cmake
-# wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc | sudo apt-key add -
-# sudo apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main"
-# sudo apt-get update
-# sudo apt-get install -y cmake
-# Configure SSH for password and public key authentication
-sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
-sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
-sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_config
-sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config
-sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config
-mkdir /var/run/sshd
-echo 'root:${ROOT_PASSWD}' | chpasswd
-mkdir -p ~/.pip
-# install miniconda
-wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
-bash /tmp/miniconda.sh -b -p /opt/conda 
-rm /tmp/miniconda.sh 
-conda init bash 
-ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
-echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc 
-# 配置 .condarc 文件
-cat <<EOF > ~/.condarc
-channels:
-  - conda-forge
-  - bioconda
-  - pytorch
-  - pytorch-nightly
-  - nvidia
-  - defaults
-show_channel_urls: true
-EOF
-# 安装 micromamba
-echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
-micromamba shell init -s bash -p ~/micromamba
-cat <<'EOF' >> ~/.bashrc
-source ~/micromamba/etc/profile.d/micromamba.sh
-alias mamba=micromamba
-alias mba=mamba
-EOF
-# 配置 .mambarc 文件
-cat <<EOF > ~/.mambarc
-channels:
-  - conda-forge
-  - bioconda
-  - pytorch
-  - pytorch-nightly
-  - nvidia
-  - defaults
-show_channel_urls: true
-EOF
-EOT
-
-# reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
-# PyTorch
-ARG CONDA_ENV_NAME="deepspeed"
-ENV CONDA_ENV_NAME=${CONDA_ENV_NAME}
-ARG PYTHON_VERSION=3.10
-ENV PYTHON_VERSION=${PYTHON_VERSION}
-ENV PATH /opt/conda/bin:/opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH
-ENV REF='main'
-ENV STAGE_DIR=/tmp
-ENV NV_PEER_MEM_VERSION=1.2
-ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0
-ENV OPENMPI_BASEVERSION=4.1
-ENV OPENMPI_VERSION=${OPENMPI_BASEVERSION}.6
-ARG CUDA_NUM='121'
-ENV CUDA_NUM=${CUDA_NUM}
-ARG CUDA='cu121'
-ENV CUDA=${CUDA}
-ARG PYTORCH_VERSION=2.3.0
-ENV PYTORCH_VERSION=${PYTORCH_VERSION}
-ARG TORCHVISION_VERSION=0.18.0
-ENV TORCHVISION_VERSION=${TORCHVISION_VERSION}
-ARG TORCHAUDIO_VERSION=2.3.0
-ENV TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION}
-ARG PYTORCH_CUDA_VERSION=12.1
-ENV PYTORCH_CUDA_VERSION=${PYTORCH_CUDA_VERSION}
-ENV TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 8.9 9.0 9.0a"
-ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
-ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
-ENV MLNX_OFED_VERSION=4.9-7.1.0.0
-ARG SETUPTOOLS_VERSION=69.5.1
-ENV SETUPTOOLS_VERSION=${SETUPTOOLS_VERSION}
-ARG USE_CUDA=1
-ENV USE_CUDA=${USE_CUDA}
-ARG USE_ROCM=0
-ENV USE_ROCM=${USE_ROCM}
-ARG USE_XPU=0
-ENV USE_XPU=${USE_XPU}
-ARG _GLIBCXX_USE_CXX11_ABI=1
-ENV _GLIBCXX_USE_CXX11_ABI=${_GLIBCXX_USE_CXX11_ABI}
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda create -n ${CONDA_ENV_NAME} python=${PYTHON_VERSION} cmake ninja -c conda-forge -y
-echo "conda activate ${CONDA_ENV_NAME}" >> ~/.bashrc
-conda activate ${CONDA_ENV_NAME}
-python3 -m pip install --no-cache-dir --upgrade pip
-python -m pip install open_clip_torch nvidia-ml-py3 opencv-contrib-python 
-conda clean -afy 
-git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF && cd ..
-python -m pip install setuptools==${SETUPTOOLS_VERSION}
-python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
-# # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
-# # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
-python3 -m pip uninstall -y torch torchvision torchaudio 
-# # install pytorch create conda env aleay exists
-git clone --recursive https://github.com/pytorch/pytorch ${STAGE_DIR}/pytorch
-cd ${STAGE_DIR}/pytorch
-git checkout v${PYTORCH_VERSION}
-git submodule sync
-git submodule update --init --recursive
-pip install -r requirements.txt
-conda install -y intel::mkl-static intel::mkl-include
-conda install -y -c pytorch magma-cuda${CUDA_NUM}
-export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
-python setup.py install
-# python3 -m pip install torch==${PYTORCH_VERSION}+${CUDA} torchvision==${TORCHVISION_VERSION}+${CUDA} torchaudio==${TORCHAUDIO_VERSION} --extra-index-url https://download.pytorch.org/whl/${CUDA}
-python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
-python3 -m pip uninstall -y transformer-engine
-python3 -m pip uninstall -y torch-tensorrt
-python3 -m pip uninstall -y apex
-EOT
-
-# install apex
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-git clone https://github.com/NVIDIA/apex ${STAGE_DIR}/apex
-cd apex
-# if pip >= 23.1 (ref: https://pip.pypa.io/en/stable/news/#v23-1) which supports multiple `--config-settings` with the same key... 
-MAX_JOBS=1 python3 -m pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
-python -c "import apex.amp; print('Apex is installed and the amp module is available.')"
-cd ..
-rm -rf ${STAGE_DIR}/apex
-EOT
-
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-# Pre-build **latest** DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
-python3 -m pip uninstall -y deepspeed
-# This has to be run (again) inside the GPU VMs running the tests.
-# The installation works here, but some tests fail, if we do not pre-build deepspeed again in the VMs running the tests.
-# TODO: Find out why test fail. install deepspeed
-# DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM} DS_BUILD_FUSED_ADAM={DS_BUILD_FUSED_ADAM} python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
-# from https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile install deepspeed fail 
-# reference deepspeed install from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
-# install deepspeed prepare
-# install Mellanox OFED
-mkdir -p ${STAGE_DIR}
-wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64.tgz | tar xzf -
-cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64 
-./mlnxofedinstall --user-space-only --without-fw-update --all -q 
-cd ${STAGE_DIR} 
-rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64*
-cd ..
-# install nv_peer_mem
-rm -rf ${STAGE_DIR}
-mkdir -p ${STAGE_DIR}
-git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory
-cd ${STAGE_DIR}/nv_peer_memory
-./build_module.sh
-cd ${STAGE_DIR}
-tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz
-cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION}
-apt-get update
-apt-get install -y dkms
-dpkg-buildpackage -us -uc
-dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
-EOT
-
-# install mpi
-ENV PATH=/usr/local/mpi/bin:${PATH} 
-ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:${LD_LIBRARY_PATH}
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-# OPENMPI
-rm -rf ${STAGE_DIR}
-mkdir -p ${STAGE_DIR}
-cd ${STAGE_DIR}
-wget -q -O - https://download.open-mpi.org/release/open-mpi/v${OPENMPI_BASEVERSION}/openmpi-${OPENMPI_VERSION}.tar.gz | tar xzf - 
-cd openmpi-${OPENMPI_VERSION} 
-./configure --prefix=/usr/local/openmpi-${OPENMPI_VERSION} 
-make -j"$(nproc)" install 
-ln -s /usr/local/openmpi-${OPENMPI_VERSION} /usr/local/mpi 
-# Sanity check:
-test -f /usr/local/mpi/bin/mpic++ 
-cd ${STAGE_DIR} 
-rm -r ${STAGE_DIR}/openmpi-${OPENMPI_VERSION}
-# Create a wrapper for OpenMPI to allow running as root by default
-mv /usr/local/mpi/bin/mpirun /usr/local/mpi/bin/mpirun.real
-echo '#!/bin/bash' > /usr/local/mpi/bin/mpirun
-echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun
-chmod a+x /usr/local/mpi/bin/mpirun
-EOT
-
-# Some Packages
-RUN <<EOT
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-apt-get update 
-apt-get install -y --no-install-recommends libsndfile-dev libcupti-dev libjpeg-dev libpng-dev screen libaio-dev
-python -m pip install https://github.com/mpi4py/mpi4py/tarball/master
-python -m pip install psutil \
-yappi \
-cffi \
-ipdb \
-pandas \
-matplotlib \
-py3nvml \
-pyarrow \
-graphviz \
-astor \
-boto3 \
-tqdm \
-sentencepiece \
-msgpack \
-requests \
-pandas \
-sphinx \
-sphinx_rtd_theme \
-scipy \
-numpy \
-scikit-learn \
-nvidia-ml-py3
-EOT
-
-# SSH daemon port inside container cannot conflict with host OS port
-ENV SSH_PORT=2222
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-cat /etc/ssh/sshd_config > ${STAGE_DIR}/sshd_config && \
-sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config
-EOT
-
-# 29.78 Usage: install.sh [options...]
-# 29.78 
-# 29.78 By default will install deepspeed and all third party dependencies across all machines listed in
-# 29.78 hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally
-# 29.78 
-# 29.78 [optional]
-# 29.78     -l, --local_only        Install only on local machine
-# 29.78     -s, --pip_sudo          Run pip install with sudo (default: no sudo)
-# 29.78     -r, --allow_sudo        Allow script to be run by root (probably don't want this, instead use --pip_sudo)
-# 29.78     -n, --no_clean          Do not clean prior build state, by default prior build files are removed before building wheels
-# 29.78     -m, --pip_mirror        Use the specified pip mirror (default: the default pip mirror)
-# 29.78     -H, --hostfile          Path to MPI-style hostfile (default: /job/hostfile)
-# 29.78     -e, --examples          Checkout deepspeed example submodule (no install)
-# 29.78     -v, --verbose           Verbose logging
-# 29.78     -h, --help              This help text
-
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-useradd --create-home --uid 1000 --shell /bin/bash deepspeed
-usermod -aG sudo deepspeed
-echo "deepspeed ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
-EOT
-
-# install cutlass https://github.com/NVIDIA/cutlass
-# H100: architecture is Hopper (cutlass need add : cmake .. -DCUTLASS_NVCC_ARCHS="90a" )
-# A100: architecture is Ampere 
-# V100: architecture is Volta 
-# T4: architecture is Turing 
-# ENV CUDACXX=${CUDA_INSTALL_PATH}/bin/nvcc
-# 70：适用于 NVIDIA Volta 架构（如 Tesla V100）。
-# 75：适用于 NVIDIA Turing 架构（如 Tesla T4）。
-# 80：适用于 NVIDIA Ampere 架构（如 A100）。
-# 90a：适用于 NVIDIA Hopper 架构（如 H100）。
-# 89:GeForce RTX 4090 
-ARG DCUTLASS_NVCC_ARCHS="89"
-ENV DCUTLASS_NVCC_ARCHS=${DCUTLASS_NVCC_ARCHS}
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-git clone https://github.com/NVIDIA/cutlass /opt/cutlass
-cd /opt/cutlass
-git checkout . 
-git checkout master
-mkdir build
-cd build
-cmake .. -DCUTLASS_NVCC_ARCHS=${DCUTLASS_NVCC_ARCHS} -DCUTLASS_ENABLE_TESTS=OFF -DCUTLASS_UNITY_BUILD_ENABLED=ON            # compiles for NVIDIA Hopper GPU architecture, like H100
-make -j"$(nproc)" install
-cd ..
-# make test_unit -j"$(nproc)"
-# make test_unit_gemm_warp -j"$(nproc)"
-EOT
-
-# CUDA_ARCH_LIST="80;86;89;90"
-ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --no_clean"
-ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
-ARG CUDA_ARCH_LIST="80;86;89;90"
-ENV CUDA_ARCH_LIST=${CUDA_ARCH_LIST}
-ARG DS_BUILD_SPARSE_ATTN=0
-ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN}
-ARG DS_BUILD_FUSED_ADAM=1
-ENV DS_BUILD_FUSED_ADAM=${DS_BUILD_FUSED_ADAM}
-ARG DS_BUILD_CPU_ADAM=0
-ENV DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM}
-ARG DS_BUILD_OPS=1
-ENV DS_BUILD_OPS=${DS_BUILD_OPS}
-ENV CUTLASS_PATH=/opt/cutlass
-# install deepspeed
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-python -m pip install setuptools==${SETUPTOOLS_VERSION}
-# install oneapi for deepspeed
-git clone https://github.com/oneapi-src/oneCCL.git ${STAGE_DIR}/oneCCL
-cd ${STAGE_DIR}/oneCCL
-git checkout . 
-git checkout master
-mkdir build
-cd build 
-cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
-make -j install
-git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
-cd ${STAGE_DIR}/DeepSpeed-Kernels
-CUDA_ARCH_LIST=${CUDA_ARCH_LIST} python setup.py bdist_wheel
-# pip install dist/deepspeed_kernels-*.whl
-CUDA_ARCH_LIST=${CUDA_ARCH_LIST} pip install -v .
-git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
-cd ${STAGE_DIR}/DeepSpeed 
-git checkout . 
-git checkout master 
-python setup.py bdist_wheel
-DS_BUILD_OPS=${DS_BUILD_OPS} pip install dist/deepspeed*.whl --force-reinstall
-# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -r requirements/requirements.txt
-# DS_BUILD_OPS=0 DS_BUILD_SPARSE_ATTN=0 DS_BUILD_CPU_ADAM=0 DS_BUILD_FUSED_ADAM=1 pip install -U --no-cache-dir .
-# ./install.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /job/hostfile # ./install.sh --allow_sudo --pip_sudo --no_clean --hostfile /path/to/your/hostfile
-cd ..
-# rm -rf ${STAGE_DIR}/DeepSpeed
-EOT
-
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-# install transformers
-git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers
-cd ${STAGE_DIR}/transformers
-python3 ./setup.py develop
-python3 -m pip install -U --no-cache-dir "pydantic<2"
-# install flash-attn
-# pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org
-pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org
-EOT
-
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-pip install optimum
-pip install peft tiktoken \
-    tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \
-    huggingface_hub spacy blobfile pycocotools \
-    xformers open_clip_torch \
-    zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
-EOT
-
-# add vscode server
-# RUN <<EOT
-# #!/bin/bash
-# wget -qO- https://update.code.visualstudio.com/commit:${commit_id}/server-linux-x64/stable
-# code-server --install-extension ms-python.vscode-pylance
-# EOT
-
-# 启动 ssh 服务
-# CMD ["/bin/bash", "-c", "service ssh start; tail -f /dev/null"]
-CMD ["/usr/sbin/sshd", "-D"]
\ No newline at end of file
diff --git a/finetune/Dockerfile.conda1 b/finetune/Dockerfile.conda1
deleted file mode 100644
index 6fddb95..0000000
--- a/finetune/Dockerfile.conda1
+++ /dev/null
@@ -1,372 +0,0 @@
-# syntax=docker/dockerfile:1
-
-# NOTE: Building this image requires Docker version >= 23.0.
-#
-# For reference:
-# - https://docs.docker.com/build/dockerfile/frontend/#stable-channel
-
-ARG CUDA_VERSION=12.1.0
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu20.04
-ARG DEBIAN_FRONTEND="noninteractive"
-ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
-ENV MAMBA_ROOT_PREFIX=~/micromamba
-ARG ROOT_PASSWD="root"
-ENV ROOT_PASSWD=${ROOT_PASSWD}
-WORKDIR /root
-SHELL ["/bin/bash", "-c"]
-
-# Base tools
-RUN <<EOT
-apt-get update
-apt-get install -y wget curl htop jq vim bash libaio-dev build-essential openssh-server python3 python3-pip
-apt-get install -y --no-install-recommends software-properties-common build-essential autotools-dev nfs-common pdsh cmake g++ gcc curl wget vim tmux emacs less unzip htop iftop iotop ca-certificates openssh-client openssh-server rsync iputils-ping net-tools sudo llvm-dev re2c
-add-apt-repository ppa:git-core/ppa -y
-apt-get install -y git libnuma-dev wget
-sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
-sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
-sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_config
-sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config
-sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config
-mkdir /var/run/sshd
-echo 'root:${ROOT_PASSWD}' | chpasswd
-mkdir -p ~/.pip
-wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
-bash /tmp/miniconda.sh -b -p /opt/conda 
-rm /tmp/miniconda.sh 
-conda init bash 
-ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
-echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc 
-cat <<EOF > ~/.condarc
-channels:
-  - conda-forge
-  - bioconda
-  - pytorch
-  - pytorch-nightly
-  - nvidia
-  - defaults
-show_channel_urls: true
-EOF
-echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
-micromamba shell init -s bash -p ~/micromamba
-cat <<'EOF' >> ~/.bashrc
-source ~/micromamba/etc/profile.d/micromamba.sh
-alias mamba=micromamba
-alias mba=mamba
-EOF
-cat <<EOF > ~/.mambarc
-channels:
-  - conda-forge
-  - bioconda
-  - pytorch
-  - pytorch-nightly
-  - nvidia
-  - defaults
-show_channel_urls: true
-EOF
-EOT
-
-# PyTorch
-ARG CONDA_ENV_NAME="deepspeed"
-ENV CONDA_ENV_NAME=${CONDA_ENV_NAME}
-ARG PYTHON_VERSION=3.10
-ENV PYTHON_VERSION=${PYTHON_VERSION}
-ENV PATH /opt/conda/bin:/opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH
-ARG PYTORCH_VERSION=2.3.0
-ENV PYTORCH_VERSION=${PYTORCH_VERSION}
-ENV TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 8.9 9.0 9.0a"
-ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
-ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
-ARG CUDA_NUM='121'
-ENV CUDA_NUM=${CUDA_NUM}
-
-RUN <<EOT
-source /opt/conda/etc/profile.d/conda.sh
-conda create -n ${CONDA_ENV_NAME} python=${PYTHON_VERSION} cmake ninja -c conda-forge -y
-echo "conda activate ${CONDA_ENV_NAME}" >> ~/.bashrc
-conda activate ${CONDA_ENV_NAME}
-python3 -m pip install --no-cache-dir --upgrade pip
-conda clean -afy 
-
-# 获取指定版本的 PyTorch 源代码
-git clone --recursive https://github.com/pytorch/pytorch ${STAGE_DIR}/pytorch
-cd ${STAGE_DIR}/pytorch
-git checkout v${PYTORCH_VERSION}
-git submodule sync
-git submodule update --init --recursive
-
-# 安装依赖项
-conda install -y intel::mkl-static intel::mkl-include
-conda install -y -c pytorch magma-cuda${CUDA_NUM}
-
-# 构建和安装 PyTorch
-export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
-python setup.py install
-
-# 安装其他必要的依赖项
-python -m pip install open_clip_torch nvidia-ml-py3 opencv-contrib-python 
-python -m pip install setuptools==69.5.1
-python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
-python3 -m pip uninstall -y torch torchvision torchaudio 
-python3 -m pip install torch==${PYTORCH_VERSION}+${CUDA} torchvision==0.18.0+${CUDA} torchaudio==2.3.0 --extra-index-url https://download.pytorch.org/whl/${CUDA}
-python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
-python3 -m pip uninstall -y transformer-engine
-python3 -m pip uninstall -y torch-tensorrt
-python3 -m pip uninstall -y apex
-EOT
-
-# install apex
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-git clone https://github.com/NVIDIA/apex ${STAGE_DIR}/apex
-cd apex
-# if pip >= 23.1 (ref: https://pip.pypa.io/en/stable/news/#v23-1) which supports multiple `--config-settings` with the same key... 
-MAX_JOBS=1 python3 -m pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
-python -c "import apex.amp; print('Apex is installed and the amp module is available.')"
-cd ..
-rm -rf ${STAGE_DIR}/apex
-EOT
-
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-# Pre-build **latest** DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
-python3 -m pip uninstall -y deepspeed
-# This has to be run (again) inside the GPU VMs running the tests.
-# The installation works here, but some tests fail, if we do not pre-build deepspeed again in the VMs running the tests.
-# TODO: Find out why test fail. install deepspeed
-# DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM} DS_BUILD_FUSED_ADAM={DS_BUILD_FUSED_ADAM} python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
-# from https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile install deepspeed fail 
-# reference deepspeed install from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
-# install deepspeed prepare
-# install Mellanox OFED
-mkdir -p ${STAGE_DIR}
-wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64.tgz | tar xzf -
-cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64 
-./mlnxofedinstall --user-space-only --without-fw-update --all -q 
-cd ${STAGE_DIR} 
-rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64*
-cd ..
-# install nv_peer_mem
-rm -rf ${STAGE_DIR}
-mkdir -p ${STAGE_DIR}
-git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory
-cd ${STAGE_DIR}/nv_peer_memory
-./build_module.sh
-cd ${STAGE_DIR}
-tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz
-cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION}
-apt-get update
-apt-get install -y dkms
-dpkg-buildpackage -us -uc
-dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb
-EOT
-
-# install mpi
-ENV PATH=/usr/local/mpi/bin:${PATH} 
-ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:${LD_LIBRARY_PATH}
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-# OPENMPI
-rm -rf ${STAGE_DIR}
-mkdir -p ${STAGE_DIR}
-cd ${STAGE_DIR}
-wget -q -O - https://download.open-mpi.org/release/open-mpi/v${OPENMPI_BASEVERSION}/openmpi-${OPENMPI_VERSION}.tar.gz | tar xzf - 
-cd openmpi-${OPENMPI_VERSION} 
-./configure --prefix=/usr/local/openmpi-${OPENMPI_VERSION} 
-make -j"$(nproc)" install 
-ln -s /usr/local/openmpi-${OPENMPI_VERSION} /usr/local/mpi 
-# Sanity check:
-test -f /usr/local/mpi/bin/mpic++ 
-cd ${STAGE_DIR} 
-rm -r ${STAGE_DIR}/openmpi-${OPENMPI_VERSION}
-# Create a wrapper for OpenMPI to allow running as root by default
-mv /usr/local/mpi/bin/mpirun /usr/local/mpi/bin/mpirun.real
-echo '#!/bin/bash' > /usr/local/mpi/bin/mpirun
-echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun
-chmod a+x /usr/local/mpi/bin/mpirun
-EOT
-
-# Some Packages
-RUN <<EOT
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-apt-get update 
-apt-get install -y --no-install-recommends libsndfile-dev libcupti-dev libjpeg-dev libpng-dev screen libaio-dev
-python -m pip install https://github.com/mpi4py/mpi4py/tarball/master
-python -m pip install psutil \
-yappi \
-cffi \
-ipdb \
-pandas \
-matplotlib \
-py3nvml \
-pyarrow \
-graphviz \
-astor \
-boto3 \
-tqdm \
-sentencepiece \
-msgpack \
-requests \
-pandas \
-sphinx \
-sphinx_rtd_theme \
-scipy \
-numpy \
-scikit-learn \
-nvidia-ml-py3
-EOT
-
-# SSH daemon port inside container cannot conflict with host OS port
-ENV SSH_PORT=2222
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-cat /etc/ssh/sshd_config > ${STAGE_DIR}/sshd_config && \
-sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config
-EOT
-
-# 29.78 Usage: install.sh [options...]
-# 29.78 
-# 29.78 By default will install deepspeed and all third party dependencies across all machines listed in
-# 29.78 hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally
-# 29.78 
-# 29.78 [optional]
-# 29.78     -l, --local_only        Install only on local machine
-# 29.78     -s, --pip_sudo          Run pip install with sudo (default: no sudo)
-# 29.78     -r, --allow_sudo        Allow script to be run by root (probably don't want this, instead use --pip_sudo)
-# 29.78     -n, --no_clean          Do not clean prior build state, by default prior build files are removed before building wheels
-# 29.78     -m, --pip_mirror        Use the specified pip mirror (default: the default pip mirror)
-# 29.78     -H, --hostfile          Path to MPI-style hostfile (default: /job/hostfile)
-# 29.78     -e, --examples          Checkout deepspeed example submodule (no install)
-# 29.78     -v, --verbose           Verbose logging
-# 29.78     -h, --help              This help text
-
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-useradd --create-home --uid 1000 --shell /bin/bash deepspeed
-usermod -aG sudo deepspeed
-echo "deepspeed ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
-EOT
-
-# install cutlass https://github.com/NVIDIA/cutlass
-# H100: architecture is Hopper (cutlass need add : cmake .. -DCUTLASS_NVCC_ARCHS="90a" )
-# A100: architecture is Ampere 
-# V100: architecture is Volta 
-# T4: architecture is Turing 
-# ENV CUDACXX=${CUDA_INSTALL_PATH}/bin/nvcc
-# 70：适用于 NVIDIA Volta 架构（如 Tesla V100）。
-# 75：适用于 NVIDIA Turing 架构（如 Tesla T4）。
-# 80：适用于 NVIDIA Ampere 架构（如 A100）。
-# 90a：适用于 NVIDIA Hopper 架构（如 H100）。
-# 89:GeForce RTX 4090 
-ARG DCUTLASS_NVCC_ARCHS="89"
-ENV DCUTLASS_NVCC_ARCHS=${DCUTLASS_NVCC_ARCHS}
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-git clone https://github.com/NVIDIA/cutlass /opt/cutlass
-cd /opt/cutlass
-git checkout . 
-git checkout master
-mkdir build
-cd build
-cmake .. -DCUTLASS_NVCC_ARCHS=${DCUTLASS_NVCC_ARCHS} -DCUTLASS_ENABLE_TESTS=OFF -DCUTLASS_UNITY_BUILD_ENABLED=ON            # compiles for NVIDIA Hopper GPU architecture, like H100
-make -j"$(nproc)" install
-cd ..
-# make test_unit -j"$(nproc)"
-# make test_unit_gemm_warp -j"$(nproc)"
-EOT
-
-# CUDA_ARCH_LIST="80;86;89;90"
-ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --no_clean"
-ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
-ARG CUDA_ARCH_LIST="80;86;89;90"
-ENV CUDA_ARCH_LIST=${CUDA_ARCH_LIST}
-ARG DS_BUILD_SPARSE_ATTN=0
-ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN}
-ARG DS_BUILD_FUSED_ADAM=1
-ENV DS_BUILD_FUSED_ADAM=${DS_BUILD_FUSED_ADAM}
-ARG DS_BUILD_CPU_ADAM=0
-ENV DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM}
-ARG DS_BUILD_OPS=1
-ENV DS_BUILD_OPS=${DS_BUILD_OPS}
-ENV CUTLASS_PATH=/opt/cutlass
-# install deepspeed
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-python -m pip install setuptools==${SETUPTOOLS_VERSION}
-# install oneapi for deepspeed
-git clone https://github.com/oneapi-src/oneCCL.git ${STAGE_DIR}/oneCCL
-cd ${STAGE_DIR}/oneCCL
-git checkout . 
-git checkout master
-mkdir build
-cd build 
-cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
-make -j install
-git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
-cd ${STAGE_DIR}/DeepSpeed-Kernels
-CUDA_ARCH_LIST=${CUDA_ARCH_LIST} python setup.py bdist_wheel
-# pip install dist/deepspeed_kernels-*.whl
-CUDA_ARCH_LIST=${CUDA_ARCH_LIST} pip install -v .
-git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
-cd ${STAGE_DIR}/DeepSpeed 
-git checkout . 
-git checkout master 
-python setup.py bdist_wheel
-DS_BUILD_OPS=${DS_BUILD_OPS} pip install dist/deepspeed*.whl --force-reinstall
-# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -r requirements/requirements.txt
-# DS_BUILD_OPS=0 DS_BUILD_SPARSE_ATTN=0 DS_BUILD_CPU_ADAM=0 DS_BUILD_FUSED_ADAM=1 pip install -U --no-cache-dir .
-# ./install.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /job/hostfile # ./install.sh --allow_sudo --pip_sudo --no_clean --hostfile /path/to/your/hostfile
-cd ..
-# rm -rf ${STAGE_DIR}/DeepSpeed
-EOT
-
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-# install transformers
-git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers
-cd ${STAGE_DIR}/transformers
-python3 ./setup.py develop
-python3 -m pip install -U --no-cache-dir "pydantic<2"
-# install flash-attn
-# pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org
-pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org
-EOT
-
-RUN <<EOT
-#!/bin/bash
-source /opt/conda/etc/profile.d/conda.sh
-conda activate ${CONDA_ENV_NAME}
-pip install optimum
-pip install peft tiktoken \
-    tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \
-    huggingface_hub spacy blobfile pycocotools \
-    xformers open_clip_torch \
-    zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
-EOT
-
-# add vscode server
-# RUN <<EOT
-# #!/bin/bash
-# wget -qO- https://update.code.visualstudio.com/commit:${commit_id}/server-linux-x64/stable
-# code-server --install-extension ms-python.vscode-pylance
-# EOT
-
-# 启动 ssh 服务
-# CMD ["/bin/bash", "-c", "service ssh start; tail -f /dev/null"]
-CMD ["/usr/sbin/sshd", "-D"]
diff --git a/finetune/docker-compose_pytorch1.13.yml b/finetune/docker-compose_pytorch1.13.yml
index a42c597..e39c544 100644
--- a/finetune/docker-compose_pytorch1.13.yml
+++ b/finetune/docker-compose_pytorch1.13.yml
@@ -19,7 +19,7 @@ services:
         USE_ROCM: 0
         USE_XPU: 0
         CUDA: cu117
-        CUDA_ARCH_LIST: "80;86" # for RTX 4090, all : "80;86;89;90"
+        CUDA_ARCH_LIST: "80;86" # for RTX 4090, all : "80;86;89;90" 编译deepspeed内核需要，这个参数很严格
         SETUPTOOLS_VERSION: "69.5.1"
         ROOT_PASSWD: "root"
         DCUTLASS_NVCC_ARCHS: "90a" # 90a for H100 ,89:GeForce RTX 4090 
diff --git a/finetune/docker-compose_pytorch1.13_conda.yml b/finetune/docker-compose_pytorch1.13_conda.yml
deleted file mode 100644
index 134aa0d..0000000
--- a/finetune/docker-compose_pytorch1.13_conda.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-version: '3.8'
-
-services:
-  ubuntu-finetune:
-    build: 
-      context: .
-      dockerfile: Dockerfile.conda1
-      args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
-        PYTHON_VERSION: 3.9 # sparse attion 最新支持到3.9
-        CUDA_VERSION: 11.7.1 # pytorch 1.13.1 对应cuda 11.7.1
-        PYTORCH_VERSION: 1.13.1
-        TORCHVISION_VERSION: 0.14.1
-        TORCHAUDIO_VERSION: 0.13.1
-        DS_BUILD_OPS: 1
-        DS_BUILD_SPARSE_ATTN: 1
-        DS_BUILD_FUSED_ADAM: 1
-        DS_BUILD_CPU_ADAM: 1
-        USE_CUDA: 1
-        USE_ROCM: 0
-        USE_XPU: 0
-        CUDA_NUM: 117
-        CUDA: cu117
-        CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
-        SETUPTOOLS_VERSION: "69.5.1"
-        ROOT_PASSWD: "root"
-        DCUTLASS_NVCC_ARCHS: "90a" # 90a for H100 ,89:GeForce RTX 4090 
-    volumes:
-      - ./src:/bbtft
-    container_name: ubuntu-finetune
-    pull_policy: if_not_present
-    tty: true
-    restart: unless-stopped
-    image: hotwa/deepspeed:pt113
-    shm_size: '32gb'
-    ports:
-      - 3227:2222
-    command: ["/usr/sbin/sshd", "-D"]
-    environment:
-      - NVIDIA_VISIBLE_DEVICES=all
-      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
-    networks:
-      - network_finetune
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-
-networks:
-  network_finetune:
-    name: network_finetune
diff --git a/finetune/docker-compose_pytorch2.3.yml b/finetune/docker-compose_pytorch2.3.yml
index e96f2be..45d2bed 100644
--- a/finetune/docker-compose_pytorch2.3.yml
+++ b/finetune/docker-compose_pytorch2.3.yml
@@ -19,9 +19,9 @@ services:
       context: .
       dockerfile: Dockerfile
       args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
-        PYTHON_VERSION: 3.9
+        PYTHON_VERSION: 3.10
         CUDA_VERSION: 12.1.0
-        PYTORCH_VERSION: 2.3.0
+        PYTORCH_VERSION: 2.3.1
         TORCHVISION_VERSION: 0.18.0
         TORCHAUDIO_VERSION: 2.3.0
         DS_BUILD_OPS: 1
diff --git a/finetune/docker-compose_pytorch2.3_conda.yml b/finetune/docker-compose_pytorch2.3_conda.yml
deleted file mode 100644
index 421e9e8..0000000
--- a/finetune/docker-compose_pytorch2.3_conda.yml
+++ /dev/null
@@ -1,63 +0,0 @@
-version: '3.8'
-
-# DeepSpeed支持多种C++/CUDA扩展（ops），这些ops旨在优化深度学习的训练和推理过程。以下是一些主要的DeepSpeed ops及其功能：
-
-# FusedAdam - 提供融合优化的Adam优化器，适用于GPU。
-# FusedLamb - 类似FusedAdam，针对LAMB优化器，适用于大规模分布式训练。
-# SparseAttention - 用于高效计算稀疏注意力机制。
-# Transformer - 提供Transformer模型的高效实现。
-# TransformerInference - 专门用于Transformer模型的推理优化。
-# CPUAdam - 针对CPU优化的Adam优化器。
-# CPULion - 针对CPU的Lion优化器。
-# Quantizer - 提供量化支持，以减少模型大小和提高推理速度。
-# RandomLTD - 用于随机层裁剪的优化器。
-# StochasticTransformer - 支持随机Transformer模型的训练和推理。
-
-services:
-  ubuntu-finetune:
-    build: 
-      context: .
-      dockerfile: Dockerfile.conda
-      args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
-        PYTHON_VERSION: 3.9
-        CUDA_VERSION: 12.1.0
-        PYTORCH_VERSION: 2.3.1
-        TORCHVISION_VERSION: 0.18.0
-        TORCHAUDIO_VERSION: 2.3.0
-        DS_BUILD_OPS: 1
-        DS_BUILD_SPARSE_ATTN: 0
-        DS_BUILD_FUSED_ADAM: 1
-        DS_BUILD_CPU_ADAM: 1
-        USE_CUDA: 1
-        USE_ROCM: 0
-        USE_XPU: 0
-        CUDA: cu121
-        CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
-        SETUPTOOLS_VERSION: "69.5.1"
-        DCUTLASS_NVCC_ARCHS: "90a" # 90a for H100 GPU 89:GeForce RTX 4090 
-    volumes:
-      - ./src:/bbtft
-    container_name: ubuntu-finetune
-    pull_policy: if_not_present
-    tty: true
-    restart: unless-stopped
-    image: hotwa/deepspeed:pt23
-    shm_size: '32gb'
-    ports:
-      - 3227:2222
-    environment:
-      - NVIDIA_VISIBLE_DEVICES=all
-      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
-    networks:
-      - network_finetune
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-
-networks:
-  network_finetune:
-    name: network_finetune