From 633a22ec82de26d0476247080671e5f2b79bcb91 Mon Sep 17 00:00:00 2001 From: lingyuzeng Date: Sat, 13 Jul 2024 23:18:51 +0800 Subject: [PATCH] update version --- finetune/Dockerfile.update | 460 +++++++++++++++++++++++++++++ finetune/docker-compose_update.yml | 67 +++++ 2 files changed, 527 insertions(+) create mode 100644 finetune/Dockerfile.update create mode 100644 finetune/docker-compose_update.yml diff --git a/finetune/Dockerfile.update b/finetune/Dockerfile.update new file mode 100644 index 0000000..7895280 --- /dev/null +++ b/finetune/Dockerfile.update @@ -0,0 +1,460 @@ + +# NOTE: Building this image require's docker version >= 23.0. +# +# For reference: +# - https://docs.docker.com/build/dockerfile/frontend/#stable-channel +ARG CUDA_VERSION=12.1.0 +FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu22.04 +ARG HTTP_PROXY +ARG HTTPS_PROXY +ENV http_proxy=${HTTP_PROXY} +ENV https_proxy=${HTTPS_PROXY} +ARG DEBIAN_FRONTEND="noninteractive" +ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND} +ARG ROOT_PASSWD="root" +ENV ROOT_PASSWD=${ROOT_PASSWD} +WORKDIR /root +SHELL ["/bin/bash", "-c"] +# base tools +RUN <> ~/.bashrc +# 配置 .condarc 文件 +cat < ~/.condarc +channels: + - conda-forge + - bioconda + - pytorch + - pytorch-nightly + - nvidia + - defaults +show_channel_urls: true +EOF +# install pixi +curl -fsSL https://pixi.sh/install.sh | bash +EOT + +# reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile +# PyTorch +ARG CONDA_ENV_NAME="deepspeed" +ENV CONDA_ENV_NAME=${CONDA_ENV_NAME} +ARG PYTHON_VERSION=3.10 +ENV PYTHON_VERSION=${PYTHON_VERSION} +ENV PATH=/opt/conda/envs/${CONDA_ENV_NAME}/bin:/usr/bin:/opt/conda/bin:$PATH +ENV DEEPSPEED_PYTHON="/opt/conda/envs/${CONDA_ENV_NAME}/bin/python3" +ENV REF='main' +ENV STAGE_DIR=/tmp +ARG CUDA='cu121' +ENV CUDA=${CUDA} +ARG PYTORCH_VERSION=2.3.1 +ENV PYTORCH_VERSION=${PYTORCH_VERSION} +ARG TORCHVISION_VERSION=0.18.1 +ENV TORCHVISION_VERSION=${TORCHVISION_VERSION} +ARG TORCHAUDIO_VERSION=2.3.1 +ENV TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION} +ARG PYTORCH_CUDA_VERSION=12.1 +ENV PYTORCH_CUDA_VERSION=${PYTORCH_CUDA_VERSION} +ARG SETUPTOOLS_VERSION=69.5.1 +ENV SETUPTOOLS_VERSION=${SETUPTOOLS_VERSION} +ARG USE_CUDA=1 +ENV USE_CUDA=${USE_CUDA} +ARG USE_ROCM=0 +ENV USE_ROCM=${USE_ROCM} +ARG USE_XPU=0 +ENV USE_XPU=${USE_XPU} +ARG _GLIBCXX_USE_CXX11_ABI=1 +ENV _GLIBCXX_USE_CXX11_ABI=${_GLIBCXX_USE_CXX11_ABI} +RUN <> ~/.bashrc +conda activate ${CONDA_ENV_NAME} +python3 -m pip install --no-cache-dir --upgrade pip +python3 -m pip install open_clip_torch nvidia-ml-py3 opencv-contrib-python +conda clean -afy +git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF && cd .. +python -m pip install setuptools==${SETUPTOOLS_VERSION} +python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing] +# # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.) +# # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops) +python3 -m pip uninstall -y torch torchvision torchaudio +# # install pytorch create conda env aleay exists +# 直接将 PyTorch 安装指引 中的 https://download.pytorch.org/whl 替换为 https://mirror.sjtu.edu.cn/pytorch-wheels 即可。 +python3 -m pip install torch==${PYTORCH_VERSION}+${CUDA} torchvision==${TORCHVISION_VERSION}+${CUDA} torchaudio==${TORCHAUDIO_VERSION} xformers --extra-index-url https://mirror.sjtu.edu.cn/pytorch-wheels/${CUDA} +python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate +python3 -m pip uninstall -y transformer-engine +python3 -m pip uninstall -y torch-tensorrt +python3 -m pip uninstall -y apex +EOT + +# install apex +RUN <= 23.1 (ref: https://pip.pypa.io/en/stable/news/#v23-1) which supports multiple `--config-settings` with the same key... +MAX_JOBS=1 python3 -m pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ +python -c "import apex.amp; print('Apex is installed and the amp module is available.')" +cd .. +rm -rf ${STAGE_DIR}/apex +EOT + +# https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/ +ENV MLNX_OFED_VERSION=23.10-3.2.2.0 +RUN <&1 +# from https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile install deepspeed fail +# reference deepspeed install from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile +# install deepspeed prepare +# install Mellanox OFED +mkdir -p ${STAGE_DIR} +wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz | tar xzf - +cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64 +./mlnxofedinstall --user-space-only --without-fw-update --all -q +cd ${STAGE_DIR} +rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64* +EOT + +ENV NV_PEER_MEM_VERSION=1.2 +ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0 +RUN < /usr/local/mpi/bin/mpirun +echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun +chmod a+x /usr/local/mpi/bin/mpirun +EOT + +# SSH daemon port inside container cannot conflict with host OS port +ENV SSH_PORT=2222 +RUN < ${STAGE_DIR}/sshd_config && \ +sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config +EOT + +# 29.78 Usage: install.sh [options...] +# 29.78 +# 29.78 By default will install deepspeed and all third party dependencies across all machines listed in +# 29.78 hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally +# 29.78 +# 29.78 [optional] +# 29.78 -l, --local_only Install only on local machine +# 29.78 -s, --pip_sudo Run pip install with sudo (default: no sudo) +# 29.78 -r, --allow_sudo Allow script to be run by root (probably don't want this, instead use --pip_sudo) +# 29.78 -n, --no_clean Do not clean prior build state, by default prior build files are removed before building wheels +# 29.78 -m, --pip_mirror Use the specified pip mirror (default: the default pip mirror) +# 29.78 -H, --hostfile Path to MPI-style hostfile (default: /job/hostfile) +# 29.78 -e, --examples Checkout deepspeed example submodule (no install) +# 29.78 -v, --verbose Verbose logging +# 29.78 -h, --help This help text + +RUN <> /etc/sudoers +EOT + +# install cutlass https://github.com/NVIDIA/cutlass +# H100: architecture is Hopper (cutlass need add : cmake .. -DCUTLASS_NVCC_ARCHS="90a" ) +# A100: architecture is Ampere +# V100: architecture is Volta +# T4: architecture is Turing +# ENV CUDACXX=${CUDA_INSTALL_PATH}/bin/nvcc +# 70:适用于 NVIDIA Volta 架构(如 Tesla V100)。 +# 75:适用于 NVIDIA Turing 架构(如 Tesla T4)。 +# 80:适用于 NVIDIA Ampere 架构(如 A100)。 +# 90a:适用于 NVIDIA Hopper 架构(如 H100)。 +# 89:GeForce RTX 4090 +ARG DCUTLASS_NVCC_ARCHS="80;89;90a" +ENV DCUTLASS_NVCC_ARCHS=${DCUTLASS_NVCC_ARCHS} +RUN < install_modified.sh +chmod +x ./install_modified.sh +# 检查 HOSTFILE_CONTENT 并写入文件 +if [ -n "${HOSTFILE_CONTENT}" ]; then + echo "${HOSTFILE_CONTENT}" > /tmp/hostfile + INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /tmp/hostfile" +else + INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}" +fi +eval $INSTALL_CMD +# compile deepspeed ops +cat <<'EOF' >> ~/.bashrc +source ~/micromamba/etc/profile.d/micromamba.sh +echo "alias mamba=micromamba" >> ~/.bashrc +echo "alias mba=mamba" >> ~/.bashrc +EOF +# 配置 .mambarc 文件 +cat < ~/compile_deepspeed_ops.py +import deepspeed + +def compile_ops(): + builders = [ + deepspeed.ops.op_builder.AsyncIOBuilder, + deepspeed.ops.op_builder.FusedAdamBuilder, + deepspeed.ops.op_builder.CPUAdamBuilder, + deepspeed.ops.op_builder.CPUAdagradBuilder, + deepspeed.ops.op_builder.CPULionBuilder, + deepspeed.ops.op_builder.EvoformerAttnBuilder, + deepspeed.ops.op_builder.FPQuantizerBuilder, + deepspeed.ops.op_builder.FusedLambBuilder, + deepspeed.ops.op_builder.FusedLionBuilder, + deepspeed.ops.op_builder.QuantizerBuilder, + deepspeed.ops.op_builder.RaggedOpsBuilder, + deepspeed.ops.op_builder.RandomLTDBuilder, + deepspeed.ops.op_builder.SparseAttnBuilder, + deepspeed.ops.op_builder.SpatialInferenceBuilder, + deepspeed.ops.op_builder.TransformerBuilder, + deepspeed.ops.op_builder.StochasticTransformerBuilder, + ] + + for builder in builders: + print(f"Compiling {builder.__name__}") + builder().load() + +if __name__ == "__main__": + compile_ops() +EOF +python compile_deepspeed_ops.py +ds_report +# clean up +# rm -f deepspeed/git_version_info_installed.py +# rm -rf dist build deepspeed.egg-info +# python setup.py bdist_wheel +# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl +# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt +# pip install numpy==1.22.4 # ImportError: cannot import name 'BUFSIZE' from 'numpy' (/opt/conda/envs/deepspeed/lib/python3.10/site-packages/numpy/__init__.py) wait for fix in numpy=2.0.0 +EOT + +# install transformers and flash-attn +RUN < ~/.deepspeed_env +NCCL_IB_DISABLE=${NCCL_IB_DISABLE} +NCCL_SOCKET_IFNAME=${NCCL_SOCKET_IFNAME} +NCCL_DEBUG=INFO +CUTLASS_PATH=${CUTLASS_PATH} +CUDA_HOME=${CUDA_HOME} +EOF +#CUDA_VISIBLE_DEVICES=0,1,2,3 +#OMP_NUM_THREADS=8 +#MASTER_ADDR=192.168.1.1 +#MASTER_PORT=12345 +EOT + +CMD ["/usr/sbin/sshd", "-D"] diff --git a/finetune/docker-compose_update.yml b/finetune/docker-compose_update.yml new file mode 100644 index 0000000..c8247ba --- /dev/null +++ b/finetune/docker-compose_update.yml @@ -0,0 +1,67 @@ +version: '3.8' + +# DeepSpeed支持多种C++/CUDA扩展(ops),这些ops旨在优化深度学习的训练和推理过程。以下是一些主要的DeepSpeed ops及其功能: + +# FusedAdam - 提供融合优化的Adam优化器,适用于GPU。 +# FusedLamb - 类似FusedAdam,针对LAMB优化器,适用于大规模分布式训练。 +# SparseAttention - 用于高效计算稀疏注意力机制。 +# Transformer - 提供Transformer模型的高效实现。 +# TransformerInference - 专门用于Transformer模型的推理优化。 +# CPUAdam - 针对CPU优化的Adam优化器。 +# CPULion - 针对CPU的Lion优化器。 +# Quantizer - 提供量化支持,以减少模型大小和提高推理速度。 +# RandomLTD - 用于随机层裁剪的优化器。 +# StochasticTransformer - 支持随机Transformer模型的训练和推理。 + +services: + ubuntu-finetune: + build: + context: . + dockerfile: Dockerfile.update + args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822 + PYTHON_VERSION: "3.10" + CUDA_VERSION: "12.1.0" + PYTORCH_VERSION: "2.3.0" + TORCHVISION_VERSION: "0.18.0" + TORCHAUDIO_VERSION: "2.3.0" + DS_BUILD_OPS: 1 + USE_CUDA: 1 + USE_ROCM: 0 + USE_XPU: 0 + CUDA: cu121 + CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90" + SETUPTOOLS_VERSION: "69.5.1" + DCUTLASS_NVCC_ARCHS: "80;86;89;90;90a" # 90a for H100 GPU 89:GeForce RTX 4090 + DEEPSPEED_VERSION: "master" + DEEPSPEED_INSTALL_FLAGS: "--allow_sudo" + HTTP_PROXY: "http://127.0.0.1:15777" + HTTPS_PROXY: "http://127.0.0.1:15777" + CACHEBUST: 1 + volumes: + - ./src:/bbtft + - /tmp:/tmp + container_name: ubuntu-finetune + pull_policy: if_not_present + # tty: true + restart: unless-stopped + image: hotwa/deepspeed:pt23_update + shm_size: '32gb' + ports: + - 3228:22 + environment: + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=compute,utility + - TMPDIR=/var/tmp + networks: + - network_finetune + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + +networks: + network_finetune: + name: network_finetune