From 25ee079c2bbd7f510d2096afcd8e5553f7f9ead8 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 24 Jun 2024 08:01:21 +0000 Subject: [PATCH] update --- finetune/Dockerfile | 81 +++++++++++++++++++------- finetune/docker-compose_pytorch2.3.yml | 1 + 2 files changed, 62 insertions(+), 20 deletions(-) diff --git a/finetune/Dockerfile b/finetune/Dockerfile index 70b72d3..fcf45d1 100644 --- a/finetune/Dockerfile +++ b/finetune/Dockerfile @@ -33,7 +33,7 @@ sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_confi sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config mkdir /var/run/sshd -echo 'root:${ROOT_PASSWD}' | chpasswd +echo "root:${ROOT_PASSWD}" | chpasswd mkdir -p ~/.pip # install miniconda wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh @@ -54,24 +54,24 @@ channels: show_channel_urls: true EOF # 安装 micromamba -echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh) -micromamba shell init -s bash -p ~/micromamba -cat <<'EOF' >> ~/.bashrc -source ~/micromamba/etc/profile.d/micromamba.sh -echo "alias mamba=micromamba" >> ~/.bashrc -echo "alias mba=mamba" >> ~/.bashrc -EOF -# 配置 .mambarc 文件 -cat < ~/.mambarc -channels: - - conda-forge - - bioconda - - pytorch - - pytorch-nightly - - nvidia - - defaults -show_channel_urls: true -EOF +# echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh) +# micromamba shell init -s bash -p ~/micromamba +# cat <<'EOF' >> ~/.bashrc +# source ~/micromamba/etc/profile.d/micromamba.sh +# echo "alias mamba=micromamba" >> ~/.bashrc +# echo "alias mba=mamba" >> ~/.bashrc +# EOF +# # 配置 .mambarc 文件 +# cat < ~/.mambarc +# channels: +# - conda-forge +# - bioconda +# - pytorch +# - pytorch-nightly +# - nvidia +# - defaults +# show_channel_urls: true +# EOF EOT # reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile @@ -364,8 +364,10 @@ ARG DS_BUILD_OPS=1 ENV DS_BUILD_OPS=${DS_BUILD_OPS} ARG HOSTFILE_CONTENT="" ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT} -ENV CUTLASS_PATH=/opt/cutlass +ENV CUTLASS_PATH='/opt/cutlass' ENV CUDA_HOME='/usr/local/cuda' +ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} +ENV PATH=${CUDA_HOME}/bin:${PATH} # install deepspeed step 3 RUN <> ~/.bashrc +source ~/micromamba/etc/profile.d/micromamba.sh +echo "alias mamba=micromamba" >> ~/.bashrc +echo "alias mba=mamba" >> ~/.bashrc +EOF +# 配置 .mambarc 文件 +cat < ~/compile_deepspeed_ops.py +import deepspeed + +def compile_ops(): + builders = [ + deepspeed.ops.op_builder.AsyncIOBuilder, + deepspeed.ops.op_builder.FusedAdamBuilder, + deepspeed.ops.op_builder.CPUAdamBuilder, + deepspeed.ops.op_builder.CPUAdagradBuilder, + deepspeed.ops.op_builder.CPULionBuilder, + deepspeed.ops.op_builder.EvoformerAttnBuilder, + deepspeed.ops.op_builder.FPQuantizerBuilder, + deepspeed.ops.op_builder.FusedLambBuilder, + deepspeed.ops.op_builder.FusedLionBuilder, + deepspeed.ops.op_builder.QuantizerBuilder, + deepspeed.ops.op_builder.RaggedOpsBuilder, + deepspeed.ops.op_builder.RandomLTDBuilder, + deepspeed.ops.op_builder.SparseAttnBuilder, + deepspeed.ops.op_builder.SpatialInferenceBuilder, + deepspeed.ops.op_builder.TransformerBuilder, + deepspeed.ops.op_builder.StochasticTransformerBuilder, + ] + + for builder in builders: + print(f"Compiling {builder.__name__}") + builder().load() + +if __name__ == "__main__": + compile_ops() +EOF +python compile_deepspeed_ops.py +ds_report # clean up # rm -f deepspeed/git_version_info_installed.py # rm -rf dist build deepspeed.egg-info diff --git a/finetune/docker-compose_pytorch2.3.yml b/finetune/docker-compose_pytorch2.3.yml index 4b20fe4..9dcc191 100644 --- a/finetune/docker-compose_pytorch2.3.yml +++ b/finetune/docker-compose_pytorch2.3.yml @@ -47,6 +47,7 @@ services: environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility + - TMPDIR=/var/tmp networks: - network_finetune deploy: