This commit is contained in:
2024-06-21 13:46:40 +08:00
parent 5aeb76da25
commit 6bd77c71ca
4 changed files with 35 additions and 33 deletions

View File

@@ -311,6 +311,8 @@ cd ..
EOT EOT
# CUDA_ARCH_LIST="80;86;89;90" # CUDA_ARCH_LIST="80;86;89;90"
ARG DEEPSPEED_VERSION="0.8.3"
ENV DEEPSPEED_VERSION=${DEEPSPEED_VERSION}
ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --no_clean" ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --no_clean"
ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS} ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
ARG CUDA_ARCH_LIST="80;86;89;90" ARG CUDA_ARCH_LIST="80;86;89;90"
@@ -319,7 +321,7 @@ ARG DS_BUILD_SPARSE_ATTN=0
ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN} ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN}
ARG DS_BUILD_FUSED_ADAM=1 ARG DS_BUILD_FUSED_ADAM=1
ENV DS_BUILD_FUSED_ADAM=${DS_BUILD_FUSED_ADAM} ENV DS_BUILD_FUSED_ADAM=${DS_BUILD_FUSED_ADAM}
ARG DS_BUILD_CPU_ADAM=0 ARG DS_BUILD_CPU_ADAM=1
ENV DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM} ENV DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM}
ARG DS_BUILD_OPS=1 ARG DS_BUILD_OPS=1
ENV DS_BUILD_OPS=${DS_BUILD_OPS} ENV DS_BUILD_OPS=${DS_BUILD_OPS}
@@ -338,50 +340,47 @@ git checkout master
mkdir build mkdir build
cd build cd build
cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
make -j install make -j"$(nproc)" install
git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels git clone https://github.com/microsoft/DeepSpeed-Kernels.git ${STAGE_DIR}/DeepSpeed-Kernels
cd ${STAGE_DIR}/DeepSpeed-Kernels cd ${STAGE_DIR}/DeepSpeed-Kernels
CUDA_ARCH_LIST=${CUDA_ARCH_LIST} python setup.py bdist_wheel CUDA_ARCH_LIST=${CUDA_ARCH_LIST} python setup.py bdist_wheel
# pip install dist/deepspeed_kernels-*.whl pip install dist/deepspeed_kernels-*.whl
CUDA_ARCH_LIST=${CUDA_ARCH_LIST} pip install -v . # CUDA_ARCH_LIST=${CUDA_ARCH_LIST} pip install -v .
git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
cd ${STAGE_DIR}/DeepSpeed cd ${STAGE_DIR}/DeepSpeed
git checkout . git checkout .
git checkout master git checkout v0.8.3
python setup.py bdist_wheel python setup.py bdist_wheel
DS_BUILD_OPS=${DS_BUILD_OPS} pip install dist/deepspeed*.whl --force-reinstall DS_BUILD_OPS=${DS_BUILD_OPS} pip install dist/deepspeed*.whl --force-reinstall
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -r requirements/requirements.txt # DS_BUILD_OPS=${DS_BUILD_OPS} pip install -r requirements/requirements.txt
# DS_BUILD_OPS=0 DS_BUILD_SPARSE_ATTN=0 DS_BUILD_CPU_ADAM=0 DS_BUILD_FUSED_ADAM=1 pip install -U --no-cache-dir .
# ./install.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /job/hostfile # ./install.sh --allow_sudo --pip_sudo --no_clean --hostfile /path/to/your/hostfile # ./install.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /job/hostfile # ./install.sh --allow_sudo --pip_sudo --no_clean --hostfile /path/to/your/hostfile
cd ..
# rm -rf ${STAGE_DIR}/DeepSpeed
EOT EOT
RUN <<EOT # RUN <<EOT
#!/bin/bash # #!/bin/bash
source /opt/conda/etc/profile.d/conda.sh # source /opt/conda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME} # conda activate ${CONDA_ENV_NAME}
# install transformers # # install transformers
git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers # git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers
cd ${STAGE_DIR}/transformers # cd ${STAGE_DIR}/transformers
python3 ./setup.py develop # python3 ./setup.py develop
python3 -m pip install -U --no-cache-dir "pydantic<2" # python3 -m pip install -U --no-cache-dir "pydantic<2"
# install flash-attn # # install flash-attn
# pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org # # pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org
pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org # pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org
EOT # EOT
RUN <<EOT # RUN <<EOT
#!/bin/bash # #!/bin/bash
source /opt/conda/etc/profile.d/conda.sh # source /opt/conda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME} # conda activate ${CONDA_ENV_NAME}
pip install optimum # pip install optimum
pip install peft tiktoken \ # pip install peft tiktoken \
tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \ # tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \
huggingface_hub spacy blobfile pycocotools \ # huggingface_hub spacy blobfile pycocotools \
xformers open_clip_torch \ # xformers open_clip_torch \
zstandard -i https://pypi.org/simple/ --trusted-host pypi.org # zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
EOT # EOT
# add vscode server # add vscode server
# RUN <<EOT # RUN <<EOT

View File

@@ -6,6 +6,7 @@ services:
context: . context: .
dockerfile: Dockerfile dockerfile: Dockerfile
args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822 args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
PYTHON_VERSION: 3.9
CUDA_VERSION: 11.7.1 CUDA_VERSION: 11.7.1
PYTORCH_VERSION: 1.13.1 PYTORCH_VERSION: 1.13.1
TORCHVISION_VERSION: 0.14.1 TORCHVISION_VERSION: 0.14.1

View File

@@ -19,6 +19,7 @@ services:
context: . context: .
dockerfile: Dockerfile dockerfile: Dockerfile
args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822 args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
PYTHON_VERSION: 3.9
CUDA_VERSION: 12.1.0 CUDA_VERSION: 12.1.0
PYTORCH_VERSION: 2.3.0 PYTORCH_VERSION: 2.3.0
TORCHVISION_VERSION: 0.18.0 TORCHVISION_VERSION: 0.18.0

View File

@@ -17,8 +17,9 @@ services:
ubuntu-finetune: ubuntu-finetune:
build: build:
context: . context: .
dockerfile: Dockerfile.conda1 dockerfile: Dockerfile.conda
args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822 args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
PYTHON_VERSION: 3.9
CUDA_VERSION: 12.1.0 CUDA_VERSION: 12.1.0
PYTORCH_VERSION: 2.3.0 PYTORCH_VERSION: 2.3.0
TORCHVISION_VERSION: 0.18.0 TORCHVISION_VERSION: 0.18.0