From d47f32d3c5f609d3bc9942c0d09e67fe4ed98121 Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 4 Jul 2024 01:45:31 +0000 Subject: [PATCH] update dockerfile --- finetune/Dockerfile | 549 ++++++++++++++++++++++---------------------- 1 file changed, 275 insertions(+), 274 deletions(-) diff --git a/finetune/Dockerfile b/finetune/Dockerfile index f115e76..d3fb917 100644 --- a/finetune/Dockerfile +++ b/finetune/Dockerfile @@ -166,300 +166,301 @@ cd ${STAGE_DIR} rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64* EOT -# ENV NV_PEER_MEM_VERSION=1.2 -# ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0 +ENV NV_PEER_MEM_VERSION=1.2 +ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0 +RUN < /usr/local/mpi/bin/mpirun +echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun +chmod a+x /usr/local/mpi/bin/mpirun +EOT + +# SSH daemon port inside container cannot conflict with host OS port +# ENV SSH_PORT=2222 # RUN < ${STAGE_DIR}/sshd_config && \ +# sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config # EOT -# # install mpi -# ENV PATH=/usr/local/mpi/bin:${PATH} -# ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:${LD_LIBRARY_PATH} +# 29.78 Usage: install.sh [options...] +# 29.78 +# 29.78 By default will install deepspeed and all third party dependencies across all machines listed in +# 29.78 hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally +# 29.78 +# 29.78 [optional] +# 29.78 -l, --local_only Install only on local machine +# 29.78 -s, --pip_sudo Run pip install with sudo (default: no sudo) +# 29.78 -r, --allow_sudo Allow script to be run by root (probably don't want this, instead use --pip_sudo) +# 29.78 -n, --no_clean Do not clean prior build state, by default prior build files are removed before building wheels +# 29.78 -m, --pip_mirror Use the specified pip mirror (default: the default pip mirror) +# 29.78 -H, --hostfile Path to MPI-style hostfile (default: /job/hostfile) +# 29.78 -e, --examples Checkout deepspeed example submodule (no install) +# 29.78 -v, --verbose Verbose logging +# 29.78 -h, --help This help text + +RUN <> /etc/sudoers +EOT + +# install cutlass https://github.com/NVIDIA/cutlass +# H100: architecture is Hopper (cutlass need add : cmake .. -DCUTLASS_NVCC_ARCHS="90a" ) +# A100: architecture is Ampere +# V100: architecture is Volta +# T4: architecture is Turing +# ENV CUDACXX=${CUDA_INSTALL_PATH}/bin/nvcc +# 70:适用于 NVIDIA Volta 架构(如 Tesla V100)。 +# 75:适用于 NVIDIA Turing 架构(如 Tesla T4)。 +# 80:适用于 NVIDIA Ampere 架构(如 A100)。 +# 90a:适用于 NVIDIA Hopper 架构(如 H100)。 +# 89:GeForce RTX 4090 +ARG DCUTLASS_NVCC_ARCHS="80;89;90a" +ENV DCUTLASS_NVCC_ARCHS=${DCUTLASS_NVCC_ARCHS} +RUN < /usr/local/mpi/bin/mpirun -# echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun -# chmod a+x /usr/local/mpi/bin/mpirun +# apt-get update +# apt-get install -y --no-install-recommends libsndfile-dev libcupti-dev libjpeg-dev libpng-dev screen libaio-dev +# python -m pip install pipdeptree \ +# psutil \ +# yappi \ +# cffi \ +# ipdb \ +# pandas \ +# matplotlib \ +# py3nvml \ +# pyarrow \ +# graphviz \ +# astor \ +# boto3 \ +# tqdm \ +# sentencepiece \ +# msgpack \ +# requests \ +# pandas \ +# sphinx \ +# sphinx_rtd_theme \ +# scipy \ +# numpy \ +# scikit-learn \ +# nvidia-ml-py3 \ +# mpi4py # EOT -# # SSH daemon port inside container cannot conflict with host OS port -# # ENV SSH_PORT=2222 -# # RUN < ${STAGE_DIR}/sshd_config && \ -# # sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config -# # EOT +# install deepspeed step 1 +RUN <> /etc/sudoers -# EOT +ARG DEEPSPEED_VERSION="v0.14.3" +ENV DEEPSPEED_VERSION=${DEEPSPEED_VERSION} +ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --verbose" +ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS} +ARG DS_BUILD_SPARSE_ATTN=0 +ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN} +ARG DS_BUILD_FUSED_ADAM=1 +ENV DS_BUILD_FUSED_ADAM=${DS_BUILD_FUSED_ADAM} +ARG DS_BUILD_CPU_ADAM=1 +ENV DS_BUILD_CPU_ADAM=${DS_BUILD_CPU_ADAM} +ARG DS_BUILD_OPS=1 +ENV DS_BUILD_OPS=${DS_BUILD_OPS} +ARG HOSTFILE_CONTENT="" +ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT} +ENV CUTLASS_PATH='/opt/cutlass' +ENV CUDA_HOME='/usr/local/cuda' +ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} +ENV PATH=${CUDA_HOME}/bin:${PATH} +# install deepspeed step 3 +RUN < install_modified.sh +chmod +x ./install_modified.sh +# 检查 HOSTFILE_CONTENT 并写入文件 +if [ -n "${HOSTFILE_CONTENT}" ]; then + echo "${HOSTFILE_CONTENT}" > /tmp/hostfile + INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /tmp/hostfile" +else + INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}" +fi +eval $INSTALL_CMD +# compile deepspeed ops +cat < ~/compile_deepspeed_ops.py +import deepspeed -# # install cutlass https://github.com/NVIDIA/cutlass -# # H100: architecture is Hopper (cutlass need add : cmake .. -DCUTLASS_NVCC_ARCHS="90a" ) -# # A100: architecture is Ampere -# # V100: architecture is Volta -# # T4: architecture is Turing -# # ENV CUDACXX=${CUDA_INSTALL_PATH}/bin/nvcc -# # 70:适用于 NVIDIA Volta 架构(如 Tesla V100)。 -# # 75:适用于 NVIDIA Turing 架构(如 Tesla T4)。 -# # 80:适用于 NVIDIA Ampere 架构(如 A100)。 -# # 90a:适用于 NVIDIA Hopper 架构(如 H100)。 -# # 89:GeForce RTX 4090 -# ARG DCUTLASS_NVCC_ARCHS="80;89;90a" -# ENV DCUTLASS_NVCC_ARCHS=${DCUTLASS_NVCC_ARCHS} -# RUN < install_modified.sh -# chmod +x ./install_modified.sh -# # 检查 HOSTFILE_CONTENT 并写入文件 -# if [ -n "${HOSTFILE_CONTENT}" ]; then -# echo "${HOSTFILE_CONTENT}" > /tmp/hostfile -# INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /tmp/hostfile" -# else -# INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}" -# fi -# eval $INSTALL_CMD -# # compile deepspeed ops -# cat < ~/compile_deepspeed_ops.py -# import deepspeed - -# def compile_ops(): -# builders = [ -# deepspeed.ops.op_builder.AsyncIOBuilder, -# deepspeed.ops.op_builder.FusedAdamBuilder, -# deepspeed.ops.op_builder.CPUAdamBuilder, -# deepspeed.ops.op_builder.CPUAdagradBuilder, -# deepspeed.ops.op_builder.CPULionBuilder, -# deepspeed.ops.op_builder.EvoformerAttnBuilder, -# deepspeed.ops.op_builder.FPQuantizerBuilder, -# deepspeed.ops.op_builder.FusedLambBuilder, -# deepspeed.ops.op_builder.FusedLionBuilder, -# deepspeed.ops.op_builder.QuantizerBuilder, -# deepspeed.ops.op_builder.RaggedOpsBuilder, -# deepspeed.ops.op_builder.RandomLTDBuilder, -# deepspeed.ops.op_builder.SparseAttnBuilder, -# deepspeed.ops.op_builder.SpatialInferenceBuilder, -# deepspeed.ops.op_builder.TransformerBuilder, -# deepspeed.ops.op_builder.StochasticTransformerBuilder, -# ] +def compile_ops(): + builders = [ + deepspeed.ops.op_builder.AsyncIOBuilder, + deepspeed.ops.op_builder.FusedAdamBuilder, + deepspeed.ops.op_builder.CPUAdamBuilder, + deepspeed.ops.op_builder.CPUAdagradBuilder, + deepspeed.ops.op_builder.CPULionBuilder, + deepspeed.ops.op_builder.EvoformerAttnBuilder, + deepspeed.ops.op_builder.FPQuantizerBuilder, + deepspeed.ops.op_builder.FusedLambBuilder, + deepspeed.ops.op_builder.FusedLionBuilder, + deepspeed.ops.op_builder.QuantizerBuilder, + deepspeed.ops.op_builder.RaggedOpsBuilder, + deepspeed.ops.op_builder.RandomLTDBuilder, + deepspeed.ops.op_builder.SparseAttnBuilder, + deepspeed.ops.op_builder.SpatialInferenceBuilder, + deepspeed.ops.op_builder.TransformerBuilder, + deepspeed.ops.op_builder.StochasticTransformerBuilder, + ] -# for builder in builders: -# print(f"Compiling {builder.__name__}") -# builder().load() + for builder in builders: + print(f"Compiling {builder.__name__}") + builder().load() -# if __name__ == "__main__": -# compile_ops() -# EOF -# python compile_deepspeed_ops.py -# ds_report -# # clean up -# # rm -f deepspeed/git_version_info_installed.py -# # rm -rf dist build deepspeed.egg-info -# # python setup.py bdist_wheel -# # DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl -# # DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt -# # pip install numpy==1.22.4 # ImportError: cannot import name 'BUFSIZE' from 'numpy' (/opt/conda/envs/deepspeed/lib/python3.10/site-packages/numpy/__init__.py) wait for fix in numpy=2.0.0 -# EOT +if __name__ == "__main__": + compile_ops() +EOF +python compile_deepspeed_ops.py +ds_report +# clean up +# rm -f deepspeed/git_version_info_installed.py +# rm -rf dist build deepspeed.egg-info +# python setup.py bdist_wheel +# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl +# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt +# pip install numpy==1.22.4 # ImportError: cannot import name 'BUFSIZE' from 'numpy' (/opt/conda/envs/deepspeed/lib/python3.10/site-packages/numpy/__init__.py) wait for fix in numpy=2.0.0 +EOT -# # install transformers and flash-attn -# RUN <> ~/.bashrc && \ -# # echo 'export PATH=${CUDA_HOME}/bin:${PATH}' >> ~/.bashrc && \ -# # echo 'export CUTLASS_PATH=/opt/cutlass' >> ~/.bashrc && \ -# # echo 'export PATH=/opt/conda/bin:$PATH' >> ~/.bashrc && \ -# # echo "source activate ${CONDA_ENV_NAME}" > ~/.bashrc +# RUN echo 'export CUDA_HOME=/usr/local/cuda' >> ~/.bashrc && \ +# echo 'export PATH=${CUDA_HOME}/bin:${PATH}' >> ~/.bashrc && \ +# echo 'export CUTLASS_PATH=/opt/cutlass' >> ~/.bashrc && \ +# echo 'export PATH=/opt/conda/bin:$PATH' >> ~/.bashrc && \ +# echo "source activate ${CONDA_ENV_NAME}" > ~/.bashrc CMD ["/usr/sbin/sshd", "-D"] # CMD ["/bin/bash", "-c", "/usr/sbin/sshd -D & while true; do sleep 1000; done"]