This commit is contained in:
Your Name
2024-06-23 16:46:22 +00:00
parent 8aebb93e89
commit 13ba53eaca
2 changed files with 45 additions and 43 deletions

View File

@@ -138,7 +138,7 @@ python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
# # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
python3 -m pip uninstall -y torch torchvision torchaudio
# # install pytorch create conda env aleay exists
python3 -m pip install torch==${PYTORCH_VERSION}+${CUDA} torchvision==${TORCHVISION_VERSION}+${CUDA} torchaudio==${TORCHAUDIO_VERSION} --extra-index-url https://download.pytorch.org/whl/${CUDA}
python3 -m pip install torch==${PYTORCH_VERSION}+${CUDA} torchvision==${TORCHVISION_VERSION}+${CUDA} torchaudio==${TORCHAUDIO_VERSION} xformers --extra-index-url https://download.pytorch.org/whl/${CUDA}
python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
python3 -m pip uninstall -y transformer-engine
python3 -m pip uninstall -y torch-tensorrt
@@ -350,7 +350,7 @@ cd ${STAGE_DIR}/DeepSpeed-Kernels
CUDA_ARCH_LIST=${CUDA_ARCH_LIST} python -m pip install -v .
EOT
ARG DEEPSPEED_VERSION="0.14.3"
ARG DEEPSPEED_VERSION="v0.14.3"
ENV DEEPSPEED_VERSION=${DEEPSPEED_VERSION}
ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --verbose"
ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
@@ -373,51 +373,53 @@ source /opt/conda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME}
git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
cd ${STAGE_DIR}/DeepSpeed
git checkout .
git checkout v${DEEPSPEED_VERSION}
# python setup.py bdist_wheel
# 修改 install.sh 脚本中的 python 解释器路径
# sed "s|\bpython\b|/opt/conda/envs/${CONDA_ENV_NAME}/bin/python|g" install.sh > install_modified.sh
# chmod +x ./install_modified.sh
git checkout ${DEEPSPEED_VERSION}
sed 's/pip install/python -m pip install/' install.sh > install_modified.sh
chmod +x ./install_modified.sh
# 检查 HOSTFILE_CONTENT 并写入文件
if [ -n "${HOSTFILE_CONTENT}" ]; then
echo "${HOSTFILE_CONTENT}" > /tmp/hostfile
INSTALL_CMD="./install.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /tmp/hostfile"
INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /tmp/hostfile"
else
INSTALL_CMD="./install.sh ${DEEPSPEED_INSTALL_FLAGS}"
INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}"
fi
# eval $INSTALL_CMD
eval $INSTALL_CMD
# clean up
# rm -f deepspeed/git_version_info_installed.py
# rm -rf dist build deepspeed.egg-info
# python setup.py bdist_wheel
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt
# pip install numpy==1.22.4 # ImportError: cannot import name 'BUFSIZE' from 'numpy' (/opt/conda/envs/deepspeed/lib/python3.10/site-packages/numpy/__init__.py) wait for fix in numpy=2.0.0
EOT
# install transformers and flash-attn
# RUN <<EOT
# #!/bin/bash
# source /opt/conda/etc/profile.d/conda.sh
# conda activate ${CONDA_ENV_NAME}
# # install transformers
# git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers
# cd ${STAGE_DIR}/transformers
# python3 ./setup.py develop
# python3 -m pip install -U --no-cache-dir "pydantic<2"
# # install flash-attn
# # pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org
# pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org
# EOT
RUN <<EOT
#!/bin/bash
source /opt/conda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME}
# install transformers
git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers
cd ${STAGE_DIR}/transformers
python3 ./setup.py develop
python3 -m pip install -U --no-cache-dir "pydantic<2"
# install flash-attn
# pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org
pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org
EOT
# other packages
# RUN <<EOT
# #!/bin/bash
# source /opt/conda/etc/profile.d/conda.sh
# conda activate ${CONDA_ENV_NAME}
# pip install optimum
# pip install peft tiktoken \
# tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \
# huggingface_hub spacy blobfile pycocotools \
# xformers open_clip_torch \
# zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
# EOT
ENV TORCH_CUDA_ARCH_LIST="80;86;89;90"
RUN <<EOT
#!/bin/bash
source /opt/conda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME}
pip3 install optimum
pip3 install peft tiktoken \
tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \
huggingface_hub spacy blobfile pycocotools \
open_clip_torch \
zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
EOT
CMD ["/usr/sbin/sshd", "-D"]