update
This commit is contained in:
@@ -138,7 +138,7 @@ python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
|
||||
# # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
|
||||
python3 -m pip uninstall -y torch torchvision torchaudio
|
||||
# # install pytorch create conda env aleay exists
|
||||
python3 -m pip install torch==${PYTORCH_VERSION}+${CUDA} torchvision==${TORCHVISION_VERSION}+${CUDA} torchaudio==${TORCHAUDIO_VERSION} --extra-index-url https://download.pytorch.org/whl/${CUDA}
|
||||
python3 -m pip install torch==${PYTORCH_VERSION}+${CUDA} torchvision==${TORCHVISION_VERSION}+${CUDA} torchaudio==${TORCHAUDIO_VERSION} xformers --extra-index-url https://download.pytorch.org/whl/${CUDA}
|
||||
python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
python3 -m pip uninstall -y transformer-engine
|
||||
python3 -m pip uninstall -y torch-tensorrt
|
||||
@@ -350,7 +350,7 @@ cd ${STAGE_DIR}/DeepSpeed-Kernels
|
||||
CUDA_ARCH_LIST=${CUDA_ARCH_LIST} python -m pip install -v .
|
||||
EOT
|
||||
|
||||
ARG DEEPSPEED_VERSION="0.14.3"
|
||||
ARG DEEPSPEED_VERSION="v0.14.3"
|
||||
ENV DEEPSPEED_VERSION=${DEEPSPEED_VERSION}
|
||||
ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --verbose"
|
||||
ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
|
||||
@@ -373,51 +373,53 @@ source /opt/conda/etc/profile.d/conda.sh
|
||||
conda activate ${CONDA_ENV_NAME}
|
||||
git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
|
||||
cd ${STAGE_DIR}/DeepSpeed
|
||||
git checkout .
|
||||
git checkout v${DEEPSPEED_VERSION}
|
||||
# python setup.py bdist_wheel
|
||||
# 修改 install.sh 脚本中的 python 解释器路径
|
||||
# sed "s|\bpython\b|/opt/conda/envs/${CONDA_ENV_NAME}/bin/python|g" install.sh > install_modified.sh
|
||||
# chmod +x ./install_modified.sh
|
||||
git checkout ${DEEPSPEED_VERSION}
|
||||
sed 's/pip install/python -m pip install/' install.sh > install_modified.sh
|
||||
chmod +x ./install_modified.sh
|
||||
# 检查 HOSTFILE_CONTENT 并写入文件
|
||||
if [ -n "${HOSTFILE_CONTENT}" ]; then
|
||||
echo "${HOSTFILE_CONTENT}" > /tmp/hostfile
|
||||
INSTALL_CMD="./install.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /tmp/hostfile"
|
||||
INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS} --hostfile /tmp/hostfile"
|
||||
else
|
||||
INSTALL_CMD="./install.sh ${DEEPSPEED_INSTALL_FLAGS}"
|
||||
INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}"
|
||||
fi
|
||||
# eval $INSTALL_CMD
|
||||
eval $INSTALL_CMD
|
||||
# clean up
|
||||
# rm -f deepspeed/git_version_info_installed.py
|
||||
# rm -rf dist build deepspeed.egg-info
|
||||
# python setup.py bdist_wheel
|
||||
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl
|
||||
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt
|
||||
# pip install numpy==1.22.4 # ImportError: cannot import name 'BUFSIZE' from 'numpy' (/opt/conda/envs/deepspeed/lib/python3.10/site-packages/numpy/__init__.py) wait for fix in numpy=2.0.0
|
||||
EOT
|
||||
|
||||
# install transformers and flash-attn
|
||||
# RUN <<EOT
|
||||
# #!/bin/bash
|
||||
# source /opt/conda/etc/profile.d/conda.sh
|
||||
# conda activate ${CONDA_ENV_NAME}
|
||||
# # install transformers
|
||||
# git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers
|
||||
# cd ${STAGE_DIR}/transformers
|
||||
# python3 ./setup.py develop
|
||||
# python3 -m pip install -U --no-cache-dir "pydantic<2"
|
||||
# # install flash-attn
|
||||
# # pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org
|
||||
# pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org
|
||||
# EOT
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
source /opt/conda/etc/profile.d/conda.sh
|
||||
conda activate ${CONDA_ENV_NAME}
|
||||
# install transformers
|
||||
git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers
|
||||
cd ${STAGE_DIR}/transformers
|
||||
python3 ./setup.py develop
|
||||
python3 -m pip install -U --no-cache-dir "pydantic<2"
|
||||
# install flash-attn
|
||||
# pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org
|
||||
pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org
|
||||
EOT
|
||||
|
||||
# other packages
|
||||
# RUN <<EOT
|
||||
# #!/bin/bash
|
||||
# source /opt/conda/etc/profile.d/conda.sh
|
||||
# conda activate ${CONDA_ENV_NAME}
|
||||
# pip install optimum
|
||||
# pip install peft tiktoken \
|
||||
# tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \
|
||||
# huggingface_hub spacy blobfile pycocotools \
|
||||
# xformers open_clip_torch \
|
||||
# zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
|
||||
# EOT
|
||||
ENV TORCH_CUDA_ARCH_LIST="80;86;89;90"
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
source /opt/conda/etc/profile.d/conda.sh
|
||||
conda activate ${CONDA_ENV_NAME}
|
||||
pip3 install optimum
|
||||
pip3 install peft tiktoken \
|
||||
tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \
|
||||
huggingface_hub spacy blobfile pycocotools \
|
||||
open_clip_torch \
|
||||
zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
|
||||
EOT
|
||||
|
||||
CMD ["/usr/sbin/sshd", "-D"]
|
||||
@@ -21,13 +21,13 @@ services:
|
||||
args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
|
||||
PYTHON_VERSION: "3.10"
|
||||
CUDA_VERSION: "12.1.0"
|
||||
PYTORCH_VERSION: "2.3.1"
|
||||
TORCHVISION_VERSION: "0.18.1"
|
||||
TORCHAUDIO_VERSION: "2.3.1"
|
||||
PYTORCH_VERSION: "2.3.0"
|
||||
TORCHVISION_VERSION: "0.18.0"
|
||||
TORCHAUDIO_VERSION: "2.3.0"
|
||||
DS_BUILD_OPS: 1
|
||||
DS_BUILD_SPARSE_ATTN: 0
|
||||
DS_BUILD_FUSED_ADAM: 1
|
||||
DS_BUILD_CPU_ADAM: 1
|
||||
# DS_BUILD_SPARSE_ATTN: 0
|
||||
# DS_BUILD_FUSED_ADAM: 1
|
||||
# DS_BUILD_CPU_ADAM: 1
|
||||
USE_CUDA: 1
|
||||
USE_ROCM: 0
|
||||
USE_XPU: 0
|
||||
@@ -35,8 +35,8 @@ services:
|
||||
CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
|
||||
SETUPTOOLS_VERSION: "69.5.1"
|
||||
DCUTLASS_NVCC_ARCHS: "80;86;89;90;90a" # 90a for H100 GPU 89:GeForce RTX 4090
|
||||
DEEPSPEED_VERSION: "0.14.3"
|
||||
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo --pip_sudo --verbose"
|
||||
DEEPSPEED_VERSION: "master"
|
||||
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
|
||||
volumes:
|
||||
- ./src:/bbtft
|
||||
container_name: ubuntu-finetune
|
||||
|
||||
Reference in New Issue
Block a user