This commit is contained in:
Your Name
2024-06-23 11:18:52 +00:00
parent 273257efc3
commit 8aebb93e89
2 changed files with 61 additions and 61 deletions

View File

@@ -222,38 +222,6 @@ echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/loca
chmod a+x /usr/local/mpi/bin/mpirun chmod a+x /usr/local/mpi/bin/mpirun
EOT EOT
# Some Packages from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
RUN <<EOT
source /opt/conda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME}
apt-get update
apt-get install -y --no-install-recommends libsndfile-dev libcupti-dev libjpeg-dev libpng-dev screen libaio-dev
python -m pip install pipdeptree \
psutil \
yappi \
cffi \
ipdb \
pandas \
matplotlib \
py3nvml \
pyarrow \
graphviz \
astor \
boto3 \
tqdm \
sentencepiece \
msgpack \
requests \
pandas \
sphinx \
sphinx_rtd_theme \
scipy \
numpy \
scikit-learn \
nvidia-ml-py3 \
mpi4py
EOT
# SSH daemon port inside container cannot conflict with host OS port # SSH daemon port inside container cannot conflict with host OS port
ENV SSH_PORT=2222 ENV SSH_PORT=2222
RUN <<EOT RUN <<EOT
@@ -319,6 +287,38 @@ cd ..
# make test_unit_gemm_warp -j"$(nproc)" # make test_unit_gemm_warp -j"$(nproc)"
EOT EOT
# Some Packages from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
# RUN <<EOT
# source /opt/conda/etc/profile.d/conda.sh
# conda activate ${CONDA_ENV_NAME}
# apt-get update
# apt-get install -y --no-install-recommends libsndfile-dev libcupti-dev libjpeg-dev libpng-dev screen libaio-dev
# python -m pip install pipdeptree \
# psutil \
# yappi \
# cffi \
# ipdb \
# pandas \
# matplotlib \
# py3nvml \
# pyarrow \
# graphviz \
# astor \
# boto3 \
# tqdm \
# sentencepiece \
# msgpack \
# requests \
# pandas \
# sphinx \
# sphinx_rtd_theme \
# scipy \
# numpy \
# scikit-learn \
# nvidia-ml-py3 \
# mpi4py
# EOT
# install deepspeed step 1 # install deepspeed step 1
RUN <<EOT RUN <<EOT
#!/bin/bash #!/bin/bash
@@ -386,38 +386,38 @@ if [ -n "${HOSTFILE_CONTENT}" ]; then
else else
INSTALL_CMD="./install.sh ${DEEPSPEED_INSTALL_FLAGS}" INSTALL_CMD="./install.sh ${DEEPSPEED_INSTALL_FLAGS}"
fi fi
eval $INSTALL_CMD # eval $INSTALL_CMD
DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl --force-reinstall # DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl
DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt # DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt
pip install numpy==1.22.4 # ImportError: cannot import name 'BUFSIZE' from 'numpy' (/opt/conda/envs/deepspeed/lib/python3.10/site-packages/numpy/__init__.py) wait for fix in numpy=2.0.0 # pip install numpy==1.22.4 # ImportError: cannot import name 'BUFSIZE' from 'numpy' (/opt/conda/envs/deepspeed/lib/python3.10/site-packages/numpy/__init__.py) wait for fix in numpy=2.0.0
EOT EOT
# install transformers and flash-attn # install transformers and flash-attn
RUN <<EOT # RUN <<EOT
#!/bin/bash # #!/bin/bash
source /opt/conda/etc/profile.d/conda.sh # source /opt/conda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME} # conda activate ${CONDA_ENV_NAME}
# install transformers # # install transformers
git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers # git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers
cd ${STAGE_DIR}/transformers # cd ${STAGE_DIR}/transformers
python3 ./setup.py develop # python3 ./setup.py develop
python3 -m pip install -U --no-cache-dir "pydantic<2" # python3 -m pip install -U --no-cache-dir "pydantic<2"
# install flash-attn # # install flash-attn
# pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org # # pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org
pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org # pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org
EOT # EOT
# other packages # other packages
RUN <<EOT # RUN <<EOT
#!/bin/bash # #!/bin/bash
source /opt/conda/etc/profile.d/conda.sh # source /opt/conda/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME} # conda activate ${CONDA_ENV_NAME}
pip install optimum # pip install optimum
pip install peft tiktoken \ # pip install peft tiktoken \
tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \ # tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \
huggingface_hub spacy blobfile pycocotools \ # huggingface_hub spacy blobfile pycocotools \
xformers open_clip_torch \ # xformers open_clip_torch \
zstandard -i https://pypi.org/simple/ --trusted-host pypi.org # zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
EOT # EOT
CMD ["/usr/sbin/sshd", "-D"] CMD ["/usr/sbin/sshd", "-D"]

View File

@@ -22,7 +22,7 @@ services:
PYTHON_VERSION: "3.10" PYTHON_VERSION: "3.10"
CUDA_VERSION: "12.1.0" CUDA_VERSION: "12.1.0"
PYTORCH_VERSION: "2.3.1" PYTORCH_VERSION: "2.3.1"
TORCHVISION_VERSION: "0.18.0" TORCHVISION_VERSION: "0.18.1"
TORCHAUDIO_VERSION: "2.3.1" TORCHAUDIO_VERSION: "2.3.1"
DS_BUILD_OPS: 1 DS_BUILD_OPS: 1
DS_BUILD_SPARSE_ATTN: 0 DS_BUILD_SPARSE_ATTN: 0