update
This commit is contained in:
@@ -222,38 +222,6 @@ echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/loca
|
|||||||
chmod a+x /usr/local/mpi/bin/mpirun
|
chmod a+x /usr/local/mpi/bin/mpirun
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# Some Packages from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
|
|
||||||
RUN <<EOT
|
|
||||||
source /opt/conda/etc/profile.d/conda.sh
|
|
||||||
conda activate ${CONDA_ENV_NAME}
|
|
||||||
apt-get update
|
|
||||||
apt-get install -y --no-install-recommends libsndfile-dev libcupti-dev libjpeg-dev libpng-dev screen libaio-dev
|
|
||||||
python -m pip install pipdeptree \
|
|
||||||
psutil \
|
|
||||||
yappi \
|
|
||||||
cffi \
|
|
||||||
ipdb \
|
|
||||||
pandas \
|
|
||||||
matplotlib \
|
|
||||||
py3nvml \
|
|
||||||
pyarrow \
|
|
||||||
graphviz \
|
|
||||||
astor \
|
|
||||||
boto3 \
|
|
||||||
tqdm \
|
|
||||||
sentencepiece \
|
|
||||||
msgpack \
|
|
||||||
requests \
|
|
||||||
pandas \
|
|
||||||
sphinx \
|
|
||||||
sphinx_rtd_theme \
|
|
||||||
scipy \
|
|
||||||
numpy \
|
|
||||||
scikit-learn \
|
|
||||||
nvidia-ml-py3 \
|
|
||||||
mpi4py
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# SSH daemon port inside container cannot conflict with host OS port
|
# SSH daemon port inside container cannot conflict with host OS port
|
||||||
ENV SSH_PORT=2222
|
ENV SSH_PORT=2222
|
||||||
RUN <<EOT
|
RUN <<EOT
|
||||||
@@ -319,6 +287,38 @@ cd ..
|
|||||||
# make test_unit_gemm_warp -j"$(nproc)"
|
# make test_unit_gemm_warp -j"$(nproc)"
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
|
# Some Packages from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
|
||||||
|
# RUN <<EOT
|
||||||
|
# source /opt/conda/etc/profile.d/conda.sh
|
||||||
|
# conda activate ${CONDA_ENV_NAME}
|
||||||
|
# apt-get update
|
||||||
|
# apt-get install -y --no-install-recommends libsndfile-dev libcupti-dev libjpeg-dev libpng-dev screen libaio-dev
|
||||||
|
# python -m pip install pipdeptree \
|
||||||
|
# psutil \
|
||||||
|
# yappi \
|
||||||
|
# cffi \
|
||||||
|
# ipdb \
|
||||||
|
# pandas \
|
||||||
|
# matplotlib \
|
||||||
|
# py3nvml \
|
||||||
|
# pyarrow \
|
||||||
|
# graphviz \
|
||||||
|
# astor \
|
||||||
|
# boto3 \
|
||||||
|
# tqdm \
|
||||||
|
# sentencepiece \
|
||||||
|
# msgpack \
|
||||||
|
# requests \
|
||||||
|
# pandas \
|
||||||
|
# sphinx \
|
||||||
|
# sphinx_rtd_theme \
|
||||||
|
# scipy \
|
||||||
|
# numpy \
|
||||||
|
# scikit-learn \
|
||||||
|
# nvidia-ml-py3 \
|
||||||
|
# mpi4py
|
||||||
|
# EOT
|
||||||
|
|
||||||
# install deepspeed step 1
|
# install deepspeed step 1
|
||||||
RUN <<EOT
|
RUN <<EOT
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
@@ -386,38 +386,38 @@ if [ -n "${HOSTFILE_CONTENT}" ]; then
|
|||||||
else
|
else
|
||||||
INSTALL_CMD="./install.sh ${DEEPSPEED_INSTALL_FLAGS}"
|
INSTALL_CMD="./install.sh ${DEEPSPEED_INSTALL_FLAGS}"
|
||||||
fi
|
fi
|
||||||
eval $INSTALL_CMD
|
# eval $INSTALL_CMD
|
||||||
DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl --force-reinstall
|
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl
|
||||||
DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt
|
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt
|
||||||
pip install numpy==1.22.4 # ImportError: cannot import name 'BUFSIZE' from 'numpy' (/opt/conda/envs/deepspeed/lib/python3.10/site-packages/numpy/__init__.py) wait for fix in numpy=2.0.0
|
# pip install numpy==1.22.4 # ImportError: cannot import name 'BUFSIZE' from 'numpy' (/opt/conda/envs/deepspeed/lib/python3.10/site-packages/numpy/__init__.py) wait for fix in numpy=2.0.0
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# install transformers and flash-attn
|
# install transformers and flash-attn
|
||||||
RUN <<EOT
|
# RUN <<EOT
|
||||||
#!/bin/bash
|
# #!/bin/bash
|
||||||
source /opt/conda/etc/profile.d/conda.sh
|
# source /opt/conda/etc/profile.d/conda.sh
|
||||||
conda activate ${CONDA_ENV_NAME}
|
# conda activate ${CONDA_ENV_NAME}
|
||||||
# install transformers
|
# # install transformers
|
||||||
git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers
|
# git clone https://github.com/huggingface/transformers ${STAGE_DIR}/transformers
|
||||||
cd ${STAGE_DIR}/transformers
|
# cd ${STAGE_DIR}/transformers
|
||||||
python3 ./setup.py develop
|
# python3 ./setup.py develop
|
||||||
python3 -m pip install -U --no-cache-dir "pydantic<2"
|
# python3 -m pip install -U --no-cache-dir "pydantic<2"
|
||||||
# install flash-attn
|
# # install flash-attn
|
||||||
# pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org
|
# # pip install packaging -i https://pypi.org/simple/ --trusted-host pypi.org
|
||||||
pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org
|
# pip install flash-attn --no-build-isolation -i https://pypi.org/simple/ --trusted-host pypi.org
|
||||||
EOT
|
# EOT
|
||||||
|
|
||||||
# other packages
|
# other packages
|
||||||
RUN <<EOT
|
# RUN <<EOT
|
||||||
#!/bin/bash
|
# #!/bin/bash
|
||||||
source /opt/conda/etc/profile.d/conda.sh
|
# source /opt/conda/etc/profile.d/conda.sh
|
||||||
conda activate ${CONDA_ENV_NAME}
|
# conda activate ${CONDA_ENV_NAME}
|
||||||
pip install optimum
|
# pip install optimum
|
||||||
pip install peft tiktoken \
|
# pip install peft tiktoken \
|
||||||
tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \
|
# tqdm matplotlib seaborn numpy pandas scikit-learn diffusers \
|
||||||
huggingface_hub spacy blobfile pycocotools \
|
# huggingface_hub spacy blobfile pycocotools \
|
||||||
xformers open_clip_torch \
|
# xformers open_clip_torch \
|
||||||
zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
|
# zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
|
||||||
EOT
|
# EOT
|
||||||
|
|
||||||
CMD ["/usr/sbin/sshd", "-D"]
|
CMD ["/usr/sbin/sshd", "-D"]
|
||||||
@@ -22,7 +22,7 @@ services:
|
|||||||
PYTHON_VERSION: "3.10"
|
PYTHON_VERSION: "3.10"
|
||||||
CUDA_VERSION: "12.1.0"
|
CUDA_VERSION: "12.1.0"
|
||||||
PYTORCH_VERSION: "2.3.1"
|
PYTORCH_VERSION: "2.3.1"
|
||||||
TORCHVISION_VERSION: "0.18.0"
|
TORCHVISION_VERSION: "0.18.1"
|
||||||
TORCHAUDIO_VERSION: "2.3.1"
|
TORCHAUDIO_VERSION: "2.3.1"
|
||||||
DS_BUILD_OPS: 1
|
DS_BUILD_OPS: 1
|
||||||
DS_BUILD_SPARSE_ATTN: 0
|
DS_BUILD_SPARSE_ATTN: 0
|
||||||
|
|||||||
Reference in New Issue
Block a user