update
This commit is contained in:
@@ -222,14 +222,14 @@ echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/loca
|
|||||||
chmod a+x /usr/local/mpi/bin/mpirun
|
chmod a+x /usr/local/mpi/bin/mpirun
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# Some Packages
|
# Some Packages from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
|
||||||
RUN <<EOT
|
RUN <<EOT
|
||||||
source /opt/conda/etc/profile.d/conda.sh
|
source /opt/conda/etc/profile.d/conda.sh
|
||||||
conda activate ${CONDA_ENV_NAME}
|
conda activate ${CONDA_ENV_NAME}
|
||||||
apt-get update
|
apt-get update
|
||||||
apt-get install -y --no-install-recommends libsndfile-dev libcupti-dev libjpeg-dev libpng-dev screen libaio-dev
|
apt-get install -y --no-install-recommends libsndfile-dev libcupti-dev libjpeg-dev libpng-dev screen libaio-dev
|
||||||
python -m pip install https://github.com/mpi4py/mpi4py/tarball/master
|
python -m pip install pipdeptree \
|
||||||
python -m pip install psutil \
|
psutil \
|
||||||
yappi \
|
yappi \
|
||||||
cffi \
|
cffi \
|
||||||
ipdb \
|
ipdb \
|
||||||
@@ -250,7 +250,8 @@ sphinx_rtd_theme \
|
|||||||
scipy \
|
scipy \
|
||||||
numpy \
|
numpy \
|
||||||
scikit-learn \
|
scikit-learn \
|
||||||
nvidia-ml-py3
|
nvidia-ml-py3 \
|
||||||
|
mpi4py
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# SSH daemon port inside container cannot conflict with host OS port
|
# SSH daemon port inside container cannot conflict with host OS port
|
||||||
@@ -351,7 +352,7 @@ EOT
|
|||||||
|
|
||||||
ARG DEEPSPEED_VERSION="0.14.3"
|
ARG DEEPSPEED_VERSION="0.14.3"
|
||||||
ENV DEEPSPEED_VERSION=${DEEPSPEED_VERSION}
|
ENV DEEPSPEED_VERSION=${DEEPSPEED_VERSION}
|
||||||
ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --no_clean"
|
ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --verbose"
|
||||||
ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
|
ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
|
||||||
ARG DS_BUILD_SPARSE_ATTN=0
|
ARG DS_BUILD_SPARSE_ATTN=0
|
||||||
ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN}
|
ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN}
|
||||||
@@ -364,7 +365,7 @@ ENV DS_BUILD_OPS=${DS_BUILD_OPS}
|
|||||||
ARG HOSTFILE_CONTENT=""
|
ARG HOSTFILE_CONTENT=""
|
||||||
ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT}
|
ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT}
|
||||||
ENV CUTLASS_PATH=/opt/cutlass
|
ENV CUTLASS_PATH=/opt/cutlass
|
||||||
|
ENV CUDA_HOME='/usr/local/cuda'
|
||||||
# install deepspeed step 3
|
# install deepspeed step 3
|
||||||
RUN <<EOT
|
RUN <<EOT
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
@@ -373,7 +374,10 @@ conda activate ${CONDA_ENV_NAME}
|
|||||||
git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
|
git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
|
||||||
cd ${STAGE_DIR}/DeepSpeed
|
cd ${STAGE_DIR}/DeepSpeed
|
||||||
git checkout .
|
git checkout .
|
||||||
git checkout ${DEEPSPEED_VERSION}
|
git checkout v${DEEPSPEED_VERSION}
|
||||||
|
# python setup.py bdist_wheel
|
||||||
|
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl --force-reinstall
|
||||||
|
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt
|
||||||
# 修改 install.sh 脚本中的 python 解释器路径
|
# 修改 install.sh 脚本中的 python 解释器路径
|
||||||
# sed "s|\bpython\b|/opt/conda/envs/${CONDA_ENV_NAME}/bin/python|g" install.sh > install_modified.sh
|
# sed "s|\bpython\b|/opt/conda/envs/${CONDA_ENV_NAME}/bin/python|g" install.sh > install_modified.sh
|
||||||
# chmod +x ./install_modified.sh
|
# chmod +x ./install_modified.sh
|
||||||
@@ -415,11 +419,4 @@ pip install peft tiktoken \
|
|||||||
zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
|
zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# add vscode server
|
|
||||||
# RUN <<EOT
|
|
||||||
# #!/bin/bash
|
|
||||||
# wget -qO- https://update.code.visualstudio.com/commit:${commit_id}/server-linux-x64/stable
|
|
||||||
# code-server --install-extension ms-python.vscode-pylance
|
|
||||||
# EOT
|
|
||||||
|
|
||||||
CMD ["/usr/sbin/sshd", "-D"]
|
CMD ["/usr/sbin/sshd", "-D"]
|
||||||
@@ -35,6 +35,7 @@ services:
|
|||||||
CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
|
CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
|
||||||
SETUPTOOLS_VERSION: "69.5.1"
|
SETUPTOOLS_VERSION: "69.5.1"
|
||||||
DCUTLASS_NVCC_ARCHS: "80;86;89;90;90a" # 90a for H100 GPU 89:GeForce RTX 4090
|
DCUTLASS_NVCC_ARCHS: "80;86;89;90;90a" # 90a for H100 GPU 89:GeForce RTX 4090
|
||||||
|
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo --pip_sudo --verbose"
|
||||||
volumes:
|
volumes:
|
||||||
- ./src:/bbtft
|
- ./src:/bbtft
|
||||||
container_name: ubuntu-finetune
|
container_name: ubuntu-finetune
|
||||||
|
|||||||
3
finetune/hostfile
Normal file
3
finetune/hostfile
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
host1 slots=4
|
||||||
|
host2 slots=4
|
||||||
|
host3 slots=8
|
||||||
Reference in New Issue
Block a user