update
This commit is contained in:
@@ -222,14 +222,14 @@ echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/loca
|
||||
chmod a+x /usr/local/mpi/bin/mpirun
|
||||
EOT
|
||||
|
||||
# Some Packages
|
||||
# Some Packages from https://github.com/microsoft/DeepSpeed/blob/master/docker/Dockerfile
|
||||
RUN <<EOT
|
||||
source /opt/conda/etc/profile.d/conda.sh
|
||||
conda activate ${CONDA_ENV_NAME}
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends libsndfile-dev libcupti-dev libjpeg-dev libpng-dev screen libaio-dev
|
||||
python -m pip install https://github.com/mpi4py/mpi4py/tarball/master
|
||||
python -m pip install psutil \
|
||||
python -m pip install pipdeptree \
|
||||
psutil \
|
||||
yappi \
|
||||
cffi \
|
||||
ipdb \
|
||||
@@ -250,7 +250,8 @@ sphinx_rtd_theme \
|
||||
scipy \
|
||||
numpy \
|
||||
scikit-learn \
|
||||
nvidia-ml-py3
|
||||
nvidia-ml-py3 \
|
||||
mpi4py
|
||||
EOT
|
||||
|
||||
# SSH daemon port inside container cannot conflict with host OS port
|
||||
@@ -351,7 +352,7 @@ EOT
|
||||
|
||||
ARG DEEPSPEED_VERSION="0.14.3"
|
||||
ENV DEEPSPEED_VERSION=${DEEPSPEED_VERSION}
|
||||
ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --no_clean"
|
||||
ARG DEEPSPEED_INSTALL_FLAGS="--allow_sudo --pip_sudo --verbose"
|
||||
ENV DEEPSPEED_INSTALL_FLAGS=${DEEPSPEED_INSTALL_FLAGS}
|
||||
ARG DS_BUILD_SPARSE_ATTN=0
|
||||
ENV DS_BUILD_SPARSE_ATTN=${DS_BUILD_SPARSE_ATTN}
|
||||
@@ -364,7 +365,7 @@ ENV DS_BUILD_OPS=${DS_BUILD_OPS}
|
||||
ARG HOSTFILE_CONTENT=""
|
||||
ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT}
|
||||
ENV CUTLASS_PATH=/opt/cutlass
|
||||
|
||||
ENV CUDA_HOME='/usr/local/cuda'
|
||||
# install deepspeed step 3
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
@@ -373,7 +374,10 @@ conda activate ${CONDA_ENV_NAME}
|
||||
git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
|
||||
cd ${STAGE_DIR}/DeepSpeed
|
||||
git checkout .
|
||||
git checkout ${DEEPSPEED_VERSION}
|
||||
git checkout v${DEEPSPEED_VERSION}
|
||||
# python setup.py bdist_wheel
|
||||
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v dist/deepspeed*.whl --force-reinstall
|
||||
# DS_BUILD_OPS=${DS_BUILD_OPS} pip install -v -r requirements/requirements.txt
|
||||
# 修改 install.sh 脚本中的 python 解释器路径
|
||||
# sed "s|\bpython\b|/opt/conda/envs/${CONDA_ENV_NAME}/bin/python|g" install.sh > install_modified.sh
|
||||
# chmod +x ./install_modified.sh
|
||||
@@ -415,11 +419,4 @@ pip install peft tiktoken \
|
||||
zstandard -i https://pypi.org/simple/ --trusted-host pypi.org
|
||||
EOT
|
||||
|
||||
# add vscode server
|
||||
# RUN <<EOT
|
||||
# #!/bin/bash
|
||||
# wget -qO- https://update.code.visualstudio.com/commit:${commit_id}/server-linux-x64/stable
|
||||
# code-server --install-extension ms-python.vscode-pylance
|
||||
# EOT
|
||||
|
||||
CMD ["/usr/sbin/sshd", "-D"]
|
||||
@@ -35,6 +35,7 @@ services:
|
||||
CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
|
||||
SETUPTOOLS_VERSION: "69.5.1"
|
||||
DCUTLASS_NVCC_ARCHS: "80;86;89;90;90a" # 90a for H100 GPU 89:GeForce RTX 4090
|
||||
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo --pip_sudo --verbose"
|
||||
volumes:
|
||||
- ./src:/bbtft
|
||||
container_name: ubuntu-finetune
|
||||
|
||||
3
finetune/hostfile
Normal file
3
finetune/hostfile
Normal file
@@ -0,0 +1,3 @@
|
||||
host1 slots=4
|
||||
host2 slots=4
|
||||
host3 slots=8
|
||||
Reference in New Issue
Block a user