This commit is contained in:
Your Name
2024-06-24 08:01:21 +00:00
parent 0bcdcb72ef
commit 25ee079c2b
2 changed files with 62 additions and 20 deletions

View File

@@ -33,7 +33,7 @@ sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_confi
sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config
sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config
mkdir /var/run/sshd
echo 'root:${ROOT_PASSWD}' | chpasswd
echo "root:${ROOT_PASSWD}" | chpasswd
mkdir -p ~/.pip
# install miniconda
wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
@@ -54,24 +54,24 @@ channels:
show_channel_urls: true
EOF
# 安装 micromamba
echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
micromamba shell init -s bash -p ~/micromamba
cat <<'EOF' >> ~/.bashrc
source ~/micromamba/etc/profile.d/micromamba.sh
echo "alias mamba=micromamba" >> ~/.bashrc
echo "alias mba=mamba" >> ~/.bashrc
EOF
# 配置 .mambarc 文件
cat <<EOF > ~/.mambarc
channels:
- conda-forge
- bioconda
- pytorch
- pytorch-nightly
- nvidia
- defaults
show_channel_urls: true
EOF
# echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
# micromamba shell init -s bash -p ~/micromamba
# cat <<'EOF' >> ~/.bashrc
# source ~/micromamba/etc/profile.d/micromamba.sh
# echo "alias mamba=micromamba" >> ~/.bashrc
# echo "alias mba=mamba" >> ~/.bashrc
# EOF
# # 配置 .mambarc 文件
# cat <<EOF > ~/.mambarc
# channels:
# - conda-forge
# - bioconda
# - pytorch
# - pytorch-nightly
# - nvidia
# - defaults
# show_channel_urls: true
# EOF
EOT
# reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
@@ -364,8 +364,10 @@ ARG DS_BUILD_OPS=1
ENV DS_BUILD_OPS=${DS_BUILD_OPS}
ARG HOSTFILE_CONTENT=""
ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT}
ENV CUTLASS_PATH=/opt/cutlass
ENV CUTLASS_PATH='/opt/cutlass'
ENV CUDA_HOME='/usr/local/cuda'
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
ENV PATH=${CUDA_HOME}/bin:${PATH}
# install deepspeed step 3
RUN <<EOT
#!/bin/bash
@@ -384,6 +386,45 @@ else
INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}"
fi
eval $INSTALL_CMD
# compile deepspeed ops
cat <<'EOF' >> ~/.bashrc
source ~/micromamba/etc/profile.d/micromamba.sh
echo "alias mamba=micromamba" >> ~/.bashrc
echo "alias mba=mamba" >> ~/.bashrc
EOF
# 配置 .mambarc 文件
cat <<EOF > ~/compile_deepspeed_ops.py
import deepspeed
def compile_ops():
builders = [
deepspeed.ops.op_builder.AsyncIOBuilder,
deepspeed.ops.op_builder.FusedAdamBuilder,
deepspeed.ops.op_builder.CPUAdamBuilder,
deepspeed.ops.op_builder.CPUAdagradBuilder,
deepspeed.ops.op_builder.CPULionBuilder,
deepspeed.ops.op_builder.EvoformerAttnBuilder,
deepspeed.ops.op_builder.FPQuantizerBuilder,
deepspeed.ops.op_builder.FusedLambBuilder,
deepspeed.ops.op_builder.FusedLionBuilder,
deepspeed.ops.op_builder.QuantizerBuilder,
deepspeed.ops.op_builder.RaggedOpsBuilder,
deepspeed.ops.op_builder.RandomLTDBuilder,
deepspeed.ops.op_builder.SparseAttnBuilder,
deepspeed.ops.op_builder.SpatialInferenceBuilder,
deepspeed.ops.op_builder.TransformerBuilder,
deepspeed.ops.op_builder.StochasticTransformerBuilder,
]
for builder in builders:
print(f"Compiling {builder.__name__}")
builder().load()
if __name__ == "__main__":
compile_ops()
EOF
python compile_deepspeed_ops.py
ds_report
# clean up
# rm -f deepspeed/git_version_info_installed.py
# rm -rf dist build deepspeed.egg-info

View File

@@ -47,6 +47,7 @@ services:
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- TMPDIR=/var/tmp
networks:
- network_finetune
deploy: