This commit is contained in:
Your Name
2024-06-24 08:01:21 +00:00
parent 0bcdcb72ef
commit 25ee079c2b
2 changed files with 62 additions and 20 deletions

View File

@@ -33,7 +33,7 @@ sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_confi
sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config
sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config
mkdir /var/run/sshd mkdir /var/run/sshd
echo 'root:${ROOT_PASSWD}' | chpasswd echo "root:${ROOT_PASSWD}" | chpasswd
mkdir -p ~/.pip mkdir -p ~/.pip
# install miniconda # install miniconda
wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
@@ -54,24 +54,24 @@ channels:
show_channel_urls: true show_channel_urls: true
EOF EOF
# 安装 micromamba # 安装 micromamba
echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh) # echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
micromamba shell init -s bash -p ~/micromamba # micromamba shell init -s bash -p ~/micromamba
cat <<'EOF' >> ~/.bashrc # cat <<'EOF' >> ~/.bashrc
source ~/micromamba/etc/profile.d/micromamba.sh # source ~/micromamba/etc/profile.d/micromamba.sh
echo "alias mamba=micromamba" >> ~/.bashrc # echo "alias mamba=micromamba" >> ~/.bashrc
echo "alias mba=mamba" >> ~/.bashrc # echo "alias mba=mamba" >> ~/.bashrc
EOF # EOF
# 配置 .mambarc 文件 # # 配置 .mambarc 文件
cat <<EOF > ~/.mambarc # cat <<EOF > ~/.mambarc
channels: # channels:
- conda-forge # - conda-forge
- bioconda # - bioconda
- pytorch # - pytorch
- pytorch-nightly # - pytorch-nightly
- nvidia # - nvidia
- defaults # - defaults
show_channel_urls: true # show_channel_urls: true
EOF # EOF
EOT EOT
# reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile # reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
@@ -364,8 +364,10 @@ ARG DS_BUILD_OPS=1
ENV DS_BUILD_OPS=${DS_BUILD_OPS} ENV DS_BUILD_OPS=${DS_BUILD_OPS}
ARG HOSTFILE_CONTENT="" ARG HOSTFILE_CONTENT=""
ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT} ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT}
ENV CUTLASS_PATH=/opt/cutlass ENV CUTLASS_PATH='/opt/cutlass'
ENV CUDA_HOME='/usr/local/cuda' ENV CUDA_HOME='/usr/local/cuda'
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
ENV PATH=${CUDA_HOME}/bin:${PATH}
# install deepspeed step 3 # install deepspeed step 3
RUN <<EOT RUN <<EOT
#!/bin/bash #!/bin/bash
@@ -384,6 +386,45 @@ else
INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}" INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}"
fi fi
eval $INSTALL_CMD eval $INSTALL_CMD
# compile deepspeed ops
cat <<'EOF' >> ~/.bashrc
source ~/micromamba/etc/profile.d/micromamba.sh
echo "alias mamba=micromamba" >> ~/.bashrc
echo "alias mba=mamba" >> ~/.bashrc
EOF
# 配置 .mambarc 文件
cat <<EOF > ~/compile_deepspeed_ops.py
import deepspeed
def compile_ops():
builders = [
deepspeed.ops.op_builder.AsyncIOBuilder,
deepspeed.ops.op_builder.FusedAdamBuilder,
deepspeed.ops.op_builder.CPUAdamBuilder,
deepspeed.ops.op_builder.CPUAdagradBuilder,
deepspeed.ops.op_builder.CPULionBuilder,
deepspeed.ops.op_builder.EvoformerAttnBuilder,
deepspeed.ops.op_builder.FPQuantizerBuilder,
deepspeed.ops.op_builder.FusedLambBuilder,
deepspeed.ops.op_builder.FusedLionBuilder,
deepspeed.ops.op_builder.QuantizerBuilder,
deepspeed.ops.op_builder.RaggedOpsBuilder,
deepspeed.ops.op_builder.RandomLTDBuilder,
deepspeed.ops.op_builder.SparseAttnBuilder,
deepspeed.ops.op_builder.SpatialInferenceBuilder,
deepspeed.ops.op_builder.TransformerBuilder,
deepspeed.ops.op_builder.StochasticTransformerBuilder,
]
for builder in builders:
print(f"Compiling {builder.__name__}")
builder().load()
if __name__ == "__main__":
compile_ops()
EOF
python compile_deepspeed_ops.py
ds_report
# clean up # clean up
# rm -f deepspeed/git_version_info_installed.py # rm -f deepspeed/git_version_info_installed.py
# rm -rf dist build deepspeed.egg-info # rm -rf dist build deepspeed.egg-info

View File

@@ -47,6 +47,7 @@ services:
environment: environment:
- NVIDIA_VISIBLE_DEVICES=all - NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility - NVIDIA_DRIVER_CAPABILITIES=compute,utility
- TMPDIR=/var/tmp
networks: networks:
- network_finetune - network_finetune
deploy: deploy: