update
This commit is contained in:
@@ -33,7 +33,7 @@ sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_confi
|
||||
sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config
|
||||
sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config
|
||||
mkdir /var/run/sshd
|
||||
echo 'root:${ROOT_PASSWD}' | chpasswd
|
||||
echo "root:${ROOT_PASSWD}" | chpasswd
|
||||
mkdir -p ~/.pip
|
||||
# install miniconda
|
||||
wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
|
||||
@@ -54,24 +54,24 @@ channels:
|
||||
show_channel_urls: true
|
||||
EOF
|
||||
# 安装 micromamba
|
||||
echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
|
||||
micromamba shell init -s bash -p ~/micromamba
|
||||
cat <<'EOF' >> ~/.bashrc
|
||||
source ~/micromamba/etc/profile.d/micromamba.sh
|
||||
echo "alias mamba=micromamba" >> ~/.bashrc
|
||||
echo "alias mba=mamba" >> ~/.bashrc
|
||||
EOF
|
||||
# 配置 .mambarc 文件
|
||||
cat <<EOF > ~/.mambarc
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- pytorch
|
||||
- pytorch-nightly
|
||||
- nvidia
|
||||
- defaults
|
||||
show_channel_urls: true
|
||||
EOF
|
||||
# echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
|
||||
# micromamba shell init -s bash -p ~/micromamba
|
||||
# cat <<'EOF' >> ~/.bashrc
|
||||
# source ~/micromamba/etc/profile.d/micromamba.sh
|
||||
# echo "alias mamba=micromamba" >> ~/.bashrc
|
||||
# echo "alias mba=mamba" >> ~/.bashrc
|
||||
# EOF
|
||||
# # 配置 .mambarc 文件
|
||||
# cat <<EOF > ~/.mambarc
|
||||
# channels:
|
||||
# - conda-forge
|
||||
# - bioconda
|
||||
# - pytorch
|
||||
# - pytorch-nightly
|
||||
# - nvidia
|
||||
# - defaults
|
||||
# show_channel_urls: true
|
||||
# EOF
|
||||
EOT
|
||||
|
||||
# reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
|
||||
@@ -364,8 +364,10 @@ ARG DS_BUILD_OPS=1
|
||||
ENV DS_BUILD_OPS=${DS_BUILD_OPS}
|
||||
ARG HOSTFILE_CONTENT=""
|
||||
ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT}
|
||||
ENV CUTLASS_PATH=/opt/cutlass
|
||||
ENV CUTLASS_PATH='/opt/cutlass'
|
||||
ENV CUDA_HOME='/usr/local/cuda'
|
||||
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
|
||||
ENV PATH=${CUDA_HOME}/bin:${PATH}
|
||||
# install deepspeed step 3
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
@@ -384,6 +386,45 @@ else
|
||||
INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}"
|
||||
fi
|
||||
eval $INSTALL_CMD
|
||||
# compile deepspeed ops
|
||||
cat <<'EOF' >> ~/.bashrc
|
||||
source ~/micromamba/etc/profile.d/micromamba.sh
|
||||
echo "alias mamba=micromamba" >> ~/.bashrc
|
||||
echo "alias mba=mamba" >> ~/.bashrc
|
||||
EOF
|
||||
# 配置 .mambarc 文件
|
||||
cat <<EOF > ~/compile_deepspeed_ops.py
|
||||
import deepspeed
|
||||
|
||||
def compile_ops():
|
||||
builders = [
|
||||
deepspeed.ops.op_builder.AsyncIOBuilder,
|
||||
deepspeed.ops.op_builder.FusedAdamBuilder,
|
||||
deepspeed.ops.op_builder.CPUAdamBuilder,
|
||||
deepspeed.ops.op_builder.CPUAdagradBuilder,
|
||||
deepspeed.ops.op_builder.CPULionBuilder,
|
||||
deepspeed.ops.op_builder.EvoformerAttnBuilder,
|
||||
deepspeed.ops.op_builder.FPQuantizerBuilder,
|
||||
deepspeed.ops.op_builder.FusedLambBuilder,
|
||||
deepspeed.ops.op_builder.FusedLionBuilder,
|
||||
deepspeed.ops.op_builder.QuantizerBuilder,
|
||||
deepspeed.ops.op_builder.RaggedOpsBuilder,
|
||||
deepspeed.ops.op_builder.RandomLTDBuilder,
|
||||
deepspeed.ops.op_builder.SparseAttnBuilder,
|
||||
deepspeed.ops.op_builder.SpatialInferenceBuilder,
|
||||
deepspeed.ops.op_builder.TransformerBuilder,
|
||||
deepspeed.ops.op_builder.StochasticTransformerBuilder,
|
||||
]
|
||||
|
||||
for builder in builders:
|
||||
print(f"Compiling {builder.__name__}")
|
||||
builder().load()
|
||||
|
||||
if __name__ == "__main__":
|
||||
compile_ops()
|
||||
EOF
|
||||
python compile_deepspeed_ops.py
|
||||
ds_report
|
||||
# clean up
|
||||
# rm -f deepspeed/git_version_info_installed.py
|
||||
# rm -rf dist build deepspeed.egg-info
|
||||
|
||||
@@ -47,6 +47,7 @@ services:
|
||||
environment:
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
- TMPDIR=/var/tmp
|
||||
networks:
|
||||
- network_finetune
|
||||
deploy:
|
||||
|
||||
Reference in New Issue
Block a user