update
This commit is contained in:
@@ -33,7 +33,7 @@ sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_confi
|
|||||||
sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config
|
sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config
|
||||||
sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config
|
sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config
|
||||||
mkdir /var/run/sshd
|
mkdir /var/run/sshd
|
||||||
echo 'root:${ROOT_PASSWD}' | chpasswd
|
echo "root:${ROOT_PASSWD}" | chpasswd
|
||||||
mkdir -p ~/.pip
|
mkdir -p ~/.pip
|
||||||
# install miniconda
|
# install miniconda
|
||||||
wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
|
wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
|
||||||
@@ -54,24 +54,24 @@ channels:
|
|||||||
show_channel_urls: true
|
show_channel_urls: true
|
||||||
EOF
|
EOF
|
||||||
# 安装 micromamba
|
# 安装 micromamba
|
||||||
echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
|
# echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh)
|
||||||
micromamba shell init -s bash -p ~/micromamba
|
# micromamba shell init -s bash -p ~/micromamba
|
||||||
cat <<'EOF' >> ~/.bashrc
|
# cat <<'EOF' >> ~/.bashrc
|
||||||
source ~/micromamba/etc/profile.d/micromamba.sh
|
# source ~/micromamba/etc/profile.d/micromamba.sh
|
||||||
echo "alias mamba=micromamba" >> ~/.bashrc
|
# echo "alias mamba=micromamba" >> ~/.bashrc
|
||||||
echo "alias mba=mamba" >> ~/.bashrc
|
# echo "alias mba=mamba" >> ~/.bashrc
|
||||||
EOF
|
# EOF
|
||||||
# 配置 .mambarc 文件
|
# # 配置 .mambarc 文件
|
||||||
cat <<EOF > ~/.mambarc
|
# cat <<EOF > ~/.mambarc
|
||||||
channels:
|
# channels:
|
||||||
- conda-forge
|
# - conda-forge
|
||||||
- bioconda
|
# - bioconda
|
||||||
- pytorch
|
# - pytorch
|
||||||
- pytorch-nightly
|
# - pytorch-nightly
|
||||||
- nvidia
|
# - nvidia
|
||||||
- defaults
|
# - defaults
|
||||||
show_channel_urls: true
|
# show_channel_urls: true
|
||||||
EOF
|
# EOF
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
|
# reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
|
||||||
@@ -364,8 +364,10 @@ ARG DS_BUILD_OPS=1
|
|||||||
ENV DS_BUILD_OPS=${DS_BUILD_OPS}
|
ENV DS_BUILD_OPS=${DS_BUILD_OPS}
|
||||||
ARG HOSTFILE_CONTENT=""
|
ARG HOSTFILE_CONTENT=""
|
||||||
ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT}
|
ENV HOSTFILE_CONTENT=${HOSTFILE_CONTENT}
|
||||||
ENV CUTLASS_PATH=/opt/cutlass
|
ENV CUTLASS_PATH='/opt/cutlass'
|
||||||
ENV CUDA_HOME='/usr/local/cuda'
|
ENV CUDA_HOME='/usr/local/cuda'
|
||||||
|
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
|
||||||
|
ENV PATH=${CUDA_HOME}/bin:${PATH}
|
||||||
# install deepspeed step 3
|
# install deepspeed step 3
|
||||||
RUN <<EOT
|
RUN <<EOT
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
@@ -384,6 +386,45 @@ else
|
|||||||
INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}"
|
INSTALL_CMD="./install_modified.sh ${DEEPSPEED_INSTALL_FLAGS}"
|
||||||
fi
|
fi
|
||||||
eval $INSTALL_CMD
|
eval $INSTALL_CMD
|
||||||
|
# compile deepspeed ops
|
||||||
|
cat <<'EOF' >> ~/.bashrc
|
||||||
|
source ~/micromamba/etc/profile.d/micromamba.sh
|
||||||
|
echo "alias mamba=micromamba" >> ~/.bashrc
|
||||||
|
echo "alias mba=mamba" >> ~/.bashrc
|
||||||
|
EOF
|
||||||
|
# 配置 .mambarc 文件
|
||||||
|
cat <<EOF > ~/compile_deepspeed_ops.py
|
||||||
|
import deepspeed
|
||||||
|
|
||||||
|
def compile_ops():
|
||||||
|
builders = [
|
||||||
|
deepspeed.ops.op_builder.AsyncIOBuilder,
|
||||||
|
deepspeed.ops.op_builder.FusedAdamBuilder,
|
||||||
|
deepspeed.ops.op_builder.CPUAdamBuilder,
|
||||||
|
deepspeed.ops.op_builder.CPUAdagradBuilder,
|
||||||
|
deepspeed.ops.op_builder.CPULionBuilder,
|
||||||
|
deepspeed.ops.op_builder.EvoformerAttnBuilder,
|
||||||
|
deepspeed.ops.op_builder.FPQuantizerBuilder,
|
||||||
|
deepspeed.ops.op_builder.FusedLambBuilder,
|
||||||
|
deepspeed.ops.op_builder.FusedLionBuilder,
|
||||||
|
deepspeed.ops.op_builder.QuantizerBuilder,
|
||||||
|
deepspeed.ops.op_builder.RaggedOpsBuilder,
|
||||||
|
deepspeed.ops.op_builder.RandomLTDBuilder,
|
||||||
|
deepspeed.ops.op_builder.SparseAttnBuilder,
|
||||||
|
deepspeed.ops.op_builder.SpatialInferenceBuilder,
|
||||||
|
deepspeed.ops.op_builder.TransformerBuilder,
|
||||||
|
deepspeed.ops.op_builder.StochasticTransformerBuilder,
|
||||||
|
]
|
||||||
|
|
||||||
|
for builder in builders:
|
||||||
|
print(f"Compiling {builder.__name__}")
|
||||||
|
builder().load()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
compile_ops()
|
||||||
|
EOF
|
||||||
|
python compile_deepspeed_ops.py
|
||||||
|
ds_report
|
||||||
# clean up
|
# clean up
|
||||||
# rm -f deepspeed/git_version_info_installed.py
|
# rm -f deepspeed/git_version_info_installed.py
|
||||||
# rm -rf dist build deepspeed.egg-info
|
# rm -rf dist build deepspeed.egg-info
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
- NVIDIA_VISIBLE_DEVICES=all
|
- NVIDIA_VISIBLE_DEVICES=all
|
||||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
|
- TMPDIR=/var/tmp
|
||||||
networks:
|
networks:
|
||||||
- network_finetune
|
- network_finetune
|
||||||
deploy:
|
deploy:
|
||||||
|
|||||||
Reference in New Issue
Block a user