From 65c0338c74bb9b642387a0dda402bd1321668ee6 Mon Sep 17 00:00:00 2001 From: lingyuzeng Date: Sat, 13 Jul 2024 16:08:30 +0800 Subject: [PATCH] remove # syntax=docker/dockerfile:1 --- finetune/Dockerfile | 85 +++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 46 deletions(-) diff --git a/finetune/Dockerfile b/finetune/Dockerfile index 6c9ad48..3e8735e 100644 --- a/finetune/Dockerfile +++ b/finetune/Dockerfile @@ -1,4 +1,3 @@ -# syntax=docker/dockerfile:1 # NOTE: Building this image require's docker version >= 23.0. # @@ -6,10 +5,6 @@ # - https://docs.docker.com/build/dockerfile/frontend/#stable-channel ARG CUDA_VERSION=12.1.0 FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu22.04 -ARG HTTP_PROXY -ARG HTTPS_PROXY -ENV http_proxy=${HTTP_PROXY} -ENV https_proxy=${HTTPS_PROXY} ARG DEBIAN_FRONTEND="noninteractive" ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND} ENV MAMBA_ROOT_PREFIX=~/micromamba @@ -17,16 +12,10 @@ ARG ROOT_PASSWD="root" ENV ROOT_PASSWD=${ROOT_PASSWD} WORKDIR /root SHELL ["/bin/bash", "-c"] -COPY id_rsa.pub /root/.ssh/id_rsa.pub # base tools RUN <> ~/.bashrc +# source ~/micromamba/etc/profile.d/micromamba.sh +# EOF +# # 配置 .mambarc 文件 +# cat < ~/.mambarc +# channels: +# - conda-forge +# - bioconda +# - pytorch +# - pytorch-nightly +# - nvidia +# - defaults +# show_channel_urls: true +# EOF EOT # reference: https://github.com/huggingface/transformers/blob/main/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile @@ -75,6 +81,8 @@ ENV PATH=/opt/conda/envs/${CONDA_ENV_NAME}/bin:/usr/bin:/opt/conda/bin:$PATH ENV DEEPSPEED_PYTHON="/opt/conda/envs/${CONDA_ENV_NAME}/bin/python3" ENV REF='main' ENV STAGE_DIR=/tmp +ENV NV_PEER_MEM_VERSION=1.2 +ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0 ENV OPENMPI_BASEVERSION=4.1 ENV OPENMPI_VERSION=${OPENMPI_BASEVERSION}.6 ARG CUDA='cu121' @@ -87,6 +95,7 @@ ARG TORCHAUDIO_VERSION=2.3.1 ENV TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION} ARG PYTORCH_CUDA_VERSION=12.1 ENV PYTORCH_CUDA_VERSION=${PYTORCH_CUDA_VERSION} +ENV MLNX_OFED_VERSION=4.9-7.1.0.0 ARG SETUPTOOLS_VERSION=69.5.1 ENV SETUPTOOLS_VERSION=${SETUPTOOLS_VERSION} ARG USE_CUDA=1 @@ -147,7 +156,6 @@ cd .. rm -rf ${STAGE_DIR}/apex EOT -ENV MLNX_OFED_VERSION=5.8-4.1.5.0 RUN < ${STAGE_DIR}/sshd_config && \ -# sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config -# EOT +ENV SSH_PORT=2222 +RUN < ${STAGE_DIR}/sshd_config && \ +sed "0,/^Port 22/s//Port ${SSH_PORT}/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config +EOT # 29.78 Usage: install.sh [options...] # 29.78 @@ -384,6 +384,12 @@ else fi eval $INSTALL_CMD # compile deepspeed ops +cat <<'EOF' >> ~/.bashrc +source ~/micromamba/etc/profile.d/micromamba.sh +echo "alias mamba=micromamba" >> ~/.bashrc +echo "alias mba=mamba" >> ~/.bashrc +EOF +# 配置 .mambarc 文件 cat < ~/compile_deepspeed_ops.py import deepspeed @@ -454,17 +460,4 @@ pip3 install peft tiktoken \ zstandard -i https://pypi.org/simple/ --trusted-host pypi.org EOT -ARG DEEPSPEED_TRAIN='/data/train_data' -ENV DEEPSPEED_TRAIN=DEEPSPEED_TRAIN -ARG DEEPSPEED_VALIDATION='/data/validation_data' -ENV DEEPSPEED_VALIDATION=DEEPSPEED_VALIDATION -ARG NCCL_SOCKET_IFNAME='eth0' - -# RUN echo 'export CUDA_HOME=/usr/local/cuda' >> ~/.bashrc && \ -# echo 'export PATH=${CUDA_HOME}/bin:${PATH}' >> ~/.bashrc && \ -# echo 'export CUTLASS_PATH=/opt/cutlass' >> ~/.bashrc && \ -# echo 'export PATH=/opt/conda/bin:$PATH' >> ~/.bashrc && \ -# echo "source activate ${CONDA_ENV_NAME}" > ~/.bashrc - CMD ["/usr/sbin/sshd", "-D"] -# CMD ["/bin/bash", "-c", "/usr/sbin/sshd -D & while true; do sleep 1000; done"]