diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6fc826d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.tar +build_d/ \ No newline at end of file diff --git a/Dockerfile.base b/Dockerfile.base new file mode 100644 index 0000000..a9d0900 --- /dev/null +++ b/Dockerfile.base @@ -0,0 +1,78 @@ +# syntax=docker/dockerfile:1 +FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 +ARG DEBIAN_FRONTEND="noninteractive" +ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND} +ENV MAMBA_ROOT_PREFIX=~/micromamba +ARG CONDA_ENV_NAME="ldh" +ENV CONDA_ENV_NAME=${CONDA_ENV_NAME} +ARG PYTHON_VERSION=3.10 +ENV PYTHON_VERSION=${PYTHON_VERSION} +ARG ROOT_PASSWD="root" +ENV ROOT_PASSWD=${ROOT_PASSWD} +ENV PATH /opt/conda/bin:/opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH +WORKDIR /root +SHELL ["/bin/bash", "-c"] +# base tools +RUN <> ~/.bashrc +echo "conda activate ${CONDA_ENV_NAME}" >> ~/.bashrc +# 配置 .condarc 文件 +cat < ~/.condarc +channels: + - conda-forge + - bioconda + - pytorch + - pytorch-nightly + - nvidia + - defaults +show_channel_urls: true +EOF +# 安装 micromamba +echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh) +micromamba shell init -s bash -p ~/micromamba +cat <<'EOF' >> ~/.bashrc +source ~/micromamba/etc/profile.d/micromamba.sh +alias mamba=micromamba +alias mba=mamba +EOF +# 配置 .mambarc 文件 +cat < ~/.mambarc +channels: + - conda-forge + - bioconda + - pytorch + - pytorch-nightly + - nvidia + - defaults +show_channel_urls: true +EOF +EOT \ No newline at end of file diff --git a/Dockerfile.bgpt b/Dockerfile.bgpt index f058204..d0ab752 100644 --- a/Dockerfile.bgpt +++ b/Dockerfile.bgpt @@ -21,9 +21,23 @@ echo "PubkeyAuthentication yes" >> /etc/ssh/sshd_config echo "Port 22" >> /etc/ssh/sshd_config mkdir /var/run/sshd echo 'root:cdcdocker' | chpasswd -# Install Micromamba +# 安装 micromamba 并配置 mambarc echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh) micromamba shell init -s bash -p ~/micromamba +cat <<'EOF' >> ~/.bashrc +source ~/micromamba/etc/profile.d/micromamba.sh +alias mamba=micromamba +alias mba=mamba +EOF +# 配置 .mambarc 文件 +cat < ~/.mambarc +channels: + - conda-forge + - bioconda + - pytorch + - pytorch-nightly + - nvidia +EOF mkdir -p ~/.pip echo " [global] diff --git a/finetune/Dockerfile b/finetune/Dockerfile index 532bdc1..562d0e1 100644 --- a/finetune/Dockerfile +++ b/finetune/Dockerfile @@ -1,163 +1,204 @@ -FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04 - +FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 ARG DEBIAN_FRONTEND="noninteractive" ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND} ENV MAMBA_ROOT_PREFIX=~/micromamba +ARG CONDA_ENV_NAME="ldh" +ENV CONDA_ENV_NAME=${CONDA_ENV_NAME} +ARG PYTHON_VERSION=3.10 +ENV PYTHON_VERSION=${PYTHON_VERSION} +ARG ROOT_PASSWD="root" +ENV ROOT_PASSWD=${ROOT_PASSWD} +ENV PATH /opt/conda/bin:/opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH WORKDIR /root SHELL ["/bin/bash", "-c"] - # base tools RUN <> /etc/ssh/sshd_config -echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config -echo "PubkeyAuthentication yes" >> /etc/ssh/sshd_config -echo "Port 22" >> /etc/ssh/sshd_config +sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config +sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config +sed -i 's/PubkeyAuthentication no/PubkeyAuthentication yes/' /etc/ssh/sshd_config +sed -i 's/^#Port 22/Port 22/' /etc/ssh/sshd_config +sed -i 's/^Port [0-9]*/Port 22/' /etc/ssh/sshd_config mkdir /var/run/sshd -echo 'root:root' | chpasswd +echo 'root:${ROOT_PASSWD}' | chpasswd mkdir -p ~/.pip -echo " -[global] -index-url = https://mirrors.aliyun.com/pypi/simple/ - -[install] -trusted-host=mirrors.aliyun.com -" >> ~/.pip/pip.conf +# install miniconda +wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh +bash /tmp/miniconda.sh -b -p /opt/conda +rm /tmp/miniconda.sh +conda init bash +conda create -n ${CONDA_ENV_NAME} python=${PYTHON_VERSION} pyyaml ipython -y +conda run -n ${CONDA_ENV_NAME} python -m pip install open_clip_torch vidia-ml-py3 opencv-contrib-python +conda clean -afy +ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh +echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc +echo "conda activate ${CONDA_ENV_NAME}" >> ~/.bashrc +# 配置 .condarc 文件 +cat < ~/.condarc +channels: + - conda-forge + - bioconda + - pytorch + - pytorch-nightly + - nvidia + - defaults +show_channel_urls: true +EOF +# 安装 micromamba +echo 1 | bash <(curl -s https://cdn.jsdelivr.net/gh/hotwa/MicroMamba_Installer@main/install.sh) +micromamba shell init -s bash -p ~/micromamba +cat <<'EOF' >> ~/.bashrc +source ~/micromamba/etc/profile.d/micromamba.sh +alias mamba=micromamba +alias mba=mamba +EOF +# 配置 .mambarc 文件 +cat < ~/.mambarc +channels: + - conda-forge + - bioconda + - pytorch + - pytorch-nightly + - nvidia + - defaults +show_channel_urls: true +EOF EOT -# deepspeed -ENV STAGE_DIR=/tmp - +# 安装 ninja 并测试 RUN <> /etc/ssh/sshd_config -cp /etc/ssh/sshd_config ${STAGE_DIR}/sshd_config -sed "0,/^#Port 22/s//Port 22/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config +# 安装 ninja +source /opt/conda/etc/profile.d/conda.sh +conda activate ${CONDA_ENV_NAME} +# 克隆 ninja 源码并编译 +git clone https://github.com/ninja-build/ninja.git +cd ninja +# 克隆 GoogleTest 源码 +git clone https://github.com/google/googletest.git +conda run -n ${CONDA_ENV_NAME} python ./configure.py --bootstrap +# 配置并构建 Ninja 测试,添加 pthread 链接选项 +CXXFLAGS="-pthread" LDFLAGS="-pthread" ./configure.py --bootstrap --gtest-source-dir=$(pwd)/googletest +./ninja all +# 运行 Ninja 单元测试 +./ninja_test EOT -# Mellanox OFED -ENV MLNX_OFED_VERSION=4.9-7.1.0.0 -RUN apt-get install -y libnuma-dev -RUN cd ${STAGE_DIR} && \ - wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64.tgz | tar xzf - && \ - cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64 && \ - ./mlnxofedinstall --user-space-only --without-fw-update --all -q && \ - cd ${STAGE_DIR} && \ - rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64* -# nv_peer_mem -ENV NV_PEER_MEM_VERSION=1.2 -ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0 -RUN mkdir -p ${STAGE_DIR} && \ - git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory && \ - cd ${STAGE_DIR}/nv_peer_memory && \ - ./build_module.sh && \ - cd ${STAGE_DIR} && \ - tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz && \ - cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION} && \ - apt-get update && \ - apt-get install -y dkms && \ - dpkg-buildpackage -us -uc && \ - dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb -# OPENMPI -ENV OPENMPI_BASEVERSION=4.1 -ENV OPENMPI_VERSION=${OPENMPI_BASEVERSION}.6 -RUN cd ${STAGE_DIR} && \ - wget -q -O - https://download.open-mpi.org/release/open-mpi/v${OPENMPI_BASEVERSION}/openmpi-${OPENMPI_VERSION}.tar.gz | tar xzf - && \ - cd openmpi-${OPENMPI_VERSION} && \ - ./configure --prefix=/usr/local/openmpi-${OPENMPI_VERSION} && \ - make -j"$(nproc)" install && \ - ln -s /usr/local/openmpi-${OPENMPI_VERSION} /usr/local/mpi && \ - # Sanity check: - test -f /usr/local/mpi/bin/mpic++ && \ - cd ${STAGE_DIR} && \ - rm -r ${STAGE_DIR}/openmpi-${OPENMPI_VERSION} -ENV PATH=/usr/local/mpi/bin:${PATH} \ - LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:${LD_LIBRARY_PATH} -# Create a wrapper for OpenMPI to allow running as root by default -RUN mv /usr/local/mpi/bin/mpirun /usr/local/mpi/bin/mpirun.real && \ - echo '#!/bin/bash' > /usr/local/mpi/bin/mpirun && \ - echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun && \ - chmod a+x /usr/local/mpi/bin/mpirun -# Python -ENV DEBIAN_FRONTEND=noninteractive -ENV PYTHON_VERSION=3 -RUN apt-get install -y python3 python3-dev && \ - rm -f /usr/bin/python && \ - ln -s /usr/bin/python3 /usr/bin/python && \ - curl -O https://bootstrap.pypa.io/pip/3.6/get-pip.py && \ - python get-pip.py && \ - rm get-pip.py && \ - pip install --upgrade pip && \ - # Print python an pip version - python -V && pip -V -RUN pip install pyyaml -RUN pip install ipython -# Some Packages -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - libsndfile-dev \ - libcupti-dev \ - libjpeg-dev \ - libpng-dev \ - screen \ - libaio-dev -RUN pip install psutil \ - yappi \ - cffi \ - ipdb \ - pandas \ - matplotlib \ - py3nvml \ - pyarrow \ - graphviz \ - astor \ - boto3 \ - tqdm \ - sentencepiece \ - msgpack \ - requests \ - pandas \ - sphinx \ - sphinx_rtd_theme \ - scipy \ - numpy \ - scikit-learn \ - nvidia-ml-py3 \ - mpi4py +# # deepspeed +# ENV STAGE_DIR=/tmp +# RUN <> /etc/ssh/sshd_config +# cp /etc/ssh/sshd_config ${STAGE_DIR}/sshd_config +# sed "0,/^#Port 22/s//Port 22/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config +# EOT + +# # Mellanox OFED +# WORKDIR ${STAGE_DIR} +# ENV MLNX_OFED_VERSION=4.9-7.1.0.0 +# RUN wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64.tgz | tar xzf - && \ +# cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64 && \ +# ./mlnxofedinstall --user-space-only --without-fw-update --all -q && \ +# cd ${STAGE_DIR} && \ +# rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64* +# # nv_peer_mem +# ENV NV_PEER_MEM_VERSION=1.2 +# ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0 +# RUN mkdir -p ${STAGE_DIR} && \ +# git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory && \ +# cd ${STAGE_DIR}/nv_peer_memory && \ +# ./build_module.sh && \ +# cd ${STAGE_DIR} && \ +# tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz && \ +# cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION} && \ +# apt-get update && \ +# apt-get install -y dkms && \ +# dpkg-buildpackage -us -uc && \ +# dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb +# # OPENMPI +# ENV OPENMPI_BASEVERSION=4.1 +# ENV OPENMPI_VERSION=${OPENMPI_BASEVERSION}.6 +# RUN < /usr/local/mpi/bin/mpirun && \ +# echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun && \ +# chmod a+x /usr/local/mpi/bin/mpirun +# # Some Packages +# RUN <> /etc/sudoers -# # Change to non-root privilege -USER deepspeed -# DeepSpeed -RUN git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed -RUN cd ${STAGE_DIR}/DeepSpeed && \ - git checkout . && \ - git checkout master && \ - ./install.sh --pip_sudo -RUN rm -rf ${STAGE_DIR}/DeepSpeed -RUN python -c "import deepspeed; print(deepspeed.__version__)" \ No newline at end of file +RUN <