dockerfile_dp/pdf_clean/Dockerfile.mineru310_mpich_complie

# syntax=docker/dockerfile:1
# NOTE: Building this image require's docker version >= 23.0.
#
# For reference:
# - https://docs.docker.com/build/dockerfile/frontend/#stable-channel
ARG TAG_VERSION="12.4.1"
FROM nvidia/cuda:${TAG_VERSION}-cudnn-devel-ubuntu22.04 as apptainerbuilder
ARG HTTP_PROXY
ARG HTTPS_PROXY
ENV http_proxy=${HTTP_PROXY}
ENV https_proxy=${HTTPS_PROXY}
ARG DEBIAN_FRONTEND="noninteractive"
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
# 安装必需的包
RUN apt-get update && apt-get install -y \
    wget \
    gcc \
    git \
    libc-dev \
    make \
    bash \
    linux-headers-generic \
    libseccomp-dev \
    libssl-dev \
    libuuid1 \
    uuid-dev \
    pkg-config \
    && rm -rf /var/lib/apt/lists/*

# 安装 Go
ARG GO_VERSION="1.21.13"
RUN wget https://golang.org/dl/go${GO_VERSION}.linux-amd64.tar.gz && \
    tar -C /usr/local -xzf go${GO_VERSION}.linux-amd64.tar.gz && \
    rm go${GO_VERSION}.linux-amd64.tar.gz

# 设置 Go 环境变量
ENV PATH="/usr/local/go/bin:${PATH}"

# 构建 Apptainer
ARG APPTAINER_COMMITISH="main"
ARG MCONFIG_OPTIONS="--with-suid"
WORKDIR /go/src/github.com/apptainer
RUN git clone https://github.com/apptainer/apptainer.git \
    && cd apptainer \
    && git checkout "$APPTAINER_COMMITISH" \
    && ./mconfig $MCONFIG_OPTIONS -p /usr/local/apptainer \
    && cd builddir \
    && make \
    && make install

# 清理
RUN apt-get remove -y wget gcc git && \
    apt-get autoremove -y && \
    apt-get clean

FROM nvidia/cuda:${TAG_VERSION}-cudnn-devel-ubuntu22.04
# 复制 Apptainer 和 Go
COPY --from=apptainerbuilder /usr/local/apptainer /usr/local/apptainer
COPY --from=apptainerbuilder /usr/local/go /usr/local/go
ENV GO_PATH="/usr/local/go"
ENV PATH="/usr/local/apptainer/bin:${GO_PATH}/bin:$PATH"
ENV APPTAINER_TMPDIR="/tmp/tmp-apptainer"
ARG HTTP_PROXY
ARG HTTPS_PROXY
ENV http_proxy=${HTTP_PROXY}
ENV https_proxy=${HTTPS_PROXY}
ARG DEBIAN_FRONTEND="noninteractive"
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
ARG ROOT_PASSWD="root"
ENV ROOT_PASSWD=${ROOT_PASSWD}
ENV SSH_PORT=2222
WORKDIR /root
SHELL ["/bin/bash", "-c"]

# base tools
RUN <<EOT
#!/bin/bash
apt-get update
apt-get install -y libgl1-mesa-glx bash-completion wget curl htop jq vim bash libaio-dev build-essential openssh-server openssh-client python3 python3-pip python3-venv bzip2
apt-get install -y --no-install-recommends software-properties-common build-essential autotools-dev nfs-common pdsh cmake g++ gcc curl wget vim tmux emacs less unzip htop iftop iotop ca-certificates openssh-client openssh-server rsync iputils-ping net-tools sudo llvm-dev re2c
add-apt-repository ppa:git-core/ppa -y
apt-get install -y git libnuma-dev wget
pip install pipx
pipx install nvitop
pipx ensurepath
. ~/.bashrc
# Configure SSH for password and public key authentication
mkdir ~/.ssh
# 创建或覆盖 SSH 配置文件 ~/.ssh/config
# - Host *: 针对所有主机的通用配置
# - ForwardAgent yes: 启用 SSH 代理转发，允许通过本地的 SSH 代理进行身份验证
# - StrictHostKeyChecking no: 禁用主机密钥检查，自动接受新的主机密钥（适用于自动化环境）
printf "Host * \n    ForwardAgent yes\nHost *\n    StrictHostKeyChecking no" > ~/.ssh/config
cp /etc/ssh/sshd_config /etc/ssh/sshd_config.bak
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config
sed -i 's/^\(\s*\)GSSAPIAuthentication yes/\1GSSAPIAuthentication no/' /etc/ssh/ssh_config
sed -i "s/^#Port 22/Port ${SSH_PORT}/" /etc/ssh/sshd_config
sudo sed -i "s/#   Port 22/Port ${SSH_PORT}/" /etc/ssh/ssh_config
ssh-keygen -t rsa -b 4096 -f /root/.ssh/id_rsa -N "" <<< y
cat ~/.ssh/id_rsa.pub >> ~/.ssh/auth
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys2
chmod 600 /root/.ssh/authorized_keys
chmod 600 /root/.ssh/authorized_keys2
mkdir /var/run/sshd
echo "root:${ROOT_PASSWD}" | chpasswd
mkdir -p ~/.pip
# install pixi
curl -fsSL https://pixi.sh/install.sh | bash
EOT

# install NVIDIA DOCA 2.7
# RUN <<EOT
# #!/bin/bash
# wget https://www.mellanox.com/downloads/DOCA/DOCA_v2.7.0/host/doca-host_2.7.0-209000-24.04-ubuntu2204_amd64.deb
# sudo dpkg -i doca-host_2.7.0-209000-24.04-ubuntu2204_amd64.deb
# sudo apt-get update
# sudo apt-get -y install doca-all
# EOT
ARG NV_DRIVER_VERSION="535"
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata && \
    apt-get install -y \
        apt-file \
        automake \
        default-jdk \
        dh-make \
        g++ \
        git \
        openjdk-8-jdk \
        libcap2 \
        libnuma-dev \
        libtool \
        # Provide CUDA dependencies by libnvidia-compute*
        libnvidia-compute-${NV_DRIVER_VERSION} \
        make \
        maven \
        pkg-config \
        udev \
        wget \
        environment-modules \
    # Remove cuda-compat* from nvidia/cuda:x86_64 images, provide CUDA dependencies by libnvidia-compute* instead
    && apt-get remove -y openjdk-11-* cuda-compat* || apt-get autoremove -y

# https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/
ARG MLNX_OFED_VERSION="23.10-3.2.2.0"
ENV MLNX_OFED_VERSION=${MLNX_OFED_VERSION}
RUN <<EOT
#!/bin/bash
apt update
apt install -y libnvidia-compute-${NV_DRIVER_VERSION}
apt install -y automake swig pciutils libltdl-dev libnl-3-dev libfuse2 chrpath graphviz libgfortran5 libusb-1.0-0 tcl debhelper libpci3 pkg-config udev autoconf lsof libmnl0 gfortran libnl-route-3-200 tk kmod ethtool bison flex m4 libnl-route-3-dev
# install Mellanox OFED
mkdir -p ${STAGE_DIR}
wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64.tgz | tar xzf -
cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64
./mlnxofedinstall --user-space-only --without-fw-update --skip-distro-check --without-ucx --without-hcoll --without-openmpi --without-mpich --without-sharp --all --force -q
cd ${STAGE_DIR}
rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu22.04-x86_64*
EOT
ENV CPATH /usr/local/cuda/include:${CPATH}
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:${LD_LIBRARY_PATH}
ENV LIBRARY_PATH /usr/local/cuda/lib64:${LIBRARY_PATH}

# install ucx
# https://github.com/openucx/ucx
# OpenMPI and OpenSHMEM installation with UCX
# https://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX
# https://openucx.readthedocs.io/en/master
# Running in Docker containers
# https://openucx.readthedocs.io/en/master/running.html#running-in-docker-containers
ENV UCX_HOME=/usr/local/ucx
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=${CUDA_HOME}/bin:${UCX_HOME}/bin:$PATH
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${UCX_HOME}/lib:$LD_LIBRARY_PATH
RUN <<EOT
#!/bin/bash
# 启用调试信息
sudo apt update
sudo apt -y install gdb valgrind
sudo apt-get update
sudo apt-get install -y build-essential libnuma-dev pkg-config libfuse3-dev
# sudo apt install -y openmpi-bin openmpi-common openmpi-doc openmpi-debug libopenmpi-dev
# sudo apt install -y libucx0-dbg libucs0-dbg libucm0-dbg libuct0-dbg libibverbs1-dbg librdmacm1-dbg libmlx5-1-dbg
git clone https://github.com/openucx/ucx.git
cd ucx
# git checkout v1.15.0
git checkout master
./autogen.sh
mkdir build
cd build
# make clean
# make distclean
# 性能优化配置 ../contrib/configure-release --prefix=/usr/local/ucx --with-cuda=${CUDA_HOME}
# 调试/开发配置 ../contrib/configure-devel --prefix=/usr/local/ucx --with-cuda=${CUDA_HOME}
# default ../configure --prefix=/usr/local/ucx --with-cuda=${CUDA_HOME}
# ../contrib/configure-release --prefix=${UCX_HOME} --with-cuda=${CUDA_HOME} --with-gdrcopy=/usr/local/gdrcopy
# ../contrib/configure-release --prefix=/usr/local/ucx \
# --with-cuda= /usr/local/cuda-12.5 \
# --with-mlx5 \
# --with-rc \
# --with-ud \
# --with-dc \
# --with-dm \
# --with-verbs
../contrib/configure-release --prefix=${UCX_HOME} \
--with-cuda=/usr/local/cuda \
--with-mlx5 \
--with-go=/usr/local/go \
--with-rc \
--with-ud \
--with-dc \
--with-dm \
--with-verbs
make -j$(nproc)
make install
# ucx_info -a
# 测试性能
# ucx_perftest -d <device> -t bw -p <protocol> -n <num_iterations>
# 测试 UCX 读取配置
# ucx_read_profile
# 检查 UCX 进程
# mpirun -np 2 -mca pml ucx -x UCX_NET_DEVICES=mlx5_0:1 ./your_mpi_program
# CUDA support check
ucx_info -c
ucx_info -d
# ompi_info | grep ucx
EOT

# # mpich install with ucx
# # UCX is already embedded in the MPICH tarball, so you do not need to separately download UCX.
# ENV MPICH_HOME=/opt/mpich
# RUN <<EOT
# #!/bin/bash
# wget -c https://www.mpich.org/static/downloads/4.2.2/mpich-4.2.2.tar.gz
# cd mpich-4.2.2
# mkdir build
# cd build
# ../configure --prefix=${MPICH_HOME} --with-device=ch4:ucx --with-cuda=/usr/local/cuda
# make -j$(nproc)
# make install
# pip install mpi4py
# EOT

# 编译安装OpenMPI 和 OpenSHMEM
# Open MPI 运行时优化
# 默认情况下，OpenMPI 启用内置的传输层（BTLs），这可能会导致额外的软件开销。可以尝试禁用某些 BTL：
# $ mpirun -np 2 -mca pml ucx --mca btl ^vader,tcp,openib,uct -x UCX_NET_DEVICES=mlx5_0:1 ./app
# UCX 在 Cray 系统上的运行
# 在 Cray 系统上运行 UCX 时，用户需要显式启用 UCX 统一模式：
# $ mpirun -np 2 -mca pml ucx --mca btl ^vader,tcp,openib,uct -x UCX_UNIFIED_MODE=1 ./app
# UCX 是一种用于高性能计算的通信库，它通过支持多种传输协议（如 IB、RoCE 和 TCP）来优化性能。
# openmpi编译：https://docs.open-mpi.org/en/v5.0.x/tuning-apps/networking/cuda.html
# https://cuterwrite.top/p/openmpi-with-ucx/
# http://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX
ENV MPI_HOME=/usr/local/openmpi
ENV PATH=${MPI_HOME}/bin:/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${MPI_HOME}/lib:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
ENV LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}
ENV CPATH=/usr/local/cuda/include:${MPI_HOME}/include:${CUDA_HOME}/include:$CPATH
# export C_INCLUDE_PATH=/usr/local/cuda/include:$C_INCLUDE_PATH
# export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH
# export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
RUN <<EOT
#!/bin/bash
apt update &&  apt install -y autoconf automake libtool flex
/usr/bin/python3 -m pip install cython
# git clone https://github.com/open-mpi/ompi.git
# git submodule update --init --recursive
git clone --recursive https://github.com/open-mpi/ompi.git
cd ompi
git checkout main
# make clean
# make distclean
./autogen.pl
mkdir build
cd build
# NOTE With OpenMPI 4.0 and above, there could be compilation errors from "btl_uct" component. This component is not critical for using UCX; so it could be disabled this way:
# OpenMPI 4.0 及以上版本可能会因 "btl_uct" 组件而出现编译错误。可以禁用此组件：
# ./configure ... --enable-mca-no-build=btl-uct ...
# 在运行时禁用 btl/uct：
# mpirun -np 2 -mca pml ucx -mca btl ^uct -x UCX_NET_DEVICES=mlx5_0:1 ./app
# 通过UCX（首选机制）支持CUDA 就不需要在编译安装openmpi加上--with-cuda了
# 检查UCX是否支持CUDA
# ucx_info -v
# ../configure --prefix=${MPI_HOME} --with-ucx=${UCX_HOME} --with-cuda=${CUDA_HOME} --enable-mca-no-build=btl-uct
# --enable-python-bindings 启用Python绑定，这通常是为了与mpi4py等库进行集成
# --without-hcoll：禁用HCOLL（集体通信库）。
# --enable-python-bindings：启用Python绑定，允许使用Python调用MPI功能。
# --enable-mpirun-prefix-by-default：默认启用mpirun的prefix选项。
# --prefix=${MPI_HOME}：指定安装路径。
# --with-ucx=${UCX_HOME}：指定UCX库路径。
# --with-cuda=${CUDA_HOME}：指定CUDA库路径。
# --enable-mca-dso=btl-smcuda,rcache-rgpusm,rcache-gpusm,accelerator-cuda：启用特定的MCA（模块组件架构）动态共享对象。
# --enable-mca-no-build=btl-uct：禁用构建UCT（UCX传输层）模块。
# -x UCX_LOG_LEVEL=debug 进行UCX的debug
# !当你编译 UCX 时，如果你启用了 CUDA 支持（即使用 --with-cuda 参数），那么 UCX 已经包含了 CUDA 的 RDMA 支持。此时，在编译 OpenMPI 时，你可以选择不再指定 --with-cuda 参数，因为 OpenMPI 会通过 UCX 自动获得 CUDA 支持
../configure --with-cuda=/usr/local/cuda --without-hcoll --enable-python-bindings --enable-mpirun-prefix-by-default --prefix=${MPI_HOME} --with-ucx=${UCX_HOME} --enable-mca-dso=btl-smcuda,rcache-rgpusm,rcache-gpusm,accelerator-cuda --enable-mca-no-build=btl-uct --with-python=/usr/bin/python3
make -j$(nproc)
make install
# 验证CUDA支持
# ompi_info | grep "MPI extensions"
# ompi_info --parsable --all | grep mpi_built_with_cuda_support:value
# 运行MPI程序时启用CUDA调试信息
# mpirun --mca opal_cuda_verbose 10 ...
# mpirun --mca mpi_common_cuda_verbose 10 ...
# ompi_info | grep cuda
# ucx_info -c
# 使用以下命令检查 Open MPI 的 UCX 支持
# ompi_info | grep cuda 中确实看到了 btl: smcuda 和其他与 CUDA 相关的扩展，但这并不意味着所有与 CUDA 相关的功能都正常工作。
# ompi_info | grep ucx
# unit test in mpi cuda
cat <<EOF > ./test_mpi_cuda.cu
#include <mpi.h>
#include <cuda_runtime.h>
#include <stdio.h>

__global__ void hello_cuda() {
    printf("Hello from CUDA kernel! Thread id: %d\n", threadIdx.x);
}

int main(int argc, char **argv) {
    MPI_Init(&argc, &argv);

    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    printf("Hello from MPI process %d!\n", rank);

    // Launch CUDA kernel
    hello_cuda<<<1, 10>>>();
    cudaDeviceSynchronize();  // Wait for the CUDA kernel to finish

    MPI_Finalize();
    return 0;
}
EOF
nvcc -o test_mpi_cuda test_mpi_cuda.cu -I${CUDA_HOME}/include -I${MPI_HOME}/include -L${MPI_HOME}/lib -lcudart -lmpi
# mpirun --allow-run-as-root -np 2 ./test_mpi_cuda
EOT

ARG CONDA_ENV_NAME="mineru"
ENV CONDA_ENV_NAME=${CONDA_ENV_NAME}
ARG PYTHON_VERSION="3.10"
ENV PYTHON_VERSION=${PYTHON_VERSION}
# https://github.com/opendatalab/PDF-Extract-Kit
RUN <<EOT
#!/bin/bash
# install miniconda
wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh
bash /tmp/miniconda.sh -b -p /opt/conda
rm /tmp/miniconda.sh
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc
. /opt/conda/etc/profile.d/conda.sh
conda init bash
conda config --set show_channel_urls true
# 配置 .condarc 文件
cat <<EOF > ~/.condarc
channels:
  - conda-forge
  - bioconda
  - pytorch
  - pytorch-nightly
  - nvidia
  - defaults
show_channel_urls: true
EOF
source /opt/conda/etc/profile.d/conda.sh
conda create -n ${CONDA_ENV_NAME} python=${PYTHON_VERSION} -y
conda activate ${CONDA_ENV_NAME}
# python -m pip install magic-pdf[full-cpu] --index-url=http://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com
# python -m pip install magic-pdf[full]==0.6.2b1 detectron2 --extra-index-url https://myhloli.github.io/wheels/ -i https://pypi.tuna.tsinghua.edu.cn/simple
python -m pip install magic-pdf[full]==0.7.0b1 detectron2 --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple
python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
# pip install detectron2 --extra-index-url https://myhloli.github.io/wheels/
# python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
# python -m pip install --force-reinstall torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cu118
# python -m pip install -U python-alist
cat <<EOF > ~/magic-pdf.json
{
    "temp-output-dir":"/results",
    "models-dir":"/models",
    "device-mode":"cuda"
}
EOF
mkdir -p /app
EOT

ENV PATH=/usr/local/cuda/bin:$PATH
# 添加deepspeed user
RUN <<EOT
#!/bin/bash
useradd --create-home --uid 1000 --shell /bin/bash deepspeed
usermod -aG sudo deepspeed
echo "deepspeed ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
EOT

# # # Change to non-root privilege
# USER deepspeed

RUN <<EOT
#!/bin/bash
apt-get clean && rm -rf /var/lib/apt/lists/*
EOT

EXPOSE 2222

CMD ["/usr/sbin/sshd", "-D"]