# syntax=docker/dockerfile:1 # NOTE: Building this image require's docker version >= 23.0. # # For reference: # - https://docs.docker.com/build/dockerfile/frontend/#stable-channel ARG TAG_VERSION="12.4.1" FROM nvidia/cuda:${TAG_VERSION}-cudnn-devel-ubuntu22.04 as apptainerbuilder ARG HTTP_PROXY ARG HTTPS_PROXY ENV http_proxy=${HTTP_PROXY} ENV https_proxy=${HTTPS_PROXY} ARG DEBIAN_FRONTEND="noninteractive" ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND} # 安装必需的包 RUN apt-get update && apt-get install -y \ wget \ gcc \ git \ libc-dev \ make \ bash \ linux-headers-generic \ libseccomp-dev \ libssl-dev \ libuuid1 \ uuid-dev \ pkg-config \ && rm -rf /var/lib/apt/lists/* # 安装 Go ARG GO_VERSION="1.21.13" RUN wget https://golang.org/dl/go${GO_VERSION}.linux-amd64.tar.gz && \ tar -C /usr/local -xzf go${GO_VERSION}.linux-amd64.tar.gz && \ rm go${GO_VERSION}.linux-amd64.tar.gz # 设置 Go 环境变量 ENV PATH="/usr/local/go/bin:${PATH}" # 构建 Apptainer ARG APPTAINER_COMMITISH="main" ARG MCONFIG_OPTIONS="--with-suid" WORKDIR /go/src/github.com/apptainer RUN git clone https://github.com/apptainer/apptainer.git \ && cd apptainer \ && git checkout "$APPTAINER_COMMITISH" \ && ./mconfig $MCONFIG_OPTIONS -p /usr/local/apptainer \ && cd builddir \ && make \ && make install # 清理 RUN apt-get remove -y wget gcc git && \ apt-get autoremove -y && \ apt-get clean FROM nvidia/cuda:${TAG_VERSION}-cudnn-devel-ubuntu22.04 # 复制 Apptainer 和 Go COPY --from=apptainerbuilder /usr/local/apptainer /usr/local/apptainer COPY --from=apptainerbuilder /usr/local/go /usr/local/go ENV GO_PATH="/usr/local/go" ENV PATH="/usr/local/apptainer/bin:${GO_PATH}/bin:$PATH" ENV APPTAINER_TMPDIR="/tmp/tmp-apptainer" ARG HTTP_PROXY ARG HTTPS_PROXY ENV http_proxy=${HTTP_PROXY} ENV https_proxy=${HTTPS_PROXY} ARG DEBIAN_FRONTEND="noninteractive" ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND} ARG ROOT_PASSWD="root" ENV ROOT_PASSWD=${ROOT_PASSWD} ENV SSH_PORT=2222 WORKDIR /root SHELL ["/bin/bash", "-c"] # base tools RUN < ~/.ssh/config cp /etc/ssh/sshd_config /etc/ssh/sshd_config.bak sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config sed -i 's/^\(\s*\)GSSAPIAuthentication yes/\1GSSAPIAuthentication no/' /etc/ssh/ssh_config sed -i "s/^#Port 22/Port ${SSH_PORT}/" /etc/ssh/sshd_config sudo sed -i "s/# Port 22/Port ${SSH_PORT}/" /etc/ssh/ssh_config ssh-keygen -t rsa -b 4096 -f /root/.ssh/id_rsa -N "" <<< y cat ~/.ssh/id_rsa.pub >> ~/.ssh/auth cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys2 chmod 600 /root/.ssh/authorized_keys chmod 600 /root/.ssh/authorized_keys2 mkdir /var/run/sshd echo "root:${ROOT_PASSWD}" | chpasswd mkdir -p ~/.pip # install pixi curl -fsSL https://pixi.sh/install.sh | bash EOT # install NVIDIA DOCA 2.7 # RUN < -t bw -p -n # 测试 UCX 读取配置 # ucx_read_profile # 检查 UCX 进程 # mpirun -np 2 -mca pml ucx -x UCX_NET_DEVICES=mlx5_0:1 ./your_mpi_program # CUDA support check ucx_info -c ucx_info -d # ompi_info | grep ucx EOT # # mpich install with ucx # # UCX is already embedded in the MPICH tarball, so you do not need to separately download UCX. # ENV MPICH_HOME=/opt/mpich # RUN < ./test_mpi_cuda.cu #include #include #include __global__ void hello_cuda() { printf("Hello from CUDA kernel! Thread id: %d\n", threadIdx.x); } int main(int argc, char **argv) { MPI_Init(&argc, &argv); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); printf("Hello from MPI process %d!\n", rank); // Launch CUDA kernel hello_cuda<<<1, 10>>>(); cudaDeviceSynchronize(); // Wait for the CUDA kernel to finish MPI_Finalize(); return 0; } EOF nvcc -o test_mpi_cuda test_mpi_cuda.cu -I${CUDA_HOME}/include -I${MPI_HOME}/include -L${MPI_HOME}/lib -lcudart -lmpi # mpirun --allow-run-as-root -np 2 ./test_mpi_cuda EOT ARG CONDA_ENV_NAME="mineru" ENV CONDA_ENV_NAME=${CONDA_ENV_NAME} ARG PYTHON_VERSION="3.10" ENV PYTHON_VERSION=${PYTHON_VERSION} # https://github.com/opendatalab/PDF-Extract-Kit RUN <> ~/.bashrc . /opt/conda/etc/profile.d/conda.sh conda init bash conda config --set show_channel_urls true # 配置 .condarc 文件 cat < ~/.condarc channels: - conda-forge - bioconda - pytorch - pytorch-nightly - nvidia - defaults show_channel_urls: true EOF source /opt/conda/etc/profile.d/conda.sh conda create -n ${CONDA_ENV_NAME} python=${PYTHON_VERSION} -y conda activate ${CONDA_ENV_NAME} # python -m pip install magic-pdf[full-cpu] --index-url=http://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com # python -m pip install magic-pdf[full]==0.6.2b1 detectron2 --extra-index-url https://myhloli.github.io/wheels/ -i https://pypi.tuna.tsinghua.edu.cn/simple python -m pip install magic-pdf[full]==0.7.0b1 detectron2 --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ # pip install detectron2 --extra-index-url https://myhloli.github.io/wheels/ # python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' # python -m pip install --force-reinstall torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cu118 # python -m pip install -U python-alist cat < ~/magic-pdf.json { "temp-output-dir":"/results", "models-dir":"/models", "device-mode":"cuda" } EOF mkdir -p /app EOT ENV PATH=/usr/local/cuda/bin:$PATH # 添加deepspeed user RUN <> /etc/sudoers EOT # # # Change to non-root privilege # USER deepspeed RUN <