FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 ENV DEBIAN_FRONTEND=noninteractive WORKDIR /workspace # Temporary Installation Directory ENV STAGE_DIR=/tmp RUN mkdir -p ${STAGE_DIR} # Installation/Basic Utilities RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \ apt-get update && \ apt-get install -y --no-install-recommends \ software-properties-common build-essential autotools-dev \ nfs-common pdsh \ cmake g++ gcc \ curl wget vim tmux emacs less unzip \ htop iftop iotop ca-certificates openssh-client openssh-server \ rsync iputils-ping net-tools sudo \ llvm-dev # Installation Latest Git # RUN add-apt-repository ppa:git-core/ppa -y && \ RUN \ apt-get update && \ apt-get install -y git && \ git --version # Client Liveness & Uncomment Port 22 for SSH Daemon # Keep SSH client alive from server side RUN echo "ClientAliveInterval 30" >> /etc/ssh/sshd_config RUN cp /etc/ssh/sshd_config ${STAGE_DIR}/sshd_config && \ sed "0,/^#Port 22/s//Port 22/" ${STAGE_DIR}/sshd_config > /etc/ssh/sshd_config # Mellanox OFED ENV MLNX_OFED_VERSION=4.9-7.1.0.0 RUN apt-get install -y libnuma-dev RUN cd ${STAGE_DIR} && \ wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64.tgz | tar xzf - && \ cd MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64 && \ ./mlnxofedinstall --user-space-only --without-fw-update --all -q && \ cd ${STAGE_DIR} && \ rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu20.04-x86_64* # nv_peer_mem ENV NV_PEER_MEM_VERSION=1.2 ENV NV_PEER_MEM_TAG=${NV_PEER_MEM_VERSION}-0 RUN mkdir -p ${STAGE_DIR} && \ git clone https://github.com/Mellanox/nv_peer_memory.git --branch ${NV_PEER_MEM_TAG} ${STAGE_DIR}/nv_peer_memory && \ cd ${STAGE_DIR}/nv_peer_memory && \ ./build_module.sh && \ cd ${STAGE_DIR} && \ tar xzf ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_VERSION}.orig.tar.gz && \ cd ${STAGE_DIR}/nvidia-peer-memory-${NV_PEER_MEM_VERSION} && \ apt-get update && \ apt-get install -y dkms && \ dpkg-buildpackage -us -uc && \ dpkg -i ${STAGE_DIR}/nvidia-peer-memory_${NV_PEER_MEM_TAG}_all.deb # OPENMPI ENV OPENMPI_BASEVERSION=4.1 ENV OPENMPI_VERSION=${OPENMPI_BASEVERSION}.6 RUN cd ${STAGE_DIR} && \ wget -q -O - https://download.open-mpi.org/release/open-mpi/v${OPENMPI_BASEVERSION}/openmpi-${OPENMPI_VERSION}.tar.gz | tar xzf - && \ cd openmpi-${OPENMPI_VERSION} && \ ./configure --prefix=/usr/local/openmpi-${OPENMPI_VERSION} && \ make -j"$(nproc)" install && \ ln -s /usr/local/openmpi-${OPENMPI_VERSION} /usr/local/mpi && \ # Sanity check: test -f /usr/local/mpi/bin/mpic++ && \ cd ${STAGE_DIR} && \ rm -r ${STAGE_DIR}/openmpi-${OPENMPI_VERSION} ENV PATH=/usr/local/mpi/bin:${PATH} \ LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:${LD_LIBRARY_PATH} # Create a wrapper for OpenMPI to allow running as root by default RUN mv /usr/local/mpi/bin/mpirun /usr/local/mpi/bin/mpirun.real && \ echo '#!/bin/bash' > /usr/local/mpi/bin/mpirun && \ echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun && \ chmod a+x /usr/local/mpi/bin/mpirun RUN apt-get update && \ apt-get install -y --no-install-recommends \ libsndfile-dev \ libcupti-dev \ libjpeg-dev \ libpng-dev \ screen \ libaio-dev RUN wget -qO- https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /${STAGE_DIR}/miniconda.sh && \ bash /tmp/miniconda.sh -b -p /opt/conda && \ rm /tmp/miniconda.sh && \ /opt/conda/bin/conda init bash ENV PATH /opt/conda/bin:$PATH COPY environment.yaml /workspace/environment.yaml RUN conda env create -n ldh -f /workspace/environment.yaml RUN conda run -n ldh pip config set global.index-url http://mirrors.aliyun.com/pypi/simple && \ conda run -n ldh pip config set install.trusted-host mirrors.aliyun.com && \ conda run -n ldh pip install -U pip && \ echo 'root:root' | chpasswd && \ sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config && \ rm -rf /usr/lib/python3/dist-packages/yaml && \ rm -rf /usr/lib/python3/dist-packages/PyYAML-* CMD ["/bin/bash", "-c", "/usr/sbin/sshd -D"]