This commit is contained in:
2024-09-09 20:36:26 +08:00
commit 247fc2baf0
4 changed files with 328 additions and 0 deletions

51
Dockerfile Normal file
View File

@@ -0,0 +1,51 @@
# syntax=docker/dockerfile:1
FROM nvidia/cuda:12.3.1-devel-ubuntu22.04
# 定义变量
ARG FFTW_VERSION=3.3.10
ARG GROMACS_VERSION=2023.2
ARG INSTALL_DIR=/opt
ARG GMX_INSTALL_PREFIX=${INSTALL_DIR}/software/gmx
ARG CUDA_TOOLKIT_PATH=/usr/local/cuda
ARG OPENMPI_PATH=${INSTALL_DIR}/software/openmpi
# 使用 Here-docs 安装软件 openmpi 等
RUN <<EOT
#!/bin/bash
apt-get update
apt-get install -y cmake curl wget libpmix-dev openmpi-bin libopenmpi-dev
mkdir -p ${INSTALL_DIR}/software
EOT
#install fftw
WORKDIR ${INSTALL_DIR}/software
RUN <<EOF
#!/bin/bash
wget http://www.fftw.org/fftw-${FFTW_VERSION}.tar.gz
tar -xzvf fftw-${FFTW_VERSION}.tar.gz
cd fftw-${FFTW_VERSION}
./configure --prefix=${INSTALL_DIR}/software/fftw --enable-sse2 --enable-avx --enable-float --enable-shared --enable-avx2
make install -j
echo 'export PATH=$PATH:${INSTALL_DIR}/software/fftw/bin' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${INSTALL_DIR}/software/fftw/lib' >> ~/.bashrc
EOF
# install gromacs
WORKDIR ${INSTALL_DIR}/software
RUN <<EOF
#!/bin/bash
wget https://ftp.gromacs.org/gromacs/gromacs-${GROMACS_VERSION}.tar.gz
tar xfz gromacs-${GROMACS_VERSION}.tar.gz
cd gromacs-${GROMACS_VERSION}
mkdir build
cd build
echo 'export CMAKE_PREFIX_PATH=${INSTALL_DIR}/software/fftw' >> ~/.bashrc
cmake .. -DCMAKE_INSTALL_PREFIX=${GMX_INSTALL_PREFIX} -DGMX_USE_RDTSCP=ON -DGMX_GPU=CUDA -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_PATH} -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON
make
make install -j8
EOF
ENV PATH="${GMX_INSTALL_PREFIX}/bin:${PATH}"
# 设置容器启动时的默认命令
CMD ["gmx_mpi"]

177
Dockerfile.gromacs Normal file
View File

@@ -0,0 +1,177 @@
# syntax=docker/dockerfile:1
# NOTE: Building this image require's docker version >= 23.0.
#
# For reference:
# - https://docs.docker.com/build/dockerfile/frontend/#stable-channel
ARG TAG_VERSION="12.4.1"
FROM nvidia/cuda:${TAG_VERSION}-cudnn-devel-ubuntu22.04
ARG HTTP_PROXY
ARG HTTPS_PROXY
ENV http_proxy=${HTTP_PROXY}
ENV https_proxy=${HTTPS_PROXY}
ARG DEBIAN_FRONTEND="noninteractive"
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
ARG ROOT_PASSWD="root"
ENV ROOT_PASSWD=${ROOT_PASSWD}
ENV SSH_PORT=2222
WORKDIR /root
SHELL ["/bin/bash", "-c"]
# base tools
RUN <<EOT
#!/bin/bash
apt-get update
apt-get install -y libgl1-mesa-glx bash-completion wget curl htop jq vim bash libaio-dev build-essential openssh-server openssh-client python3 python3-pip python3-venv bzip2
apt-get install -y --no-install-recommends software-properties-common build-essential autotools-dev nfs-common pdsh cmake g++ gcc curl wget vim tmux emacs less unzip htop iftop iotop ca-certificates openssh-client openssh-server rsync iputils-ping net-tools sudo llvm-dev re2c
add-apt-repository ppa:git-core/ppa -y
apt-get install -y git libnuma-dev wget
pip install pipx
pipx install nvitop
pipx ensurepath
. ~/.bashrc
# Configure SSH for password and public key authentication
mkdir ~/.ssh
# 创建或覆盖 SSH 配置文件 ~/.ssh/config
# - Host *: 针对所有主机的通用配置
# - ForwardAgent yes: 启用 SSH 代理转发,允许通过本地的 SSH 代理进行身份验证
# - StrictHostKeyChecking no: 禁用主机密钥检查,自动接受新的主机密钥(适用于自动化环境)
printf "Host * \n ForwardAgent yes\nHost *\n StrictHostKeyChecking no" > ~/.ssh/config
cp /etc/ssh/sshd_config /etc/ssh/sshd_config.bak
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config
sed -i 's/^\(\s*\)GSSAPIAuthentication yes/\1GSSAPIAuthentication no/' /etc/ssh/ssh_config
sed -i "s/^#Port 22/Port ${SSH_PORT}/" /etc/ssh/sshd_config
sudo sed -i "s/# Port 22/Port ${SSH_PORT}/" /etc/ssh/ssh_config
ssh-keygen -t rsa -b 4096 -f /root/.ssh/id_rsa -N "" <<< y
cat ~/.ssh/id_rsa.pub >> ~/.ssh/auth
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys2
chmod 600 /root/.ssh/authorized_keys
chmod 600 /root/.ssh/authorized_keys2
mkdir /var/run/sshd
echo "root:${ROOT_PASSWD}" | chpasswd
mkdir -p ~/.pip
# install pixi
curl -fsSL https://pixi.sh/install.sh | bash
EOT
ARG FFTW_VERSION="3.3.10"
ENV FFTW_VERSION=${FFTW_VERSION}
ENV PATH=/usr/local/fftw:$PATH
# 安装fftw
RUN <<EOT
#!/bin/bash
wget http://www.fftw.org/fftw-${FFTW_VERSION}.tar.gz
tar zxvf fftw-${FFTW_VERSION}.tar.gz
cd fftw-${FFTW_VERSION}
./configure --prefix=/usr/local/fftw --enable-sse2 --enable-avx --enable-float --enable-avx2 --enable-shared # 若CPU支持AVX512指令集且有多于1个AVX512 FMA单元则可加上--enable-avx512以进一步提升性能
make -j$(nproc)
make install
EOT
# 安装openmpi
ENV MPI_HOME=/usr/local/openmpi
ENV PATH=${MPI_HOME}/bin:/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${MPI_HOME}/lib:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
ENV LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}
ENV CPATH=/usr/local/cuda/include:${MPI_HOME}/include:${CUDA_HOME}/include:$CPATH
# export C_INCLUDE_PATH=/usr/local/cuda/include:$C_INCLUDE_PATH
# export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH
# export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
RUN <<EOT
#!/bin/bash
apt update && apt install -y autoconf automake libtool flex
/usr/bin/python3 -m pip install cython
git clone --recursive https://github.com/open-mpi/ompi.git
cd ompi
git checkout main
# make clean
# make distclean
./autogen.pl
mkdir build
cd build
../configure --with-cuda=/usr/local/cuda --enable-python-bindings --enable-mpirun-prefix-by-default --prefix=${MPI_HOME} --with-python=/usr/bin/python3
make -j$(nproc)
make install
# 验证CUDA支持
cat <<EOF > ./test_mpi_cuda.cu
#include <mpi.h>
#include <cuda_runtime.h>
#include <stdio.h>
__global__ void hello_cuda() {
printf("Hello from CUDA kernel! Thread id: %d\n", threadIdx.x);
}
int main(int argc, char **argv) {
MPI_Init(&argc, &argv);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("Hello from MPI process %d!\n", rank);
// Launch CUDA kernel
hello_cuda<<<1, 10>>>();
cudaDeviceSynchronize(); // Wait for the CUDA kernel to finish
MPI_Finalize();
return 0;
}
EOF
nvcc -o test_mpi_cuda test_mpi_cuda.cu -I${CUDA_HOME}/include -I${MPI_HOME}/include -L${MPI_HOME}/lib -lcudart -lmpi
# mpirun --allow-run-as-root -np 2 ./test_mpi_cuda
EOT
# 安装plumed
ARG PLUMED_VERSION="2.9.1"
ENV PLUMED_VERSION=${PLUMED_VERSION}
ENV LD_LIBRARY_PATH=/usr/local/plumed/lib:$LD_LIBRARY_PATH
ENV PATH=/usr/local/plumed:/usr/local/plumed/bin:$PATH
RUN <<EOT
#!/bin/bash
# git clone https://github.com/plumed/plumed2
# cd plumed2
# git checkout v${PLUMED_VERSION}
curl -L -o plumed-${PLUMED_VERSION}.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_VERSION}/plumed-${PLUMED_VERSION}.tgz
tar zxvf plumed-${PLUMED_VERSION}.tar.gz
cd plumed-${PLUMED_VERSION}
./configure --prefix=/usr/local/plumed
make -j$(nproc)
make install
EOT
# 安装gromacs
ARG GROMACS_VERSION="2022.5"
ENV GROMACS_VERSION=${GROMACS_VERSION}
ENV GROMACS_HOME=/usr/local/gromacs-${GROMACS_VERSION}-plumed-${PLUMED_VERSION}
ENV PATH=PATH=$GROMACS_HOME/bin:$PATH
RUN <<EOT
#!/bin/bash
wget -c https://ftp.gromacs.org/gromacs/gromacs-${GROMACS_VERSION}.tar.gz
tar zxvf gromacs-${GROMACS_VERSION}.tar.gz
cd gromacs-${GROMACS_VERSION}
### patch the plumed
# plumed-patch -p
plumed-patch -p -e gromacs-${GROMACS_VERSION}
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local/gromacs-${GROMACS_VERSION}-plumed-${PLUMED_VERSION} \
-DGMX_BUILD_OWN_FFTW=ON \
-DREGRESSIONTEST_DOWNLOAD=ON \
-DGMX_GPU=CUDA \
-DGMX_MPI=ON
make -j$(nproc)
make install
echo "source /usr/local/gromacs-${GROMACS_VERSION}-plumed-${PLUMED_VERSION}/bin/GMXRC.bash" >> /root/.bashrc
EOT
RUN <<EOT
#!/bin/bash
apt-get clean && rm -rf /var/lib/apt/lists/*
EOT
EXPOSE 2222
CMD ["/usr/sbin/sshd", "-D"]

45
README.md Normal file
View File

@@ -0,0 +1,45 @@
# gromacs_docker
在容器中使用gpu需要安装[nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
运行命令:
```shell
docker run -it --net=host --gpus all --name 容器名 -e NVIDIA_DRIVER_CAPABILITIES=compute,utility -e NVIDIA_VISIBLE_DEVICES=all 镜像名
```
首次使用报错:
```shell
docker: Error response from daemon: could not select device driver “” with capabilities: [[gpu]].
```
解决办法:
```shell
sudo curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | \
sudo apt-key add -
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
sudo curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | \
sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
sudo apt-get update
sudo apt-get install nvidia-container-runtime
```
02 验证执行下列命令:
```shell
which nvidia-container-runtime
```
输出 /usr/bin/nvidia-container-runtime表示安装成功。
03 docker 使用:
```shell
docker run -it --gpus all **
```
此时,设置使用设备上全部的显卡。
目前尚不支持debian12

55
docker-compose.yml Normal file
View File

@@ -0,0 +1,55 @@
version: '3.8'
services:
gromacs:
build:
context: .
dockerfile: Dockerfile.gromacs
args:
CACHEBUST: 1
TAG_VERSION: "12.4.1"
PLUMED_VERSION: "2.9.1"
FFTW_VERSION: "3.3.10"
BUILDKIT_INLINE_CACHE: 1
# env_file:
# - .env
# volumes:
# - ./hostfile:/hostfile
container_name: gromacs
pull_policy: if_not_present
ulimits:
memlock:
soft: -1
hard: -1
restart: unless-stopped
image: hotwa/gromacs:test
privileged: true
cap_add:
- ALL
- CAP_SYS_PTRACE
shm_size: '64gb'
# devices:
# - /dev/infiniband/rdma_cm
# - /dev/infiniband/uverbs0
# - /dev/infiniband/uverbs1
# - /dev/infiniband/uverbs2
# - /dev/infiniband/uverbs3
# - /dev/infiniband/uverbs4
# - /dev/infiniband/uverbs5
# - /dev/infiniband/uverbs6
# - /dev/infiniband/uverbs7
# - /dev/infiniband/uverbs8
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- TMPDIR=/var/tmp
# - UCX_NET_DEVICES=mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1,mlx5_8:1
# network_mode: host
command: ["/usr/sbin/sshd", "-D"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]