add
This commit is contained in:
51
Dockerfile
Normal file
51
Dockerfile
Normal file
@@ -0,0 +1,51 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
FROM nvidia/cuda:12.3.1-devel-ubuntu22.04
|
||||
|
||||
# 定义变量
|
||||
ARG FFTW_VERSION=3.3.10
|
||||
ARG GROMACS_VERSION=2023.2
|
||||
ARG INSTALL_DIR=/opt
|
||||
ARG GMX_INSTALL_PREFIX=${INSTALL_DIR}/software/gmx
|
||||
ARG CUDA_TOOLKIT_PATH=/usr/local/cuda
|
||||
ARG OPENMPI_PATH=${INSTALL_DIR}/software/openmpi
|
||||
|
||||
# 使用 Here-docs 安装软件 openmpi 等
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
apt-get update
|
||||
apt-get install -y cmake curl wget libpmix-dev openmpi-bin libopenmpi-dev
|
||||
mkdir -p ${INSTALL_DIR}/software
|
||||
EOT
|
||||
|
||||
#install fftw
|
||||
WORKDIR ${INSTALL_DIR}/software
|
||||
RUN <<EOF
|
||||
#!/bin/bash
|
||||
wget http://www.fftw.org/fftw-${FFTW_VERSION}.tar.gz
|
||||
tar -xzvf fftw-${FFTW_VERSION}.tar.gz
|
||||
cd fftw-${FFTW_VERSION}
|
||||
./configure --prefix=${INSTALL_DIR}/software/fftw --enable-sse2 --enable-avx --enable-float --enable-shared --enable-avx2
|
||||
make install -j
|
||||
echo 'export PATH=$PATH:${INSTALL_DIR}/software/fftw/bin' >> ~/.bashrc
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${INSTALL_DIR}/software/fftw/lib' >> ~/.bashrc
|
||||
EOF
|
||||
|
||||
# install gromacs
|
||||
WORKDIR ${INSTALL_DIR}/software
|
||||
RUN <<EOF
|
||||
#!/bin/bash
|
||||
wget https://ftp.gromacs.org/gromacs/gromacs-${GROMACS_VERSION}.tar.gz
|
||||
tar xfz gromacs-${GROMACS_VERSION}.tar.gz
|
||||
cd gromacs-${GROMACS_VERSION}
|
||||
mkdir build
|
||||
cd build
|
||||
echo 'export CMAKE_PREFIX_PATH=${INSTALL_DIR}/software/fftw' >> ~/.bashrc
|
||||
cmake .. -DCMAKE_INSTALL_PREFIX=${GMX_INSTALL_PREFIX} -DGMX_USE_RDTSCP=ON -DGMX_GPU=CUDA -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_PATH} -DGMX_BUILD_OWN_FFTW=ON -DGMX_MPI=ON
|
||||
make
|
||||
make install -j8
|
||||
EOF
|
||||
|
||||
ENV PATH="${GMX_INSTALL_PREFIX}/bin:${PATH}"
|
||||
|
||||
# 设置容器启动时的默认命令
|
||||
CMD ["gmx_mpi"]
|
||||
177
Dockerfile.gromacs
Normal file
177
Dockerfile.gromacs
Normal file
@@ -0,0 +1,177 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
# NOTE: Building this image require's docker version >= 23.0.
|
||||
#
|
||||
# For reference:
|
||||
# - https://docs.docker.com/build/dockerfile/frontend/#stable-channel
|
||||
ARG TAG_VERSION="12.4.1"
|
||||
FROM nvidia/cuda:${TAG_VERSION}-cudnn-devel-ubuntu22.04
|
||||
ARG HTTP_PROXY
|
||||
ARG HTTPS_PROXY
|
||||
ENV http_proxy=${HTTP_PROXY}
|
||||
ENV https_proxy=${HTTPS_PROXY}
|
||||
ARG DEBIAN_FRONTEND="noninteractive"
|
||||
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
|
||||
ARG ROOT_PASSWD="root"
|
||||
ENV ROOT_PASSWD=${ROOT_PASSWD}
|
||||
ENV SSH_PORT=2222
|
||||
WORKDIR /root
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
# base tools
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
apt-get update
|
||||
apt-get install -y libgl1-mesa-glx bash-completion wget curl htop jq vim bash libaio-dev build-essential openssh-server openssh-client python3 python3-pip python3-venv bzip2
|
||||
apt-get install -y --no-install-recommends software-properties-common build-essential autotools-dev nfs-common pdsh cmake g++ gcc curl wget vim tmux emacs less unzip htop iftop iotop ca-certificates openssh-client openssh-server rsync iputils-ping net-tools sudo llvm-dev re2c
|
||||
add-apt-repository ppa:git-core/ppa -y
|
||||
apt-get install -y git libnuma-dev wget
|
||||
pip install pipx
|
||||
pipx install nvitop
|
||||
pipx ensurepath
|
||||
. ~/.bashrc
|
||||
# Configure SSH for password and public key authentication
|
||||
mkdir ~/.ssh
|
||||
# 创建或覆盖 SSH 配置文件 ~/.ssh/config
|
||||
# - Host *: 针对所有主机的通用配置
|
||||
# - ForwardAgent yes: 启用 SSH 代理转发,允许通过本地的 SSH 代理进行身份验证
|
||||
# - StrictHostKeyChecking no: 禁用主机密钥检查,自动接受新的主机密钥(适用于自动化环境)
|
||||
printf "Host * \n ForwardAgent yes\nHost *\n StrictHostKeyChecking no" > ~/.ssh/config
|
||||
cp /etc/ssh/sshd_config /etc/ssh/sshd_config.bak
|
||||
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config
|
||||
sed -i 's/^\(\s*\)GSSAPIAuthentication yes/\1GSSAPIAuthentication no/' /etc/ssh/ssh_config
|
||||
sed -i "s/^#Port 22/Port ${SSH_PORT}/" /etc/ssh/sshd_config
|
||||
sudo sed -i "s/# Port 22/Port ${SSH_PORT}/" /etc/ssh/ssh_config
|
||||
ssh-keygen -t rsa -b 4096 -f /root/.ssh/id_rsa -N "" <<< y
|
||||
cat ~/.ssh/id_rsa.pub >> ~/.ssh/auth
|
||||
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys
|
||||
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys2
|
||||
chmod 600 /root/.ssh/authorized_keys
|
||||
chmod 600 /root/.ssh/authorized_keys2
|
||||
mkdir /var/run/sshd
|
||||
echo "root:${ROOT_PASSWD}" | chpasswd
|
||||
mkdir -p ~/.pip
|
||||
# install pixi
|
||||
curl -fsSL https://pixi.sh/install.sh | bash
|
||||
EOT
|
||||
|
||||
ARG FFTW_VERSION="3.3.10"
|
||||
ENV FFTW_VERSION=${FFTW_VERSION}
|
||||
ENV PATH=/usr/local/fftw:$PATH
|
||||
# 安装fftw
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
wget http://www.fftw.org/fftw-${FFTW_VERSION}.tar.gz
|
||||
tar zxvf fftw-${FFTW_VERSION}.tar.gz
|
||||
cd fftw-${FFTW_VERSION}
|
||||
./configure --prefix=/usr/local/fftw --enable-sse2 --enable-avx --enable-float --enable-avx2 --enable-shared # 若CPU支持AVX512指令集,且有多于1个AVX512 FMA单元,则可加上--enable-avx512以进一步提升性能
|
||||
make -j$(nproc)
|
||||
make install
|
||||
EOT
|
||||
|
||||
# 安装openmpi
|
||||
ENV MPI_HOME=/usr/local/openmpi
|
||||
ENV PATH=${MPI_HOME}/bin:/usr/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${MPI_HOME}/lib:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
ENV LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}
|
||||
ENV CPATH=/usr/local/cuda/include:${MPI_HOME}/include:${CUDA_HOME}/include:$CPATH
|
||||
# export C_INCLUDE_PATH=/usr/local/cuda/include:$C_INCLUDE_PATH
|
||||
# export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH
|
||||
# export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
apt update && apt install -y autoconf automake libtool flex
|
||||
/usr/bin/python3 -m pip install cython
|
||||
git clone --recursive https://github.com/open-mpi/ompi.git
|
||||
cd ompi
|
||||
git checkout main
|
||||
# make clean
|
||||
# make distclean
|
||||
./autogen.pl
|
||||
mkdir build
|
||||
cd build
|
||||
../configure --with-cuda=/usr/local/cuda --enable-python-bindings --enable-mpirun-prefix-by-default --prefix=${MPI_HOME} --with-python=/usr/bin/python3
|
||||
make -j$(nproc)
|
||||
make install
|
||||
# 验证CUDA支持
|
||||
cat <<EOF > ./test_mpi_cuda.cu
|
||||
#include <mpi.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdio.h>
|
||||
|
||||
__global__ void hello_cuda() {
|
||||
printf("Hello from CUDA kernel! Thread id: %d\n", threadIdx.x);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
int rank;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
|
||||
printf("Hello from MPI process %d!\n", rank);
|
||||
|
||||
// Launch CUDA kernel
|
||||
hello_cuda<<<1, 10>>>();
|
||||
cudaDeviceSynchronize(); // Wait for the CUDA kernel to finish
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
||||
EOF
|
||||
nvcc -o test_mpi_cuda test_mpi_cuda.cu -I${CUDA_HOME}/include -I${MPI_HOME}/include -L${MPI_HOME}/lib -lcudart -lmpi
|
||||
# mpirun --allow-run-as-root -np 2 ./test_mpi_cuda
|
||||
EOT
|
||||
|
||||
# 安装plumed
|
||||
ARG PLUMED_VERSION="2.9.1"
|
||||
ENV PLUMED_VERSION=${PLUMED_VERSION}
|
||||
ENV LD_LIBRARY_PATH=/usr/local/plumed/lib:$LD_LIBRARY_PATH
|
||||
ENV PATH=/usr/local/plumed:/usr/local/plumed/bin:$PATH
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
# git clone https://github.com/plumed/plumed2
|
||||
# cd plumed2
|
||||
# git checkout v${PLUMED_VERSION}
|
||||
curl -L -o plumed-${PLUMED_VERSION}.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_VERSION}/plumed-${PLUMED_VERSION}.tgz
|
||||
tar zxvf plumed-${PLUMED_VERSION}.tar.gz
|
||||
cd plumed-${PLUMED_VERSION}
|
||||
./configure --prefix=/usr/local/plumed
|
||||
make -j$(nproc)
|
||||
make install
|
||||
EOT
|
||||
|
||||
# 安装gromacs
|
||||
ARG GROMACS_VERSION="2022.5"
|
||||
ENV GROMACS_VERSION=${GROMACS_VERSION}
|
||||
ENV GROMACS_HOME=/usr/local/gromacs-${GROMACS_VERSION}-plumed-${PLUMED_VERSION}
|
||||
ENV PATH=PATH=$GROMACS_HOME/bin:$PATH
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
wget -c https://ftp.gromacs.org/gromacs/gromacs-${GROMACS_VERSION}.tar.gz
|
||||
tar zxvf gromacs-${GROMACS_VERSION}.tar.gz
|
||||
cd gromacs-${GROMACS_VERSION}
|
||||
### patch the plumed
|
||||
# plumed-patch -p
|
||||
plumed-patch -p -e gromacs-${GROMACS_VERSION}
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local/gromacs-${GROMACS_VERSION}-plumed-${PLUMED_VERSION} \
|
||||
-DGMX_BUILD_OWN_FFTW=ON \
|
||||
-DREGRESSIONTEST_DOWNLOAD=ON \
|
||||
-DGMX_GPU=CUDA \
|
||||
-DGMX_MPI=ON
|
||||
make -j$(nproc)
|
||||
make install
|
||||
echo "source /usr/local/gromacs-${GROMACS_VERSION}-plumed-${PLUMED_VERSION}/bin/GMXRC.bash" >> /root/.bashrc
|
||||
EOT
|
||||
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
EOT
|
||||
|
||||
EXPOSE 2222
|
||||
|
||||
CMD ["/usr/sbin/sshd", "-D"]
|
||||
45
README.md
Normal file
45
README.md
Normal file
@@ -0,0 +1,45 @@
|
||||
# gromacs_docker
|
||||
|
||||
在容器中使用gpu需要安装:[nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
|
||||
|
||||
运行命令:
|
||||
|
||||
```shell
|
||||
docker run -it --net=host --gpus all --name 容器名 -e NVIDIA_DRIVER_CAPABILITIES=compute,utility -e NVIDIA_VISIBLE_DEVICES=all 镜像名
|
||||
```
|
||||
|
||||
首次使用报错:
|
||||
|
||||
```shell
|
||||
docker: Error response from daemon: could not select device driver “” with capabilities: [[gpu]].
|
||||
```
|
||||
|
||||
解决办法:
|
||||
|
||||
```shell
|
||||
sudo curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | \
|
||||
sudo apt-key add -
|
||||
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
|
||||
sudo curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | \
|
||||
sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
|
||||
sudo apt-get update
|
||||
|
||||
sudo apt-get install nvidia-container-runtime
|
||||
```
|
||||
|
||||
02 验证执行下列命令:
|
||||
|
||||
```shell
|
||||
which nvidia-container-runtime
|
||||
```
|
||||
|
||||
输出 /usr/bin/nvidia-container-runtime,表示安装成功。
|
||||
03 docker 使用:
|
||||
|
||||
```shell
|
||||
docker run -it --gpus all **
|
||||
```
|
||||
|
||||
此时,设置使用设备上全部的显卡。
|
||||
|
||||
目前尚不支持debian12
|
||||
55
docker-compose.yml
Normal file
55
docker-compose.yml
Normal file
@@ -0,0 +1,55 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
gromacs:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.gromacs
|
||||
args:
|
||||
CACHEBUST: 1
|
||||
TAG_VERSION: "12.4.1"
|
||||
PLUMED_VERSION: "2.9.1"
|
||||
FFTW_VERSION: "3.3.10"
|
||||
BUILDKIT_INLINE_CACHE: 1
|
||||
# env_file:
|
||||
# - .env
|
||||
# volumes:
|
||||
# - ./hostfile:/hostfile
|
||||
container_name: gromacs
|
||||
pull_policy: if_not_present
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
restart: unless-stopped
|
||||
image: hotwa/gromacs:test
|
||||
privileged: true
|
||||
cap_add:
|
||||
- ALL
|
||||
- CAP_SYS_PTRACE
|
||||
shm_size: '64gb'
|
||||
# devices:
|
||||
# - /dev/infiniband/rdma_cm
|
||||
# - /dev/infiniband/uverbs0
|
||||
# - /dev/infiniband/uverbs1
|
||||
# - /dev/infiniband/uverbs2
|
||||
# - /dev/infiniband/uverbs3
|
||||
# - /dev/infiniband/uverbs4
|
||||
# - /dev/infiniband/uverbs5
|
||||
# - /dev/infiniband/uverbs6
|
||||
# - /dev/infiniband/uverbs7
|
||||
# - /dev/infiniband/uverbs8
|
||||
environment:
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
- TMPDIR=/var/tmp
|
||||
# - UCX_NET_DEVICES=mlx5_0:1,mlx5_1:1,mlx5_2:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1,mlx5_8:1
|
||||
# network_mode: host
|
||||
command: ["/usr/sbin/sshd", "-D"]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
Reference in New Issue
Block a user