Files
gromacs_docker/Dockerfile.gromacs_amber
2024-10-23 21:06:48 +08:00

370 lines
14 KiB
Docker
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# syntax=docker/dockerfile:1.3-labs
# NOTE: Building this image require's docker version >= 23.0.
#
# For reference:
# - https://docs.docker.com/build/dockerfile/frontend/#stable-channel
ARG TAG_VERSION="12.4.1-cudnn-devel-ubuntu22.04"
FROM nvidia/cuda:${TAG_VERSION}
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=$CUDA_HOME/bin:$PATH
ENV LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
ENV CPATH=$CUDA_HOME/include:$CPATH
ENV LIBRARY_PATH=$CUDA_HOME/lib64:$LIBRARY_PATH
ARG HTTP_PROXY
ARG HTTPS_PROXY
ENV http_proxy=${HTTP_PROXY}
ENV https_proxy=${HTTPS_PROXY}
ARG DEBIAN_FRONTEND="noninteractive"
ENV DEBIAN_FRONTEND=${DEBIAN_FRONTEND}
ARG ROOT_PASSWD="root"
ENV ROOT_PASSWD=${ROOT_PASSWD}
ENV SSH_PORT=2222
WORKDIR /root
SHELL ["/bin/bash", "-c"]
RUN <<EOT
#!/bin/bash
cp /etc/apt/sources.list /etc/apt/sources.list_bak
cat << EOF > /etc/apt/sources.list
deb https://mirrors.aliyun.com/ubuntu/ jammy main restricted universe multiverse
deb-src https://mirrors.aliyun.com/ubuntu/ jammy main restricted universe multiverse
deb https://mirrors.aliyun.com/ubuntu/ jammy-security main restricted universe multiverse
deb-src https://mirrors.aliyun.com/ubuntu/ jammy-security main restricted universe multiverse
deb https://mirrors.aliyun.com/ubuntu/ jammy-updates main restricted universe multiverse
deb-src https://mirrors.aliyun.com/ubuntu/ jammy-updates main restricted universe multiverse
# deb https://mirrors.aliyun.com/ubuntu/ jammy-proposed main restricted universe multiverse
# deb-src https://mirrors.aliyun.com/ubuntu/ jammy-proposed main restricted universe multiverse
deb https://mirrors.aliyun.com/ubuntu/ jammy-backports main restricted universe multiverse
deb-src https://mirrors.aliyun.com/ubuntu/ jammy-backports main restricted universe multiverse
EOF
EOT
# base tools
RUN <<EOT
#!/bin/bash
apt-get update
apt-get install -y libgl1-mesa-glx bash-completion wget curl htop jq vim bash libaio-dev build-essential openssh-server openssh-client python3 python3-pip python3-venv bzip2 screen
apt-get install -y --no-install-recommends software-properties-common build-essential autotools-dev nfs-common pdsh cmake g++ gcc curl wget vim tmux emacs less unzip htop iftop iotop ca-certificates openssh-client openssh-server rsync iputils-ping net-tools sudo llvm-dev re2c
add-apt-repository ppa:git-core/ppa -y
apt-get install -y git libnuma-dev wget
pip install pipx
pipx install nvitop
pipx ensurepath
. ~/.bashrc
# Configure SSH for password and public key authentication
mkdir ~/.ssh
# 创建或覆盖 SSH 配置文件 ~/.ssh/config
# - Host *: 针对所有主机的通用配置
# - ForwardAgent yes: 启用 SSH 代理转发,允许通过本地的 SSH 代理进行身份验证
# - StrictHostKeyChecking no: 禁用主机密钥检查,自动接受新的主机密钥(适用于自动化环境)
printf "Host * \n ForwardAgent yes\nHost *\n StrictHostKeyChecking no" > ~/.ssh/config
cp /etc/ssh/sshd_config /etc/ssh/sshd_config.bak
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/' /etc/ssh/sshd_config
sed -i 's/^\(\s*\)GSSAPIAuthentication yes/\1GSSAPIAuthentication no/' /etc/ssh/ssh_config
sed -i "s/^#Port 22/Port ${SSH_PORT}/" /etc/ssh/sshd_config
sudo sed -i "s/# Port 22/Port ${SSH_PORT}/" /etc/ssh/ssh_config
ssh-keygen -t rsa -b 4096 -f /root/.ssh/id_rsa -N "" <<< y
cat ~/.ssh/id_rsa.pub >> ~/.ssh/auth
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys2
chmod 600 /root/.ssh/authorized_keys
chmod 600 /root/.ssh/authorized_keys2
mkdir /var/run/sshd
echo "root:${ROOT_PASSWD}" | chpasswd
mkdir -p ~/.pip
# timezone
apt-get install -y tzdata
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
echo "Asia/Shanghai" > /etc/timezone
# install pixi
# curl -fsSL https://pixi.sh/install.sh | bash
EOT
ENV PATH=/opt/modules/bin:$PATH
ENV LIBRARY_PATH=/opt/modules/lib:$LIBRARY_PATH
COPY ./file/modules-5.4.0.tar.gz /root
## install modules to manage environment variables download from https://modules.sourceforge.net/
# usage: https://nscc.mrzhenggang.com/user-manual/config-env-with-module
RUN <<EOT
#!/bin/bash
apt-get update && apt-get install -y tcl
tar zxvf modules-5.4.0.tar.gz
cd modules-5.4.0
# ./configure --prefix=/opt/modules --bindir=/opt/modules/bin --libdir=/opt/modules/lib --with-tclsh=/usr/bin/tclsh
./configure --prefix=/opt/modules --bindir=/opt/modules/bin --libdir=/opt/modules/lib --disable-libtclenvmodules
make -j$(nproc)
make install
echo "source /opt/modules/init/profile.sh" >> /etc/profile
echo "source /opt/modules/init/profile.sh" >> ~/.bashrc
# /opt/modules/bin/modulecmd
EOT
ARG FFTW_VERSION="3.3.10"
ENV FFTW_VERSION=${FFTW_VERSION}
ENV PATH=/usr/local/fftw:$PATH
# 安装fftw
RUN <<EOT
#!/bin/bash
wget http://www.fftw.org/fftw-${FFTW_VERSION}.tar.gz
tar zxvf fftw-${FFTW_VERSION}.tar.gz
cd fftw-${FFTW_VERSION}
./configure --prefix=/usr/local/fftw --enable-sse2 --enable-avx --enable-float --enable-avx2 --enable-shared # 若CPU支持AVX512指令集且有多于1个AVX512 FMA单元则可加上--enable-avx512以进一步提升性能
make -j$(nproc)
make install
EOT
# install ucx
# https://github.com/openucx/ucx
# OpenMPI and OpenSHMEM installation with UCX
# https://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX
# https://openucx.readthedocs.io/en/master
# Running in Docker containers
# https://openucx.readthedocs.io/en/master/running.html#running-in-docker-containers
ENV UCX_PREFIX=/usr/local/ucx
ENV PATH=$UCX_PREFIX/bin:$PATH
ENV LD_LIBRARY_PATH=$UCX_PREFIX/lib:$LD_LIBRARY_PATH
ENV CPATH=$UCX_PREFIX/include:$CPATH
ENV LIBRARY_PATH=$UCX_PREFIX/lib:$LIBRARY_PATH
RUN <<EOT
#!/bin/bash
# 启用调试信息
sudo apt update
sudo apt -y install gdb valgrind autoconf build-essential libnuma-dev pkg-config libfuse3-dev automake libtool flex gfortran libibverbs-dev rdma-core
# sudo apt install -y openmpi-bin openmpi-common openmpi-doc openmpi-debug libopenmpi-dev
sudo apt install -y libucx0-dbg libucs0-dbg libucm0-dbg libuct0-dbg libibverbs1-dbg librdmacm1-dbg libmlx5-1-dbg
git clone https://github.com/openucx/ucx.git
cd ucx
# git checkout v1.15.0
git checkout master
./autogen.sh
mkdir build
cd build
# make clean
# make distclean
# 性能优化配置 ../contrib/configure-release --prefix=/usr/local/ucx --with-cuda=${CUDA_HOME}
# 调试/开发配置 ../contrib/configure-devel --prefix=/usr/local/ucx --with-cuda=${CUDA_HOME}
# default ../configure --prefix=/usr/local/ucx --with-cuda=${CUDA_HOME}
# ../contrib/configure-release --prefix=${UCX_HOME} --with-cuda=${CUDA_HOME} --with-gdrcopy=/usr/local/gdrcopy
# ../contrib/configure-release --prefix=/usr/local/ucx \
# --with-cuda= /usr/local/cuda-12.5 \
# --with-mlx5 \
# --with-rc \
# --with-ud \
# --with-dc \
# --with-dm \
# --with-verbs \
../contrib/configure-release --prefix=${UCX_HOME} \
--with-cuda=/usr/local/cuda \
--with-mlx5 \
--with-avx \
--with-rc \
--with-ud \
--with-dc \
--with-dm \
--with-verbs
make -j$(nproc)
make install
# ucx_info -a
# 测试性能
# ucx_perftest -d <device> -t bw -p <protocol> -n <num_iterations>
# 测试 UCX 读取配置
# ucx_read_profile
# 检查 UCX 进程
# mpirun -np 2 -mca pml ucx -x UCX_NET_DEVICES=mlx5_0:1 ./your_mpi_program
# CUDA support check
# ucx_info -c
# ucx_info -d
EOT
# 安装openmpi
ENV MPI_HOME=/usr/local/openmpi
ENV PATH=${MPI_HOME}/bin:/usr/bin:$PATH
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${MPI_HOME}/lib:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
ENV LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}
ENV CPATH=/usr/local/cuda/include:${MPI_HOME}/include:${CUDA_HOME}/include:$CPATH
# export C_INCLUDE_PATH=/usr/local/cuda/include:$C_INCLUDE_PATH
# export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH
# export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
RUN <<EOT
#!/bin/bash
apt update && apt install -y autoconf automake libtool flex gfortran
/usr/bin/python3 -m pip install cython
git clone --recursive https://github.com/open-mpi/ompi.git
cd ompi
git checkout main
# make clean
# make distclean
./autogen.pl
mkdir build
cd build
# ../configure --with-cuda=/usr/local/cuda --enable-python-bindings --enable-mpirun-prefix-by-default --prefix=${MPI_HOME} --with-python=/usr/bin/python3 FC=gfortran
# ../configure FC=gfortran PYTHON=/usr/bin/python3 --with-cuda=/usr/local/cuda --with-cuda-libdir=/usr/local/cuda/lib64 --enable-python-bindings --enable-mpirun-prefix-by-default --prefix=${MPI_HOME} --with-ucx=${UCX_HOME}
../configure FC=gfortran PYTHON=/usr/bin/python3 --with-cuda=/usr/local/cuda --with-cuda-libdir=/usr/local/cuda/lib64 --enable-python-bindings \
--enable-mpirun-prefix-by-default --prefix=${MPI_HOME} --with-ucx=${UCX_HOME} --enable-mca-dso=btl-smcuda,rcache-rgpusm,rcache-gpusm,accelerator-cuda --enable-mca-no-build=btl-uct --without-hcoll
make -j$(nproc)
make install
# 验证CUDA支持
cat <<EOF > ./test_mpi_cuda.cu
#include <mpi.h>
#include <cuda_runtime.h>
#include <stdio.h>
__global__ void hello_cuda() {
printf("Hello from CUDA kernel! Thread id: %d\n", threadIdx.x);
}
int main(int argc, char **argv) {
MPI_Init(&argc, &argv);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("Hello from MPI process %d!\n", rank);
// Launch CUDA kernel
hello_cuda<<<1, 10>>>();
cudaDeviceSynchronize(); // Wait for the CUDA kernel to finish
MPI_Finalize();
return 0;
}
EOF
nvcc -o test_mpi_cuda test_mpi_cuda.cu -I${CUDA_HOME}/include -I${MPI_HOME}/include -L${MPI_HOME}/lib -lcudart -lmpi
# mpirun --allow-run-as-root -np 2 ./test_mpi_cuda
ompi_info | grep "MPI extensions"
EOT
# 安装plumed
ARG PLUMED_VERSION="2.9.2"
ENV PLUMED_VERSION=${PLUMED_VERSION}
ENV LD_LIBRARY_PATH=/usr/local/plumed/lib:$LD_LIBRARY_PATH
ENV PATH=/usr/local/plumed:/usr/local/plumed/bin:$PATH
RUN <<EOT
#!/bin/bash
# git clone https://github.com/plumed/plumed2
# cd plumed2
# git checkout v${PLUMED_VERSION}
curl -L -o plumed-${PLUMED_VERSION}.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_VERSION}/plumed-${PLUMED_VERSION}.tgz
tar zxvf plumed-${PLUMED_VERSION}.tar.gz
cd plumed-${PLUMED_VERSION}
./configure --prefix=/usr/local/plumed
make -j$(nproc)
make install
EOT
# 安装gromacs
ARG GROMACS_VERSION="2021.7"
ENV GROMACS_VERSION=${GROMACS_VERSION}
ENV GROMACS_HOME=/usr/local/gromacs-${GROMACS_VERSION}-plumed-${PLUMED_VERSION}
ENV PATH=PATH=$GROMACS_HOME/bin:$PATH
ARG CUDA_ARCH="75;86;89"
ENV CUDA_ARCH=${CUDA_ARCH}
RUN <<EOT
#!/bin/bash
wget -c https://ftp.gromacs.org/gromacs/gromacs-${GROMACS_VERSION}.tar.gz
tar zxvf gromacs-${GROMACS_VERSION}.tar.gz
cd gromacs-${GROMACS_VERSION}
### patch the plumed
# plumed-patch -p
plumed-patch -p -e gromacs-${GROMACS_VERSION}
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local/gromacs-${GROMACS_VERSION}-plumed-${PLUMED_VERSION} \
-DGMX_BUILD_OWN_FFTW=ON \
-DREGRESSIONTEST_DOWNLOAD=ON \
-DGMX_GPU=CUDA \
-DGMX_CUDA_TARGET_COMPUTE="75;86;89" \
-DGMX_CUDA_TARGET_SM="75;86;89" \
-DGMX_MPI=ON
make -j$(nproc)
make install
echo "source /usr/local/gromacs-${GROMACS_VERSION}-plumed-${PLUMED_VERSION}/bin/GMXRC.bash" >> /root/.bashrc
EOT
# PyAutoFEP install support version: GROMACS 2021.7
# GROMACS 2022.5 和 2023 中遇到的“排除原子距离超过截断距离”的问题,确保模拟的稳定性和自由能计算的收敛性。
# https://github.com/luancarvalhomartins/PyAutoFEP/blob/master/docs/Manual.pdf
# https://github.com/luancarvalhomartins/PyAutoFEP/tree/master/docs/tutorial01
ENV CPLUS_INCLUDE_PATH=/usr/include/openbabel3
ENV LIBRARY_PATH=/usr/lib:/usr/local/lib:${LIBRARY_PATH}
RUN <<EOT
#!/bin/bash
sudo apt-get update
sudo apt-get install openbabel libopenbabel-dev -y
sudo apt-get install swig -y
sudo ln -s /usr/include/openbabel3 /usr/local/include/openbabel3
/usr/bin/python3 -m pip install ipython rdkit "networkx>2.0,<3.0" alchemlyb==0.6.0 pymbar==3.0.5 openbabel>3.0.0 matplotlib numpy biopython mdanalysis pytest packaging
# or openbabel use 2.4.1
git clone https://github.com/luancarvalhomartins/PyAutoFEP.git
EOT
COPY file/Amber24.tar.bz2 file/AmberTools24.tar.bz2 /root
COPY file/l_HPCKit_p_2024.2.1.79_offline.sh file/l_onemkl_p_2024.2.2.17_offline.sh /root
COPY file/boost_1_86_0.tar.gz /root
ENV DOWNLOAD_MINICONDA="False"
# install HPCKit and oneMKL
RUN <<EOT
#!/bin/bash
chmod +x l_HPCKit_p_2024.2.1.79_offline.sh
./l_HPCKit_p_2024.2.1.79_offline.sh -a --silent --eula accept --install-dir /opt/intel
chmod +x l_onemkl_p_2024.2.2.17_offline.sh
./l_onemkl_p_2024.2.2.17_offline.sh -a --silent --eula accept --install-dir /opt/intel/onemkl
# echo "source /opt/intel/setvars.sh" >> /etc/profile
# echo "source /opt/intel/onemkl/setvars.sh" >> /etc/profile
echo "source /opt/intel/setvars.sh" >> ~/.bashrc
echo "source /opt/intel/onemkl/setvars.sh" >> ~/.bashrc
mkdir -p /opt/modulefiles/intel
chmod +x /opt/intel/setvars.sh
chmod +x /opt/intel/onemkl/setvars.sh
# curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
# echo "source $HOME/.cargo/env" >> ~/.bashrc
# cargo install modenv
EOT
# install ambertools
ENV MODULEPATH=/opt/modulefiles/boost:$MODULEPATH
RUN <<EOT
#!/bin/bash
python3 -m pip install numpy scipy matplotlib cython setuptools
# install Boost from https://www.boost.org/users/download/
tar zxvf boost_1_86_0.tar.gz
cd boost_1_86_0
./bootstrap.sh --prefix=/opt/boost --with-libraries=all --with-toolset=gcc
echo "using mpi : /usr/local/openmpi/bin/mpicxx ;" >> project-config.jam
# # echo "using mpi : /opt/intel/mpi/2021.13/bin/mpicxx ;" >> project-config.jam
./b2 -j$(nproc) --layout=tagged link=static,shared threading=multi install
mkdir -p /opt/modulefiles/boost
# use modulefile to load boost command is:
# module load boost/1.86.0-openmpi-5.1.0a1 | module list | module avail
cat << EOF > /opt/modulefiles/boost/1.86.0-openmpi-5.1.0a1
#%Module1.0
set prefix /opt/boost
# 设置库路径和头文件路径,方便编译器找到 Boost
prepend-path LD_LIBRARY_PATH \$prefix/lib
prepend-path CPATH \$prefix/include
prepend-path LIBRARY_PATH \$prefix/lib
prepend-path PATH \$prefix/bin
EOF
# 解压 Amber24
tar -xjvf Amber24.tar.bz2
# 解压 AmberTools24
tar -xjvf AmberTools24.tar.bz2
# 清理解压后的 .tar.bz2 文件(可选)
# rm Amber24.tar.bz2 AmberTools24.tar.bz2
EOT
RUN <<EOT
#!/bin/bash
apt-get clean && rm -rf /var/lib/apt/lists/*
EOT
EXPOSE 2222
CMD ["/usr/sbin/sshd", "-D"]