add openucx and openmpi
This commit is contained in:
@@ -60,6 +60,7 @@ echo "Asia/Shanghai" > /etc/timezone
|
||||
# curl -fsSL https://pixi.sh/install.sh | bash
|
||||
EOT
|
||||
|
||||
ENV CUDA_HOME=/usr/local/cuda
|
||||
ENV PATH=/opt/modules/bin:$PATH
|
||||
ENV LIBRARY_PATH=/opt/modules/lib:$LIBRARY_PATH
|
||||
COPY ./file/modules-5.4.0.tar.gz /root
|
||||
@@ -74,6 +75,8 @@ cd modules-5.4.0
|
||||
./configure --prefix=/opt/modules --bindir=/opt/modules/bin --libdir=/opt/modules/lib --disable-libtclenvmodules
|
||||
make -j$(nproc)
|
||||
make install
|
||||
echo "source /opt/modules/init/profile.sh" >> /etc/profile
|
||||
echo "source /opt/modules/init/profile.sh" >> ~/.bashrc
|
||||
# /opt/modules/bin/modulecmd
|
||||
EOT
|
||||
|
||||
@@ -91,10 +94,74 @@ make -j$(nproc)
|
||||
make install
|
||||
EOT
|
||||
|
||||
# install ucx
|
||||
# https://github.com/openucx/ucx
|
||||
# OpenMPI and OpenSHMEM installation with UCX
|
||||
# https://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX
|
||||
# https://openucx.readthedocs.io/en/master
|
||||
# Running in Docker containers
|
||||
# https://openucx.readthedocs.io/en/master/running.html#running-in-docker-containers
|
||||
ENV UCX_HOME=/usr/local/ucx
|
||||
ENV PATH=${CUDA_HOME}/bin:${UCX_HOME}/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${UCX_HOME}/lib:$LD_LIBRARY_PATH
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
# 启用调试信息
|
||||
sudo apt update
|
||||
sudo apt -y install gdb valgrind
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential libnuma-dev pkg-config libfuse3-dev
|
||||
# sudo apt install -y openmpi-bin openmpi-common openmpi-doc openmpi-debug libopenmpi-dev
|
||||
# sudo apt install -y libucx0-dbg libucs0-dbg libucm0-dbg libuct0-dbg libibverbs1-dbg librdmacm1-dbg libmlx5-1-dbg
|
||||
git clone https://github.com/openucx/ucx.git
|
||||
cd ucx
|
||||
# git checkout v1.15.0
|
||||
git checkout master
|
||||
./autogen.sh
|
||||
mkdir build
|
||||
cd build
|
||||
# make clean
|
||||
# make distclean
|
||||
# 性能优化配置 ../contrib/configure-release --prefix=/usr/local/ucx --with-cuda=${CUDA_HOME}
|
||||
# 调试/开发配置 ../contrib/configure-devel --prefix=/usr/local/ucx --with-cuda=${CUDA_HOME}
|
||||
# default ../configure --prefix=/usr/local/ucx --with-cuda=${CUDA_HOME}
|
||||
# ../contrib/configure-release --prefix=${UCX_HOME} --with-cuda=${CUDA_HOME} --with-gdrcopy=/usr/local/gdrcopy
|
||||
# ../contrib/configure-release --prefix=/usr/local/ucx \
|
||||
# --with-cuda= /usr/local/cuda-12.5 \
|
||||
# --with-mlx5 \
|
||||
# --with-rc \
|
||||
# --with-ud \
|
||||
# --with-dc \
|
||||
# --with-dm \
|
||||
# --with-verbs \
|
||||
# --with-go=/usr/local/go
|
||||
# --with-mlx5
|
||||
../contrib/configure-release --prefix=${UCX_HOME} \
|
||||
--with-cuda=/usr/local/cuda \
|
||||
--with-rc \
|
||||
--with-ud \
|
||||
--with-dc \
|
||||
--with-dm \
|
||||
--with-verbs
|
||||
make -j$(nproc)
|
||||
make install
|
||||
# ucx_info -a
|
||||
# 测试性能
|
||||
# ucx_perftest -d <device> -t bw -p <protocol> -n <num_iterations>
|
||||
# 测试 UCX 读取配置
|
||||
# ucx_read_profile
|
||||
# 检查 UCX 进程
|
||||
# mpirun -np 2 -mca pml ucx -x UCX_NET_DEVICES=mlx5_0:1 ./your_mpi_program
|
||||
# CUDA support check
|
||||
ucx_info -c
|
||||
ucx_info -d
|
||||
# ompi_info | grep ucx
|
||||
EOT
|
||||
|
||||
# 安装openmpi
|
||||
ENV MPI_HOME=/usr/local/openmpi
|
||||
ENV PATH=${MPI_HOME}/bin:/usr/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${MPI_HOME}/lib:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${MPI_HOME}/lib:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
ENV LIBRARY_PATH=/usr/local/cuda/lib64:${LIBRARY_PATH}
|
||||
ENV CPATH=/usr/local/cuda/include:${MPI_HOME}/include:${CUDA_HOME}/include:$CPATH
|
||||
# export C_INCLUDE_PATH=/usr/local/cuda/include:$C_INCLUDE_PATH
|
||||
@@ -102,7 +169,7 @@ ENV CPATH=/usr/local/cuda/include:${MPI_HOME}/include:${CUDA_HOME}/include:$CPAT
|
||||
# export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
apt update && apt install -y autoconf automake libtool flex
|
||||
apt update && apt install -y autoconf automake libtool flex gfortran
|
||||
/usr/bin/python3 -m pip install cython
|
||||
git clone --recursive https://github.com/open-mpi/ompi.git
|
||||
cd ompi
|
||||
@@ -112,7 +179,10 @@ git checkout main
|
||||
./autogen.pl
|
||||
mkdir build
|
||||
cd build
|
||||
../configure --with-cuda=/usr/local/cuda --enable-python-bindings --enable-mpirun-prefix-by-default --prefix=${MPI_HOME} --with-python=/usr/bin/python3
|
||||
# ../configure --with-cuda=/usr/local/cuda --enable-python-bindings --enable-mpirun-prefix-by-default --prefix=${MPI_HOME} --with-python=/usr/bin/python3 FC=gfortran
|
||||
# ../configure FC=gfortran PYTHON=/usr/bin/python3 --with-cuda=/usr/local/cuda --with-cuda-libdir=/usr/local/cuda/lib64 --enable-python-bindings --enable-mpirun-prefix-by-default --prefix=${MPI_HOME} --with-ucx=${UCX_HOME}
|
||||
../configure FC=gfortran PYTHON=/usr/bin/python3 --with-cuda=/usr/local/cuda --with-cuda-libdir=/usr/local/cuda/lib64 --enable-python-bindings \
|
||||
--enable-mpirun-prefix-by-default --prefix=${MPI_HOME} --with-ucx=${UCX_HOME} --enable-mca-dso=btl-smcuda,rcache-rgpusm,rcache-gpusm,accelerator-cuda --enable-mca-no-build=btl-uct
|
||||
make -j$(nproc)
|
||||
make install
|
||||
# 验证CUDA支持
|
||||
@@ -143,6 +213,7 @@ int main(int argc, char **argv) {
|
||||
EOF
|
||||
nvcc -o test_mpi_cuda test_mpi_cuda.cu -I${CUDA_HOME}/include -I${MPI_HOME}/include -L${MPI_HOME}/lib -lcudart -lmpi
|
||||
# mpirun --allow-run-as-root -np 2 ./test_mpi_cuda
|
||||
ompi_info | grep "MPI extensions"
|
||||
EOT
|
||||
|
||||
# 安装plumed
|
||||
@@ -213,25 +284,53 @@ COPY file/Amber24.tar.bz2 file/AmberTools24.tar.bz2 /root
|
||||
COPY file/l_HPCKit_p_2024.2.1.79_offline.sh file/l_onemkl_p_2024.2.2.17_offline.sh /root
|
||||
COPY file/boost_1_86_0.tar.gz /root
|
||||
ENV DOWNLOAD_MINICONDA="False"
|
||||
# install ambertools
|
||||
# install HPCKit and oneMKL
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
python3 -m pip install numpy scipy matplotlib
|
||||
chmod +x l_HPCKit_p_2024.2.1.79_offline.sh
|
||||
./l_HPCKit_p_2024.2.1.79_offline.sh -a --silent --eula accept --install-dir /opt/intel
|
||||
chmod +x l_onemkl_p_2024.2.2.17_offline.sh
|
||||
./l_onemkl_p_2024.2.2.17_offline.sh -a --silent --eula accept --install-dir /opt/intel/onemkl
|
||||
# echo "source /opt/intel/setvars.sh" >> /etc/profile
|
||||
# echo "source /opt/intel/onemkl/setvars.sh" >> /etc/profile
|
||||
echo "source /opt/intel/setvars.sh" >> ~/.bashrc
|
||||
echo "source /opt/intel/onemkl/setvars.sh" >> ~/.bashrc
|
||||
mkdir -p /opt/modulefiles/intel
|
||||
chmod +x /opt/intel/setvars.sh
|
||||
chmod +x /opt/intel/onemkl/setvars.sh
|
||||
# curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
# echo "source $HOME/.cargo/env" >> ~/.bashrc
|
||||
# cargo install modenv
|
||||
EOT
|
||||
# install ambertools
|
||||
ENV MODULEPATH=/opt/modulefiles/boost:$MODULEPATH
|
||||
RUN <<EOT
|
||||
#!/bin/bash
|
||||
python3 -m pip install numpy scipy matplotlib cython setuptools
|
||||
# install Boost from https://www.boost.org/users/download/
|
||||
tar zxvf boost_1_86_0.tar.gz
|
||||
cd boost_1_86_0
|
||||
./bootstrap.sh --prefix=/opt/boost --with-libraries=all --with-toolset=gcc
|
||||
echo "using mpi : /usr/local/openmpi/bin/mpicxx ;" >> project-config.jam
|
||||
# echo "using mpi : /opt/intel/mpi/2021.13/bin/mpicxx ;" >> project-config.jam
|
||||
# # echo "using mpi : /opt/intel/mpi/2021.13/bin/mpicxx ;" >> project-config.jam
|
||||
./b2 -j$(nproc) --layout=tagged link=static,shared threading=multi install
|
||||
mkdir -p /opt/modulefiles/boost
|
||||
# use modulefile to load boost command is:
|
||||
# module load boost/1.86.0-openmpi-5.1.0a1 | module list | module avail
|
||||
cat << EOF > /opt/modulefiles/boost/1.86.0-openmpi-5.1.0a1
|
||||
#%Module1.0
|
||||
set prefix /opt/boost
|
||||
|
||||
# 设置库路径和头文件路径,方便编译器找到 Boost
|
||||
prepend-path LD_LIBRARY_PATH \$prefix/lib
|
||||
prepend-path CPATH \$prefix/include
|
||||
prepend-path LIBRARY_PATH \$prefix/lib
|
||||
prepend-path PATH \$prefix/bin
|
||||
EOF
|
||||
# 解压 Amber24
|
||||
tar -xjf Amber24.tar.bz2
|
||||
tar -xjvf Amber24.tar.bz2
|
||||
# 解压 AmberTools24
|
||||
tar -xjf AmberTools24.tar.bz2
|
||||
tar -xjvf AmberTools24.tar.bz2
|
||||
# 清理解压后的 .tar.bz2 文件(可选)
|
||||
# rm Amber24.tar.bz2 AmberTools24.tar.bz2
|
||||
EOT
|
||||
|
||||
Reference in New Issue
Block a user