Files
shellscripts/nerdctl/setup_nvidia_docker_containerd_rootless.sh
2024-11-12 23:33:03 +08:00

82 lines
2.8 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# 获取当前执行脚本的用户
CURRENT_USER=$(whoami)
echo "当前用户:$CURRENT_USER"
# 安装 NVIDIA Docker 工具包
install_nvidia_docker() {
echo "正在安装 NVIDIA Docker..."
sudo apt-get update
sudo apt-get install -y curl gnupg lsb-release
# 配置 NVIDIA Docker 源
if ! grep -q "^deb .\+nvidia-container-toolkit" /etc/apt/sources.list /etc/apt/sources.list.d/*; then
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
else
echo "NVIDIA Docker 源已经配置,跳过此步骤。"
fi
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
}
# 配置 NVIDIA Container Toolkit
configure_nvidia_ctk() {
echo "正在配置 NVIDIA Container Toolkit..."
# 创建 Rootless 模式下的 containerd 配置目录
mkdir -p "$HOME/.config/containerd"
# 配置 nvidia-ctk 到 Rootless 模式下的 config.toml
nvidia-ctk runtime configure --runtime=containerd --config="$HOME/.config/containerd/config.toml"
# 确保 /etc/nvidia-container-runtime/config.toml 存在,并配置为默认 runtime
if [ ! -f /etc/nvidia-container-runtime/config.toml ]; then
sudo nvidia-ctk config --set default-runtime --config=/etc/nvidia-container-runtime/config.toml
fi
# 配置 NVIDIA 共享库路径,确保加载 GPU 驱动
echo "/usr/lib/x86_64-linux-gnu" | sudo tee /etc/ld.so.conf.d/nvidia.conf
sudo ldconfig
# 添加 nvidia-container-cli 到 PATH
if ! echo "$PATH" | grep -q "/usr/bin"; then
echo 'export PATH=$PATH:/usr/bin' >> ~/.profile
source ~/.profile
fi
}
# 启用 cgroup v2 支持和权限调整
configure_cgroup_v2() {
echo "配置 cgroup v2 支持..."
sudo chmod -R 755 /sys/fs/cgroup
sudo chown -R $(whoami) /sys/fs/cgroup
# 创建并设置 /etc/cni/tuning/allowlist.conf 文件
sudo mkdir -p /etc/cni/tuning
sudo touch /etc/cni/tuning/allowlist.conf
sudo chmod 644 /etc/cni/tuning/allowlist.conf
sudo chown -R $(whoami) /etc/cni
}
# 重启 containerd 服务
restart_containerd() {
echo "重启 containerd 服务..."
systemctl --user daemon-reload
systemctl --user restart containerd
sudo systemctl daemon-reload
sudo systemctl restart containerd
}
# 执行所有步骤
install_nvidia_docker
configure_nvidia_ctk
configure_cgroup_v2
restart_containerd
echo "所有步骤已完成NVIDIA Docker 和 containerd 配置已更新。"