Files
shellscripts/nerdctl/setup_nvidia_docker_containerd_rootless.sh
2024-11-12 23:31:14 +08:00

77 lines
2.6 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# 获取当前执行脚本的用户
CURRENT_USER=$(whoami)
echo "当前用户:$CURRENT_USER"
# 安装 NVIDIA Docker 工具包
install_nvidia_docker() {
echo "正在安装 NVIDIA Docker..."
sudo apt-get update
sudo apt-get install -y curl gnupg lsb-release
# 配置 NVIDIA Docker 源
if ! grep -q "^deb .\+nvidia-container-toolkit" /etc/apt/sources.list /etc/apt/sources.list.d/*; then
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
else
echo "NVIDIA Docker 源已经配置,跳过此步骤。"
fi
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
}
# 配置 NVIDIA Container Toolkit
configure_nvidia_ctk() {
echo "正在配置 NVIDIA Container Toolkit..."
# 创建配置目录
mkdir -p "$HOME/.config/containerd"
# 配置 nvidia-ctk 到 rootless 模式下的 config.toml
nvidia-ctk runtime configure --runtime=containerd --config="$HOME/.config/containerd/config.toml"
# 如果没有 /etc/nvidia-container-runtime/config.toml则生成默认配置
if [ ! -f /etc/nvidia-container-runtime/config.toml ]; then
sudo nvidia-ctk config --set default-runtime --config=/etc/nvidia-container-runtime/config.toml
fi
# 配置 NVIDIA 共享库路径,确保加载 GPU 驱动
echo "/usr/lib/x86_64-linux-gnu" | sudo tee /etc/ld.so.conf.d/nvidia.conf
sudo ldconfig
# 添加 nvidia-container-cli 到 PATH
echo 'export PATH=$PATH:/usr/bin' >> ~/.profile
source ~/.profile
}
# 启用 cgroup v2 支持和权限调整
configure_cgroup_v2() {
echo "配置 cgroup v2 支持..."
sudo chmod -R 755 /sys/fs/cgroup
sudo chown -R $(whoami) /sys/fs/cgroup
sudo mkdir -p /etc/cni/tuning
sudo touch /etc/cni/tuning/allowlist.conf
sudo chmod 644 /etc/cni/tuning/allowlist.conf
}
# 重启 containerd 服务
restart_containerd() {
echo "重启 containerd 服务..."
systemctl --user daemon-reload
systemctl --user restart containerd
sudo systemctl daemon-reload
sudo systemctl restart containerd
}
# 执行所有步骤
install_nvidia_docker
configure_nvidia_ctk
configure_cgroup_v2
restart_containerd
echo "所有步骤已完成NVIDIA Docker 和 containerd 配置已更新。"