77 lines
2.6 KiB
Bash
Executable File
77 lines
2.6 KiB
Bash
Executable File
#!/bin/bash
|
||
|
||
# 获取当前执行脚本的用户
|
||
CURRENT_USER=$(whoami)
|
||
echo "当前用户:$CURRENT_USER"
|
||
|
||
# 安装 NVIDIA Docker 工具包
|
||
install_nvidia_docker() {
|
||
echo "正在安装 NVIDIA Docker..."
|
||
sudo apt-get update
|
||
sudo apt-get install -y curl gnupg lsb-release
|
||
|
||
# 配置 NVIDIA Docker 源
|
||
if ! grep -q "^deb .\+nvidia-container-toolkit" /etc/apt/sources.list /etc/apt/sources.list.d/*; then
|
||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
|
||
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
|
||
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
|
||
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||
else
|
||
echo "NVIDIA Docker 源已经配置,跳过此步骤。"
|
||
fi
|
||
|
||
sudo apt-get update
|
||
sudo apt-get install -y nvidia-container-toolkit
|
||
}
|
||
|
||
# 配置 NVIDIA Container Toolkit
|
||
configure_nvidia_ctk() {
|
||
echo "正在配置 NVIDIA Container Toolkit..."
|
||
|
||
# 创建配置目录
|
||
mkdir -p "$HOME/.config/containerd"
|
||
|
||
# 配置 nvidia-ctk 到 rootless 模式下的 config.toml
|
||
nvidia-ctk runtime configure --runtime=containerd --config="$HOME/.config/containerd/config.toml"
|
||
|
||
# 如果没有 /etc/nvidia-container-runtime/config.toml,则生成默认配置
|
||
if [ ! -f /etc/nvidia-container-runtime/config.toml ]; then
|
||
sudo nvidia-ctk config --set default-runtime --config=/etc/nvidia-container-runtime/config.toml
|
||
fi
|
||
|
||
# 配置 NVIDIA 共享库路径,确保加载 GPU 驱动
|
||
echo "/usr/lib/x86_64-linux-gnu" | sudo tee /etc/ld.so.conf.d/nvidia.conf
|
||
sudo ldconfig
|
||
|
||
# 添加 nvidia-container-cli 到 PATH
|
||
echo 'export PATH=$PATH:/usr/bin' >> ~/.profile
|
||
source ~/.profile
|
||
}
|
||
|
||
# 启用 cgroup v2 支持和权限调整
|
||
configure_cgroup_v2() {
|
||
echo "配置 cgroup v2 支持..."
|
||
sudo chmod -R 755 /sys/fs/cgroup
|
||
sudo chown -R $(whoami) /sys/fs/cgroup
|
||
sudo mkdir -p /etc/cni/tuning
|
||
sudo touch /etc/cni/tuning/allowlist.conf
|
||
sudo chmod 644 /etc/cni/tuning/allowlist.conf
|
||
}
|
||
|
||
# 重启 containerd 服务
|
||
restart_containerd() {
|
||
echo "重启 containerd 服务..."
|
||
systemctl --user daemon-reload
|
||
systemctl --user restart containerd
|
||
sudo systemctl daemon-reload
|
||
sudo systemctl restart containerd
|
||
}
|
||
|
||
# 执行所有步骤
|
||
install_nvidia_docker
|
||
configure_nvidia_ctk
|
||
configure_cgroup_v2
|
||
restart_containerd
|
||
|
||
echo "所有步骤已完成,NVIDIA Docker 和 containerd 配置已更新。"
|