add
This commit is contained in:
@@ -5,8 +5,62 @@ docker-compose -f docker-compose_pytorch1.13.yml build
|
||||
docker-compose -f docker-compose_pytorch2.3.yml build
|
||||
```
|
||||
|
||||
## 英伟达显卡安装卸载驱动
|
||||
|
||||
卸载
|
||||
|
||||
```shell
|
||||
cd /usr/local/cuda
|
||||
ll
|
||||
cd ..
|
||||
cd cuda-12.3/
|
||||
ll
|
||||
cd bin/
|
||||
ll
|
||||
./cuda-uninstaller
|
||||
cd ~
|
||||
nvidia-uninstall
|
||||
sudo modprobe -r nvidia-drm nvidia-modeset nvidia-uvm nvidia
|
||||
sudo rm -rf /usr/lib64/nvidia /usr/lib/nvidia
|
||||
sudo apt autoremove nvidia*
|
||||
sudo apt clean all
|
||||
sudo dracut --force
|
||||
sudo reboot
|
||||
```
|
||||
|
||||
安装
|
||||
|
||||
```shell
|
||||
wget https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu2204/x86_64/nvidia-fabricmanager-555_555.42.06-1_amd64.deb
|
||||
dpkg -i nvidia-fabricmanager-555_555.42.06-1_amd64.deb
|
||||
wget https://developer.download.nvidia.com/compute/cuda/12.5.1/local_installers/cuda_12.5.1_555.42.06_linux.run
|
||||
ll
|
||||
sudo sh cuda_12.5.1_555.42.06_linux.run
|
||||
echo 'export PATH=/usr/local/cuda/bin:$PATH' >> ~/.bashrc && echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc && source /root/.bashrc
|
||||
nvcc -V
|
||||
nvidia-smi
|
||||
nvidia-smi -pm 1
|
||||
modprobe nvidia_peermem
|
||||
nvidia-smi
|
||||
modinfo nvidia_peermem
|
||||
lsmod | grep nvidia_peermem
|
||||
systemctl mask apt-daily-upgrade.service
|
||||
systemctl mask apt-daily-upgrade.timer
|
||||
systemctl disable apt-daily-upgrade.timer
|
||||
systemctl disable apt-daily-upgrade.service
|
||||
ll
|
||||
wget https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu2204/x86_64/nvidia-fabricmanager-555_555.42.06-1_amd64.deb
|
||||
dpkg -i nvidia-fabricmanager-555_555.42.06-1_amd64.deb
|
||||
sudo systemctl start nvidia-fabricmanager
|
||||
sudo systemctl status nvidia-fabricmanager
|
||||
```
|
||||
|
||||
## 镜像测试命令
|
||||
|
||||
docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all ldh/deepspeed:test
|
||||
docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all hotwa/deepspeed:pt23_update
|
||||
|
||||
|
||||
```shell
|
||||
nvidia-smi
|
||||
nvcc -V
|
||||
@@ -58,6 +112,31 @@ EOF
|
||||
python compile_deepspeed_ops.py
|
||||
```
|
||||
|
||||
## 配置vscode的docker的插件
|
||||
|
||||
[nerdctl配置](https://blog.csdn.net/margu_168/article/details/139822555)
|
||||
|
||||
|
||||
|
||||
```shell
|
||||
cat << 'EOF' > /usr/local/bin/docker
|
||||
#!/bin/bash
|
||||
exec nerdctl "$@"
|
||||
EOF
|
||||
chmod +x /usr/local/bin/docker
|
||||
```
|
||||
|
||||
nerdctl bash自动补全
|
||||
|
||||
```shell
|
||||
apt update
|
||||
apt install bash-completion -y
|
||||
nerdctl completion bash > /etc/bash_completion.d/nerdctl
|
||||
nerdctl completion bash > /etc/bash_completion.d/docker
|
||||
source /etc/bash_completion.d/nerdctl
|
||||
source /etc/bash_completion.d/docker
|
||||
```
|
||||
|
||||
## 物理机更新内核
|
||||
|
||||
```shell
|
||||
|
||||
Reference in New Issue
Block a user