add
This commit is contained in:
@@ -5,8 +5,62 @@ docker-compose -f docker-compose_pytorch1.13.yml build
|
|||||||
docker-compose -f docker-compose_pytorch2.3.yml build
|
docker-compose -f docker-compose_pytorch2.3.yml build
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 英伟达显卡安装卸载驱动
|
||||||
|
|
||||||
|
卸载
|
||||||
|
|
||||||
|
```shell
|
||||||
|
cd /usr/local/cuda
|
||||||
|
ll
|
||||||
|
cd ..
|
||||||
|
cd cuda-12.3/
|
||||||
|
ll
|
||||||
|
cd bin/
|
||||||
|
ll
|
||||||
|
./cuda-uninstaller
|
||||||
|
cd ~
|
||||||
|
nvidia-uninstall
|
||||||
|
sudo modprobe -r nvidia-drm nvidia-modeset nvidia-uvm nvidia
|
||||||
|
sudo rm -rf /usr/lib64/nvidia /usr/lib/nvidia
|
||||||
|
sudo apt autoremove nvidia*
|
||||||
|
sudo apt clean all
|
||||||
|
sudo dracut --force
|
||||||
|
sudo reboot
|
||||||
|
```
|
||||||
|
|
||||||
|
安装
|
||||||
|
|
||||||
|
```shell
|
||||||
|
wget https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu2204/x86_64/nvidia-fabricmanager-555_555.42.06-1_amd64.deb
|
||||||
|
dpkg -i nvidia-fabricmanager-555_555.42.06-1_amd64.deb
|
||||||
|
wget https://developer.download.nvidia.com/compute/cuda/12.5.1/local_installers/cuda_12.5.1_555.42.06_linux.run
|
||||||
|
ll
|
||||||
|
sudo sh cuda_12.5.1_555.42.06_linux.run
|
||||||
|
echo 'export PATH=/usr/local/cuda/bin:$PATH' >> ~/.bashrc && echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc && source /root/.bashrc
|
||||||
|
nvcc -V
|
||||||
|
nvidia-smi
|
||||||
|
nvidia-smi -pm 1
|
||||||
|
modprobe nvidia_peermem
|
||||||
|
nvidia-smi
|
||||||
|
modinfo nvidia_peermem
|
||||||
|
lsmod | grep nvidia_peermem
|
||||||
|
systemctl mask apt-daily-upgrade.service
|
||||||
|
systemctl mask apt-daily-upgrade.timer
|
||||||
|
systemctl disable apt-daily-upgrade.timer
|
||||||
|
systemctl disable apt-daily-upgrade.service
|
||||||
|
ll
|
||||||
|
wget https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu2204/x86_64/nvidia-fabricmanager-555_555.42.06-1_amd64.deb
|
||||||
|
dpkg -i nvidia-fabricmanager-555_555.42.06-1_amd64.deb
|
||||||
|
sudo systemctl start nvidia-fabricmanager
|
||||||
|
sudo systemctl status nvidia-fabricmanager
|
||||||
|
```
|
||||||
|
|
||||||
## 镜像测试命令
|
## 镜像测试命令
|
||||||
|
|
||||||
|
docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all ldh/deepspeed:test
|
||||||
|
docker run -it --rm --network=host --privileged --ipc=host --ulimit memlock=-1 --gpus all hotwa/deepspeed:pt23_update
|
||||||
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
nvidia-smi
|
nvidia-smi
|
||||||
nvcc -V
|
nvcc -V
|
||||||
@@ -58,6 +112,31 @@ EOF
|
|||||||
python compile_deepspeed_ops.py
|
python compile_deepspeed_ops.py
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 配置vscode的docker的插件
|
||||||
|
|
||||||
|
[nerdctl配置](https://blog.csdn.net/margu_168/article/details/139822555)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```shell
|
||||||
|
cat << 'EOF' > /usr/local/bin/docker
|
||||||
|
#!/bin/bash
|
||||||
|
exec nerdctl "$@"
|
||||||
|
EOF
|
||||||
|
chmod +x /usr/local/bin/docker
|
||||||
|
```
|
||||||
|
|
||||||
|
nerdctl bash自动补全
|
||||||
|
|
||||||
|
```shell
|
||||||
|
apt update
|
||||||
|
apt install bash-completion -y
|
||||||
|
nerdctl completion bash > /etc/bash_completion.d/nerdctl
|
||||||
|
nerdctl completion bash > /etc/bash_completion.d/docker
|
||||||
|
source /etc/bash_completion.d/nerdctl
|
||||||
|
source /etc/bash_completion.d/docker
|
||||||
|
```
|
||||||
|
|
||||||
## 物理机更新内核
|
## 物理机更新内核
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
|||||||
Reference in New Issue
Block a user