first add

This commit is contained in:
Your Name
2024-08-02 14:44:39 +08:00
parent 0b0edc3755
commit 968bc3dd24
35 changed files with 3693 additions and 65 deletions

28
docker-compose_nccl.yml Normal file
View File

@@ -0,0 +1,28 @@
version: '3.8'
# https://github.com/mayooot/build-nccl-tests-with-pytorch
services:
nccl-test-container:
image: mayooot/nccl-tests-with-pytorch:v0.0.2
container_name: nccl-test-container
network_mode: host
environment:
- PORT=1998
- PASS=P@88w0rd
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
volumes:
- ./id_rsa_finetune:/root/.ssh/id_rsa
- ./id_rsa.pub:/root/.ssh/id_rsa.pub
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
cap_add:
- IPC_LOCK
devices:
- /dev/infiniband:/dev/infiniband
shm_size: '32gb'
restart: unless-stopped