29 lines
753 B
YAML
29 lines
753 B
YAML
version: '3.8'
|
|
# https://github.com/mayooot/build-nccl-tests-with-pytorch
|
|
services:
|
|
nccl-test-container:
|
|
image: mayooot/nccl-tests-with-pytorch:v0.0.2
|
|
container_name: nccl-test-container
|
|
network_mode: host
|
|
environment:
|
|
- PORT=1998
|
|
- PASS=P@88w0rd
|
|
- NVIDIA_VISIBLE_DEVICES=all
|
|
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
|
volumes:
|
|
- ./id_rsa_finetune:/root/.ssh/id_rsa
|
|
- ./id_rsa.pub:/root/.ssh/id_rsa.pub
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
cap_add:
|
|
- IPC_LOCK
|
|
devices:
|
|
- /dev/infiniband:/dev/infiniband
|
|
shm_size: '32gb'
|
|
restart: unless-stopped
|