version: '3.8' # https://github.com/mayooot/build-nccl-tests-with-pytorch services: nccl-test-container: image: mayooot/nccl-tests-with-pytorch:v0.0.2 container_name: nccl-test-container network_mode: host environment: - PORT=1998 - PASS=P@88w0rd - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility volumes: - ./id_rsa_finetune:/root/.ssh/id_rsa - ./id_rsa.pub:/root/.ssh/id_rsa.pub deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] cap_add: - IPC_LOCK devices: - /dev/infiniband:/dev/infiniband shm_size: '32gb' restart: unless-stopped