services: ldh-megatron-deepspeed-test: image: hotwa/magadeep:latest shm_size: '128gb' deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] #runtime: nvidia environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility # stdin_open: true # tty: true privileged: true cap_add: - IPC_LOCK volumes: - /root/workspace:/root/data - /dev/infiniband:/dev/infiniband # ports: # - "22242:22242" # - "5000:5000" # networks: # - ldh_overlay_network network_mode: host command: ["/usr/sbin/sshd", "-D"] # networks: # ldh_overlay_network: # external: true