services: megatron-test: image: nvcr.io/nvidia/pytorch:24.02-py3 shm_size: '560gb' deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] #runtime: nvidia environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility #- CUTLASS_PATH="/opt/cutlass" #- CUDA_HOME="/usr/local/cuda" #- PATH="${CUDA_HOME}/bin:${PATH}" #- LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" stdin_open: true tty: true privileged: true cap_add: - IPC_LOCK volumes: - /root/workspace:/mnt - /dev/infiniband:/dev/infiniband # - /mnt/local-nvme:/root/ ports: - "5000:5000" # networks: # - ldh_overlay_network network_mode: host # networks: # ldh_overlay_network: # external: true