services: ldh-deepspeed-test: build: context: . dockerfile: Dockerfile # args: # HTTP_PROXY: "http://127.0.0.1:15777" # HTTPS_PROXY: "http://127.0.0.1:15777" # cache-from: "type=local" image: ldh/deepspeed:test container_name: ldh-deepspeed-test shm_size: '1024gb' deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] # runtime: nvidia # ipc: host pid: host environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility # stdin_open: true # tty: true # privileged: true cap_add: - ALL volumes: - /mnt/beegfs:/root/shared/beegfs - /mnt/yrfs:/root/shared/yrfs # ports: # - "22242:22242" # - "5000:5000" network_mode: host command: ["/usr/sbin/sshd", "-D"] # command: ["/bin/bash", "-c", "while true; do sleep 1000; done"]