version: '3.1' services: ubuntu-colossalai: build: context: . dockerfile: Dockerfile.colossalai args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822 # REGISTRY: "nvcr.io" # OWNER: "nvidia" # nvcr.io/nvidia/pytorch:24.07-py3 # LABEL: "pytorch" # VERSION: "24.07-py3" # HTTP_PROXY: "http://127.0.0.1:15777" # HTTPS_PROXY: "http://127.0.0.1:15777" CACHEBUST: 1 # volumes: # - ./workspace:/workspace # - /tmp:/tmp container_name: ubuntu-colossalai pull_policy: if_not_present ulimits: memlock: soft: -1 hard: -1 # tty: true # stdin_open: true restart: unless-stopped image: hotwa/colossalai:latest privileged: true ipc: host network_mode: host shm_size: '1024gb' # ports: # - 3228:2222 environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility - TMPDIR=/var/tmp # networks: # - network_finetune # command: ["/usr/sbin/sshd", "-D"] deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu]