version: '3.8' services: ubuntu-finetune: build: context: . dockerfile: Dockerfile args: PYTHON_VERSION: "3.10" CUDA_VERSION: "12.1.0" PYTORCH_VERSION: "2.3.0" TORCHVISION_VERSION: "0.18.0" TORCHAUDIO_VERSION: "2.3.0" DS_BUILD_OPS: 1 USE_CUDA: 1 USE_ROCM: 0 USE_XPU: 0 CUDA: cu121 CUDA_ARCH_LIST: "80;86;89;90" SETUPTOOLS_VERSION: "69.5.1" DCUTLASS_NVCC_ARCHS: "80;86;89;90;90a" DEEPSPEED_VERSION: "master" DEEPSPEED_INSTALL_FLAGS: "--allow_sudo" volumes: - ./binbbt:/bbtft container_name: ubuntu-finetune pull_policy: if_not_present restart: unless-stopped image: hotwa/deepspeed:pt23 shm_size: '40gb' ports: - 3228:22 environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility - TMPDIR=/var/tmp networks: - test-net deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] # 修改为docker-swarm的网络 networks: test-net: external: true