version: '3.8' services: ubuntu-finetune: build: context: . dockerfile: Dockerfile args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822 PYTHON_VERSION: 3.9 CUDA_VERSION: 11.7.1 PYTORCH_VERSION: 1.13.1 TORCHVISION_VERSION: 0.14.1 TORCHAUDIO_VERSION: 0.13.1 DS_BUILD_OPS: 1 DS_BUILD_SPARSE_ATTN: 1 DS_BUILD_FUSED_ADAM: 1 DS_BUILD_CPU_ADAM: 1 USE_CUDA: 1 USE_ROCM: 0 USE_XPU: 0 CUDA: cu117 CUDA_ARCH_LIST: "80;86" # for RTX 4090, all : "80;86;89;90" 编译deepspeed内核需要,这个参数很严格 SETUPTOOLS_VERSION: "69.5.1" ROOT_PASSWD: "root" DCUTLASS_NVCC_ARCHS: "90a" # 90a for H100 ,89:GeForce RTX 4090 volumes: - ./src:/bbtft container_name: ubuntu-finetune pull_policy: if_not_present tty: true restart: unless-stopped image: hotwa/deepspeed:pt113 shm_size: '32gb' ports: - 3227:2222 command: ["/usr/sbin/sshd", "-D"] environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility networks: - network_finetune deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] networks: network_finetune: name: network_finetune