63 lines
1.5 KiB
YAML
63 lines
1.5 KiB
YAML
version: '3.8'
|
|
|
|
services:
|
|
ubuntu-finetune:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
args:
|
|
PYTHON_VERSION: "3.10"
|
|
CUDA_VERSION: "12.1.0"
|
|
PYTORCH_VERSION: "2.3.0"
|
|
TORCHVISION_VERSION: "0.18.0"
|
|
TORCHAUDIO_VERSION: "2.3.0"
|
|
DS_BUILD_OPS: 1
|
|
USE_CUDA: 1
|
|
USE_ROCM: 0
|
|
USE_XPU: 0
|
|
CUDA: cu121
|
|
CUDA_ARCH_LIST: "80;86;89;90"
|
|
SETUPTOOLS_VERSION: "69.5.1"
|
|
DCUTLASS_NVCC_ARCHS: "80;86;89;90;90a"
|
|
DEEPSPEED_VERSION: "master"
|
|
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
|
|
volumes:
|
|
- type: tmpfs
|
|
target: /dev/shm
|
|
tmpfs:
|
|
size: 32000000000 # 32GB
|
|
# - ./src:/bbtft
|
|
# - ./id_rsa_finetune:/root/.ssh/id_rsa
|
|
# - ./id_rsa.pub:/root/.ssh/id_rsa.pub
|
|
# container_name: ubuntu-finetune
|
|
image: hotwa/deepspeed:pt23
|
|
shm_size: '32gb'
|
|
ports:
|
|
- 3228:22
|
|
environment:
|
|
- NVIDIA_VISIBLE_DEVICES=all
|
|
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
|
- TMPDIR=/var/tmp
|
|
# networks:
|
|
# - my-custom-bridge
|
|
deploy:
|
|
replicas: 4
|
|
resources:
|
|
reservations:
|
|
generic_resources:
|
|
- discrete_resource_spec:
|
|
kind: "NVIDIA-GPU"
|
|
value: 8
|
|
- discrete_resource_spec:
|
|
kind: "SRIOV-VF"
|
|
value: 1
|
|
placement:
|
|
constraints: [node.labels.gpu == true]
|
|
cap_add:
|
|
- IPC_LOCK
|
|
privileged: true
|
|
|
|
# networks:
|
|
# my-custom-bridge:
|
|
# external: true
|