update
This commit is contained in:
@@ -18,10 +18,20 @@ services:
|
||||
kind: gpus
|
||||
value: 1
|
||||
placement:
|
||||
constraints: [node.platform.os == linux]
|
||||
constraints:
|
||||
- node.labels.gpu == true
|
||||
cap_add:
|
||||
- IPC_LOCK
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: overlay
|
||||
driver: overlay
|
||||
|
||||
# 为节点添加标签:
|
||||
# docker node ls
|
||||
|
||||
|
||||
# docker node update --label-add gpu=true node1
|
||||
|
||||
# docker stack deploy -c docker-compose.yml rdma_stack
|
||||
|
||||
|
||||
57
finetune/docker-compose_stack2.yml
Normal file
57
finetune/docker-compose_stack2.yml
Normal file
@@ -0,0 +1,57 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
ubuntu-finetune:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
PYTHON_VERSION: "3.10"
|
||||
CUDA_VERSION: "12.1.0"
|
||||
PYTORCH_VERSION: "2.3.0"
|
||||
TORCHVISION_VERSION: "0.18.0"
|
||||
TORCHAUDIO_VERSION: "2.3.0"
|
||||
DS_BUILD_OPS: 1
|
||||
USE_CUDA: 1
|
||||
USE_ROCM: 0
|
||||
USE_XPU: 0
|
||||
CUDA: cu121
|
||||
CUDA_ARCH_LIST: "80;86;89;90"
|
||||
SETUPTOOLS_VERSION: "69.5.1"
|
||||
DCUTLASS_NVCC_ARCHS: "80;86;89;90;90a"
|
||||
DEEPSPEED_VERSION: "master"
|
||||
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
|
||||
volumes:
|
||||
- ./src:/bbtft
|
||||
- ./id_rsa_finetune:/root/.ssh/id_rsa
|
||||
- ./id_rsa.pub:/root/.ssh/id_rsa.pub
|
||||
container_name: ubuntu-finetune
|
||||
image: hotwa/deepspeed:pt23
|
||||
shm_size: '32gb'
|
||||
ports:
|
||||
- 3228:22
|
||||
environment:
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
- TMPDIR=/var/tmp
|
||||
networks:
|
||||
- my-custom-bridge
|
||||
deploy:
|
||||
replicas: 1
|
||||
resources:
|
||||
reservations:
|
||||
generic_resources:
|
||||
- discrete_resource_spec:
|
||||
kind: nvidia
|
||||
value: 1
|
||||
placement:
|
||||
constraints: [node.labels.gpu == true]
|
||||
runtime: nvidia
|
||||
cap_add:
|
||||
- IPC_LOCK
|
||||
devices:
|
||||
- /dev/infiniband:/dev/infiniband
|
||||
|
||||
networks:
|
||||
my-custom-bridge:
|
||||
external: true
|
||||
Reference in New Issue
Block a user