This commit is contained in:
Your Name
2024-07-02 10:17:29 +00:00
parent 0067c17c9a
commit ef91582d78
3 changed files with 14 additions and 10 deletions

View File

@@ -42,7 +42,7 @@ services:
reservations: reservations:
generic_resources: generic_resources:
- discrete_resource_spec: - discrete_resource_spec:
kind: NVIDIAGPU kind: "NVIDIA-GPU"
value: 1 value: 1
placement: placement:
constraints: [node.platform.os == linux] constraints: [node.platform.os == linux]

View File

@@ -15,7 +15,7 @@ services:
reservations: reservations:
generic_resources: generic_resources:
- discrete_resource_spec: - discrete_resource_spec:
kind: NVIDIAGPU kind: "NVIDIA-GPU"
value: 1 value: 1
placement: placement:
constraints: constraints:

View File

@@ -22,10 +22,14 @@ services:
DEEPSPEED_VERSION: "master" DEEPSPEED_VERSION: "master"
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo" DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
volumes: volumes:
- ./src:/bbtft - type: tmpfs
- ./id_rsa_finetune:/root/.ssh/id_rsa target: /dev/shm
- ./id_rsa.pub:/root/.ssh/id_rsa.pub tmpfs:
container_name: ubuntu-finetune size: 32000000000 # 32GB
# - ./src:/bbtft
# - ./id_rsa_finetune:/root/.ssh/id_rsa
# - ./id_rsa.pub:/root/.ssh/id_rsa.pub
# container_name: ubuntu-finetune
image: hotwa/deepspeed:pt23 image: hotwa/deepspeed:pt23
shm_size: '32gb' shm_size: '32gb'
ports: ports:
@@ -37,19 +41,19 @@ services:
networks: networks:
- my-custom-bridge - my-custom-bridge
deploy: deploy:
replicas: 1 replicas: 4
resources: resources:
reservations: reservations:
generic_resources: generic_resources:
- discrete_resource_spec: - discrete_resource_spec:
kind: NVIDIAGPU kind: "NVIDIA-GPU"
value: 1 value: 8
placement: placement:
constraints: [node.labels.gpu == true] constraints: [node.labels.gpu == true]
cap_add: cap_add:
- IPC_LOCK - IPC_LOCK
devices: devices:
- /dev/infiniband:/dev/infiniband - "/dev/infiniband:/dev/infiniband:rwm"
networks: networks:
my-custom-bridge: my-custom-bridge: