From 4bf09ad53880b2d26f1a9395404f41b824644d36 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Tue, 2 Jul 2024 07:43:07 +0000
Subject: [PATCH] update

---
 finetune/docker-compose_stack1.yml | 14 ++++++--
 finetune/docker-compose_stack2.yml | 57 ++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 2 deletions(-)
 create mode 100644 finetune/docker-compose_stack2.yml

diff --git a/finetune/docker-compose_stack1.yml b/finetune/docker-compose_stack1.yml
index b8e4497..cfd3b0c 100644
--- a/finetune/docker-compose_stack1.yml
+++ b/finetune/docker-compose_stack1.yml
@@ -18,10 +18,20 @@ services:
                 kind: gpus
                 value: 1
       placement:
-        constraints: [node.platform.os == linux]
+        constraints:
+          - node.labels.gpu == true
     cap_add:
       - IPC_LOCK
 
 networks:
   default:
-    driver: overlay
\ No newline at end of file
+    driver: overlay
+
+# 为节点添加标签：
+# docker node ls
+
+
+# docker node update --label-add gpu=true node1
+
+# docker stack deploy -c docker-compose.yml rdma_stack
+
diff --git a/finetune/docker-compose_stack2.yml b/finetune/docker-compose_stack2.yml
new file mode 100644
index 0000000..0ba7bb3
--- /dev/null
+++ b/finetune/docker-compose_stack2.yml
@@ -0,0 +1,57 @@
+version: '3.8'
+
+services:
+  ubuntu-finetune:
+    build: 
+      context: .
+      dockerfile: Dockerfile
+      args:
+        PYTHON_VERSION: "3.10"
+        CUDA_VERSION: "12.1.0"
+        PYTORCH_VERSION: "2.3.0"
+        TORCHVISION_VERSION: "0.18.0"
+        TORCHAUDIO_VERSION: "2.3.0"
+        DS_BUILD_OPS: 1
+        USE_CUDA: 1
+        USE_ROCM: 0
+        USE_XPU: 0
+        CUDA: cu121
+        CUDA_ARCH_LIST: "80;86;89;90"
+        SETUPTOOLS_VERSION: "69.5.1"
+        DCUTLASS_NVCC_ARCHS: "80;86;89;90;90a"
+        DEEPSPEED_VERSION: "master"
+        DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
+    volumes:
+      - ./src:/bbtft
+      - ./id_rsa_finetune:/root/.ssh/id_rsa
+      - ./id_rsa.pub:/root/.ssh/id_rsa.pub
+    container_name: ubuntu-finetune
+    image: hotwa/deepspeed:pt23
+    shm_size: '32gb'
+    ports:
+      - 3228:22
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+      - TMPDIR=/var/tmp
+    networks:
+      - my-custom-bridge
+    deploy:
+      replicas: 1
+      resources:
+        reservations:
+          generic_resources:
+            - discrete_resource_spec:
+                kind: nvidia
+                value: 1
+      placement:
+        constraints: [node.labels.gpu == true]
+    runtime: nvidia
+    cap_add:
+      - IPC_LOCK
+    devices:
+      - /dev/infiniband:/dev/infiniband
+
+networks:
+  my-custom-bridge:
+    external: true