first add

2024-08-28 15:18:15 +08:00
commit 873429d4e6
57 changed files with 4892 additions and 0 deletions
--- a/finetune/docker-compose_pytorch1.13.yml
+++ b/finetune/docker-compose_pytorch1.13.yml
@@ -0,0 +1,52 @@
+version: '3.8'
+
+services:
+  ubuntu-finetune:
+    build: 
+      context: .
+      dockerfile: Dockerfile
+      args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
+        PYTHON_VERSION: 3.9
+        CUDA_VERSION: 11.7.1
+        PYTORCH_VERSION: 1.13.1
+        TORCHVISION_VERSION: 0.14.1
+        TORCHAUDIO_VERSION: 0.13.1
+        DS_BUILD_OPS: 1
+        DS_BUILD_SPARSE_ATTN: 1
+        DS_BUILD_FUSED_ADAM: 1
+        DS_BUILD_CPU_ADAM: 1
+        USE_CUDA: 1
+        USE_ROCM: 0
+        USE_XPU: 0
+        CUDA: cu117
+        CUDA_ARCH_LIST: "80;86" # for RTX 4090, all : "80;86;89;90" 编译deepspeed内核需要，这个参数很严格
+        SETUPTOOLS_VERSION: "69.5.1"
+        ROOT_PASSWD: "root"
+        DCUTLASS_NVCC_ARCHS: "90a" # 90a for H100 ,89:GeForce RTX 4090 
+    volumes:
+      - ./src:/bbtft
+    container_name: ubuntu-finetune
+    pull_policy: if_not_present
+    tty: true
+    restart: unless-stopped
+    image: hotwa/deepspeed:pt113
+    shm_size: '32gb'
+    ports:
+      - 3227:2222
+    command: ["/usr/sbin/sshd", "-D"]
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+    networks:
+      - network_finetune
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+
+networks:
+  network_finetune:
+    name: network_finetune