first add

This commit is contained in:
2024-08-28 15:18:15 +08:00
commit 873429d4e6
57 changed files with 4892 additions and 0 deletions

View File

@@ -0,0 +1,52 @@
version: '3.8'
services:
ubuntu-finetune:
build:
context: .
dockerfile: Dockerfile
args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
PYTHON_VERSION: 3.9
CUDA_VERSION: 11.7.1
PYTORCH_VERSION: 1.13.1
TORCHVISION_VERSION: 0.14.1
TORCHAUDIO_VERSION: 0.13.1
DS_BUILD_OPS: 1
DS_BUILD_SPARSE_ATTN: 1
DS_BUILD_FUSED_ADAM: 1
DS_BUILD_CPU_ADAM: 1
USE_CUDA: 1
USE_ROCM: 0
USE_XPU: 0
CUDA: cu117
CUDA_ARCH_LIST: "80;86" # for RTX 4090, all : "80;86;89;90" 编译deepspeed内核需要这个参数很严格
SETUPTOOLS_VERSION: "69.5.1"
ROOT_PASSWD: "root"
DCUTLASS_NVCC_ARCHS: "90a" # 90a for H100 ,89:GeForce RTX 4090
volumes:
- ./src:/bbtft
container_name: ubuntu-finetune
pull_policy: if_not_present
tty: true
restart: unless-stopped
image: hotwa/deepspeed:pt113
shm_size: '32gb'
ports:
- 3227:2222
command: ["/usr/sbin/sshd", "-D"]
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
networks:
- network_finetune
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
networks:
network_finetune:
name: network_finetune