106 lines
3.5 KiB
YAML
106 lines
3.5 KiB
YAML
version: '3.8'
|
||
|
||
# DeepSpeed支持多种C++/CUDA扩展(ops),这些ops旨在优化深度学习的训练和推理过程。以下是一些主要的DeepSpeed ops及其功能:
|
||
|
||
# FusedAdam - 提供融合优化的Adam优化器,适用于GPU。
|
||
# FusedLamb - 类似FusedAdam,针对LAMB优化器,适用于大规模分布式训练。
|
||
# SparseAttention - 用于高效计算稀疏注意力机制。
|
||
# Transformer - 提供Transformer模型的高效实现。
|
||
# TransformerInference - 专门用于Transformer模型的推理优化。
|
||
# CPUAdam - 针对CPU优化的Adam优化器。
|
||
# CPULion - 针对CPU的Lion优化器。
|
||
# Quantizer - 提供量化支持,以减少模型大小和提高推理速度。
|
||
# RandomLTD - 用于随机层裁剪的优化器。
|
||
# StochasticTransformer - 支持随机Transformer模型的训练和推理。
|
||
# 检测系统总内存(以GB为单位)
|
||
# TOTAL_MEM=$(awk '/MemTotal/ {printf "%.0f\n", $2/1024/1024}' /proc/meminfo)
|
||
# echo "Docker Compose 文件已生成,shm_size 设置为 ${TOTAL_MEM}GB。"
|
||
|
||
services:
|
||
ubuntu-finetune:
|
||
build:
|
||
context: .
|
||
dockerfile: Dockerfile.zly
|
||
args: # PyTorch版本、Python版本与pytorch_lightning版本的对应关系表 https://blog.csdn.net/qq_41813454/article/details/137421822
|
||
PYTHON_VERSION: "3.10"
|
||
GO_VERSION: "1.21.13"
|
||
NV_PEER_MEM_VERSION: "1.2"
|
||
TAG_VERSION: "12.1.1-cudnn8-devel-ubuntu22.04"
|
||
PYTORCH_VERSION: "2.3.0"
|
||
TORCHVISION_VERSION: "0.18.0"
|
||
TORCHAUDIO_VERSION: "2.3.0"
|
||
DS_BUILD_OPS: 1
|
||
USE_CUDA: 1
|
||
USE_ROCM: 0
|
||
USE_XPU: 0
|
||
CUDA: "cu121"
|
||
CUDA_ARCH_LIST: "80;89;90" # for RTX 4090, all : "80;86;89;90"
|
||
TORCH_CUDA_ARCH_LIST: "8.0;8.9;9.0+PTX" # all "6.0;6.1;6.2;7.0;7.5;8.0;8.6;8.9;9.0"
|
||
SETUPTOOLS_VERSION: "69.5.1"
|
||
# Hopper (H100): 90a
|
||
# Ampere (A100, RTX 30 series): 80
|
||
# Turing (RTX 20 series, Titan RTX): 75
|
||
# Volta (V100): 70
|
||
# Pascal (P100, GTX 10 series): 60;61
|
||
# Maxwell (GTX 900 series): 50;53
|
||
DCUTLASS_NVCC_ARCHS: "80;89,90a" # 90a for H100 GPU 89:GeForce RTX 4090
|
||
DEEPSPEED_VERSION: "master"
|
||
DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
|
||
# HTTP_PROXY: "http://127.0.0.1:15777"
|
||
# HTTPS_PROXY: "http://127.0.0.1:15777"
|
||
CACHEBUST: 1
|
||
NV_DRIVER_VERSION: "535"
|
||
GO_VERSION: "1.21.13"
|
||
env_file:
|
||
- .env
|
||
# volumes:
|
||
# - ../src:/work
|
||
container_name: ubuntu-finetune
|
||
pull_policy: if_not_present
|
||
ulimits:
|
||
memlock:
|
||
soft: -1
|
||
hard: -1
|
||
# tty: true
|
||
# stdin_open: true
|
||
restart: unless-stopped
|
||
image: hotwa/deepspeed:pt23_828
|
||
privileged: true
|
||
cap_add:
|
||
- ALL
|
||
- CAP_SYS_PTRACE
|
||
shm_size: '32gb'
|
||
# devices:
|
||
# - /dev/infiniband/rdma_cm
|
||
# - /dev/infiniband/uverbs0
|
||
# - /dev/infiniband/uverbs1
|
||
# - /dev/infiniband/uverbs2
|
||
# - /dev/infiniband/uverbs3
|
||
# - /dev/infiniband/uverbs4
|
||
# - /dev/infiniband/uverbs5
|
||
# - /dev/infiniband/uverbs6
|
||
# - /dev/infiniband/uverbs7
|
||
# - /dev/infiniband/uverbs8
|
||
ipc: host
|
||
# ports:
|
||
# - 3228:2222
|
||
environment:
|
||
- NVIDIA_VISIBLE_DEVICES=all
|
||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||
- TMPDIR=/var/tmp
|
||
network_mode: host
|
||
# networks:
|
||
# - network_finetune
|
||
command: ["/usr/sbin/sshd", "-D"]
|
||
deploy:
|
||
resources:
|
||
reservations:
|
||
devices:
|
||
- driver: nvidia
|
||
count: all
|
||
capabilities: [gpu]
|
||
|
||
# networks:
|
||
# network_finetune:
|
||
# name: network_finetune
|