Files
nvidia_docker/docker-compose_ldh.yml
Your Name 968bc3dd24 first add
2024-08-02 14:44:39 +08:00

58 lines
1.6 KiB
YAML

services:
ldh-deepspeed-test:
build:
context: .
dockerfile: Dockerfile.ldh
args:
# PYTHON_VERSION: "3.10"
# CUDA_VERSION: "12.1.0"
# PYTORCH_VERSION: "2.3.0"
# TORCHVISION_VERSION: "0.18.0"
# TORCHAUDIO_VERSION: "2.3.0"
# DS_BUILD_OPS: 1
# USE_CUDA: 1
# USE_ROCM: 0
# USE_XPU: 0
# CUDA: cu121
# CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
# SETUPTOOLS_VERSION: "69.5.1"
# DCUTLASS_NVCC_ARCHS: "80;86;89;90;90a" # 90a for H100 GPU 89:GeForce RTX 4090
# DEEPSPEED_VERSION: "master"
# DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
HTTP_PROXY: "http://127.0.0.1:15777"
HTTPS_PROXY: "http://127.0.0.1:15777"
# cache-from: "type=local"
image: ldh/deepspeed:test
shm_size: '128gb'
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
#runtime: nvidia
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
# stdin_open: true
# tty: true
privileged: true
cap_add:
- IPC_LOCK
volumes:
- /root/workspace:/root/data
- /dev/infiniband:/dev/infiniband
# ports:
# - "22242:22242"
# - "5000:5000"
# networks:
# - ldh_overlay_network
network_mode: host
command: ["/usr/sbin/sshd", "-D"]
# networks:
# ldh_overlay_network:
# external: true