first add
This commit is contained in:
57
docker-compose_ldh.yml
Normal file
57
docker-compose_ldh.yml
Normal file
@@ -0,0 +1,57 @@
|
||||
|
||||
services:
|
||||
ldh-deepspeed-test:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.ldh
|
||||
args:
|
||||
# PYTHON_VERSION: "3.10"
|
||||
# CUDA_VERSION: "12.1.0"
|
||||
# PYTORCH_VERSION: "2.3.0"
|
||||
# TORCHVISION_VERSION: "0.18.0"
|
||||
# TORCHAUDIO_VERSION: "2.3.0"
|
||||
# DS_BUILD_OPS: 1
|
||||
# USE_CUDA: 1
|
||||
# USE_ROCM: 0
|
||||
# USE_XPU: 0
|
||||
# CUDA: cu121
|
||||
# CUDA_ARCH_LIST: "80;86;89;90" # for RTX 4090, all : "80;86;89;90"
|
||||
# SETUPTOOLS_VERSION: "69.5.1"
|
||||
# DCUTLASS_NVCC_ARCHS: "80;86;89;90;90a" # 90a for H100 GPU 89:GeForce RTX 4090
|
||||
# DEEPSPEED_VERSION: "master"
|
||||
# DEEPSPEED_INSTALL_FLAGS: "--allow_sudo"
|
||||
HTTP_PROXY: "http://127.0.0.1:15777"
|
||||
HTTPS_PROXY: "http://127.0.0.1:15777"
|
||||
# cache-from: "type=local"
|
||||
image: ldh/deepspeed:test
|
||||
shm_size: '128gb'
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
#runtime: nvidia
|
||||
environment:
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
# stdin_open: true
|
||||
# tty: true
|
||||
privileged: true
|
||||
cap_add:
|
||||
- IPC_LOCK
|
||||
volumes:
|
||||
- /root/workspace:/root/data
|
||||
- /dev/infiniband:/dev/infiniband
|
||||
# ports:
|
||||
# - "22242:22242"
|
||||
# - "5000:5000"
|
||||
# networks:
|
||||
# - ldh_overlay_network
|
||||
network_mode: host
|
||||
command: ["/usr/sbin/sshd", "-D"]
|
||||
|
||||
# networks:
|
||||
# ldh_overlay_network:
|
||||
# external: true
|
||||
Reference in New Issue
Block a user