first add

This commit is contained in:
Your Name
2024-08-02 14:44:39 +08:00
parent 0b0edc3755
commit 968bc3dd24
35 changed files with 3693 additions and 65 deletions

38
docker-compose_mega.yml Normal file
View File

@@ -0,0 +1,38 @@
services:
megatron-test:
image: nvcr.io/nvidia/pytorch:24.02-py3
shm_size: '560gb'
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
#runtime: nvidia
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
#- CUTLASS_PATH="/opt/cutlass"
#- CUDA_HOME="/usr/local/cuda"
#- PATH="${CUDA_HOME}/bin:${PATH}"
#- LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}"
stdin_open: true
tty: true
privileged: true
cap_add:
- IPC_LOCK
volumes:
- /root/workspace:/mnt
- /dev/infiniband:/dev/infiniband
# - /mnt/local-nvme:/root/
ports:
- "5000:5000"
# networks:
# - ldh_overlay_network
network_mode: host
# networks:
# ldh_overlay_network:
# external: true