Split config files
Этот коммит содержится в:
36
Services/llamacpp-multi/llamacpp-multi.container
Обычный файл
36
Services/llamacpp-multi/llamacpp-multi.container
Обычный файл
@@ -0,0 +1,36 @@
|
||||
[Container]
|
||||
ContainerName=llamacpp-multi
|
||||
Image=localhost/llamacpp:vulkan-multi-amd64
|
||||
#AutoUpdate=registry
|
||||
Network=internal.network
|
||||
PublishPort=8090:8090
|
||||
|
||||
# Production - Lemonade usa Hugging Face Hub per i modelli
|
||||
Volume=/srv/containers/aitools/models:/root/.cache/llama.cpp
|
||||
|
||||
# ROCm tuning
|
||||
AddDevice=/dev/dri/renderD128
|
||||
PodmanArgs=--group-add=keep-groups --ipc=host
|
||||
SecurityLabelType=container_runtime_t
|
||||
|
||||
# Multi-instance configuration (throughput optimized)
|
||||
Environment=LLAMA_INSTANCES=4
|
||||
Environment=LLAMA_BASE_PORT=9000
|
||||
Environment=LLAMA_ARG_HOST=0.0.0.0
|
||||
Environment=LLAMA_ARG_PARALLEL=32
|
||||
Environment=LLAMA_ARG_THREADS=16
|
||||
Environment=LLAMA_ARG_BATCH_SIZE=2048
|
||||
Environment=LLAMA_ARG_CTX_SIZE=131072
|
||||
Environment=LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-Next-GGUF:Q2_K_XL
|
||||
|
||||
# HF
|
||||
Environment=HF_HOME=/root/.cache/huggingface
|
||||
Environment=HF_TOKEN=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
|
||||
|
||||
|
||||
[Service]
|
||||
Restart=on-failure
|
||||
TimeoutStartSec=15m
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target default.target
|
||||
Ссылка в новой задаче
Block a user