Split config files

2026-02-05 23:27:16 +01:00
--- a/Services/llamacpp-multi/llamacpp-multi.container
+++ b/Services/llamacpp-multi/llamacpp-multi.container
@@ -0,0 +1,36 @@
+[Container]
+ContainerName=llamacpp-multi
+Image=localhost/llamacpp:vulkan-multi-amd64
+#AutoUpdate=registry
+Network=internal.network
+PublishPort=8090:8090
+
+# Production - Lemonade usa Hugging Face Hub per i modelli
+Volume=/srv/containers/aitools/models:/root/.cache/llama.cpp
+
+# ROCm tuning
+AddDevice=/dev/dri/renderD128
+PodmanArgs=--group-add=keep-groups --ipc=host
+SecurityLabelType=container_runtime_t
+
+# Multi-instance configuration (throughput optimized)
+Environment=LLAMA_INSTANCES=4
+Environment=LLAMA_BASE_PORT=9000
+Environment=LLAMA_ARG_HOST=0.0.0.0
+Environment=LLAMA_ARG_PARALLEL=32
+Environment=LLAMA_ARG_THREADS=16
+Environment=LLAMA_ARG_BATCH_SIZE=2048
+Environment=LLAMA_ARG_CTX_SIZE=131072
+Environment=LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-Next-GGUF:Q2_K_XL
+
+# HF
+Environment=HF_HOME=/root/.cache/huggingface
+Environment=HF_TOKEN=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
+
+
+[Service]
+Restart=on-failure
+TimeoutStartSec=15m
+
+[Install]
+WantedBy=multi-user.target default.target