[Container] ContainerName=llamacpp Image=localhost/llamacpp:vulkan-amd64 #AutoUpdate=registry Network=internal.network PublishPort=8090:8090 # Production - Lemonade usa Hugging Face Hub per i modelli Volume=/srv/containers/aitools/models/hf:/root/.cache/huggingface/hub # Vecchia cartella #Volume=/srv/containers/aitools/models:/root/.cache/llama.cpp # ROCm tuning AddDevice=/dev/dri/renderD128 PodmanArgs=--group-add=keep-groups --ipc=host SecurityLabelType=container_runtime_t Environment=LLAMA_ARG_HOST=0.0.0.0 Environment=LLAMA_ARG_PORT=8090 Environment=LLAMA_ARG_NO_MMAP=true Environment=LLAMA_ARG_CTX_SIZE=131072 Environment=LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q2_K # MTP Environment=LLAMA_ARG_SPEC_TYPE=draft-mtp Environment=LLAMA_ARG_SPEC_DRAFT_N_MAX=6 # HF Environment=HF_HOME=/root/.cache/huggingface Environment=HF_TOKEN=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 # API Key — protezione accesso al server (da impostare anche in Odoo > AI > API Key) # Decommentare per abilitare l'autenticazione #Environment=LLAMA_API_KEY=la-tua-chiave-segreta # Model alias — nome con cui il server risponde alle richieste (campo "model" nell'API) # Se non impostato, llama-server usa il nome del file .gguf come alias #Environment=LLAMA_ARG_ALIAS=chat # Override Boot #Entrypoint=/usr/bin/sleep #Exec=infinity [Service] Restart=on-failure TimeoutStartSec=15m [Install] WantedBy=multi-user.target default.target