[Unit] Name=llamacpp [Container] ContainerName=llamacpp Image=localhost/llamacpp:rocm-amd64 Network=internal.network #PublishPort=8080:8080 # ROCm AddDevice=/dev/kfd AddDevice=/dev/dri PodmanArgs=--userns=keep-id --group-add=keep-groups --ipc=host SecurityLabelType=container_runtime_t # ROCm tuning #Environment=HSA_OVERRIDE_GFX_VERSION=11.5.1 #Environment=ROCR_VISIBLE_DEVICES=0 #Environment=GPU_TARGETS=gfx1151 # API Key #Environment=LLAMA_API_KEY="" # Models Environment=GENERAL_FAST_MODEL="-m models/gemma-3-1b-it-Q5_K_M.gguf" Environment=GENERAL_MODEL="-m models/gpt-oss-20b-Q4_K_M.gguf" Environment=CHAT_MODEL="-m models/Qwen3-VL-30B-A3B-Q4_K_S.gguf" Environment=CODER_MODEL="-m models/Qwen3-Coder-30B-A3B-Instruct-Q6_K.gguf" Environment=EMBEDDING_FAST_MODEL="-m models/embeddinggemma-300M-Q8_0.gguf" Environment=EMBEDDING_MODEL="-m models/bge-code-v1-q6_k.gguf" Environment=GENERAL_CONTEXT_SIZE=262144 Environment=GENERAL_GPU_LAYERS=99 Environment=GENERAL_MAX_TOKENS=512 Environment=CODER_CONTEXT_SIZE=131072 Environment=CODER_GPU_LAYERS=99 Environment=CODER_MAX_TOKENS=512 # Mount points Volume=/srv/containers/aitools/models/llamacpp:/home/ubuntu/.cache/llama.cpp Volume=/srv/containers/aitools/models/llamacpp:/app/models Volume=/srv/containers/aitools/llamacpp_config.yaml:/app/config.yaml [Service] Restart=on-failure TimeoutStartSec=15m [Install] WantedBy=multi-user.target default.target