55 rader
1.4 KiB
Plaintext
55 rader
1.4 KiB
Plaintext
|
|
[Unit]
|
||
|
|
Name=llamacpp
|
||
|
|
|
||
|
|
[Container]
|
||
|
|
ContainerName=llamacpp
|
||
|
|
Image=localhost/llamacpp:rocm-amd64
|
||
|
|
Network=internal.network
|
||
|
|
|
||
|
|
#PublishPort=8080:8080
|
||
|
|
|
||
|
|
# ROCm
|
||
|
|
AddDevice=/dev/kfd
|
||
|
|
AddDevice=/dev/dri
|
||
|
|
PodmanArgs=--userns=keep-id --group-add=keep-groups --ipc=host
|
||
|
|
SecurityLabelType=container_runtime_t
|
||
|
|
|
||
|
|
# ROCm tuning
|
||
|
|
#Environment=HSA_OVERRIDE_GFX_VERSION=11.5.1
|
||
|
|
#Environment=ROCR_VISIBLE_DEVICES=0
|
||
|
|
#Environment=GPU_TARGETS=gfx1151
|
||
|
|
|
||
|
|
# API Key
|
||
|
|
#Environment=LLAMA_API_KEY=""
|
||
|
|
|
||
|
|
# Models
|
||
|
|
Environment=GENERAL_FAST_MODEL="-m models/gemma-3-1b-it-Q5_K_M.gguf"
|
||
|
|
Environment=GENERAL_MODEL="-m models/gpt-oss-20b-Q4_K_M.gguf"
|
||
|
|
|
||
|
|
Environment=CHAT_MODEL="-m models/Qwen3-VL-30B-A3B-Q4_K_S.gguf"
|
||
|
|
|
||
|
|
Environment=CODER_MODEL="-m models/Qwen3-Coder-30B-A3B-Instruct-Q6_K.gguf"
|
||
|
|
|
||
|
|
Environment=EMBEDDING_FAST_MODEL="-m models/embeddinggemma-300M-Q8_0.gguf"
|
||
|
|
Environment=EMBEDDING_MODEL="-m models/bge-code-v1-q6_k.gguf"
|
||
|
|
|
||
|
|
Environment=GENERAL_CONTEXT_SIZE=262144
|
||
|
|
Environment=GENERAL_GPU_LAYERS=99
|
||
|
|
Environment=GENERAL_MAX_TOKENS=512
|
||
|
|
|
||
|
|
Environment=CODER_CONTEXT_SIZE=131072
|
||
|
|
Environment=CODER_GPU_LAYERS=99
|
||
|
|
Environment=CODER_MAX_TOKENS=512
|
||
|
|
|
||
|
|
# Mount points
|
||
|
|
Volume=/srv/containers/aitools/models/llamacpp:/home/ubuntu/.cache/llama.cpp
|
||
|
|
Volume=/srv/containers/aitools/models/llamacpp:/app/models
|
||
|
|
Volume=/srv/containers/aitools/llamacpp_config.yaml:/app/config.yaml
|
||
|
|
|
||
|
|
[Service]
|
||
|
|
Restart=on-failure
|
||
|
|
TimeoutStartSec=15m
|
||
|
|
|
||
|
|
[Install]
|
||
|
|
WantedBy=multi-user.target default.target
|