first commit
This commit is contained in:
54
Services/llamacpp/llamacpp.container
Normal file
54
Services/llamacpp/llamacpp.container
Normal file
@@ -0,0 +1,54 @@
|
||||
[Unit]
|
||||
Name=llamacpp
|
||||
|
||||
[Container]
|
||||
ContainerName=llamacpp
|
||||
Image=localhost/llamacpp:rocm-amd64
|
||||
Network=internal.network
|
||||
|
||||
#PublishPort=8080:8080
|
||||
|
||||
# ROCm
|
||||
AddDevice=/dev/kfd
|
||||
AddDevice=/dev/dri
|
||||
PodmanArgs=--userns=keep-id --group-add=keep-groups --ipc=host
|
||||
SecurityLabelType=container_runtime_t
|
||||
|
||||
# ROCm tuning
|
||||
#Environment=HSA_OVERRIDE_GFX_VERSION=11.5.1
|
||||
#Environment=ROCR_VISIBLE_DEVICES=0
|
||||
#Environment=GPU_TARGETS=gfx1151
|
||||
|
||||
# API Key
|
||||
#Environment=LLAMA_API_KEY=""
|
||||
|
||||
# Models
|
||||
Environment=GENERAL_FAST_MODEL="-m models/gemma-3-1b-it-Q5_K_M.gguf"
|
||||
Environment=GENERAL_MODEL="-m models/gpt-oss-20b-Q4_K_M.gguf"
|
||||
|
||||
Environment=CHAT_MODEL="-m models/Qwen3-VL-30B-A3B-Q4_K_S.gguf"
|
||||
|
||||
Environment=CODER_MODEL="-m models/Qwen3-Coder-30B-A3B-Instruct-Q6_K.gguf"
|
||||
|
||||
Environment=EMBEDDING_FAST_MODEL="-m models/embeddinggemma-300M-Q8_0.gguf"
|
||||
Environment=EMBEDDING_MODEL="-m models/bge-code-v1-q6_k.gguf"
|
||||
|
||||
Environment=GENERAL_CONTEXT_SIZE=262144
|
||||
Environment=GENERAL_GPU_LAYERS=99
|
||||
Environment=GENERAL_MAX_TOKENS=512
|
||||
|
||||
Environment=CODER_CONTEXT_SIZE=131072
|
||||
Environment=CODER_GPU_LAYERS=99
|
||||
Environment=CODER_MAX_TOKENS=512
|
||||
|
||||
# Mount points
|
||||
Volume=/srv/containers/aitools/models/llamacpp:/home/ubuntu/.cache/llama.cpp
|
||||
Volume=/srv/containers/aitools/models/llamacpp:/app/models
|
||||
Volume=/srv/containers/aitools/llamacpp_config.yaml:/app/config.yaml
|
||||
|
||||
[Service]
|
||||
Restart=on-failure
|
||||
TimeoutStartSec=15m
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target default.target
|
||||
Reference in New Issue
Block a user