Add llamacpp container

2025-12-11 20:01:23 +01:00
Parent 05007461aa
--- a/Services/llamacpp-swap/DOCS.md
+++ b/Services/llamacpp-swap/DOCS.md
--- a/Services/llamacpp-swap/Scripts/startBaseMedium.sh
+++ b/Services/llamacpp-swap/Scripts/startBaseMedium.sh
--- a/Services/llamacpp-swap/Scripts/startBaseMini.sh
+++ b/Services/llamacpp-swap/Scripts/startBaseMini.sh
--- a/Services/llamacpp-swap/Scripts/startBaseTop.sh
+++ b/Services/llamacpp-swap/Scripts/startBaseTop.sh
--- a/Services/llamacpp-swap/Scripts/startChat.sh
+++ b/Services/llamacpp-swap/Scripts/startChat.sh
--- a/Services/llamacpp-swap/Scripts/startCoder.sh
+++ b/Services/llamacpp-swap/Scripts/startCoder.sh
--- a/Services/llamacpp-swap/Scripts/startCoderMedium.sh
+++ b/Services/llamacpp-swap/Scripts/startCoderMedium.sh
--- a/Services/llamacpp-swap/Scripts/startCoderMini.sh
+++ b/Services/llamacpp-swap/Scripts/startCoderMini.sh
--- a/Services/llamacpp-swap/Scripts/startCoderTop.sh
+++ b/Services/llamacpp-swap/Scripts/startCoderTop.sh
--- a/Services/llamacpp-swap/Scripts/startEmbedding.sh
+++ b/Services/llamacpp-swap/Scripts/startEmbedding.sh
--- a/Services/llamacpp-swap/Scripts/startEmbeddingFast.sh
+++ b/Services/llamacpp-swap/Scripts/startEmbeddingFast.sh
--- a/Services/llamacpp-swap/Scripts/startEmbeddingMedium.sh
+++ b/Services/llamacpp-swap/Scripts/startEmbeddingMedium.sh
--- a/Services/llamacpp-swap/Scripts/startEmbeddingMini.sh
+++ b/Services/llamacpp-swap/Scripts/startEmbeddingMini.sh
--- a/Services/llamacpp-swap/Scripts/startEmbeddingTop.sh
+++ b/Services/llamacpp-swap/Scripts/startEmbeddingTop.sh
--- a/Services/llamacpp-swap/Scripts/startGeneral.sh
+++ b/Services/llamacpp-swap/Scripts/startGeneral.sh
--- a/Services/llamacpp-swap/Scripts/startGeneralFast.sh
+++ b/Services/llamacpp-swap/Scripts/startGeneralFast.sh
--- a/Services/llamacpp-swap/Scripts/stopBaseMedium.sh
+++ b/Services/llamacpp-swap/Scripts/stopBaseMedium.sh
--- a/Services/llamacpp-swap/Scripts/stopBaseMini.sh
+++ b/Services/llamacpp-swap/Scripts/stopBaseMini.sh
--- a/Services/llamacpp-swap/Scripts/stopBaseTop.sh
+++ b/Services/llamacpp-swap/Scripts/stopBaseTop.sh
--- a/Services/llamacpp-swap/Scripts/stopChat.sh
+++ b/Services/llamacpp-swap/Scripts/stopChat.sh
--- a/Services/llamacpp-swap/Scripts/stopCoder.sh
+++ b/Services/llamacpp-swap/Scripts/stopCoder.sh
--- a/Services/llamacpp-swap/Scripts/stopCoderMedium.sh
+++ b/Services/llamacpp-swap/Scripts/stopCoderMedium.sh
--- a/Services/llamacpp-swap/Scripts/stopCoderMini.sh
+++ b/Services/llamacpp-swap/Scripts/stopCoderMini.sh
--- a/Services/llamacpp-swap/Scripts/stopCoderTop.sh
+++ b/Services/llamacpp-swap/Scripts/stopCoderTop.sh
--- a/Services/llamacpp-swap/Scripts/stopEmbedding.sh
+++ b/Services/llamacpp-swap/Scripts/stopEmbedding.sh
--- a/Services/llamacpp-swap/Scripts/stopEmbeddingFast.sh
+++ b/Services/llamacpp-swap/Scripts/stopEmbeddingFast.sh
--- a/Services/llamacpp-swap/Scripts/stopEmbeddingMedium.sh
+++ b/Services/llamacpp-swap/Scripts/stopEmbeddingMedium.sh
--- a/Services/llamacpp-swap/Scripts/stopEmbeddingMini.sh
+++ b/Services/llamacpp-swap/Scripts/stopEmbeddingMini.sh
--- a/Services/llamacpp-swap/Scripts/stopEmbeddingTop.sh
+++ b/Services/llamacpp-swap/Scripts/stopEmbeddingTop.sh
--- a/Services/llamacpp-swap/Scripts/stopGeneral.sh
+++ b/Services/llamacpp-swap/Scripts/stopGeneral.sh
--- a/Services/llamacpp-swap/Scripts/stopGeneralFast.sh
+++ b/Services/llamacpp-swap/Scripts/stopGeneralFast.sh
--- a/Services/llamacpp-swap/config.preset.yaml
+++ b/Services/llamacpp-swap/config.preset.yaml
--- a/Services/llamacpp-swap/entrypoint.sh
+++ b/Services/llamacpp-swap/entrypoint.sh
--- a/Services/llamacpp-swap/lamacpp-swap-nginx.conf
+++ b/Services/llamacpp-swap/lamacpp-swap-nginx.conf
--- a/Services/llamacpp-swap/llama-swap-vulkan.Containerfile
+++ b/Services/llamacpp-swap/llama-swap-vulkan.Containerfile
@@ -1,8 +1,8 @@
 ### LLaMACpp Builder Container with Vulkan for GPUs
 ### Multi-stage: download stage with pre-built binaries, runtime stage with only runtime libraries
 ###
-### BUILD: podman build -t llamacpp:vulkan-amd64 -f llama-vulkan.Containerfile .
-### Export: podman save -o /home/duckpage/llamacpp-vulkan-amd64.tar localhost/llamacpp:vulkan-amd64
+### BUILD: podman build -t llamacpp-swap:vulkan-amd64 -f llama-swap-vulkan.Containerfile .
+### Export: podman save -o /home/duckpage/llamacpp-swap-vulkan-amd64.tar localhost/llamacpp-swap:vulkan-amd64


 ARG UBUNTU_VERSION=24.04
--- a/Services/llamacpp-swap/llamacpp-swap.container
+++ b/Services/llamacpp-swap/llamacpp-swap.container
--- a/Services/llamacpp.Containerfile
+++ b/Services/llamacpp.Containerfile
@@ -0,0 +1,51 @@
+### LLaMACpp Builder Container with Vulkan for GPUs
+### Multi-stage: download stage with pre-built binaries, runtime stage with only runtime libraries
+###
+### BUILD: podman build -t llamacpp:vulkan-amd64 -f llamacpp.Containerfile .
+### Export: podman save -o /home/badstorm/llamacpp-vulkan-amd64.tar localhost/llamacpp:vulkan-amd64
+
+
+FROM ubuntu:24.04
+
+USER root
+EXPOSE 8090
+
+RUN apt-get update \
+    && apt-get install -y curl unzip grep sed git ffmpeg nano python3-pip python3 python3-wheel \
+    && pip install --break-system-packages --upgrade setuptools \
+    && pip install --break-system-packages -U "huggingface_hub[cli]" \
+    && if [ -f requirements.txt ]; then pip install --break-system-packages -r requirements.txt; fi \
+    && apt autoremove -y \
+    && apt clean -y \
+    && rm -rf /tmp/* /var/tmp/* \
+    && rm -rf /var/lib/apt/lists/* \
+    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
+    && find /var/cache -type f -delete
+
+WORKDIR /app
+
+RUN VERSION=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | grep '"tag_name"' | head -1 | sed 's/.*"tag_name": "\([^"]*\)".*/\1/') \
+    && echo "Last llama.cpp version: $VERSION" \
+    && curl -L https://github.com/ggml-org/llama.cpp/releases/download/${VERSION}/llama-${VERSION}-bin-ubuntu-vulkan-x64.zip -o llama.zip \
+    && unzip -j llama.zip -d . \
+    && rm llama.zip
+
+RUN chmod +x /app/llama-server
+
+WORKDIR /app
+
+ENV PATH=/app:$PATH
+ENV LD_LIBRARY_PATH=/app:$LD_LIBRARY_PATH
+ENV HF_HUB_ENABLE_HF_TRANSFER=1
+#ENV HF_HOME=
+#ENV HUGGING_FACE_HUB_TOKEN=
+ENV LLAMA_ARG_HOST=0.0.0.0
+ENV LLAMA_ARG_PORT=8090
+ENV LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q2_K
+#ENV LLAMA_ARG_MMPROJ_URL=
+ENV LLAMA_ARG_NO_MMAP=true
+ENV LLAMA_ARG_CTX_SIZE=128000
+#ENV LLAMA_API_KEY=""
+
+ENTRYPOINT ["/app/llama-server"]
+CMD ["--no-warmup"]
--- a/Services/llamacpp.container
+++ b/Services/llamacpp.container
@@ -0,0 +1,32 @@
+[Container]
+ContainerName=llamacpp
+Image=localhost/llamacpp:vulkan-amd64
+#AutoUpdate=registry
+Network=internal.network
+PublishPort=8090:8090
+
+# Production - Lemonade usa Hugging Face Hub per i modelli
+Volume=/srv/containers/aitools/models:/root/.cache/llama.cpp
+
+# ROCm tuning
+AddDevice=/dev/dri/renderD128
+PodmanArgs=--group-add=keep-groups --ipc=host
+SecurityLabelType=container_runtime_t
+
+Environment=LLAMA_ARG_HOST=0.0.0.0
+Environment=LLAMA_ARG_PORT=8090
+Environment=LLAMA_ARG_NO_MMAP=true
+Environment=LLAMA_ARG_CTX_SIZE=131072
+Environment=LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q2_K
+
+# HF
+Environment=HF_HOME=/root/.cache/huggingface
+Environment=HF_TOKEN=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
+
+
+[Service]
+Restart=on-failure
+TimeoutStartSec=15m
+
+[Install]
+WantedBy=multi-user.target default.target
--- a/Services/postgres.container
+++ b/Services/postgres.container
@@ -0,0 +1,20 @@
+[Unit]
+Name=postgres
+
+[Container]
+ContainerName=postgres
+Image=postgres:17
+#AutoUpdate=registry
+Network=internal.network
+Environment=POSTGRES_USER=postgres
+Environment=POSTGRES_PASSWORD=postgres
+#PublishPort=5432:5432
+
+Volume=/srv/containers/postgres:/var/lib/postgresql/data
+
+[Service]
+TimeoutStartSec=5m
+Restart=on-failure
+
+[Install]
+WantedBy=multi-user.target default.target
--- a/install.sh
+++ b/install.sh
@@ -14,6 +14,9 @@ sudo apt update && sudo apt upgrade -y
 # 2. Aggiungere utente ai gruppi render e video
 echo "Aggiungendo utente ai gruppi render e video..."
 sudo usermod -a -G render,video $LOGNAME
+sudo loginctl enable-linger $USER
+sudo sh -c "echo 'net.ipv4.ip_unprivileged_port_start=80' >> /etc/sysctl.conf"
+

 # 3. Installare podman
 echo "Installando podman..."