Files
bdi_podman_serverconf/Services/llamacpp/llama-vulkan.Containerfile
2025-11-01 15:38:32 +01:00

122 lines
4.1 KiB
Docker

### LLaMACpp Builder Container with Vulkan for GPUs
### Multi-stage: download stage with pre-built binaries, runtime stage with only runtime libraries
###
### BUILD: podman build -t llamacpp:vulkan-amd64 -f llama-vulkan.Containerfile .
### Export: podman save -o /home/duckpage/llamacpp-vulkan-amd64.tar localhost/llamacpp:vulkan-amd64
ARG UBUNTU_VERSION=24.04
### Download image
FROM ubuntu:${UBUNTU_VERSION} AS download
RUN apt-get update \
&& apt-get install -y curl unzip grep sed \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /tmp
RUN VERSION=$(curl -s -I https://github.com/ggml-org/llama.cpp/releases/latest | grep -i location | sed 's|.*/tag/||' | tr -d '\r') \
&& echo "Last llama.cpp version: $VERSION" \
&& curl -L https://github.com/ggml-org/llama.cpp/releases/download/${VERSION}/llama-${VERSION}-bin-ubuntu-vulkan-x64.zip -o llama.zip \
&& unzip llama.zip \
&& rm llama.zip \
&& if [ -d llama-* ]; then mv llama-*/* . && rmdir llama-*; elif [ -d build ]; then mv build/* . && rmdir build; fi \
&& if [ -d bin ]; then mv bin/* . && rmdir bin; fi # flatten further
RUN mkdir -p /app/lib /app/full \
&& find . -name "*.so" -exec cp {} /app/lib \; \
&& cp -r * /app/full 2>/dev/null || true \
&& ls -la /app/full # list contents
## Base image
FROM ubuntu:${UBUNTU_VERSION} AS base
RUN apt-get update \
&& apt-get install -y libgomp1 curl nano ca-certificates wget\
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
COPY --from=download /app/lib/ /app
### Full
FROM base AS full
COPY --from=download /app/full /app
RUN chmod +x /app/llama-server
WORKDIR /app
RUN apt-get update \
&& apt-get install -y \
libvulkan-dev \
git \
python3-pip \
python3 \
python3-wheel\
&& pip install --break-system-packages --upgrade setuptools \
&& pip install --break-system-packages -U "huggingface_hub[cli]" \
&& if [ -f requirements.txt ]; then pip install --break-system-packages -r requirements.txt; fi \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
# -------- Model args (prefer Q6 to keep mmap on and avoid load issues) --------
ARG GENERAL_FAST_MODEL="-m models/gemma-3-1b-it-Q5_K_M.gguf"
ARG GENERAL_MODEL="-m models/gpt-oss-20b-Q4_K_M.gguf"
ARG CHAT_MODEL="-m models/Qwen3-VL-30B-A3B-Q4_K_S.gguf"
ARG CODER_MODEL="-m models/Qwen3-Coder-30B-A3B-Instruct-Q6_K.gguf"
ARG EMBEDDING_FAST_MODEL="-m models/embeddinggemma-300M-Q8_0.gguf"
ARG EMBEDDING_MODEL="-m models/bge-code-v1-q6_k.gguf"
# -------- Runtime defaults --------
ARG GENERAL_CONTEXT_SIZE=16384
ARG GENERAL_GPU_LAYERS=99
ARG GENERAL_MAX_TOKENS=512
ARG CODER_CONTEXT_SIZE=131072
ARG CODER_GPU_LAYERS=99
ARG CODER_MAX_TOKENS=512
ENV GENERAL_FAST_MODEL=${GENERAL_FAST_MODEL}
ENV GENERAL_MODEL=${GENERAL_MODEL}
ENV CODER_MODEL=${CODER_MODEL}
ENV EMBEDDING_FAST_MODEL=${EMBEDDING_FAST_MODEL}
ENV EMBEDDING_MODEL=${EMBEDDING_MODEL}
ENV GENERAL_CONTEXT_SIZE=${GENERAL_CONTEXT_SIZE}
ENV GENERAL_GPU_LAYERS=${GENERAL_GPU_LAYERS}
ENV GENERAL_MAX_TOKENS=${GENERAL_MAX_TOKENS}
ENV CODER_CONTEXT_SIZE=${CODER_CONTEXT_SIZE}
ENV CODER_GPU_LAYERS=${CODER_GPU_LAYERS}
ENV CODER_MAX_TOKENS=${CODER_MAX_TOKENS}
# -------- llama-swap --------
RUN curl -L https://github.com/mostlygeek/llama-swap/releases/download/v165/llama-swap_165_linux_amd64.tar.gz -o /tmp/llama-swap.tar.gz \
&& tar -xzf /tmp/llama-swap.tar.gz -C /app \
&& rm /tmp/llama-swap.tar.gz
# -------- start/stop scripts --------
# Nota: usiamo --threads -1 --threads-batch -1 per lasciare a llama.cpp l'autotuning
COPY ./Scripts/ /app/Scripts/
RUN chmod +x /app/Scripts/*.sh
# -------- Copy preset config file --------
COPY ./config.preset.yaml /app/config.preset.yaml
# -------- Copy entrypoint script --------
COPY ./entrypoint.sh /app/entrypoint.sh
RUN chmod +x /app/entrypoint.sh
ENTRYPOINT ["/app/entrypoint.sh"]