Services/llamacpp.Containerfile

### LLaMACpp Builder Container with Vulkan for GPUs
### Multi-stage: download stage with pre-built binaries, runtime stage with only runtime libraries
###
### BUILD: podman build -t llamacpp:vulkan-amd64 -f llamacpp.Containerfile .
### Export: podman save -o /home/badstorm/llamacpp-vulkan-amd64.tar localhost/llamacpp:vulkan-amd64


FROM ubuntu:24.04

USER root
EXPOSE 8090

RUN apt-get update \
    && apt-get install -y curl unzip grep sed git ffmpeg nano python3-pip python3 python3-wheel \
    && pip install --break-system-packages --upgrade setuptools \
    && pip install --break-system-packages -U "huggingface_hub[cli]" \
    && if [ -f requirements.txt ]; then pip install --break-system-packages -r requirements.txt; fi \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && rm -rf /var/lib/apt/lists/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

WORKDIR /app

RUN VERSION=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | grep '"tag_name"' | head -1 | sed 's/.*"tag_name": "\([^"]*\)".*/\1/') \
    && echo "Last llama.cpp version: $VERSION" \
    && curl -L https://github.com/ggml-org/llama.cpp/releases/download/${VERSION}/llama-${VERSION}-bin-ubuntu-vulkan-x64.zip -o llama.zip \
    && unzip -j llama.zip -d . \
    && rm llama.zip

RUN chmod +x /app/llama-server

WORKDIR /app

ENV PATH=/app:$PATH
ENV LD_LIBRARY_PATH=/app:$LD_LIBRARY_PATH
ENV HF_HUB_ENABLE_HF_TRANSFER=1
#ENV HF_HOME=
#ENV HUGGING_FACE_HUB_TOKEN=
ENV LLAMA_ARG_HOST=0.0.0.0
ENV LLAMA_ARG_PORT=8090
ENV LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q2_K
#ENV LLAMA_ARG_MMPROJ_URL=
ENV LLAMA_ARG_NO_MMAP=true
ENV LLAMA_ARG_CTX_SIZE=128000
#ENV LLAMA_API_KEY=""

ENTRYPOINT ["/app/llama-server"]
CMD ["--no-warmup"]
Add llamacpp container 2025-12-11 20:01:23 +01:00			`### LLaMACpp Builder Container with Vulkan for GPUs`
			`### Multi-stage: download stage with pre-built binaries, runtime stage with only runtime libraries`
			`###`
			`### BUILD: podman build -t llamacpp:vulkan-amd64 -f llamacpp.Containerfile .`
			`### Export: podman save -o /home/badstorm/llamacpp-vulkan-amd64.tar localhost/llamacpp:vulkan-amd64`


			`FROM ubuntu:24.04`

			`USER root`
			`EXPOSE 8090`

			`RUN apt-get update \`
			`&& apt-get install -y curl unzip grep sed git ffmpeg nano python3-pip python3 python3-wheel \`
			`&& pip install --break-system-packages --upgrade setuptools \`
			`&& pip install --break-system-packages -U "huggingface_hub[cli]" \`
			`&& if [ -f requirements.txt ]; then pip install --break-system-packages -r requirements.txt; fi \`
			`&& apt autoremove -y \`
			`&& apt clean -y \`
			`&& rm -rf /tmp/* /var/tmp/* \`
			`&& rm -rf /var/lib/apt/lists/* \`
			`&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \`
			`&& find /var/cache -type f -delete`

			`WORKDIR /app`

			`RUN VERSION=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest \| grep '"tag_name"' \| head -1 \| sed 's/."tag_name": "\([^"]\)".*/\1/') \`
			`&& echo "Last llama.cpp version: $VERSION" \`
			`&& curl -L https://github.com/ggml-org/llama.cpp/releases/download/${VERSION}/llama-${VERSION}-bin-ubuntu-vulkan-x64.zip -o llama.zip \`
			`&& unzip -j llama.zip -d . \`
			`&& rm llama.zip`

			`RUN chmod +x /app/llama-server`

			`WORKDIR /app`

			`ENV PATH=/app:$PATH`
			`ENV LD_LIBRARY_PATH=/app:$LD_LIBRARY_PATH`
			`ENV HF_HUB_ENABLE_HF_TRANSFER=1`
			`#ENV HF_HOME=`
			`#ENV HUGGING_FACE_HUB_TOKEN=`
			`ENV LLAMA_ARG_HOST=0.0.0.0`
			`ENV LLAMA_ARG_PORT=8090`
			`ENV LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q2_K`
			`#ENV LLAMA_ARG_MMPROJ_URL=`
			`ENV LLAMA_ARG_NO_MMAP=true`
			`ENV LLAMA_ARG_CTX_SIZE=128000`
			`#ENV LLAMA_API_KEY=""`

			`ENTRYPOINT ["/app/llama-server"]`
			`CMD ["--no-warmup"]`