### LLaMACpp Builder Container with Vulkan for GPUs ### Multi-stage: download stage with pre-built binaries, runtime stage with only runtime libraries ### ### BUILD: podman build -t llamacpp:vulkan-amd64 -f llamacpp.Containerfile . ### Export: podman save -o /home/badstorm/llamacpp-vulkan-amd64.tar localhost/llamacpp:vulkan-amd64 FROM ubuntu:24.04 USER root EXPOSE 8090 RUN apt-get update \ && apt-get install -y curl unzip grep sed git ffmpeg nano python3-pip python3 python3-wheel \ && pip install --break-system-packages --upgrade setuptools \ && pip install --break-system-packages -U "huggingface_hub[cli]" \ && if [ -f requirements.txt ]; then pip install --break-system-packages -r requirements.txt; fi \ && apt autoremove -y \ && apt clean -y \ && rm -rf /tmp/* /var/tmp/* \ && rm -rf /var/lib/apt/lists/* \ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ && find /var/cache -type f -delete WORKDIR /app RUN VERSION=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | grep '"tag_name"' | head -1 | sed 's/.*"tag_name": "\([^"]*\)".*/\1/') \ && echo "Last llama.cpp version: $VERSION" \ && curl -L https://github.com/ggml-org/llama.cpp/releases/download/${VERSION}/llama-${VERSION}-bin-ubuntu-vulkan-x64.zip -o llama.zip \ && unzip -j llama.zip -d . \ && rm llama.zip RUN chmod +x /app/llama-server WORKDIR /app ENV PATH=/app:$PATH ENV LD_LIBRARY_PATH=/app:$LD_LIBRARY_PATH ENV HF_HUB_ENABLE_HF_TRANSFER=1 #ENV HF_HOME= #ENV HUGGING_FACE_HUB_TOKEN= ENV LLAMA_ARG_HOST=0.0.0.0 ENV LLAMA_ARG_PORT=8090 ENV LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q2_K #ENV LLAMA_ARG_MMPROJ_URL= ENV LLAMA_ARG_NO_MMAP=true ENV LLAMA_ARG_CTX_SIZE=128000 #ENV LLAMA_API_KEY="" ENTRYPOINT ["/app/llama-server"] CMD ["--no-warmup"]