diff --git a/Services/llamacpp.Containerfile b/Services/llamacpp-mistral.Containerfile similarity index 100% rename from Services/llamacpp.Containerfile rename to Services/llamacpp-mistral.Containerfile diff --git a/Services/llamacpp-vulkan.Containerfile b/Services/llamacpp-vulkan.Containerfile new file mode 100644 index 0000000..ba0c731 --- /dev/null +++ b/Services/llamacpp-vulkan.Containerfile @@ -0,0 +1,51 @@ +### LLaMACpp Builder Container with Vulkan for GPUs +### Multi-stage: download stage with pre-built binaries, runtime stage with only runtime libraries +### +### BUILD: podman build -t llamacpp:vulkan-amd64 -f llamacpp-vulkan.Containerfile . +### Export: podman save -o /home/badstorm/llamacpp-vulkan-amd64.tar localhost/llamacpp:vulkan-amd64 + + +FROM debian:13-slim + +USER root +EXPOSE 8090 + +RUN apt-get update \ + && apt-get install -y curl tar grep sed git ffmpeg nano python3-pip python3 python3-wheel \ + && pip install --break-system-packages --upgrade setuptools \ + && pip install --break-system-packages -U "huggingface_hub[cli]" \ + && if [ -f requirements.txt ]; then pip install --break-system-packages -r requirements.txt; fi \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && rm -rf /var/lib/apt/lists/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +WORKDIR /app + +RUN VERSION=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | grep '"tag_name"' | head -1 | sed 's/.*"tag_name": "\([^"]*\)".*/\1/') \ + && echo "Last llama.cpp version: $VERSION" \ + && curl -L https://github.com/ggml-org/llama.cpp/releases/download/${VERSION}/llama-${VERSION}-bin-ubuntu-vulkan-x64.tar.gz -o llama.tar.gz \ + && tar -xzf llama.tar.gz -C . --strip-components=1 \ + && rm llama.tar.gz + +RUN chmod +x /app/llama-server + +WORKDIR /app + +ENV PATH=/app:$PATH +ENV LD_LIBRARY_PATH=/app:$LD_LIBRARY_PATH +ENV HF_HUB_ENABLE_HF_TRANSFER=1 +#ENV HF_HOME= +#ENV HUGGING_FACE_HUB_TOKEN= +ENV LLAMA_ARG_HOST=0.0.0.0 +ENV LLAMA_ARG_PORT=8090 +ENV LLAMA_ARG_HF_REPO=unsloth/Qwen3.5-35B-A3B-GGUF:Q2_K_XL +#ENV LLAMA_ARG_MMPROJ_URL= +ENV LLAMA_ARG_NO_MMAP=true +ENV LLAMA_ARG_CTX_SIZE=128000 +#ENV LLAMA_API_KEY="" + +ENTRYPOINT ["/app/llama-server"] +CMD ["--no-warmup"]