51 řádky
1.8 KiB
Docker
51 řádky
1.8 KiB
Docker
|
|
### LLaMACpp Builder Container with Vulkan for GPUs
|
||
|
|
### Multi-stage: download stage with pre-built binaries, runtime stage with only runtime libraries
|
||
|
|
###
|
||
|
|
### BUILD: podman build -t llamacpp:vulkan-amd64 -f llamacpp.Containerfile .
|
||
|
|
### Export: podman save -o /home/badstorm/llamacpp-vulkan-amd64.tar localhost/llamacpp:vulkan-amd64
|
||
|
|
|
||
|
|
|
||
|
|
FROM ubuntu:24.04
|
||
|
|
|
||
|
|
USER root
|
||
|
|
EXPOSE 8090
|
||
|
|
|
||
|
|
RUN apt-get update \
|
||
|
|
&& apt-get install -y curl unzip grep sed git ffmpeg nano python3-pip python3 python3-wheel \
|
||
|
|
&& pip install --break-system-packages --upgrade setuptools \
|
||
|
|
&& pip install --break-system-packages -U "huggingface_hub[cli]" \
|
||
|
|
&& if [ -f requirements.txt ]; then pip install --break-system-packages -r requirements.txt; fi \
|
||
|
|
&& apt autoremove -y \
|
||
|
|
&& apt clean -y \
|
||
|
|
&& rm -rf /tmp/* /var/tmp/* \
|
||
|
|
&& rm -rf /var/lib/apt/lists/* \
|
||
|
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||
|
|
&& find /var/cache -type f -delete
|
||
|
|
|
||
|
|
WORKDIR /app
|
||
|
|
|
||
|
|
RUN VERSION=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | grep '"tag_name"' | head -1 | sed 's/.*"tag_name": "\([^"]*\)".*/\1/') \
|
||
|
|
&& echo "Last llama.cpp version: $VERSION" \
|
||
|
|
&& curl -L https://github.com/ggml-org/llama.cpp/releases/download/${VERSION}/llama-${VERSION}-bin-ubuntu-vulkan-x64.zip -o llama.zip \
|
||
|
|
&& unzip -j llama.zip -d . \
|
||
|
|
&& rm llama.zip
|
||
|
|
|
||
|
|
RUN chmod +x /app/llama-server
|
||
|
|
|
||
|
|
WORKDIR /app
|
||
|
|
|
||
|
|
ENV PATH=/app:$PATH
|
||
|
|
ENV LD_LIBRARY_PATH=/app:$LD_LIBRARY_PATH
|
||
|
|
ENV HF_HUB_ENABLE_HF_TRANSFER=1
|
||
|
|
#ENV HF_HOME=
|
||
|
|
#ENV HUGGING_FACE_HUB_TOKEN=
|
||
|
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||
|
|
ENV LLAMA_ARG_PORT=8090
|
||
|
|
ENV LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q2_K
|
||
|
|
#ENV LLAMA_ARG_MMPROJ_URL=
|
||
|
|
ENV LLAMA_ARG_NO_MMAP=true
|
||
|
|
ENV LLAMA_ARG_CTX_SIZE=128000
|
||
|
|
#ENV LLAMA_API_KEY=""
|
||
|
|
|
||
|
|
ENTRYPOINT ["/app/llama-server"]
|
||
|
|
CMD ["--no-warmup"]
|