Split config files
Этот коммит содержится в:
62
Services/llamacpp-multi/llamacpp-multi.Containerfile
Обычный файл
62
Services/llamacpp-multi/llamacpp-multi.Containerfile
Обычный файл
@@ -0,0 +1,62 @@
|
||||
### LLaMACpp Multi-Instance Container with Nginx Load Balancer
|
||||
### Based on llama-throughput-lab for maximum throughput
|
||||
### Multiple llama-server instances + nginx for load balancing
|
||||
###
|
||||
### BUILD: podman build -t llamacpp:vulkan-multi-amd64 -f llamacpp-multi.Containerfile .
|
||||
### Export: podman save -o /home/badstorm/llamacpp-vulkan-multi-amd64.tar localhost/llamacpp:vulkan-multi-amd64
|
||||
|
||||
|
||||
FROM ubuntu:24.04
|
||||
|
||||
USER root
|
||||
EXPOSE 8090 9000 9001 9002 9003
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y curl tar grep sed git ffmpeg nano python3-pip python3 python3-wheel nginx supervisor \
|
||||
&& pip install --break-system-packages --upgrade setuptools \
|
||||
&& pip install --break-system-packages -U "huggingface_hub[cli]" \
|
||||
&& if [ -f requirements.txt ]; then pip install --break-system-packages -r requirements.txt; fi \
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN VERSION=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | grep '"tag_name"' | head -1 | sed 's/.*"tag_name": "\([^"]*\)".*/\1/') \
|
||||
&& echo "Last llama.cpp version: $VERSION" \
|
||||
&& curl -L https://github.com/ggml-org/llama.cpp/releases/download/${VERSION}/llama-${VERSION}-bin-ubuntu-vulkan-x64.tar.gz -o llama.tar.gz \
|
||||
&& tar -xzf llama.tar.gz -C . --strip-components=1 \
|
||||
&& rm llama.tar.gz
|
||||
|
||||
RUN chmod +x /app/llama-server
|
||||
|
||||
# Copy startup script for multiple instances
|
||||
COPY start-multi-servers.sh /app/bin/
|
||||
RUN chmod +x /app/bin/start-multi-servers.sh
|
||||
|
||||
# Copy nginx config
|
||||
COPY llama-upstream.conf /etc/nginx/conf.d/
|
||||
|
||||
# Copy supervisor config
|
||||
COPY llama-multi.conf /etc/supervisor/conf.d/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PATH=/app:/app/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/app:$LD_LIBRARY_PATH
|
||||
ENV HF_HUB_ENABLE_HF_TRANSFER=1
|
||||
ENV LLAMA_INSTANCES=4
|
||||
ENV LLAMA_BASE_PORT=9000
|
||||
ENV LLAMA_ARG_PARALLEL=32
|
||||
ENV LLAMA_ARG_THREADS=16
|
||||
ENV LLAMA_ARG_BATCH_SIZE=2048
|
||||
ENV LLAMA_ARG_CTX_SIZE=131072
|
||||
ENV LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q2_K
|
||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||
ENV LLAMA_READY_TIMEOUT=600
|
||||
|
||||
ENTRYPOINT ["/usr/bin/supervisord"]
|
||||
CMD ["-c", "/etc/supervisor/conf.d/llama-multi.conf"]
|
||||
Ссылка в новой задаче
Block a user