Split config files
Tá an tiomantas seo le fáil i:
@@ -1,61 +0,0 @@
|
|||||||
### LLaMACpp Multi-Instance Container with Nginx Load Balancer
|
|
||||||
### Based on llama-throughput-lab for maximum throughput
|
|
||||||
### Multiple llama-server instances + nginx for load balancing
|
|
||||||
###
|
|
||||||
### BUILD: podman build -t llamacpp:vulkan-multi-amd64 -f llamacpp-multi.Containerfile .
|
|
||||||
### Export: podman save -o /home/badstorm/llamacpp-vulkan-multi-amd64.tar localhost/llamacpp:vulkan-multi-amd64
|
|
||||||
|
|
||||||
|
|
||||||
FROM ubuntu:24.04
|
|
||||||
|
|
||||||
USER root
|
|
||||||
EXPOSE 8090 9000 9001 9002 9003
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y curl tar grep sed git ffmpeg nano python3-pip python3 python3-wheel nginx supervisor \
|
|
||||||
&& pip install --break-system-packages --upgrade setuptools \
|
|
||||||
&& pip install --break-system-packages -U "huggingface_hub[cli]" \
|
|
||||||
&& if [ -f requirements.txt ]; then pip install --break-system-packages -r requirements.txt; fi \
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN VERSION=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | grep '"tag_name"' | head -1 | sed 's/.*"tag_name": "\([^"]*\)".*/\1/') \
|
|
||||||
&& echo "Last llama.cpp version: $VERSION" \
|
|
||||||
&& curl -L https://github.com/ggml-org/llama.cpp/releases/download/${VERSION}/llama-${VERSION}-bin-ubuntu-vulkan-x64.tar.gz -o llama.tar.gz \
|
|
||||||
&& tar -xzf llama.tar.gz -C . --strip-components=1 \
|
|
||||||
&& rm llama.tar.gz
|
|
||||||
|
|
||||||
RUN chmod +x /app/llama-server
|
|
||||||
|
|
||||||
# Create startup script for multiple instances
|
|
||||||
RUN mkdir -p /app/bin /var/log && printf '#!/bin/bash\nset -e\n\nINSTANCES=${LLAMA_INSTANCES:-2}\nBASE_PORT=${LLAMA_BASE_PORT:-9000}\nREADY_TIMEOUT=${LLAMA_READY_TIMEOUT:-600}\n\necho "Starting $INSTANCES llama-server instances on ports $BASE_PORT-$((BASE_PORT+INSTANCES-1))"\n\nfor ((i=0; i<INSTANCES; i++)); do\n PORT=$((BASE_PORT + i))\n echo "Starting instance $((i+1))/$INSTANCES on port $PORT..."\n LLAMA_ARG_PORT=$PORT /app/llama-server \\\n > /var/log/llama-server-$PORT.log 2>&1 &\n sleep 3\ndone\n\necho "Waiting for servers to be ready..."\nfor ((i=0; i<INSTANCES; i++)); do\n PORT=$((BASE_PORT + i))\n elapsed=0\n while [ $elapsed -lt $READY_TIMEOUT ]; do\n if curl -s http://127.0.0.1:$PORT/health > /dev/null 2>&1; then\n echo "Instance on port $PORT is ready"\n break\n fi\n sleep 5\n elapsed=$((elapsed + 5))\n done\n if [ $elapsed -ge $READY_TIMEOUT ]; then\n echo "ERROR: Server on port $PORT did not become ready after ${READY_TIMEOUT}s"\n fi\ndone\n\necho "All instances ready. Monitoring logs..."\ntail -f /var/log/llama-server-*.log &\nwait\n' > /app/bin/start-multi-servers.sh && chmod +x /app/bin/start-multi-servers.sh
|
|
||||||
|
|
||||||
# Create nginx config template
|
|
||||||
RUN mkdir -p /etc/nginx/conf.d && printf 'upstream llama_backend {\n least_conn;\n server 127.0.0.1:9000 max_fails=3 fail_timeout=30s;\n server 127.0.0.1:9001 max_fails=3 fail_timeout=30s;\n server 127.0.0.1:9002 max_fails=3 fail_timeout=30s;\n server 127.0.0.1:9003 max_fails=3 fail_timeout=30s;\n}\n\nserver {\n listen 8090;\n server_name _;\n \n client_max_body_size 512M;\n \n location / {\n proxy_pass http://llama_backend;\n proxy_http_version 1.1;\n proxy_set_header Upgrade $http_upgrade;\n proxy_set_header Connection "upgrade";\n proxy_set_header Host $host;\n proxy_set_header X-Real-IP $remote_addr;\n proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n proxy_set_header X-Forwarded-Proto $scheme;\n proxy_buffering off;\n proxy_request_buffering off;\n proxy_read_timeout 600s;\n proxy_connect_timeout 30s;\n }\n \n location /health {\n access_log off;\n return 200 "healthy\\n";\n add_header Content-Type text/plain;\n }\n}\n' > /etc/nginx/conf.d/llama-upstream.conf
|
|
||||||
|
|
||||||
# Create supervisor config for managing both nginx and servers
|
|
||||||
RUN mkdir -p /etc/supervisor/conf.d && printf '[supervisord]\nnodaemon=true\nlogfile=/var/log/supervisor/supervisord.log\n\n[program:nginx]\ncommand=/usr/sbin/nginx -g "daemon off;"\nautostart=true\nautorestart=true\nstderr_logfile=/var/log/nginx/error.log\nstdout_logfile=/var/log/nginx/access.log\n\n[program:llama-servers]\ncommand=/app/bin/start-multi-servers.sh\nautostart=true\nautorestart=false\nstderr_logfile=/var/log/llama-servers.log\nstdout_logfile=/var/log/llama-servers.log\n' > /etc/supervisor/conf.d/llama-multi.conf
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
ENV PATH=/app:/app/bin:$PATH
|
|
||||||
ENV LD_LIBRARY_PATH=/app:$LD_LIBRARY_PATH
|
|
||||||
ENV HF_HUB_ENABLE_HF_TRANSFER=1
|
|
||||||
ENV LLAMA_INSTANCES=2
|
|
||||||
ENV LLAMA_BASE_PORT=9000
|
|
||||||
ENV LLAMA_ARG_PARALLEL=32
|
|
||||||
ENV LLAMA_ARG_THREADS=16
|
|
||||||
ENV LLAMA_ARG_BATCH_SIZE=2048
|
|
||||||
ENV LLAMA_ARG_CTX_SIZE=131072
|
|
||||||
ENV LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q2_K
|
|
||||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
|
||||||
ENV LLAMA_READY_TIMEOUT=600
|
|
||||||
|
|
||||||
ENTRYPOINT ["/usr/bin/supervisord"]
|
|
||||||
CMD ["-c", "/etc/supervisor/conf.d/llama-multi.conf"]
|
|
||||||
129
Services/llamacpp-multi/README.md
Comhad gnáth
129
Services/llamacpp-multi/README.md
Comhad gnáth
@@ -0,0 +1,129 @@
|
|||||||
|
# LLaMACpp Multi-Instance Setup
|
||||||
|
|
||||||
|
Guida per configurare e scalare il numero di istanze di llama-server con load balancing nginx.
|
||||||
|
|
||||||
|
## Struttura Attuale
|
||||||
|
|
||||||
|
- **4 istanze** di llama-server (porte 9000-9003)
|
||||||
|
- **Nginx** come load balancer (porta 8090)
|
||||||
|
- **Supervisor** per gestire tutti i processi
|
||||||
|
|
||||||
|
## Aggiungere Istanze
|
||||||
|
|
||||||
|
Se vuoi aumentare il numero di istanze, segui questi step:
|
||||||
|
|
||||||
|
### 1. Modifica il Containerfile
|
||||||
|
|
||||||
|
File: `llamacpp-multi.Containerfile`
|
||||||
|
|
||||||
|
Cambia:
|
||||||
|
```dockerfile
|
||||||
|
ENV LLAMA_INSTANCES=4
|
||||||
|
```
|
||||||
|
|
||||||
|
Con il numero di istanze desiderato (es. 6):
|
||||||
|
```dockerfile
|
||||||
|
ENV LLAMA_INSTANCES=6
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Aggiorna la Configurazione Nginx
|
||||||
|
|
||||||
|
File: `llama-upstream.conf`
|
||||||
|
|
||||||
|
Aggiungi i server nei porti nuovi nel blocco `upstream llama_backend`:
|
||||||
|
|
||||||
|
```nginx
|
||||||
|
upstream llama_backend {
|
||||||
|
least_conn;
|
||||||
|
server 127.0.0.1:9000 max_fails=3 fail_timeout=30s;
|
||||||
|
server 127.0.0.1:9001 max_fails=3 fail_timeout=30s;
|
||||||
|
server 127.0.0.1:9002 max_fails=3 fail_timeout=30s;
|
||||||
|
server 127.0.0.1:9003 max_fails=3 fail_timeout=30s;
|
||||||
|
server 127.0.0.1:9004 max_fails=3 fail_timeout=30s; # NUOVO
|
||||||
|
server 127.0.0.1:9005 max_fails=3 fail_timeout=30s; # NUOVO
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Aggiorna il Containerfile con le porte esposte
|
||||||
|
|
||||||
|
File: `llamacpp-multi.Containerfile`
|
||||||
|
|
||||||
|
Aggiungi le nuove porte:
|
||||||
|
```dockerfile
|
||||||
|
EXPOSE 8090 9000 9001 9002 9003 9004 9005
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Ricompila il Container
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /home/badstorm/Source/bdi/bdi_podman_serverconf/Services/llamacpp-multi
|
||||||
|
podman build -t llamacpp:vulkan-multi-amd64 -f llamacpp-multi.Containerfile .
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Riavvia il Servizio
|
||||||
|
|
||||||
|
```bash
|
||||||
|
systemctl restart llamacpp-multi
|
||||||
|
```
|
||||||
|
|
||||||
|
## Considerazioni di Risorse
|
||||||
|
|
||||||
|
Ogni istanza consuma:
|
||||||
|
- **~8GB VRAM** (dipende dal modello e da `LLAMA_ARG_CTX_SIZE`)
|
||||||
|
- **~1-2 CPU core** (dipende dal carico)
|
||||||
|
|
||||||
|
**Con GPU AMD Radeon (RENOIR):**
|
||||||
|
- 2 istanze: ✅ Stabile
|
||||||
|
- 4 istanze: ⚠️ Funziona ma monitorare memoria
|
||||||
|
- 6+ istanze: ❌ Probabilmente fuori di VRAM
|
||||||
|
|
||||||
|
Monitora con:
|
||||||
|
```bash
|
||||||
|
podman stats llamacpp-multi
|
||||||
|
```
|
||||||
|
|
||||||
|
## Variabili di Ambiente Modificabili
|
||||||
|
|
||||||
|
Nel file `.container` puoi sovrascrivere:
|
||||||
|
|
||||||
|
```ini
|
||||||
|
Environment=LLAMA_ARG_PARALLEL=32
|
||||||
|
Environment=LLAMA_ARG_THREADS=16
|
||||||
|
Environment=LLAMA_ARG_BATCH_SIZE=2048
|
||||||
|
Environment=LLAMA_ARG_CTX_SIZE=131072
|
||||||
|
Environment=LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-Next-GGUF:Q2_K_XL
|
||||||
|
Environment=LLAMA_READY_TIMEOUT=600
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
Una volta avviate le istanze, testa:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8090/v1/models
|
||||||
|
```
|
||||||
|
|
||||||
|
Dovresti vedere il modello listato se tutte le istanze sono pronte.
|
||||||
|
|
||||||
|
Test di carico (concurrent requests):
|
||||||
|
```bash
|
||||||
|
for i in {1..10}; do
|
||||||
|
curl -X POST http://localhost:8090/api/completion \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"prompt": "Once upon a time", "n_predict": 64}' &
|
||||||
|
done
|
||||||
|
wait
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
**502 Bad Gateway:**
|
||||||
|
```bash
|
||||||
|
podman exec llamacpp-multi tail -f /var/log/llama-server-9000.log
|
||||||
|
```
|
||||||
|
|
||||||
|
**Timeout Ready:**
|
||||||
|
Aumenta `LLAMA_READY_TIMEOUT` se il modello impiega più di 10 minuti a caricare.
|
||||||
|
|
||||||
|
**Out of Memory:**
|
||||||
|
Riduci `LLAMA_ARG_PARALLEL`, `LLAMA_ARG_BATCH_SIZE`, o `LLAMA_ARG_CTX_SIZE`.
|
||||||
17
Services/llamacpp-multi/llama-multi.conf
Comhad gnáth
17
Services/llamacpp-multi/llama-multi.conf
Comhad gnáth
@@ -0,0 +1,17 @@
|
|||||||
|
[supervisord]
|
||||||
|
nodaemon=true
|
||||||
|
logfile=/var/log/supervisor/supervisord.log
|
||||||
|
|
||||||
|
[program:nginx]
|
||||||
|
command=/usr/sbin/nginx -g "daemon off;"
|
||||||
|
autostart=true
|
||||||
|
autorestart=true
|
||||||
|
stderr_logfile=/var/log/nginx/error.log
|
||||||
|
stdout_logfile=/var/log/nginx/access.log
|
||||||
|
|
||||||
|
[program:llama-servers]
|
||||||
|
command=/app/bin/start-multi-servers.sh
|
||||||
|
autostart=true
|
||||||
|
autorestart=false
|
||||||
|
stderr_logfile=/var/log/llama-servers.log
|
||||||
|
stdout_logfile=/var/log/llama-servers.log
|
||||||
35
Services/llamacpp-multi/llama-upstream.conf
Comhad gnáth
35
Services/llamacpp-multi/llama-upstream.conf
Comhad gnáth
@@ -0,0 +1,35 @@
|
|||||||
|
upstream llama_backend {
|
||||||
|
least_conn;
|
||||||
|
server 127.0.0.1:9000 max_fails=3 fail_timeout=30s;
|
||||||
|
server 127.0.0.1:9001 max_fails=3 fail_timeout=30s;
|
||||||
|
server 127.0.0.1:9002 max_fails=3 fail_timeout=30s;
|
||||||
|
server 127.0.0.1:9003 max_fails=3 fail_timeout=30s;
|
||||||
|
}
|
||||||
|
|
||||||
|
server {
|
||||||
|
listen 8090;
|
||||||
|
server_name _;
|
||||||
|
|
||||||
|
client_max_body_size 512M;
|
||||||
|
|
||||||
|
location / {
|
||||||
|
proxy_pass http://llama_backend;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_set_header Upgrade $http_upgrade;
|
||||||
|
proxy_set_header Connection "upgrade";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_request_buffering off;
|
||||||
|
proxy_read_timeout 600s;
|
||||||
|
proxy_connect_timeout 30s;
|
||||||
|
}
|
||||||
|
|
||||||
|
location /health {
|
||||||
|
access_log off;
|
||||||
|
return 200 "healthy\n";
|
||||||
|
add_header Content-Type text/plain;
|
||||||
|
}
|
||||||
|
}
|
||||||
62
Services/llamacpp-multi/llamacpp-multi.Containerfile
Comhad gnáth
62
Services/llamacpp-multi/llamacpp-multi.Containerfile
Comhad gnáth
@@ -0,0 +1,62 @@
|
|||||||
|
### LLaMACpp Multi-Instance Container with Nginx Load Balancer
|
||||||
|
### Based on llama-throughput-lab for maximum throughput
|
||||||
|
### Multiple llama-server instances + nginx for load balancing
|
||||||
|
###
|
||||||
|
### BUILD: podman build -t llamacpp:vulkan-multi-amd64 -f llamacpp-multi.Containerfile .
|
||||||
|
### Export: podman save -o /home/badstorm/llamacpp-vulkan-multi-amd64.tar localhost/llamacpp:vulkan-multi-amd64
|
||||||
|
|
||||||
|
|
||||||
|
FROM ubuntu:24.04
|
||||||
|
|
||||||
|
USER root
|
||||||
|
EXPOSE 8090 9000 9001 9002 9003
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y curl tar grep sed git ffmpeg nano python3-pip python3 python3-wheel nginx supervisor \
|
||||||
|
&& pip install --break-system-packages --upgrade setuptools \
|
||||||
|
&& pip install --break-system-packages -U "huggingface_hub[cli]" \
|
||||||
|
&& if [ -f requirements.txt ]; then pip install --break-system-packages -r requirements.txt; fi \
|
||||||
|
&& apt autoremove -y \
|
||||||
|
&& apt clean -y \
|
||||||
|
&& rm -rf /tmp/* /var/tmp/* \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||||
|
&& find /var/cache -type f -delete
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN VERSION=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | grep '"tag_name"' | head -1 | sed 's/.*"tag_name": "\([^"]*\)".*/\1/') \
|
||||||
|
&& echo "Last llama.cpp version: $VERSION" \
|
||||||
|
&& curl -L https://github.com/ggml-org/llama.cpp/releases/download/${VERSION}/llama-${VERSION}-bin-ubuntu-vulkan-x64.tar.gz -o llama.tar.gz \
|
||||||
|
&& tar -xzf llama.tar.gz -C . --strip-components=1 \
|
||||||
|
&& rm llama.tar.gz
|
||||||
|
|
||||||
|
RUN chmod +x /app/llama-server
|
||||||
|
|
||||||
|
# Copy startup script for multiple instances
|
||||||
|
COPY start-multi-servers.sh /app/bin/
|
||||||
|
RUN chmod +x /app/bin/start-multi-servers.sh
|
||||||
|
|
||||||
|
# Copy nginx config
|
||||||
|
COPY llama-upstream.conf /etc/nginx/conf.d/
|
||||||
|
|
||||||
|
# Copy supervisor config
|
||||||
|
COPY llama-multi.conf /etc/supervisor/conf.d/
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENV PATH=/app:/app/bin:$PATH
|
||||||
|
ENV LD_LIBRARY_PATH=/app:$LD_LIBRARY_PATH
|
||||||
|
ENV HF_HUB_ENABLE_HF_TRANSFER=1
|
||||||
|
ENV LLAMA_INSTANCES=4
|
||||||
|
ENV LLAMA_BASE_PORT=9000
|
||||||
|
ENV LLAMA_ARG_PARALLEL=32
|
||||||
|
ENV LLAMA_ARG_THREADS=16
|
||||||
|
ENV LLAMA_ARG_BATCH_SIZE=2048
|
||||||
|
ENV LLAMA_ARG_CTX_SIZE=131072
|
||||||
|
ENV LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q2_K
|
||||||
|
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||||
|
ENV LLAMA_READY_TIMEOUT=600
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/bin/supervisord"]
|
||||||
|
CMD ["-c", "/etc/supervisor/conf.d/llama-multi.conf"]
|
||||||
@@ -14,15 +14,14 @@ PodmanArgs=--group-add=keep-groups --ipc=host
|
|||||||
SecurityLabelType=container_runtime_t
|
SecurityLabelType=container_runtime_t
|
||||||
|
|
||||||
# Multi-instance configuration (throughput optimized)
|
# Multi-instance configuration (throughput optimized)
|
||||||
Environment=LLAMA_INSTANCES=2
|
Environment=LLAMA_INSTANCES=4
|
||||||
Environment=LLAMA_BASE_PORT=9000
|
Environment=LLAMA_BASE_PORT=9000
|
||||||
Environment=LLAMA_ARG_HOST=0.0.0.0
|
Environment=LLAMA_ARG_HOST=0.0.0.0
|
||||||
Environment=LLAMA_ARG_PARALLEL=32
|
Environment=LLAMA_ARG_PARALLEL=32
|
||||||
Environment=LLAMA_ARG_THREADS=16
|
Environment=LLAMA_ARG_THREADS=16
|
||||||
Environment=LLAMA_ARG_BATCH_SIZE=2048
|
Environment=LLAMA_ARG_BATCH_SIZE=2048
|
||||||
Environment=LLAMA_ARG_UBATCH=512
|
|
||||||
Environment=LLAMA_ARG_CTX_SIZE=131072
|
Environment=LLAMA_ARG_CTX_SIZE=131072
|
||||||
Environment=LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q2_K
|
Environment=LLAMA_ARG_HF_REPO=unsloth/Qwen3-Coder-Next-GGUF:Q2_K_XL
|
||||||
|
|
||||||
# HF
|
# HF
|
||||||
Environment=HF_HOME=/root/.cache/huggingface
|
Environment=HF_HOME=/root/.cache/huggingface
|
||||||
37
Services/llamacpp-multi/start-multi-servers.sh
Comhad gnáth
37
Services/llamacpp-multi/start-multi-servers.sh
Comhad gnáth
@@ -0,0 +1,37 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
INSTANCES=${LLAMA_INSTANCES:-2}
|
||||||
|
BASE_PORT=${LLAMA_BASE_PORT:-9000}
|
||||||
|
READY_TIMEOUT=${LLAMA_READY_TIMEOUT:-600}
|
||||||
|
|
||||||
|
echo "Starting $INSTANCES llama-server instances on ports $BASE_PORT-$((BASE_PORT+INSTANCES-1))"
|
||||||
|
|
||||||
|
for ((i=0; i<INSTANCES; i++)); do
|
||||||
|
PORT=$((BASE_PORT + i))
|
||||||
|
echo "Starting instance $((i+1))/$INSTANCES on port $PORT..."
|
||||||
|
LLAMA_ARG_PORT=$PORT /app/llama-server \
|
||||||
|
> /var/log/llama-server-$PORT.log 2>&1 &
|
||||||
|
sleep 3
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Waiting for servers to be ready..."
|
||||||
|
for ((i=0; i<INSTANCES; i++)); do
|
||||||
|
PORT=$((BASE_PORT + i))
|
||||||
|
elapsed=0
|
||||||
|
while [ $elapsed -lt $READY_TIMEOUT ]; do
|
||||||
|
if curl -s http://127.0.0.1:$PORT/health > /dev/null 2>&1; then
|
||||||
|
echo "Instance on port $PORT is ready"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 5
|
||||||
|
elapsed=$((elapsed + 5))
|
||||||
|
done
|
||||||
|
if [ $elapsed -ge $READY_TIMEOUT ]; then
|
||||||
|
echo "ERROR: Server on port $PORT did not become ready after ${READY_TIMEOUT}s"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "All instances ready. Monitoring logs..."
|
||||||
|
tail -f /var/log/llama-server-*.log &
|
||||||
|
wait
|
||||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir