文件
bdi_podman_serverconf/Services/llamacpp-multi/llama-upstream.conf
2026-02-05 23:27:16 +01:00

36 行
1022 B
Plaintext

upstream llama_backend {
least_conn;
server 127.0.0.1:9000 max_fails=3 fail_timeout=30s;
server 127.0.0.1:9001 max_fails=3 fail_timeout=30s;
server 127.0.0.1:9002 max_fails=3 fail_timeout=30s;
server 127.0.0.1:9003 max_fails=3 fail_timeout=30s;
}
server {
listen 8090;
server_name _;
client_max_body_size 512M;
location / {
proxy_pass http://llama_backend;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_buffering off;
proxy_request_buffering off;
proxy_read_timeout 600s;
proxy_connect_timeout 30s;
}
location /health {
access_log off;
return 200 "healthy\n";
add_header Content-Type text/plain;
}
}