upstream llama_backend { least_conn; server 127.0.0.1:9000 max_fails=3 fail_timeout=30s; server 127.0.0.1:9001 max_fails=3 fail_timeout=30s; server 127.0.0.1:9002 max_fails=3 fail_timeout=30s; server 127.0.0.1:9003 max_fails=3 fail_timeout=30s; } server { listen 8090; server_name _; client_max_body_size 512M; location / { proxy_pass http://llama_backend; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_buffering off; proxy_request_buffering off; proxy_read_timeout 600s; proxy_connect_timeout 30s; } location /health { access_log off; return 200 "healthy\n"; add_header Content-Type text/plain; } }