Files
amd-strix-halo-vllm-toolboxes/scripts/start-vllm.sh
T

55 wiersze
1.6 KiB
Bash
Czysty Zwykły widok Historia

2025-09-03 22:37:26 +01:00
#!/usr/bin/env bash
set -euo pipefail
2025-09-04 13:58:51 +01:00
# Defaults (override via env: HOST, PORT, DOWNLOAD_DIR, EXTRA_FLAGS)
HOST="${HOST:-0.0.0.0}"
PORT="${PORT:-8000}"
DOWNLOAD_DIR="${DOWNLOAD_DIR:-$HOME/vllm-models}"
EXTRA_FLAGS="${EXTRA_FLAGS:-}"
models=(
"Llama 2 7B Chat|meta-llama/Llama-2-7b-chat-hf|"
"Qwen2.5 7B Instruct|Qwen/Qwen2.5-7B-Instruct|"
"Qwen3 30B A3B Instruct|Qwen/Qwen3-30B-A3B-Instruct-2507|"
"Qwen3 14B AWQ|Qwen/Qwen3-14B-AWQ|--quantization awq --dtype float16 --enforce-eager"
2025-09-04 17:27:38 +01:00
"Gemma 3 27B instruct|google/gemma-3-27b-it|"
"Gemma 3 12B Instruct|google/gemma-3-12b-it|"
"Gemma 3 4B Instruct|google/gemma-3-4b-it|"
2025-09-03 22:37:26 +01:00
)
2025-09-04 13:33:53 +01:00
echo "Select a model:"
2025-09-04 13:58:51 +01:00
for i in "${!models[@]}"; do
name="${models[$i]%%|*}"
printf " [%d] %s\n" "$((i+1))" "$name"
2025-09-03 22:37:26 +01:00
done
2025-09-04 13:33:53 +01:00
read -rp "Enter number: " choice
2025-09-04 13:58:51 +01:00
[[ "$choice" =~ ^[1-9][0-9]*$ ]] || { echo "Invalid choice."; exit 1; }
idx=$((choice-1))
(( idx >= 0 && idx < ${#models[@]} )) || { echo "Invalid choice."; exit 1; }
2025-09-03 23:08:45 +01:00
2025-09-04 13:58:51 +01:00
IFS='|' read -r label repo flags <<< "${models[$idx]}"
2025-09-03 22:37:26 +01:00
2025-09-04 13:58:51 +01:00
mkdir -p "$DOWNLOAD_DIR"
CMD=(vllm serve "$repo" --host "$HOST" --port "$PORT" --download-dir "$DOWNLOAD_DIR")
# Per-model flags
if [[ -n "${flags:-}" ]]; then
# shellcheck disable=SC2206
CMD+=($flags)
fi
2025-09-03 22:37:26 +01:00
2025-09-04 13:58:51 +01:00
# Optional global extras: e.g. EXTRA_FLAGS="--gpu-memory-utilization 0.8"
if [[ -n "${EXTRA_FLAGS:-}" ]]; then
# shellcheck disable=SC2206
CMD+=($EXTRA_FLAGS)
2025-09-03 23:08:45 +01:00
fi
2025-09-03 22:37:26 +01:00
2025-09-04 13:58:51 +01:00
echo -e "Running:\n\n ${CMD[@]}\n"
echo "API test → curl -s http://localhost:${PORT}/v1/models | jq -r '.data[0].id'"
echo "SSH tip → ssh -L ${PORT}:localhost:${PORT} user@host"
echo
2025-09-03 22:37:26 +01:00
2025-09-04 13:33:53 +01:00
exec "${CMD[@]}"