İşlemeleri karşılaştır
1 İşleme
| Yazar | SHA1 | Tarih | |
|---|---|---|---|
| 48a20990d3 |
@@ -13,7 +13,7 @@ on:
|
|||||||
default: ""
|
default: ""
|
||||||
|
|
||||||
env:
|
env:
|
||||||
IMAGE_REPO: kyuz0/vllm-therock-gfx1151
|
IMAGE_REPO: kyuz0/vllm-therock-gfx1150
|
||||||
DOCKER_BUILDKIT: "1"
|
DOCKER_BUILDKIT: "1"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
@@ -67,7 +67,7 @@ jobs:
|
|||||||
uses: dawidd6/action-download-artifact@v6
|
uses: dawidd6/action-download-artifact@v6
|
||||||
with:
|
with:
|
||||||
workflow: build-rccl.yml
|
workflow: build-rccl.yml
|
||||||
name: librccl-gfx1151
|
name: librccl-gfx1150
|
||||||
run_id: ${{ github.event.inputs.rccl_run_id }}
|
run_id: ${{ github.event.inputs.rccl_run_id }}
|
||||||
path: custom_libs
|
path: custom_libs
|
||||||
if_no_artifact_found: warn
|
if_no_artifact_found: warn
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ on:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
ROCM_MAJOR_VER: 7
|
ROCM_MAJOR_VER: 7
|
||||||
GFX: gfx1151
|
GFX: gfx1150
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-rccl:
|
build-rccl:
|
||||||
@@ -24,17 +24,17 @@ jobs:
|
|||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
source /etc/profile.d/rocm-sdk.sh
|
source /etc/profile.d/rocm-sdk.sh
|
||||||
bash scripts/build_rccl_gfx1151.sh
|
bash scripts/build_rccl_gfx1150.sh
|
||||||
|
|
||||||
- name: Compress Artifact
|
- name: Compress Artifact
|
||||||
run: |
|
run: |
|
||||||
# Path determined from script logic: rocm-systems/projects/rccl/build_gfx1151/librccl.so.1
|
# Path determined from script logic: rocm-systems/projects/rccl/build_gfx1150/librccl.so.1
|
||||||
ls -lh rocm-systems/projects/rccl/build_gfx1151/librccl.so.1
|
ls -lh rocm-systems/projects/rccl/build_gfx1150/librccl.so.1
|
||||||
gzip -c rocm-systems/projects/rccl/build_gfx1151/librccl.so.1 > librccl.so.1.gz
|
gzip -c rocm-systems/projects/rccl/build_gfx1150/librccl.so.1 > librccl.so.1.gz
|
||||||
ls -lh librccl.so.1.gz
|
ls -lh librccl.so.1.gz
|
||||||
|
|
||||||
- name: Upload Artifact
|
- name: Upload Artifact
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: librccl-gfx1151
|
name: librccl-gfx1150
|
||||||
path: librccl.so.1.gz
|
path: librccl.so.1.gz
|
||||||
|
|||||||
+3
-1
@@ -1,3 +1,5 @@
|
|||||||
*.pyc
|
*.pyc
|
||||||
__pycache__/
|
__pycache__/
|
||||||
settings.json
|
settings.json
|
||||||
|
custom_libs/
|
||||||
|
rocm-systems/
|
||||||
+15
-15
@@ -1,4 +1,4 @@
|
|||||||
FROM registry.fedoraproject.org/fedora:43
|
FROM debian:12-slim
|
||||||
|
|
||||||
# 1. System Base & Build Tools
|
# 1. System Base & Build Tools
|
||||||
# Added 'gperftools-libs' for tcmalloc (fixes double-free)
|
# Added 'gperftools-libs' for tcmalloc (fixes double-free)
|
||||||
@@ -8,7 +8,7 @@ RUN sh /tmp/install_deps.sh
|
|||||||
# 2. Install "TheRock" ROCm SDK (Tarball Method)
|
# 2. Install "TheRock" ROCm SDK (Tarball Method)
|
||||||
WORKDIR /tmp
|
WORKDIR /tmp
|
||||||
ARG ROCM_MAJOR_VER=7
|
ARG ROCM_MAJOR_VER=7
|
||||||
ARG GFX=gfx1151
|
ARG GFX=gfx1150
|
||||||
# We pass ARGs to the script via ENV or rely on defaults.
|
# We pass ARGs to the script via ENV or rely on defaults.
|
||||||
# But let's be explicit and export them for the RUN command.
|
# But let's be explicit and export them for the RUN command.
|
||||||
COPY scripts/install_rocm_sdk.sh /tmp/install_rocm_sdk.sh
|
COPY scripts/install_rocm_sdk.sh /tmp/install_rocm_sdk.sh
|
||||||
@@ -18,7 +18,7 @@ RUN chmod +x /tmp/install_rocm_sdk.sh && \
|
|||||||
/tmp/install_rocm_sdk.sh
|
/tmp/install_rocm_sdk.sh
|
||||||
|
|
||||||
# 4. Python Venv Setup
|
# 4. Python Venv Setup
|
||||||
RUN /usr/bin/python3.12 -m venv /opt/venv
|
RUN /usr/bin/python3.11 -m venv /opt/venv
|
||||||
ENV VIRTUAL_ENV=/opt/venv
|
ENV VIRTUAL_ENV=/opt/venv
|
||||||
ENV PATH=/opt/venv/bin:$PATH
|
ENV PATH=/opt/venv/bin:$PATH
|
||||||
ENV PIP_NO_CACHE_DIR=1
|
ENV PIP_NO_CACHE_DIR=1
|
||||||
@@ -27,7 +27,7 @@ RUN python -m pip install --upgrade pip wheel packaging "setuptools<80.0.0"
|
|||||||
|
|
||||||
# 5. Install PyTorch (TheRock Nightly)
|
# 5. Install PyTorch (TheRock Nightly)
|
||||||
RUN python -m pip install \
|
RUN python -m pip install \
|
||||||
--index-url https://rocm.nightlies.amd.com/v2-staging/gfx1151/ \
|
--index-url https://rocm.nightlies.amd.com/v2-staging/gfx1150/ \
|
||||||
--pre torch torchaudio torchvision
|
--pre torch torchaudio torchvision
|
||||||
|
|
||||||
WORKDIR /opt
|
WORKDIR /opt
|
||||||
@@ -49,16 +49,16 @@ WORKDIR /opt/vllm
|
|||||||
# --- PATCHING ---
|
# --- PATCHING ---
|
||||||
COPY scripts/patch_strix.py /opt/vllm/patch_strix.py
|
COPY scripts/patch_strix.py /opt/vllm/patch_strix.py
|
||||||
RUN python /opt/vllm/patch_strix.py && \
|
RUN python /opt/vllm/patch_strix.py && \
|
||||||
sed -i 's/gfx1200;gfx1201/gfx1151/' CMakeLists.txt
|
sed -i 's/gfx1200;gfx1201/gfx1150/' CMakeLists.txt
|
||||||
|
|
||||||
# 7. Build vLLM (Wheel Method) with CLANG Host Compiler
|
# 7. Build vLLM (Wheel Method) with CLANG Host Compiler
|
||||||
RUN python -m pip install --upgrade cmake ninja packaging wheel numpy "setuptools-scm>=8" "setuptools<80.0.0" scikit-build-core pybind11
|
RUN python -m pip install --upgrade cmake ninja packaging wheel numpy "setuptools-scm>=8" "setuptools<80.0.0" scikit-build-core pybind11 amd-quark>=0.11
|
||||||
ENV ROCM_HOME="/opt/rocm"
|
ENV ROCM_HOME="/opt/rocm"
|
||||||
ENV HIP_PATH="/opt/rocm"
|
ENV HIP_PATH="/opt/rocm"
|
||||||
ENV VLLM_TARGET_DEVICE="rocm"
|
ENV VLLM_TARGET_DEVICE="rocm"
|
||||||
ENV PYTORCH_ROCM_ARCH="gfx1151"
|
ENV PYTORCH_ROCM_ARCH="gfx1150"
|
||||||
ENV HIP_ARCHITECTURES="gfx1151"
|
ENV HIP_ARCHITECTURES="gfx1150"
|
||||||
ENV AMDGPU_TARGETS="gfx1151"
|
ENV AMDGPU_TARGETS="gfx1150"
|
||||||
ENV MAX_JOBS="4"
|
ENV MAX_JOBS="4"
|
||||||
|
|
||||||
# --- CRITICAL FIX FOR SEGFAULT ---
|
# --- CRITICAL FIX FOR SEGFAULT ---
|
||||||
@@ -69,7 +69,7 @@ ENV CXX="/opt/rocm/llvm/bin/clang++"
|
|||||||
|
|
||||||
RUN export HIP_DEVICE_LIB_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) && \
|
RUN export HIP_DEVICE_LIB_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) && \
|
||||||
echo "Compiling with Bitcode: $HIP_DEVICE_LIB_PATH" && \
|
echo "Compiling with Bitcode: $HIP_DEVICE_LIB_PATH" && \
|
||||||
export CMAKE_ARGS="-DROCM_PATH=/opt/rocm -DHIP_PATH=/opt/rocm -DAMDGPU_TARGETS=gfx1151 -DHIP_ARCHITECTURES=gfx1151" && \
|
export CMAKE_ARGS="-DROCM_PATH=/opt/rocm -DHIP_PATH=/opt/rocm -DAMDGPU_TARGETS=gfx1150 -DHIP_ARCHITECTURES=gfx1150" && \
|
||||||
python -m pip wheel --no-build-isolation --no-deps -w /tmp/dist -v . && \
|
python -m pip wheel --no-build-isolation --no-deps -w /tmp/dist -v . && \
|
||||||
python -m pip install /tmp/dist/*.whl
|
python -m pip install /tmp/dist/*.whl
|
||||||
|
|
||||||
@@ -86,8 +86,8 @@ ENV CMAKE_PREFIX_PATH="/opt/rocm"
|
|||||||
|
|
||||||
# Force CMake to use the System ROCm Compiler (/opt/rocm/llvm/bin/clang++)
|
# Force CMake to use the System ROCm Compiler (/opt/rocm/llvm/bin/clang++)
|
||||||
RUN cmake -S . \
|
RUN cmake -S . \
|
||||||
-DGPU_TARGETS="gfx1151" \
|
-DGPU_TARGETS="gfx1150" \
|
||||||
-DBNB_ROCM_ARCH="gfx1151" \
|
-DBNB_ROCM_ARCH="gfx1150" \
|
||||||
-DCOMPUTE_BACKEND=hip \
|
-DCOMPUTE_BACKEND=hip \
|
||||||
-DCMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ \
|
-DCMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ \
|
||||||
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
|
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
|
||||||
@@ -101,7 +101,7 @@ RUN chmod -R a+rwX /opt && \
|
|||||||
find /opt/venv -type f -name "*.so" -exec strip -s {} + 2>/dev/null || true && \
|
find /opt/venv -type f -name "*.so" -exec strip -s {} + 2>/dev/null || true && \
|
||||||
find /opt/venv -type d -name "__pycache__" -prune -exec rm -rf {} + && \
|
find /opt/venv -type d -name "__pycache__" -prune -exec rm -rf {} + && \
|
||||||
rm -rf /root/.cache/pip || true && \
|
rm -rf /root/.cache/pip || true && \
|
||||||
dnf clean all && rm -rf /var/cache/dnf/*
|
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
COPY scripts/01-rocm-env-for-triton.sh /etc/profile.d/01-rocm-env-for-triton.sh
|
COPY scripts/01-rocm-env-for-triton.sh /etc/profile.d/01-rocm-env-for-triton.sh
|
||||||
COPY scripts/99-toolbox-banner.sh /etc/profile.d/99-toolbox-banner.sh
|
COPY scripts/99-toolbox-banner.sh /etc/profile.d/99-toolbox-banner.sh
|
||||||
@@ -128,7 +128,7 @@ RUN chmod +x /opt/start-vllm /opt/start-vllm-cluster /opt/vllm_cluster_bench.py
|
|||||||
RUN chmod 0644 /etc/profile.d/*.sh
|
RUN chmod 0644 /etc/profile.d/*.sh
|
||||||
RUN printf 'ulimit -S -c 0\n' > /etc/profile.d/90-nocoredump.sh && chmod 0644 /etc/profile.d/90-nocoredump.sh
|
RUN printf 'ulimit -S -c 0\n' > /etc/profile.d/90-nocoredump.sh && chmod 0644 /etc/profile.d/90-nocoredump.sh
|
||||||
|
|
||||||
# 9. Install Custom RCCL (gfx1151) - Replaces standard library with manually built one
|
# 9. Install Custom RCCL (gfx1150) - Replaces standard library with manually built one
|
||||||
COPY custom_libs/librccl.so.1.gz /tmp/librccl.so.1.gz
|
COPY custom_libs/librccl.so.1.gz /tmp/librccl.so.1.gz
|
||||||
RUN echo "Installing Custom RCCL..." && \
|
RUN echo "Installing Custom RCCL..." && \
|
||||||
gzip -d /tmp/librccl.so.1.gz && \
|
gzip -d /tmp/librccl.so.1.gz && \
|
||||||
@@ -146,4 +146,4 @@ RUN python -m pip install transformers==5.0.0
|
|||||||
|
|
||||||
RUN chmod -R a+rwX /opt
|
RUN chmod -R a+rwX /opt
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
@@ -0,0 +1,304 @@
|
|||||||
|
# Guida: Usare vLLM con Podman su Strix Halo
|
||||||
|
|
||||||
|
Questa guida ti spiega come buildare e usare il container vLLM con il modello `bullpoint/Qwen3-Coder-Next-AWQ-4bit` su Debian 13 con Podman.
|
||||||
|
|
||||||
|
## Prerequisiti
|
||||||
|
|
||||||
|
- Podman installato e funzionante
|
||||||
|
- AMD Ryzen AI Max "Strix Halo" (gfx1150) o GPU ROCm compatibile
|
||||||
|
- Accesso ai device `/dev/kfd` e `/dev/dri`
|
||||||
|
- Almeno 30GB di spazio disco per il modello e la cache
|
||||||
|
|
||||||
|
## 1. Buildare l'immagine
|
||||||
|
|
||||||
|
Dalla directory del progetto, esegui:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
podman build -t vllm:rocm .
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:**
|
||||||
|
- Il build richiede 30-60 minuti a seconda della macchina
|
||||||
|
- L'immagine compila vLLM, bitsandbytes e flash-attention da sorgente
|
||||||
|
- Se il build fallisce, verifica di avere abbastanza spazio disco e memoria
|
||||||
|
|
||||||
|
### Opzioni di build avanzate
|
||||||
|
|
||||||
|
Puoi passare argomenti personalizzati:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
podman build \
|
||||||
|
--build-arg ROCM_MAJOR_VER=7 \
|
||||||
|
--build-arg GFX=gfx1150 \
|
||||||
|
--network=host \
|
||||||
|
-t vllm:rocm .
|
||||||
|
```
|
||||||
|
|
||||||
|
- `--network=host` - Usare la rete dell'host per i download (utile se hai problemi di connessione)
|
||||||
|
- `--no-cache` - Ignorare la cache e ricompilare tutto
|
||||||
|
|
||||||
|
## 2. Preparare i filesystem locali
|
||||||
|
|
||||||
|
Crea le cartelle per modelli e cache:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir -p ~/models
|
||||||
|
mkdir -p ~/.cache/huggingface
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. Lanciare il container con GPU
|
||||||
|
|
||||||
|
### Opzione A: Shell interattiva (Development)
|
||||||
|
|
||||||
|
Se vuoi esplorare il container e usare il TUI `start-vllm`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
podman run -it \
|
||||||
|
--device /dev/kfd \
|
||||||
|
--device /dev/dri \
|
||||||
|
--network host \
|
||||||
|
-v $HOME/models:/models \
|
||||||
|
-v $HOME/.cache/huggingface:/cache/huggingface \
|
||||||
|
-p 8000:8000 \
|
||||||
|
vllm:rocm \
|
||||||
|
/bin/bash
|
||||||
|
```
|
||||||
|
|
||||||
|
Dentro il container:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
start-vllm
|
||||||
|
```
|
||||||
|
|
||||||
|
Oppure lancia direttamente:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
vllm serve bullpoint/Qwen3-Coder-Next-AWQ-4bit \
|
||||||
|
--tensor-parallel-size 1 \
|
||||||
|
--trust-remote-code \
|
||||||
|
--enforce-eager \
|
||||||
|
--gpu-memory-utilization 0.90
|
||||||
|
```
|
||||||
|
|
||||||
|
### Opzione B: Lanciare direttamente il servizio (Production)
|
||||||
|
|
||||||
|
Esegui vLLM in un unico comando senza shell interattiva:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
podman run -d \
|
||||||
|
--device /dev/kfd \
|
||||||
|
--device /dev/dri \
|
||||||
|
--network host \
|
||||||
|
-v $HOME/models:/models \
|
||||||
|
-v $HOME/.cache/huggingface:/cache/huggingface \
|
||||||
|
-p 8000:8000 \
|
||||||
|
--name vllm-server \
|
||||||
|
vllm:rocm \
|
||||||
|
vllm serve bullpoint/Qwen3-Coder-Next-AWQ-4bit \
|
||||||
|
--tensor-parallel-size 1 \
|
||||||
|
--trust-remote-code \
|
||||||
|
--enforce-eager \
|
||||||
|
--gpu-memory-utilization 0.90
|
||||||
|
```
|
||||||
|
|
||||||
|
**Opzioni spiegate:**
|
||||||
|
|
||||||
|
| Opzione | Significato |
|
||||||
|
|---------|------------|
|
||||||
|
| `-d` | Esegui in background |
|
||||||
|
| `--device /dev/kfd` | Accesso alla GPU ROCm (kernel compute queue) |
|
||||||
|
| `--device /dev/dri` | Accesso agli acceleratori DRI (render engine) |
|
||||||
|
| `--network host` | Usa la rete dell'host (migliore performance) |
|
||||||
|
| `-v $HOME/models:/models` | Monta la cartella modelli locale |
|
||||||
|
| `-v $HOME/.cache/huggingface:/cache/huggingface` | Monta la cache HuggingFace |
|
||||||
|
| `-p 8000:8000` | Espone la porta dell'API OpenAI-compatible |
|
||||||
|
| `--name vllm-server` | Nome del container |
|
||||||
|
| `--tensor-parallel-size 1` | Usa 1 GPU (no parallelismo) |
|
||||||
|
| `--trust-remote-code` | Permetti codice remoto da HuggingFace |
|
||||||
|
| `--enforce-eager` | Modalità eager (debug/stability) |
|
||||||
|
| `--gpu-memory-utilization 0.90` | Usa il 90% della memoria GPU |
|
||||||
|
|
||||||
|
## 4. Monitorare il container
|
||||||
|
|
||||||
|
Se lanciato in background (`-d`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Visualizza i log
|
||||||
|
podman logs -f vllm-server
|
||||||
|
|
||||||
|
# Visualizza i log ultimi 50 righe
|
||||||
|
podman logs -n 50 vllm-server
|
||||||
|
|
||||||
|
# Controlla lo stato
|
||||||
|
podman ps | grep vllm-server
|
||||||
|
|
||||||
|
# Entra nel container
|
||||||
|
podman exec -it vllm-server /bin/bash
|
||||||
|
```
|
||||||
|
|
||||||
|
## 5. Testare l'API
|
||||||
|
|
||||||
|
Una volta che il server è up, puoi testare con cURL:
|
||||||
|
|
||||||
|
### Chat Completion
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "bullpoint/Qwen3-Coder-Next-AWQ-4bit",
|
||||||
|
"messages": [{"role": "user", "content": "Write a Python function to sort a list"}],
|
||||||
|
"max_tokens": 200,
|
||||||
|
"temperature": 0.7
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Completamento testo
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8000/v1/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "bullpoint/Qwen3-Coder-Next-AWQ-4bit",
|
||||||
|
"prompt": "def fibonacci(",
|
||||||
|
"max_tokens": 100
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Listare modelli disponibili
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/models
|
||||||
|
```
|
||||||
|
|
||||||
|
## 6. Usare da un altro host (SSH Port Forwarding)
|
||||||
|
|
||||||
|
Se vLLM è su un server remoto:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh -L 0.0.0.0:8000:localhost:8000 user@remote-host
|
||||||
|
```
|
||||||
|
|
||||||
|
Poi da client locale:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/models
|
||||||
|
```
|
||||||
|
|
||||||
|
## 7. Stoppare il container
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Se lanciato in background
|
||||||
|
podman stop vllm-server
|
||||||
|
|
||||||
|
# Rimuovere il container
|
||||||
|
podman rm vllm-server
|
||||||
|
|
||||||
|
# Se in shell interattiva, usa Ctrl+C e poi
|
||||||
|
podman stop <container-id>
|
||||||
|
```
|
||||||
|
|
||||||
|
## 8. Usare con systemd (Quadlet)
|
||||||
|
|
||||||
|
Se hai già usato il file `vllm-rocm.container` generato:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir -p ~/.config/containers/systemd/
|
||||||
|
cp vllm-rocm.container ~/.config/containers/systemd/
|
||||||
|
systemctl --user daemon-reload
|
||||||
|
systemctl --user start vllm-rocm
|
||||||
|
systemctl --user status vllm-rocm
|
||||||
|
```
|
||||||
|
|
||||||
|
Visualizza i log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
systemctl --user logs -u vllm-rocm -n 50 -f
|
||||||
|
```
|
||||||
|
|
||||||
|
## Modello: bullpoint/Qwen3-Coder-Next-AWQ-4bit
|
||||||
|
|
||||||
|
### Caratteristiche
|
||||||
|
|
||||||
|
- **Quantizzazione:** AWQ (Activation-aware Weight Quantization) a 4-bit
|
||||||
|
- **Vantaggi:**
|
||||||
|
- Occupa ~15-20GB di memoria (vs 50-60GB full precision)
|
||||||
|
- Esecuzione molto veloce
|
||||||
|
- Qualità proche al modello full precision
|
||||||
|
- **Caso d'uso:** Sviluppo code, task di programmazione
|
||||||
|
|
||||||
|
### Parametri consigliati
|
||||||
|
|
||||||
|
```bash
|
||||||
|
vllm serve bullpoint/Qwen3-Coder-Next-AWQ-4bit \
|
||||||
|
--tensor-parallel-size 1 \
|
||||||
|
--trust-remote-code \
|
||||||
|
--enforce-eager \
|
||||||
|
--gpu-memory-utilization 0.90 \
|
||||||
|
--max-model-len 4096 \
|
||||||
|
--batch-size 16
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Errore: "Unable to locate package python3.13"
|
||||||
|
|
||||||
|
Il container usa Python 3.13, disponibile in Debian 13. Verifica di usare `debian:bookworm` o `debian:13-slim` nella base image.
|
||||||
|
|
||||||
|
### Errore: "No GPU detected"
|
||||||
|
|
||||||
|
Verifica che i device siano accessibili:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ls -la /dev/kfd /dev/dri
|
||||||
|
```
|
||||||
|
|
||||||
|
Se non ci sono, potrebbe essere un problema di driver. Su Strix Halo:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
rocm-smi
|
||||||
|
```
|
||||||
|
|
||||||
|
### Errore: "Out of memory"
|
||||||
|
|
||||||
|
Riduci `--gpu-memory-utilization` oppure `--max-model-len`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
vllm serve bullpoint/Qwen3-Coder-Next-AWQ-4bit \
|
||||||
|
--gpu-memory-utilization 0.80 \
|
||||||
|
--max-model-len 2048
|
||||||
|
```
|
||||||
|
|
||||||
|
### Il container si ferma subito
|
||||||
|
|
||||||
|
Controlla i log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
podman logs vllm-server
|
||||||
|
```
|
||||||
|
|
||||||
|
Se vedi errori di compilazione, il build potrebbe non essere completato correttamente. Riprova:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
podman build --no-cache -t vllm:rocm .
|
||||||
|
```
|
||||||
|
|
||||||
|
## Link Utili
|
||||||
|
|
||||||
|
- [vLLM Documentation](https://docs.vllm.ai/)
|
||||||
|
- [HuggingFace Qwen3 Models](https://huggingface.co/collections/Qwen/qwen3-coder-67a2e625ef1d5c6ba5a9c14c)
|
||||||
|
- [ROCm Documentation](https://rocmdocs.amd.com/)
|
||||||
|
|
||||||
|
## Domande Frequenti
|
||||||
|
|
||||||
|
**D: Posso usare più GPU con Tensor Parallelism?**
|
||||||
|
R: Sì, imposta `--tensor-parallel-size 2` se hai 2 GPU. Su Strix Halo single-GPU, usa `--tensor-parallel-size 1`.
|
||||||
|
|
||||||
|
**D: Come cambio modello senza riavviare il container?**
|
||||||
|
R: Devi stoppare e riavviare il container con un modello diverso.
|
||||||
|
|
||||||
|
**D: Posso usare questo con una Web UI?**
|
||||||
|
R: Sì, usa HuggingFace Chat UI o altre app che supportano endpoint OpenAI-compatible.
|
||||||
|
|
||||||
|
**D: Il modello viene scaricato ogni volta?**
|
||||||
|
R: No, viene cachato in `~/.cache/huggingface`. La prima volta richiede il download, le volte successive usa la cache.
|
||||||
+7
-7
@@ -1,13 +1,13 @@
|
|||||||
# AMD Strix Halo (gfx1151) — vLLM Toolbox/Container
|
# AMD Strix Halo (gfx1150) — vLLM Toolbox/Container
|
||||||
|
|
||||||
An **Fedora 43** Docker/Podman container that is **Toolbx-compatible** (usable as a Fedora toolbox) for serving LLMs with **vLLM** on **AMD Ryzen AI Max “Strix Halo” (gfx1151)**. Built on the **TheRock nightly builds** for ROCm.
|
An **Fedora 43** Docker/Podman container that is **Toolbx-compatible** (usable as a Fedora toolbox) for serving LLMs with **vLLM** on **AMD Ryzen AI Max “Strix Halo” (gfx1150)**. Built on the **TheRock nightly builds** for ROCm.
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🚀 High-Performance Clustering Support (New!)
|
## 🚀 High-Performance Clustering Support (New!)
|
||||||
|
|
||||||
**Update:** This toolbox now ships with a **custom build of ROCm/RCCL** that enables **native RDMA/RoCE v2 support for Strix Halo (gfx1151)**. This allows you to connect two nodes via a low-latency interconnect (e.g., Intel E810) and run vLLM with Tensor Parallelism (TP=2) effectively acting as a single 256GB Unified Memory GPU.
|
**Update:** This toolbox now ships with a **custom build of ROCm/RCCL** that enables **native RDMA/RoCE v2 support for Strix Halo (gfx1150)**. This allows you to connect two nodes via a low-latency interconnect (e.g., Intel E810) and run vLLM with Tensor Parallelism (TP=2) effectively acting as a single 256GB Unified Memory GPU.
|
||||||
|
|
||||||
👉 **[Read the Full RDMA Cluster Setup Guide](rdma_cluster/setup_guide.md)** for hardware requirements and configuration instructions.
|
👉 **[Read the Full RDMA Cluster Setup Guide](rdma_cluster/setup_guide.md)** for hardware requirements and configuration instructions.
|
||||||
|
|
||||||
@@ -58,7 +58,7 @@ View full benchmarks at: [https://kyuz0.github.io/amd-strix-halo-vllm-toolboxes/
|
|||||||
|
|
||||||
## 1) Toolbx vs Docker/Podman
|
## 1) Toolbx vs Docker/Podman
|
||||||
|
|
||||||
The `kyuz0/vllm-therock-gfx1151:latest` image can be used both as:
|
The `kyuz0/vllm-therock-gfx1150:latest` image can be used both as:
|
||||||
|
|
||||||
* **Fedora Toolbx (recommended for development):** Toolbx shares your **HOME** and user, so models/configs live on the host. Great for iterating quickly while keeping the host clean.
|
* **Fedora Toolbx (recommended for development):** Toolbx shares your **HOME** and user, so models/configs live on the host. Great for iterating quickly while keeping the host clean.
|
||||||
* **Docker/Podman (recommended for deployment/perf):** Use for running vLLM as a service (host networking, IPC tuning, etc.). Always **mount a host directory** for model weights so they stay outside the container.
|
* **Docker/Podman (recommended for deployment/perf):** Use for running vLLM as a service (host networking, IPC tuning, etc.). Always **mount a host directory** for model weights so they stay outside the container.
|
||||||
@@ -81,7 +81,7 @@ To manually create a toolbox that exposes the GPU and relaxes seccomp:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
toolbox create vllm \
|
toolbox create vllm \
|
||||||
--image docker.io/kyuz0/vllm-therock-gfx1151:latest \
|
--image docker.io/kyuz0/vllm-therock-gfx1150:latest \
|
||||||
-- --device /dev/dri --device /dev/kfd \
|
-- --device /dev/dri --device /dev/kfd \
|
||||||
--group-add video --group-add render --security-opt seccomp=unconfined
|
--group-add video --group-add render --security-opt seccomp=unconfined
|
||||||
```
|
```
|
||||||
@@ -112,7 +112,7 @@ Ubuntu’s toolbox package still breaks GPU access, so use Distrobox instead:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
distrobox create -n vllm \
|
distrobox create -n vllm \
|
||||||
--image docker.io/kyuz0/vllm-therock-gfx1151:latest \
|
--image docker.io/kyuz0/vllm-therock-gfx1150:latest \
|
||||||
--additional-flags "--device /dev/kfd --device /dev/dri --group-add video --group-add render --security-opt seccomp=unconfined"
|
--additional-flags "--device /dev/kfd --device /dev/dri --group-add video --group-add render --security-opt seccomp=unconfined"
|
||||||
|
|
||||||
distrobox enter vllm
|
distrobox enter vllm
|
||||||
@@ -218,6 +218,6 @@ This toolbox supports high-performance clustering of multiple Strix Halo nodes u
|
|||||||
**Detailed Documentation:** [RDMA Cluster Setup Guide](rdma_cluster/setup_guide.md)
|
**Detailed Documentation:** [RDMA Cluster Setup Guide](rdma_cluster/setup_guide.md)
|
||||||
|
|
||||||
**Key Features:**
|
**Key Features:**
|
||||||
* **Custom RCCL Patch:** Use of a custom-built `librccl.so` to support RDMA on `gfx1151`.
|
* **Custom RCCL Patch:** Use of a custom-built `librccl.so` to support RDMA on `gfx1150`.
|
||||||
* **Easy Setup:** `refresh_toolbox.sh` automatically detects and exposes RDMA devices.
|
* **Easy Setup:** `refresh_toolbox.sh` automatically detects and exposes RDMA devices.
|
||||||
* **Cluster Management:** Included `start-vllm-cluster` TUI for managing Ray and vLLM.
|
* **Cluster Management:** Included `start-vllm-cluster` TUI for managing Ray and vLLM.
|
||||||
@@ -4,7 +4,7 @@
|
|||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>AMD Strix Halo (gfx1151) vLLM Benchmarks</title>
|
<title>AMD Strix Halo (gfx1150) vLLM Benchmarks</title>
|
||||||
<style>
|
<style>
|
||||||
:root {
|
:root {
|
||||||
--bg-body: #f9fafb;
|
--bg-body: #f9fafb;
|
||||||
@@ -445,7 +445,7 @@
|
|||||||
|
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<header>
|
<header>
|
||||||
<h1>AMD Strix Halo (gfx1151) vLLM Benchmarks</h1>
|
<h1>AMD Strix Halo (gfx1150) vLLM Benchmarks</h1>
|
||||||
<p style="margin: 4px 0 0 0; font-size: 0.9rem;">
|
<p style="margin: 4px 0 0 0; font-size: 0.9rem;">
|
||||||
<a href="https://github.com/kyuz0/amd-strix-halo-vllm-toolboxes/" target="_blank"
|
<a href="https://github.com/kyuz0/amd-strix-halo-vllm-toolboxes/" target="_blank"
|
||||||
style="color: var(--primary); text-decoration: none;">View on GitHub →</a>
|
style="color: var(--primary); text-decoration: none;">View on GitHub →</a>
|
||||||
|
|||||||
@@ -221,7 +221,7 @@ The cluster management and verification scripts rely on SSH to execute commands
|
|||||||
|
|
||||||
### 5.2 Installation
|
### 5.2 Installation
|
||||||
|
|
||||||
The toolbox container provided in this repo includes a **critical patch**: a custom-built `librccl.so` that enables `gfx1151` (Strix Halo) support for RDMA (https://github.com/kyuz0/rocm-systems/tree/gfx1151-rccl), which is currently missing in upstream ROCm packages. This library is automatically compiled using the [`build-rccl`](../.github/workflows/build-rccl.yml) GitHub Action in this repository, which generates the artifact that is then bundled into the Docker container.
|
The toolbox container provided in this repo includes a **critical patch**: a custom-built `librccl.so` that enables `gfx1150` (Strix Halo) support for RDMA (https://github.com/kyuz0/rocm-systems/tree/gfx1150-rccl), which is currently missing in upstream ROCm packages. This library is automatically compiled using the [`build-rccl`](../.github/workflows/build-rccl.yml) GitHub Action in this repository, which generates the artifact that is then bundled into the Docker container.
|
||||||
|
|
||||||
To install the toolbox on **both nodes**, run:
|
To install the toolbox on **both nodes**, run:
|
||||||
|
|
||||||
@@ -230,7 +230,7 @@ To install the toolbox on **both nodes**, run:
|
|||||||
```
|
```
|
||||||
|
|
||||||
**What this does:**
|
**What this does:**
|
||||||
1. Pulls the latest `kyuz0/vllm-therock-gfx1151` image.
|
1. Pulls the latest `kyuz0/vllm-therock-gfx1150` image.
|
||||||
2. Detects if `/dev/infiniband` exists on your host.
|
2. Detects if `/dev/infiniband` exists on your host.
|
||||||
3. Creates the toolbox with flags to expose:
|
3. Creates the toolbox with flags to expose:
|
||||||
* **iGPU Access**: `/dev/dri`, `/dev/kfd` (Required for ROCm)
|
* **iGPU Access**: `/dev/dri`, `/dev/kfd` (Required for ROCm)
|
||||||
@@ -332,7 +332,7 @@ If you see link issues, ensure your Intel E810 firmware is up to date using the
|
|||||||
## 8. References & Acknowledgements
|
## 8. References & Acknowledgements
|
||||||
|
|
||||||
* **Reddit - Strix Halo Batching with Tensor Parallel**: [Thread by Hungry_Elk_3276](https://www.reddit.com/r/LocalLLaMA/comments/1p8nped/strix_halo_batching_with_tensor_parallel_and/)
|
* **Reddit - Strix Halo Batching with Tensor Parallel**: [Thread by Hungry_Elk_3276](https://www.reddit.com/r/LocalLLaMA/comments/1p8nped/strix_halo_batching_with_tensor_parallel_and/)
|
||||||
* Special thanks to user **Hungry_Elk_3276** for their initial experiments with vLLM RDMA, which highlighted the missing `gfx1151` support in upstream RCCL.
|
* Special thanks to user **Hungry_Elk_3276** for their initial experiments with vLLM RDMA, which highlighted the missing `gfx1150` support in upstream RCCL.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
# Issue Report: vLLM Tensor Parallelism over RDMA on AMD Strix Halo
|
# Issue Report: vLLM Tensor Parallelism over RDMA on AMD Strix Halo
|
||||||
|
|
||||||
> **✅ RESOLVED (Feb 2, 2026)**
|
> **✅ RESOLVED (Feb 2, 2026)**
|
||||||
> This issue is **SOLVED**. The root cause was indeed missing `gfx1151` support in the upstream RCCL library.
|
> This issue is **SOLVED**. The root cause was indeed missing `gfx1150` support in the upstream RCCL library.
|
||||||
>
|
>
|
||||||
> I have patched and built a custom version of RCCL with native `gfx1151` support. This patched library is **now included** in the toolbox container provided by this repository (`kyuz0/vllm-therock-gfx1151`).
|
> I have patched and built a custom version of RCCL with native `gfx1150` support. This patched library is **now included** in the toolbox container provided by this repository (`kyuz0/vllm-therock-gfx1150`).
|
||||||
>
|
>
|
||||||
> See the [RDMA Cluster Setup Guide](setup_guide.md) for instructions on how to run the cluster using the fixed container.
|
> See the [RDMA Cluster Setup Guide](setup_guide.md) for instructions on how to run the cluster using the fixed container.
|
||||||
|
|
||||||
@@ -12,8 +12,8 @@ I am attempting to run vLLM with Tensor Parallelism across two AMD Strix Halo (R
|
|||||||
|
|
||||||
- **Current Status:** RDMA communication is verified (low latency ~5us). Ray cluster is operational and can allocate tensors on both nodes.
|
- **Current Status:** RDMA communication is verified (low latency ~5us). Ray cluster is operational and can allocate tensors on both nodes.
|
||||||
- **Blocker:** vLLM fails with `HIP error: invalid kernel file` when initializing the distributed environment.
|
- **Blocker:** vLLM fails with `HIP error: invalid kernel file` when initializing the distributed environment.
|
||||||
- **Suspected Cause:** Possible missing support for `gfx1151` in the RCCL library included with the ROCm nightly build.
|
- **Suspected Cause:** Possible missing support for `gfx1150` in the RCCL library included with the ROCm nightly build.
|
||||||
- **Goal:** Solicit troubleshooting advice or confirmation if `gfx1151` support is indeed missing/required in RCCL.
|
- **Goal:** Solicit troubleshooting advice or confirmation if `gfx1150` support is indeed missing/required in RCCL.
|
||||||
|
|
||||||
## Table of Contents
|
## Table of Contents
|
||||||
1. [Context & Goal](#1-context--goal)
|
1. [Context & Goal](#1-context--goal)
|
||||||
@@ -24,7 +24,7 @@ I am attempting to run vLLM with Tensor Parallelism across two AMD Strix Halo (R
|
|||||||
4. [The Issue: Invalid Kernel File](#4-the-issue-invalid-kernel-file)
|
4. [The Issue: Invalid Kernel File](#4-the-issue-invalid-kernel-file)
|
||||||
- [4.1 Command & Configuration](#41-command--configuration)
|
- [4.1 Command & Configuration](#41-command--configuration)
|
||||||
- [4.2 Error Logs](#42-error-logs)
|
- [4.2 Error Logs](#42-error-logs)
|
||||||
- [4.3 Hypothesis: RCCL Support for gfx1151](#43-hypothesis-rccl-support-for-gfx1151)
|
- [4.3 Hypothesis: RCCL Support for gfx1150](#43-hypothesis-rccl-support-for-gfx1150)
|
||||||
5. [Request for Help](#5-request-for-help)
|
5. [Request for Help](#5-request-for-help)
|
||||||
|
|
||||||
## 1. Context & Goal
|
## 1. Context & Goal
|
||||||
@@ -70,7 +70,7 @@ The environment is created using `toolbox` (wrapping Podman) with specific flags
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
toolbox create vllm \
|
toolbox create vllm \
|
||||||
--image docker.io/kyuz0/vllm-therock-gfx1151:latest \
|
--image docker.io/kyuz0/vllm-therock-gfx1150:latest \
|
||||||
-- \
|
-- \
|
||||||
--device /dev/dri \
|
--device /dev/dri \
|
||||||
--device /dev/kfd \
|
--device /dev/kfd \
|
||||||
@@ -751,7 +751,7 @@ This results in an `HIP error: invalid kernel file` immediately upon engine init
|
|||||||
|
|
||||||
### 4.1 - Possible reasons
|
### 4.1 - Possible reasons
|
||||||
|
|
||||||
This invalid kernel file might be related to RCCL not supporting gfx1151. There was a PR that was never merged:
|
This invalid kernel file might be related to RCCL not supporting gfx1150. There was a PR that was never merged:
|
||||||
|
|
||||||
https://github.com/ROCm/rccl/pull/2075
|
https://github.com/ROCm/rccl/pull/2075
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
TOOLBOX_NAME="vllm"
|
TOOLBOX_NAME="vllm"
|
||||||
IMAGE="docker.io/kyuz0/vllm-therock-gfx1151:latest"
|
IMAGE="docker.io/kyuz0/vllm-therock-gfx1150:latest"
|
||||||
|
|
||||||
# Base options
|
# Base options
|
||||||
OPTIONS="--device /dev/dri --device /dev/kfd --group-add video --group-add render --security-opt seccomp=unconfined"
|
OPTIONS="--device /dev/dri --device /dev/kfd --group-add video --group-add render --security-opt seccomp=unconfined"
|
||||||
|
|||||||
@@ -83,13 +83,13 @@ cat <<'ASCII'
|
|||||||
v L L M
|
v L L M
|
||||||
ASCII
|
ASCII
|
||||||
echo
|
echo
|
||||||
printf 'AMD STRIX HALO — vLLM Toolbox (gfx1151, ROCm via TheRock)\n'
|
printf 'AMD STRIX HALO — vLLM Toolbox (gfx1150, ROCm via TheRock)\n'
|
||||||
[[ -n "$ROCM_VER" ]] && printf 'ROCm nightly: %s\n' "$ROCM_VER"
|
[[ -n "$ROCM_VER" ]] && printf 'ROCm nightly: %s\n' "$ROCM_VER"
|
||||||
echo
|
echo
|
||||||
printf 'Machine: %s\n' "$MACHINE"
|
printf 'Machine: %s\n' "$MACHINE"
|
||||||
printf 'GPU : %s\n\n' "$GPU"
|
printf 'GPU : %s\n\n' "$GPU"
|
||||||
printf 'Repo : https://github.com/kyuz0/amd-strix-halo-vllm-toolboxes\n'
|
printf 'Repo : https://github.com/kyuz0/amd-strix-halo-vllm-toolboxes\n'
|
||||||
printf 'Image : docker.io/kyuz0/vllm-therock-gfx1151:latest\n\n'
|
printf 'Image : docker.io/kyuz0/vllm-therock-gfx1150:latest\n\n'
|
||||||
printf 'Included:\n'
|
printf 'Included:\n'
|
||||||
printf ' - %-16s → %s\n' "start-vllm (TUI)" "Interactive launcher: Model select, Multi-GPU & Cache handling"
|
printf ' - %-16s → %s\n' "start-vllm (TUI)" "Interactive launcher: Model select, Multi-GPU & Cache handling"
|
||||||
printf ' - %-16s → %s\n' "start-vllm-cluster" "Cluster launcher: Setup Ray Head/Worker & Launch vLLM RCCL"
|
printf ' - %-16s → %s\n' "start-vllm-cluster" "Cluster launcher: Setup Ray Head/Worker & Launch vLLM RCCL"
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
set -e
|
||||||
# Configuration
|
# Configuration
|
||||||
REPO_URL="https://github.com/kyuz0/rocm-systems.git"
|
REPO_URL="https://code.badstorm.xyz/AI/rocm-systems.git"
|
||||||
BRANCH="gfx1151-rccl"
|
BRANCH="gfx1150-rccl"
|
||||||
BUILD_DIR="build_gfx1151"
|
BUILD_DIR="build_gfx1150"
|
||||||
ROCM_PATH=${ROCM_PATH:-/opt/rocm}
|
ROCM_PATH=${ROCM_PATH:-/opt/rocm}
|
||||||
# Project sub-directory
|
# Project sub-directory
|
||||||
PROJECT_DIR="projects/rccl"
|
PROJECT_DIR="projects/rccl"
|
||||||
echo "=== Building RCCL for gfx1151 ==="
|
echo "=== Building RCCL for gfx1150 ==="
|
||||||
echo "Repo: $REPO_URL"
|
echo "Repo: $REPO_URL"
|
||||||
echo "Branch: $BRANCH"
|
echo "Branch: $BRANCH"
|
||||||
echo "ROCm Path: $ROCM_PATH"
|
echo "ROCm Path: $ROCM_PATH"
|
||||||
@@ -28,14 +28,14 @@ echo "Entering project directory..."
|
|||||||
cd $PROJECT_DIR
|
cd $PROJECT_DIR
|
||||||
mkdir -p $BUILD_DIR
|
mkdir -p $BUILD_DIR
|
||||||
cd $BUILD_DIR
|
cd $BUILD_DIR
|
||||||
echo "Configuring CMake for gfx1151..."
|
echo "Configuring CMake for gfx1150..."
|
||||||
# We explicitly set GPU_TARGETS to gfx1151 to override the default list.
|
# We explicitly set GPU_TARGETS to gfx1150 to override the default list.
|
||||||
# We also set AMDGPU_TARGETS for standard rocm-cmake compliance.
|
# We also set AMDGPU_TARGETS for standard rocm-cmake compliance.
|
||||||
CXX=$ROCM_PATH/bin/hipcc cmake .. \
|
CXX=$ROCM_PATH/bin/hipcc cmake .. \
|
||||||
-DCMAKE_CXX_COMPILER=$ROCM_PATH/bin/hipcc \
|
-DCMAKE_CXX_COMPILER=$ROCM_PATH/bin/hipcc \
|
||||||
-DDEFAULT_GPUS="gfx1151" \
|
-DDEFAULT_GPUS="gfx1150" \
|
||||||
-DGPU_TARGETS="gfx1151" \
|
-DGPU_TARGETS="gfx1150" \
|
||||||
-DAMDGPU_TARGETS="gfx1151" \
|
-DAMDGPU_TARGETS="gfx1150" \
|
||||||
-DCMAKE_INSTALL_PREFIX=./install \
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
-DBUILD_TESTS=OFF \
|
-DBUILD_TESTS=OFF \
|
||||||
-DGENERATE_SYM_KERNELS=OFF \
|
-DGENERATE_SYM_KERNELS=OFF \
|
||||||
@@ -44,6 +44,15 @@ CXX=$ROCM_PATH/bin/hipcc cmake .. \
|
|||||||
# 3. Build
|
# 3. Build
|
||||||
echo "Building librccl.so..."
|
echo "Building librccl.so..."
|
||||||
make -j$(nproc)
|
make -j$(nproc)
|
||||||
|
|
||||||
|
# Comprimi il file reale (non il symlink)
|
||||||
|
cd /home/badstorm/Source/ai/amd-strix-halo-vllm-toolboxes
|
||||||
|
gzip -k rocm-systems/projects/rccl/build_gfx1150/librccl.so.1.0
|
||||||
|
|
||||||
|
# Copia i file .gz in custom_libs/
|
||||||
|
mkdir -p custom_libs/
|
||||||
|
cp rocm-systems/projects/rccl/build_gfx1150/librccl.so.1.0.gz custom_libs/librccl.so.1.gz
|
||||||
|
|
||||||
echo "=== Build Complete ==="
|
echo "=== Build Complete ==="
|
||||||
echo "Libraries are located in:"
|
echo "Libraries are located in:"
|
||||||
echo " $(pwd)/librccl.so"
|
echo " $(pwd)/librccl.so"
|
||||||
|
|||||||
@@ -2,11 +2,12 @@
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
# 1. System Base & Build Tools
|
# 1. System Base & Build Tools
|
||||||
# Added 'gperftools-libs' for tcmalloc (fixes double-free)
|
# Added 'libgoogle-perftools4' for tcmalloc (fixes double-free)
|
||||||
dnf -y install --setopt=install_weak_deps=False --nodocs \
|
apt-get update
|
||||||
python3.12 python3.12-devel git rsync libatomic bash ca-certificates curl \
|
apt-get install -y --no-install-recommends \
|
||||||
gcc gcc-c++ binutils make ffmpeg-free \
|
python3.11 python3.11-dev python3.11-venv git rsync bash ca-certificates curl \
|
||||||
cmake ninja-build aria2c tar xz vim nano dialog \
|
gcc g++ binutils make ffmpeg \
|
||||||
libdrm-devel zlib-devel openssl-devel pgrep \
|
cmake ninja-build aria2 tar xz-utils vim nano dialog \
|
||||||
numactl-devel gperftools-libs iproute libibverbs-utils patch perftest ping iperf3 perfquery \
|
libdrm-dev zlib1g-dev libssl-dev procps \
|
||||||
&& dnf clean all && rm -rf /var/cache/dnf/*
|
libnuma-dev libgoogle-perftools4 iproute2 ibverbs-utils patch perftest iputils-ping iperf3 infiniband-diags \
|
||||||
|
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ set -euo pipefail
|
|||||||
|
|
||||||
# Configuration with defaults matching Dockerfile ARGs
|
# Configuration with defaults matching Dockerfile ARGs
|
||||||
ROCM_MAJOR_VER="${ROCM_MAJOR_VER:-7}"
|
ROCM_MAJOR_VER="${ROCM_MAJOR_VER:-7}"
|
||||||
GFX="${GFX:-gfx1151}"
|
GFX="${GFX:-gfx1150}"
|
||||||
|
|
||||||
echo "=== Installing ROCm SDK ($GFX / $ROCM_MAJOR_VER) ==="
|
echo "=== Installing ROCm SDK ($GFX / $ROCM_MAJOR_VER) ==="
|
||||||
|
|
||||||
@@ -51,8 +51,9 @@ printf '%s\n' \
|
|||||||
"export VLLM_TARGET_DEVICE=rocm" \
|
"export VLLM_TARGET_DEVICE=rocm" \
|
||||||
"export HIP_FORCE_DEV_KERNARG=1" \
|
"export HIP_FORCE_DEV_KERNARG=1" \
|
||||||
"export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1" \
|
"export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1" \
|
||||||
"export LD_PRELOAD=/usr/lib64/libtcmalloc_minimal.so.4:/opt/rocm/lib/librocm_smi64.so.1.0" \
|
"export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/rocm/lib/librocm_smi64.so.1.0" \
|
||||||
> /etc/profile.d/rocm-sdk.sh
|
> /etc/profile.d/rocm-sdk.sh
|
||||||
|
|
||||||
chmod 0644 /etc/profile.d/rocm-sdk.sh
|
chmod 0644 /etc/profile.d/rocm-sdk.sh
|
||||||
echo "=== ROCm SDK Installation Complete ==="
|
echo "=== ROCm SDK Installation Complete ==="
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ set -e
|
|||||||
# Configuration
|
# Configuration
|
||||||
# Paths identified from your environment
|
# Paths identified from your environment
|
||||||
ROCM_LIB_PATH="/opt/rocm/lib/librccl.so.1.0"
|
ROCM_LIB_PATH="/opt/rocm/lib/librccl.so.1.0"
|
||||||
VENV_LIB_PATH="/opt/venv/lib/python3.13/site-packages/_rocm_sdk_libraries_gfx1151/lib/librccl.so.1"
|
VENV_LIB_PATH="/opt/venv/lib/python3.11/site-packages/_rocm_sdk_libraries_gfx1150/lib/librccl.so.1"
|
||||||
BACKUP_DIR="./rccl_backups_$(date +%Y%m%d_%H%M%S)"
|
BACKUP_DIR="./rccl_backups_$(date +%Y%m%d_%H%M%S)"
|
||||||
# Files to replace
|
# Files to replace
|
||||||
# We assume the new library is named 'librccl.so' or 'librccl.so.1' in the current directory or provided as arg
|
# We assume the new library is named 'librccl.so' or 'librccl.so.1' in the current directory or provided as arg
|
||||||
@@ -20,7 +20,7 @@ do_install() {
|
|||||||
echo "Please provide the path to the newly built librccl.so.1"
|
echo "Please provide the path to the newly built librccl.so.1"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "=== Installing Custom RCCL (gfx1151) ==="
|
echo "=== Installing Custom RCCL (gfx1150) ==="
|
||||||
echo "Creating backup directory: $BACKUP_DIR"
|
echo "Creating backup directory: $BACKUP_DIR"
|
||||||
mkdir -p "$BACKUP_DIR"
|
mkdir -p "$BACKUP_DIR"
|
||||||
# 1. Backup /opt/rocm location
|
# 1. Backup /opt/rocm location
|
||||||
|
|||||||
@@ -25,10 +25,10 @@ def patch_vllm():
|
|||||||
txt = p_rocm.read_text()
|
txt = p_rocm.read_text()
|
||||||
header = 'import sys\nfrom unittest.mock import MagicMock\nsys.modules["amdsmi"] = MagicMock()\n'
|
header = 'import sys\nfrom unittest.mock import MagicMock\nsys.modules["amdsmi"] = MagicMock()\n'
|
||||||
txt = header + txt
|
txt = header + txt
|
||||||
txt = txt.replace('def _get_gcn_arch() -> str:', 'def _get_gcn_arch() -> str:\n return "gfx1151"\n\ndef _old_get_gcn_arch() -> str:')
|
txt = txt.replace('def _get_gcn_arch() -> str:', 'def _get_gcn_arch() -> str:\n return "gfx1150"\n\ndef _old_get_gcn_arch() -> str:')
|
||||||
txt = re.sub(r'device_type = .*', 'device_type = "rocm"', txt)
|
txt = re.sub(r'device_type = .*', 'device_type = "rocm"', txt)
|
||||||
txt = re.sub(r'device_name = .*', 'device_name = "gfx1151"', txt)
|
txt = re.sub(r'device_name = .*', 'device_name = "gfx1150"', txt)
|
||||||
txt += '\n def get_device_name(self, device_id: int = 0) -> str:\n return "AMD-gfx1151"\n'
|
txt += '\n def get_device_name(self, device_id: int = 0) -> str:\n return "AMD-gfx1150"\n'
|
||||||
p_rocm.write_text(txt)
|
p_rocm.write_text(txt)
|
||||||
print(" -> Patched vllm/platforms/rocm.py")
|
print(" -> Patched vllm/platforms/rocm.py")
|
||||||
|
|
||||||
|
|||||||
Yeni konuda referans
Bir kullanıcı engelle