From b8678b08ba62cd87d626877624d05039efd46591 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Sun, 30 Nov 2025 17:49:29 +0000 Subject: [PATCH] Installing flash_attn, as this is now neded by vLLM --- Dockerfile | 9 +++++++++ scripts/01-rocm-env-for-triton.sh | 1 + 2 files changed, 10 insertions(+) diff --git a/Dockerfile b/Dockerfile index 07310ff..e2a6e70 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,6 +63,15 @@ RUN python -m pip install \ WORKDIR /opt +# Flash-Attention +ENV FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" + +RUN git clone https://github.com/ROCm/flash-attention.git &&\ + cd flash-attention &&\ + git checkout main_perf &&\ + python setup.py install && \ + cd /opt && rm -rf /opt/flash-attention + # 6. Clone vLLM RUN git clone https://github.com/vllm-project/vllm.git /opt/vllm WORKDIR /opt/vllm diff --git a/scripts/01-rocm-env-for-triton.sh b/scripts/01-rocm-env-for-triton.sh index 3ad1166..c1e1ec8 100644 --- a/scripts/01-rocm-env-for-triton.sh +++ b/scripts/01-rocm-env-for-triton.sh @@ -1,3 +1,4 @@ # Required for Strix Halo / RDNA3.5 on vLLM export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1 +export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" export VLLM_TARGET_DEVICE=rocm \ No newline at end of file