From b8678b08ba62cd87d626877624d05039efd46591 Mon Sep 17 00:00:00 2001
From: Donato Capitella <donato.capitella@reversec.com>
Date: Sun, 30 Nov 2025 17:49:29 +0000
Subject: [PATCH] Installing flash_attn, as this is now neded by vLLM

---
 Dockerfile                        | 9 +++++++++
 scripts/01-rocm-env-for-triton.sh | 1 +
 2 files changed, 10 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 07310ff..e2a6e70 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -63,6 +63,15 @@ RUN python -m pip install \
 
 WORKDIR /opt
 
+# Flash-Attention
+ENV FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
+
+RUN git clone https://github.com/ROCm/flash-attention.git &&\ 
+    cd flash-attention &&\
+    git checkout main_perf &&\
+    python setup.py install && \
+    cd /opt && rm -rf /opt/flash-attention
+
 # 6. Clone vLLM
 RUN git clone https://github.com/vllm-project/vllm.git /opt/vllm
 WORKDIR /opt/vllm
diff --git a/scripts/01-rocm-env-for-triton.sh b/scripts/01-rocm-env-for-triton.sh
index 3ad1166..c1e1ec8 100644
--- a/scripts/01-rocm-env-for-triton.sh
+++ b/scripts/01-rocm-env-for-triton.sh
@@ -1,3 +1,4 @@
 # Required for Strix Halo / RDNA3.5 on vLLM
 export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
+export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
 export VLLM_TARGET_DEVICE=rocm
\ No newline at end of file