2
0

Installing flash_attn, as this is now neded by vLLM

Este cometimento está contido em:
Donato Capitella
2025-11-30 17:49:29 +00:00
ascendente 30bd06b1bd
cometimento b8678b08ba
2 ficheiros modificados com 10 adições e 0 eliminações
+9
Ver ficheiro
@@ -63,6 +63,15 @@ RUN python -m pip install \
WORKDIR /opt
# Flash-Attention
ENV FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
RUN git clone https://github.com/ROCm/flash-attention.git &&\
cd flash-attention &&\
git checkout main_perf &&\
python setup.py install && \
cd /opt && rm -rf /opt/flash-attention
# 6. Clone vLLM
RUN git clone https://github.com/vllm-project/vllm.git /opt/vllm
WORKDIR /opt/vllm
+1
Ver ficheiro
@@ -1,3 +1,4 @@
# Required for Strix Halo / RDNA3.5 on vLLM
export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
export VLLM_TARGET_DEVICE=rocm