2025-11-30 14:57:37 +00:00
|
|
|
FROM registry.fedoraproject.org/fedora:43
|
|
|
|
|
|
2025-11-30 15:37:12 +00:00
|
|
|
# 1. System Base & Build Tools
|
|
|
|
|
# Added 'gperftools-libs' for tcmalloc (fixes double-free)
|
2025-11-30 14:57:37 +00:00
|
|
|
RUN dnf -y install --setopt=install_weak_deps=False --nodocs \
|
2025-12-18 08:56:14 +00:00
|
|
|
python3.13 python3.13-devel git rsync libatomic bash ca-certificates curl \
|
|
|
|
|
gcc gcc-c++ binutils make ffmpeg-free \
|
2025-12-20 12:31:20 +00:00
|
|
|
cmake ninja-build aria2c tar xz vim nano dialog \
|
2025-12-19 07:45:07 +00:00
|
|
|
libdrm-devel zlib-devel openssl-devel pgrep \
|
2025-12-18 08:56:14 +00:00
|
|
|
numactl-devel gperftools-libs \
|
2025-11-30 14:57:37 +00:00
|
|
|
&& dnf clean all && rm -rf /var/cache/dnf/*
|
|
|
|
|
|
2025-11-30 15:37:12 +00:00
|
|
|
# 2. Install "TheRock" ROCm SDK (Tarball Method)
|
2025-11-30 14:57:37 +00:00
|
|
|
WORKDIR /tmp
|
|
|
|
|
ARG ROCM_MAJOR_VER=7
|
|
|
|
|
ARG GFX=gfx1151
|
|
|
|
|
RUN set -euo pipefail; \
|
2025-12-18 08:56:14 +00:00
|
|
|
BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \
|
|
|
|
|
PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \
|
|
|
|
|
KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \
|
|
|
|
|
| tr '<' '\n' \
|
|
|
|
|
| grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \
|
|
|
|
|
| sort -V | tail -n1)"; \
|
|
|
|
|
echo "Downloading Latest Tarball: ${KEY}"; \
|
|
|
|
|
aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz; \
|
|
|
|
|
mkdir -p /opt/rocm; \
|
|
|
|
|
tar xzf therock.tar.gz -C /opt/rocm --strip-components=1; \
|
|
|
|
|
rm therock.tar.gz
|
2025-11-30 14:57:37 +00:00
|
|
|
|
2025-11-30 15:37:12 +00:00
|
|
|
# 3. Configure Global ROCm Environment
|
|
|
|
|
# We add LD_PRELOAD for tcmalloc here to fix the shutdown crash
|
2025-11-30 14:57:37 +00:00
|
|
|
RUN export ROCM_PATH=/opt/rocm && \
|
2025-12-18 08:56:14 +00:00
|
|
|
BITCODE_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) && \
|
|
|
|
|
printf '%s\n' \
|
|
|
|
|
"export ROCM_PATH=/opt/rocm" \
|
|
|
|
|
"export HIP_PLATFORM=amd" \
|
|
|
|
|
"export HIP_PATH=/opt/rocm" \
|
|
|
|
|
"export HIP_CLANG_PATH=/opt/rocm/llvm/bin" \
|
|
|
|
|
"export HIP_DEVICE_LIB_PATH=$BITCODE_PATH" \
|
|
|
|
|
"export PATH=$ROCM_PATH/bin:$ROCM_PATH/llvm/bin:\$PATH" \
|
|
|
|
|
"export LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib:\$LD_LIBRARY_PATH" \
|
|
|
|
|
"export ROCBLAS_USE_HIPBLASLT=1" \
|
|
|
|
|
"export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1" \
|
|
|
|
|
"export VLLM_TARGET_DEVICE=rocm" \
|
|
|
|
|
"export HIP_FORCE_DEV_KERNARG=1" \
|
|
|
|
|
"export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1" \
|
|
|
|
|
"export LD_PRELOAD=/usr/lib64/libtcmalloc_minimal.so.4" \
|
|
|
|
|
> /etc/profile.d/rocm-sdk.sh && \
|
|
|
|
|
chmod 0644 /etc/profile.d/rocm-sdk.sh
|
2025-11-30 14:57:37 +00:00
|
|
|
|
2025-11-30 15:37:12 +00:00
|
|
|
# 4. Python Venv Setup
|
2025-11-30 14:57:37 +00:00
|
|
|
RUN /usr/bin/python3.13 -m venv /opt/venv
|
|
|
|
|
ENV VIRTUAL_ENV=/opt/venv
|
|
|
|
|
ENV PATH=/opt/venv/bin:$PATH
|
|
|
|
|
ENV PIP_NO_CACHE_DIR=1
|
|
|
|
|
RUN printf 'source /opt/venv/bin/activate\n' > /etc/profile.d/venv.sh
|
|
|
|
|
RUN python -m pip install --upgrade pip wheel packaging "setuptools<80.0.0"
|
|
|
|
|
|
2025-11-30 15:37:12 +00:00
|
|
|
# 5. Install PyTorch (TheRock Nightly)
|
2025-11-30 14:57:37 +00:00
|
|
|
RUN python -m pip install \
|
2025-12-19 08:06:51 +00:00
|
|
|
--index-url https://rocm.nightlies.amd.com/v2-staging/gfx1151/ \
|
2025-11-30 14:57:37 +00:00
|
|
|
--pre torch torchaudio torchvision
|
|
|
|
|
|
|
|
|
|
WORKDIR /opt
|
2025-11-30 15:37:12 +00:00
|
|
|
|
2025-11-30 17:49:29 +00:00
|
|
|
# Flash-Attention
|
|
|
|
|
ENV FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
|
|
|
|
|
|
|
|
|
|
RUN git clone https://github.com/ROCm/flash-attention.git &&\
|
2025-12-18 08:56:14 +00:00
|
|
|
cd flash-attention &&\
|
|
|
|
|
git checkout main_perf &&\
|
|
|
|
|
python setup.py install && \
|
|
|
|
|
cd /opt && rm -rf /opt/flash-attention
|
2025-11-30 17:49:29 +00:00
|
|
|
|
2025-11-30 15:37:12 +00:00
|
|
|
# 6. Clone vLLM
|
2025-11-30 14:57:37 +00:00
|
|
|
RUN git clone https://github.com/vllm-project/vllm.git /opt/vllm
|
|
|
|
|
WORKDIR /opt/vllm
|
|
|
|
|
|
2025-11-30 15:37:12 +00:00
|
|
|
# --- PATCHING ---
|
2025-11-30 14:57:37 +00:00
|
|
|
RUN echo "import sys, re" > patch_strix.py && \
|
2025-12-18 08:56:14 +00:00
|
|
|
echo "from pathlib import Path" >> patch_strix.py && \
|
|
|
|
|
# Patch 1: __init__.py
|
|
|
|
|
echo "p = Path('vllm/platforms/__init__.py')" >> patch_strix.py && \
|
|
|
|
|
echo "txt = p.read_text()" >> patch_strix.py && \
|
|
|
|
|
echo "txt = txt.replace('import amdsmi', '# import amdsmi')" >> patch_strix.py && \
|
|
|
|
|
echo "txt = re.sub(r'is_rocm = .*', 'is_rocm = True', txt)" >> patch_strix.py && \
|
|
|
|
|
echo "txt = re.sub(r'if len\(amdsmi\.amdsmi_get_processor_handles\(\)\) > 0:', 'if True:', txt)" >> patch_strix.py && \
|
|
|
|
|
echo "txt = txt.replace('amdsmi.amdsmi_init()', 'pass')" >> patch_strix.py && \
|
|
|
|
|
echo "txt = txt.replace('amdsmi.amdsmi_shut_down()', 'pass')" >> patch_strix.py && \
|
|
|
|
|
echo "p.write_text(txt)" >> patch_strix.py && \
|
|
|
|
|
# Patch 2: rocm.py
|
|
|
|
|
echo "p = Path('vllm/platforms/rocm.py')" >> patch_strix.py && \
|
|
|
|
|
echo "txt = p.read_text()" >> patch_strix.py && \
|
|
|
|
|
echo "header = 'import sys\nfrom unittest.mock import MagicMock\nsys.modules[\"amdsmi\"] = MagicMock()\n'" >> patch_strix.py && \
|
|
|
|
|
echo "txt = header + txt" >> patch_strix.py && \
|
|
|
|
|
echo "txt = re.sub(r'device_type = .*', 'device_type = \"rocm\"', txt)" >> patch_strix.py && \
|
|
|
|
|
echo "txt = re.sub(r'device_name = .*', 'device_name = \"gfx1151\"', txt)" >> patch_strix.py && \
|
|
|
|
|
echo "txt += '\n def get_device_name(self, device_id: int = 0) -> str:\n return \"AMD-gfx1151\"\n'" >> patch_strix.py && \
|
|
|
|
|
echo "p.write_text(txt)" >> patch_strix.py && \
|
|
|
|
|
echo "print('Successfully patched vLLM for Strix Halo')" >> patch_strix.py && \
|
|
|
|
|
python patch_strix.py && \
|
|
|
|
|
sed -i 's/gfx1200;gfx1201/gfx1151/' CMakeLists.txt
|
2025-11-30 14:57:37 +00:00
|
|
|
|
2025-11-30 15:37:12 +00:00
|
|
|
# 7. Build vLLM (Wheel Method) with CLANG Host Compiler
|
|
|
|
|
RUN python -m pip install --upgrade cmake ninja packaging wheel numpy "setuptools-scm>=8" "setuptools<80.0.0" scikit-build-core pybind11
|
2025-11-30 14:57:37 +00:00
|
|
|
ENV ROCM_HOME="/opt/rocm"
|
|
|
|
|
ENV HIP_PATH="/opt/rocm"
|
|
|
|
|
ENV VLLM_TARGET_DEVICE="rocm"
|
|
|
|
|
ENV PYTORCH_ROCM_ARCH="gfx1151"
|
2025-11-30 15:41:01 +00:00
|
|
|
ENV HIP_ARCHITECTURES="gfx1151"
|
|
|
|
|
ENV AMDGPU_TARGETS="gfx1151"
|
2025-11-30 15:37:12 +00:00
|
|
|
ENV MAX_JOBS="4"
|
2025-11-30 14:57:37 +00:00
|
|
|
|
2025-11-30 15:37:12 +00:00
|
|
|
# --- CRITICAL FIX FOR SEGFAULT ---
|
|
|
|
|
# We force the Host Compiler (CC/CXX) to be the ROCm Clang, not Fedora GCC.
|
|
|
|
|
# This aligns the ABI of the compiled vLLM extensions with PyTorch.
|
2025-11-30 14:57:37 +00:00
|
|
|
ENV CC="/opt/rocm/llvm/bin/clang"
|
|
|
|
|
ENV CXX="/opt/rocm/llvm/bin/clang++"
|
|
|
|
|
|
2025-11-30 15:37:12 +00:00
|
|
|
RUN export HIP_DEVICE_LIB_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) && \
|
2025-12-18 08:56:14 +00:00
|
|
|
echo "Compiling with Bitcode: $HIP_DEVICE_LIB_PATH" && \
|
|
|
|
|
export CMAKE_ARGS="-DROCM_PATH=/opt/rocm -DHIP_PATH=/opt/rocm -DAMDGPU_TARGETS=gfx1151 -DHIP_ARCHITECTURES=gfx1151" && \
|
|
|
|
|
python -m pip wheel --no-build-isolation --no-deps -w /tmp/dist -v . && \
|
|
|
|
|
python -m pip install /tmp/dist/*.whl
|
|
|
|
|
|
|
|
|
|
# --- bitsandbytes (ROCm) ---
|
|
|
|
|
WORKDIR /opt
|
|
|
|
|
RUN git clone -b rocm_enabled_multi_backend https://github.com/ROCm/bitsandbytes.git
|
|
|
|
|
WORKDIR /opt/bitsandbytes
|
|
|
|
|
|
|
|
|
|
# Explicitly set HIP_PLATFORM (Docker ENV, not /etc/profile)
|
|
|
|
|
ENV HIP_PLATFORM="amd"
|
|
|
|
|
ENV CMAKE_PREFIX_PATH="/opt/rocm"
|
|
|
|
|
|
|
|
|
|
# Force CMake to use the System ROCm Compiler (/opt/rocm/llvm/bin/clang++)
|
|
|
|
|
RUN cmake -S . \
|
|
|
|
|
-DGPU_TARGETS="gfx1151" \
|
|
|
|
|
-DBNB_ROCM_ARCH="gfx1151" \
|
|
|
|
|
-DCOMPUTE_BACKEND=hip \
|
|
|
|
|
-DCMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ \
|
|
|
|
|
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
|
|
|
|
|
&& \
|
|
|
|
|
make -j$(nproc) && \
|
|
|
|
|
python -m pip install --no-cache-dir . --no-build-isolation --no-deps
|
2025-11-30 14:57:37 +00:00
|
|
|
|
2025-11-30 15:37:12 +00:00
|
|
|
# 8. Final Cleanup & Runtime
|
2025-11-30 14:57:37 +00:00
|
|
|
WORKDIR /opt
|
|
|
|
|
RUN chmod -R a+rwX /opt && \
|
2025-12-18 08:56:14 +00:00
|
|
|
find /opt/venv -type f -name "*.so" -exec strip -s {} + 2>/dev/null || true && \
|
|
|
|
|
find /opt/venv -type d -name "__pycache__" -prune -exec rm -rf {} + && \
|
|
|
|
|
rm -rf /root/.cache/pip || true && \
|
|
|
|
|
dnf clean all && rm -rf /var/cache/dnf/*
|
2025-11-30 14:57:37 +00:00
|
|
|
|
|
|
|
|
COPY scripts/01-rocm-env-for-triton.sh /etc/profile.d/01-rocm-env-for-triton.sh
|
|
|
|
|
COPY scripts/99-toolbox-banner.sh /etc/profile.d/99-toolbox-banner.sh
|
|
|
|
|
COPY scripts/zz-venv-last.sh /etc/profile.d/zz-venv-last.sh
|
2025-12-20 11:37:06 +00:00
|
|
|
COPY scripts/start_vllm.py /usr/local/bin/start-vllm
|
|
|
|
|
COPY benchmarks/max_context_results.json /opt/max_context_results.json
|
|
|
|
|
COPY benchmarks/run_vllm_bench.py /opt/run_vllm_bench.py
|
|
|
|
|
RUN chmod 0644 /etc/profile.d/*.sh && chmod +x /usr/local/bin/start-vllm && chmod 0644 /opt/max_context_results.json
|
2025-11-30 14:57:37 +00:00
|
|
|
RUN chmod 0644 /etc/profile.d/*.sh
|
|
|
|
|
RUN printf 'ulimit -S -c 0\n' > /etc/profile.d/90-nocoredump.sh && chmod 0644 /etc/profile.d/90-nocoredump.sh
|
|
|
|
|
|
|
|
|
|
CMD ["/bin/bash"]
|