Updating toolbox and pushing GitHub Action
这个提交包含在:
@@ -0,0 +1,98 @@
|
||||
name: build-and-publish
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: "Image tag to publish (e.g. latest)"
|
||||
required: true
|
||||
default: "latest"
|
||||
|
||||
env:
|
||||
IMAGE_REPO: kyuz0/vllm-therock-gfx1151
|
||||
DOCKER_BUILDKIT: "1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Put Docker on /mnt
|
||||
run: |
|
||||
set -eux
|
||||
echo '{ "data-root": "/mnt/docker" }' | sudo tee /etc/docker/daemon.json
|
||||
sudo systemctl stop docker
|
||||
sudo rm -rf /var/lib/docker || true
|
||||
sudo mkdir -p /mnt/docker
|
||||
sudo systemctl start docker
|
||||
docker info | grep "Docker Root Dir"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Free disk space
|
||||
shell: bash
|
||||
run: |
|
||||
set -euxo pipefail
|
||||
echo "Disk BEFORE:"; df -h
|
||||
sudo rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc || true
|
||||
sudo rm -rf /opt/hostedtoolcache/CodeQL /opt/hostedtoolcache/go || true
|
||||
docker system prune -af || true
|
||||
docker builder prune -af || true
|
||||
sudo apt-get clean
|
||||
sudo rm -rf /var/lib/apt/lists/*
|
||||
sudo rm -rf /opt/hostedtoolcache
|
||||
echo "Disk AFTER:"; df -h
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: BuildKit GC config (8GB cap)
|
||||
run: |
|
||||
cat > /tmp/buildkitd.toml <<'EOF'
|
||||
[worker.oci]
|
||||
gc = true
|
||||
gckeepstorage = 8000 # MB
|
||||
EOF
|
||||
|
||||
- name: Set up Buildx (with GC)
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
buildkitd-flags: --config /tmp/buildkitd.toml
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: docker.io/${{ env.IMAGE_REPO }}
|
||||
tags: |
|
||||
type=raw,value=${{ github.event.inputs.tag }}
|
||||
type=sha
|
||||
type=raw,value={{date 'YYYYMMDD-HHmmss'}}
|
||||
labels: |
|
||||
org.opencontainers.image.source=https://github.com/${{ github.repository }}
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
|
||||
- name: Build and push
|
||||
id: build
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
provenance: false
|
||||
sbom: false
|
||||
no-cache: true
|
||||
|
||||
- name: Prune buildx cache
|
||||
if: always()
|
||||
run: |
|
||||
docker buildx prune -af --verbose --min-free-space 4gb || true
|
||||
+129
@@ -0,0 +1,129 @@
|
||||
FROM registry.fedoraproject.org/fedora:43
|
||||
|
||||
# 1. System Base & Build Tools
|
||||
# Added 'gperftools-libs' for tcmalloc (fixes double-free)
|
||||
RUN dnf -y install --setopt=install_weak_deps=False --nodocs \
|
||||
python3.13 python3.13-devel git rsync libatomic bash ca-certificates curl \
|
||||
gcc gcc-c++ binutils make ffmpeg-free \
|
||||
cmake ninja-build aria2c tar xz vim nano \
|
||||
libdrm-devel zlib-devel openssl-devel \
|
||||
numactl-devel gperftools-libs \
|
||||
&& dnf clean all && rm -rf /var/cache/dnf/*
|
||||
|
||||
# 2. Install "TheRock" ROCm SDK (Tarball Method)
|
||||
WORKDIR /tmp
|
||||
ARG ROCM_MAJOR_VER=7
|
||||
ARG GFX=gfx1151
|
||||
RUN set -euo pipefail; \
|
||||
BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \
|
||||
PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \
|
||||
KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \
|
||||
| tr '<' '\n' \
|
||||
| grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \
|
||||
| sort -V | tail -n1)"; \
|
||||
echo "Downloading Latest Tarball: ${KEY}"; \
|
||||
aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz; \
|
||||
mkdir -p /opt/rocm; \
|
||||
tar xzf therock.tar.gz -C /opt/rocm --strip-components=1; \
|
||||
rm therock.tar.gz
|
||||
|
||||
# 3. Configure Global ROCm Environment
|
||||
# We add LD_PRELOAD for tcmalloc here to fix the shutdown crash
|
||||
RUN export ROCM_PATH=/opt/rocm && \
|
||||
BITCODE_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) && \
|
||||
printf '%s\n' \
|
||||
"export ROCM_PATH=/opt/rocm" \
|
||||
"export HIP_PLATFORM=amd" \
|
||||
"export HIP_PATH=/opt/rocm" \
|
||||
"export HIP_CLANG_PATH=/opt/rocm/llvm/bin" \
|
||||
"export HIP_DEVICE_LIB_PATH=$BITCODE_PATH" \
|
||||
"export PATH=$ROCM_PATH/bin:$ROCM_PATH/llvm/bin:\$PATH" \
|
||||
"export LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib:\$LD_LIBRARY_PATH" \
|
||||
"export ROCBLAS_USE_HIPBLASLT=1" \
|
||||
"export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1" \
|
||||
"export VLLM_TARGET_DEVICE=rocm" \
|
||||
"export HIP_FORCE_DEV_KERNARG=1" \
|
||||
"export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1" \
|
||||
"export LD_PRELOAD=/usr/lib64/libtcmalloc_minimal.so.4" \
|
||||
> /etc/profile.d/rocm-sdk.sh && \
|
||||
chmod 0644 /etc/profile.d/rocm-sdk.sh
|
||||
|
||||
# 4. Python Venv Setup
|
||||
RUN /usr/bin/python3.13 -m venv /opt/venv
|
||||
ENV VIRTUAL_ENV=/opt/venv
|
||||
ENV PATH=/opt/venv/bin:$PATH
|
||||
ENV PIP_NO_CACHE_DIR=1
|
||||
RUN printf 'source /opt/venv/bin/activate\n' > /etc/profile.d/venv.sh
|
||||
RUN python -m pip install --upgrade pip wheel packaging "setuptools<80.0.0"
|
||||
|
||||
# 5. Install PyTorch (TheRock Nightly)
|
||||
RUN python -m pip install \
|
||||
--index-url https://rocm.nightlies.amd.com/v2-staging/gfx1151/ \
|
||||
--pre torch torchaudio torchvision
|
||||
|
||||
WORKDIR /opt
|
||||
|
||||
# 6. Clone vLLM
|
||||
RUN git clone https://github.com/vllm-project/vllm.git /opt/vllm
|
||||
WORKDIR /opt/vllm
|
||||
|
||||
# --- PATCHING ---
|
||||
RUN echo "import sys, re" > patch_strix.py && \
|
||||
echo "from pathlib import Path" >> patch_strix.py && \
|
||||
# Patch 1: __init__.py
|
||||
echo "p = Path('vllm/platforms/__init__.py')" >> patch_strix.py && \
|
||||
echo "txt = p.read_text()" >> patch_strix.py && \
|
||||
echo "txt = txt.replace('import amdsmi', '# import amdsmi')" >> patch_strix.py && \
|
||||
echo "txt = re.sub(r'is_rocm = .*', 'is_rocm = True', txt)" >> patch_strix.py && \
|
||||
echo "txt = re.sub(r'if len\(amdsmi\.amdsmi_get_processor_handles\(\)\) > 0:', 'if True:', txt)" >> patch_strix.py && \
|
||||
echo "txt = txt.replace('amdsmi.amdsmi_init()', 'pass')" >> patch_strix.py && \
|
||||
echo "txt = txt.replace('amdsmi.amdsmi_shut_down()', 'pass')" >> patch_strix.py && \
|
||||
echo "p.write_text(txt)" >> patch_strix.py && \
|
||||
# Patch 2: rocm.py
|
||||
echo "p = Path('vllm/platforms/rocm.py')" >> patch_strix.py && \
|
||||
echo "txt = p.read_text()" >> patch_strix.py && \
|
||||
echo "header = 'import sys\nfrom unittest.mock import MagicMock\nsys.modules[\"amdsmi\"] = MagicMock()\n'" >> patch_strix.py && \
|
||||
echo "txt = header + txt" >> patch_strix.py && \
|
||||
echo "txt = re.sub(r'device_type = .*', 'device_type = \"cuda\"', txt)" >> patch_strix.py && \
|
||||
echo "txt = re.sub(r'device_name = .*', 'device_name = \"cuda\"', txt)" >> patch_strix.py && \
|
||||
echo "txt += '\n def get_device_name(self, device_id: int = 0) -> str:\n return \"AMD-gfx1151\"\n'" >> patch_strix.py && \
|
||||
echo "p.write_text(txt)" >> patch_strix.py && \
|
||||
echo "print('Successfully patched vLLM for Strix Halo')" >> patch_strix.py && \
|
||||
python patch_strix.py && \
|
||||
sed -i 's/gfx1200;gfx1201/gfx1151;gfx1200;gfx1201/' CMakeLists.txt
|
||||
|
||||
# 7. Build vLLM (Wheel Method) with CLANG Host Compiler
|
||||
RUN python -m pip install --upgrade cmake ninja packaging wheel numpy "setuptools-scm>=8" "setuptools<80.0.0" scikit-build-core pybind11
|
||||
ENV ROCM_HOME="/opt/rocm"
|
||||
ENV HIP_PATH="/opt/rocm"
|
||||
ENV VLLM_TARGET_DEVICE="rocm"
|
||||
ENV PYTORCH_ROCM_ARCH="gfx1151"
|
||||
ENV MAX_JOBS="4"
|
||||
|
||||
# --- CRITICAL FIX FOR SEGFAULT ---
|
||||
# We force the Host Compiler (CC/CXX) to be the ROCm Clang, not Fedora GCC.
|
||||
# This aligns the ABI of the compiled vLLM extensions with PyTorch.
|
||||
ENV CC="/opt/rocm/llvm/bin/clang"
|
||||
ENV CXX="/opt/rocm/llvm/bin/clang++"
|
||||
|
||||
RUN export HIP_DEVICE_LIB_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) && \
|
||||
echo "Compiling with Bitcode: $HIP_DEVICE_LIB_PATH" && \
|
||||
export CMAKE_ARGS="-DROCM_PATH=/opt/rocm -DHIP_PATH=/opt/rocm" && \
|
||||
python -m pip wheel --no-build-isolation --no-deps -w /tmp/dist -v . && \
|
||||
python -m pip install /tmp/dist/*.whl
|
||||
|
||||
# 8. Final Cleanup & Runtime
|
||||
WORKDIR /opt
|
||||
RUN chmod -R a+rwX /opt && \
|
||||
find /opt/venv -type f -name "*.so" -exec strip -s {} + 2>/dev/null || true && \
|
||||
find /opt/venv -type d -name "__pycache__" -prune -exec rm -rf {} + && \
|
||||
rm -rf /root/.cache/pip || true && \
|
||||
dnf clean all && rm -rf /var/cache/dnf/*
|
||||
|
||||
COPY scripts/01-rocm-env-for-triton.sh /etc/profile.d/01-rocm-env-for-triton.sh
|
||||
COPY scripts/99-toolbox-banner.sh /etc/profile.d/99-toolbox-banner.sh
|
||||
COPY scripts/zz-venv-last.sh /etc/profile.d/zz-venv-last.sh
|
||||
RUN chmod 0644 /etc/profile.d/*.sh
|
||||
RUN printf 'ulimit -S -c 0\n' > /etc/profile.d/90-nocoredump.sh && chmod 0644 /etc/profile.d/90-nocoredump.sh
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
@@ -1,142 +0,0 @@
|
||||
FROM kyuz0/pytorch-therock-gfx1151-aotriton-builder:latest AS vllm-builder
|
||||
|
||||
# Clone vLLM repository (shallow clone)
|
||||
RUN git clone --depth 1 https://github.com/vllm-project/vllm.git
|
||||
|
||||
# Install vLLM build dependencies and build vLLM
|
||||
RUN source .venv/bin/activate && \
|
||||
cd vllm && \
|
||||
uv pip install ninja cmake wheel pybind11 && \
|
||||
uv pip install --upgrade numba scipy huggingface-hub[cli] "numpy<2" && \
|
||||
python use_existing_torch.py && \
|
||||
sed -i '/amdsmi==/d' requirements/rocm-build.txt && \
|
||||
sed -i '/pytorch-triton-rocm/d' requirements/rocm-build.txt && \
|
||||
sed -i '/triton==/d' requirements/rocm-build.txt && \
|
||||
uv pip install -r requirements/rocm-build.txt
|
||||
|
||||
# Apply gfx1151 fixes
|
||||
RUN cd vllm && \
|
||||
sed -i 's/gfx1200;gfx1201/gfx1151;gfx1200;gfx1201/' CMakeLists.txt && \
|
||||
sed -i '/torch == 2.8.0,/d' pyproject.toml && \
|
||||
sed -i 's/import torch/try:\n import torch\n from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME\n TORCH_AVAILABLE = True\nexcept ImportError:\n torch = None\n CUDA_HOME = None\n ROCM_HOME = None\n TORCH_AVAILABLE = False/' setup.py && \
|
||||
sed -i 's/from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME/# Moved to try block above/' setup.py && \
|
||||
sed -i 's/torch.version.cuda is None/TORCH_AVAILABLE and torch.version.cuda is None/' setup.py && \
|
||||
sed -i 's/has_cuda = torch.version.cuda is not None/has_cuda = TORCH_AVAILABLE and torch.version.cuda is not None/' setup.py && \
|
||||
sed -i 's/torch.version.hip is not None/TORCH_AVAILABLE and torch.version.hip is not None/' setup.py && \
|
||||
sed -i 's/rocm_version = get_rocm_version() or torch.version.hip/rocm_version = get_rocm_version() or (torch.version.hip if TORCH_AVAILABLE else None)/' setup.py && \
|
||||
sed -i 's/cuda_major, cuda_minor = torch.version.cuda.split(".")/cuda_major, cuda_minor = torch.version.cuda.split(".") if TORCH_AVAILABLE else ("0", "0")/' setup.py
|
||||
|
||||
# Fix ROCm platform detection
|
||||
RUN cd vllm && \
|
||||
git checkout HEAD -- vllm/platforms/__init__.py && \
|
||||
sed -i '/def rocm_platform_plugin/,/return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None/s/is_rocm = False/is_rocm = False/' vllm/platforms/__init__.py && \
|
||||
sed -i '/def rocm_platform_plugin/,/return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None/s/logger.debug("Checking if ROCm platform is available.")/logger.debug("Checking if ROCm platform is available.")\n \n # Skip amdsmi check due to segfault issues - default to ROCm for AMD systems/' vllm/platforms/__init__.py && \
|
||||
sed -i '/def rocm_platform_plugin/,/return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None/s/try:\n import amdsmi/try:\n import torch/' vllm/platforms/__init__.py && \
|
||||
sed -i '/def rocm_platform_plugin/,/return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None/s/amdsmi.amdsmi_init()/# amdsmi disabled - using torch detection/' vllm/platforms/__init__.py && \
|
||||
sed -i '/def rocm_platform_plugin/,/return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None/s/try:\n if len(amdsmi.amdsmi_get_processor_handles()) > 0:/if hasattr(torch, '\''version'\'') and hasattr(torch.version, '\''hip'\'') and torch.version.hip is not None:/' vllm/platforms/__init__.py && \
|
||||
sed -i '/def rocm_platform_plugin/,/return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None/s/is_rocm = True\n logger.debug("Confirmed ROCm platform is available.")/is_rocm = True\n logger.debug("ROCm platform detected via torch.version.hip")/' vllm/platforms/__init__.py && \
|
||||
sed -i '/def rocm_platform_plugin/,/return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None/s/else:\n logger.debug("ROCm platform is not available because"\n " no GPU is found.")/else:\n # Fallback: assume ROCm if we'\''re not CUDA and not other platforms\n logger.debug("Defaulting to ROCm platform (amdsmi disabled due to segfault)")\n is_rocm = True/' vllm/platforms/__init__.py && \
|
||||
sed -i '/def rocm_platform_plugin/,/return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None/s/finally:\n amdsmi.amdsmi_shut_down()/finally:\n # amdsmi disabled\n pass/' vllm/platforms/__init__.py && \
|
||||
sed -i '/def rocm_platform_plugin/,/return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None/s/logger.debug("ROCm platform is not available because: %s", str(e))/logger.debug("ROCm platform check failed: %s", str(e))\n # Still default to ROCm as fallback\n is_rocm = True/' vllm/platforms/__init__.py
|
||||
|
||||
# Also patch vllm/platforms/rocm.py to avoid amdsmi at runtime
|
||||
RUN cd vllm && python - <<'PY'
|
||||
from pathlib import Path
|
||||
p = Path("vllm/platforms/rocm.py")
|
||||
s = p.read_text()
|
||||
|
||||
# Add amdsmi stubs if missing
|
||||
if "def amdsmi_init" not in s:
|
||||
s += """
|
||||
|
||||
# --- vllm-therock patch: tolerate missing 'amdsmi' ---
|
||||
try:
|
||||
amdsmi
|
||||
except Exception:
|
||||
amdsmi = None
|
||||
|
||||
if 'amdsmi_init' not in globals():
|
||||
def amdsmi_init(): return None
|
||||
def amdsmi_shut_down(): return None
|
||||
"""
|
||||
|
||||
# Override get_device_name to avoid amdsmi
|
||||
if "vllm_therock_rocm_get_device_name" not in s:
|
||||
s += r"""
|
||||
|
||||
def vllm_therock_rocm_get_device_name(self, device_id: int = 0):
|
||||
try:
|
||||
import torch
|
||||
return torch.cuda.get_device_name(device_id)
|
||||
except Exception:
|
||||
return "AMD-gfx1151"
|
||||
|
||||
try:
|
||||
RocmPlatform.get_device_name = vllm_therock_rocm_get_device_name
|
||||
except Exception:
|
||||
pass
|
||||
"""
|
||||
p.write_text(s)
|
||||
print("Patched", p)
|
||||
PY
|
||||
|
||||
|
||||
# Build vLLM
|
||||
RUN source .venv/bin/activate && \
|
||||
cd vllm && \
|
||||
uv pip uninstall amdsmi || echo "amdsmi not installed" && \
|
||||
printf '#!/bin/bash\necho "gfx1151"\n' > /usr/local/bin/amdgpu-arch && \
|
||||
chmod +x /usr/local/bin/amdgpu-arch && \
|
||||
printf '#!/bin/bash\necho "gfx1151"\n' > /usr/bin/amdgpu-arch && \
|
||||
chmod +x /usr/bin/amdgpu-arch && \
|
||||
printf '#!/bin/bash\necho "gfx1151"\n' > /bin/amdgpu-arch && \
|
||||
chmod +x /bin/amdgpu-arch && \
|
||||
export PYTORCH_ROCM_ARCH="gfx1151" && \
|
||||
/torch-therock/.venv/bin/python -c "import torch; print('torch==' + torch.__version__)" > /tmp/constraints.txt && \
|
||||
/torch-therock/.venv/bin/python -c "import triton; print('pytorch-triton-rocm==' + getattr(triton, '__version__', 'unknown'))" >> /tmp/constraints.txt || echo "# triton version not found" >> /tmp/constraints.txt && \
|
||||
TORCH_CMAKE_PATH=$(/torch-therock/.venv/bin/python -c "import torch; print(torch.utils.cmake_prefix_path)") && \
|
||||
VLLM_TARGET_DEVICE=rocm CMAKE_PREFIX_PATH="$TORCH_CMAKE_PATH" Torch_DIR="$TORCH_CMAKE_PATH/Torch" CMAKE_ARGS="-DGPU_TARGETS=gfx1151 -DHIP_TARGETS=gfx1151 -DAMDGPU_TARGETS=gfx1151" /torch-therock/.venv/bin/pip install . --no-build-isolation --constraint /tmp/constraints.txt
|
||||
|
||||
# Runtime stage
|
||||
FROM archlinux:latest
|
||||
|
||||
# Install runtime dependencies + compilation tools
|
||||
RUN pacman -Syu --noconfirm && \
|
||||
pacman -S --noconfirm ca-certificates gcc make cmake ninja git && \
|
||||
pacman -Scc --noconfirm && \
|
||||
git clone --depth 1 https://github.com/pyenv/pyenv.git /opt/pyenv && \
|
||||
export PYENV_ROOT=/opt/pyenv && \
|
||||
export PATH=$PYENV_ROOT/bin:$PATH && \
|
||||
eval "$(pyenv init -)" && \
|
||||
pyenv install 3.12.9 && \
|
||||
pyenv global 3.12.9
|
||||
|
||||
# Copy complete environment from builder
|
||||
COPY --from=vllm-builder /opt/pyenv /opt/pyenv
|
||||
COPY --from=vllm-builder /torch-therock/.venv /torch-therock/.venv
|
||||
COPY --from=vllm-builder /torch-therock/*.sh /torch-therock/
|
||||
|
||||
# Set environment
|
||||
ENV PYENV_ROOT=/opt/pyenv
|
||||
ENV PYENV_VERSION=3.12.9
|
||||
ENV PATH="/opt/pyenv/versions/3.12.9/bin:/torch-therock/.venv/bin:$PATH"
|
||||
ENV PYTORCH_ROCM_ARCH=gfx1151
|
||||
|
||||
WORKDIR /torch-therock
|
||||
|
||||
# Test installation
|
||||
RUN /torch-therock/.venv/bin/python -c "import torch; print('PyTorch version:', torch.__version__)" && \
|
||||
/torch-therock/.venv/bin/python -c "import vllm; print('vLLM version:', vllm.__version__)"
|
||||
|
||||
# Toolbx compatibility - fix permissions and add environment setup
|
||||
RUN chmod -R a+rwX /torch-therock
|
||||
|
||||
# Copy toolbx scripts
|
||||
COPY scripts/vllm-env.sh /etc/profile.d/vllm-env.sh
|
||||
COPY scripts/vllm-banner.sh /etc/profile.d/vllm-banner.sh
|
||||
RUN chmod 644 /etc/profile.d/vllm-env.sh /etc/profile.d/vllm-banner.sh
|
||||
|
||||
COPY scripts/start-vllm.sh /usr/local/bin/start-vllm
|
||||
RUN chmod 755 /usr/local/bin/start-vllm
|
||||
|
||||
CMD ["bash", "-c", "source .venv/bin/activate && bash"]
|
||||
@@ -0,0 +1,3 @@
|
||||
# Required for Strix Halo / RDNA3.5 on vLLM
|
||||
export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
|
||||
export VLLM_TARGET_DEVICE=rocm
|
||||
@@ -1,30 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Auto-activate vLLM environment for toolbx
|
||||
|
||||
# Activate PyTorch + vLLM environment
|
||||
source /torch-therock/.venv/bin/activate
|
||||
|
||||
# ROCm and performance environment variables
|
||||
export PYTORCH_ROCM_ARCH=gfx1151
|
||||
export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
|
||||
export VLLM_USE_TRITON_FLASH_ATTN=0
|
||||
export TORCH_COMPILE_DEBUG=1
|
||||
export VLLM_COMPILE_LEVEL=3
|
||||
|
||||
# Detect and export ROCm toolchain paths
|
||||
eval "$(
|
||||
python3 - <<'PY'
|
||||
try:
|
||||
import pathlib, _rocm_sdk_core as r
|
||||
base = pathlib.Path(r.__file__).parent / "lib" / "llvm" / "bin"
|
||||
lib = pathlib.Path(r.__file__).parent / "lib"
|
||||
print(f'export TRITON_HIP_LLD_PATH="{base / "ld.lld"}"')
|
||||
print(f'export TRITON_HIP_CLANG_PATH="{base / "clang++"}"')
|
||||
print(f'export LD_LIBRARY_PATH="{lib}:$LD_LIBRARY_PATH"')
|
||||
except ImportError:
|
||||
pass
|
||||
PY
|
||||
)" 2>/dev/null || true
|
||||
|
||||
# Enable flash attention
|
||||
export FLASH_ATTENTION_TRITON_AMD_ENABLE=TRUE
|
||||
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
# Ensure /opt/venv/bin is first even if ~/.local/bin or ~/.cargo/bin prepend themselves via user dotfiles.
|
||||
|
||||
_venv_path_fix() {
|
||||
# remove any existing /opt/venv/bin entries, then prepend one
|
||||
local newpath
|
||||
newpath="$(printf '%s' "$PATH" | awk -v RS=: -v ORS=: '$0!="/opt/venv/bin"{print}')"
|
||||
PATH="/opt/venv/bin:${newpath%:}"
|
||||
}
|
||||
|
||||
# run once after shell init; don't duplicate
|
||||
case "$PROMPT_COMMAND" in
|
||||
*_venv_path_fix*) : ;;
|
||||
*) PROMPT_COMMAND="_venv_path_fix${PROMPT_COMMAND:+;$PROMPT_COMMAND}" ;;
|
||||
esac
|
||||
|
||||
在新工单中引用
屏蔽一个用户