33f0a41c7a
HIP supports compiling kernels from assembly into exec. The device assembly needs to be compiled into a fat binary object. This device object is embedded into a host object using llvm-mc directives. Then, any host linker may link the host and device objects together into an executable. A README is added. Change-Id: I59d3a8b5363073810ffc3aa0d57f21b0df272369
51 行
1.8 KiB
Makefile
51 行
1.8 KiB
Makefile
HIP_PATH?= $(wildcard /opt/rocm/hip)
|
|
ifeq (,$(HIP_PATH))
|
|
HIP_PATH=../../..
|
|
endif
|
|
|
|
HIPCC=$(HIP_PATH)/bin/hipcc
|
|
CLANG=$(HIP_PATH)/../llvm/bin/clang
|
|
LLVM_MC=$(HIP_PATH)/../llvm/bin/llvm-mc
|
|
CLANG_OFFLOAD_BUNDLER=$(HIP_PATH)/../llvm/bin/clang-offload-bundler
|
|
|
|
SRCS=square.cpp
|
|
|
|
# Extracting ASM code, then creating an executable with the modified asm.
|
|
|
|
SQ_HOST_ASM=square_host.s
|
|
SQ_HOST_OBJ=square_host.o
|
|
SQ_DEVICE_HIPFB=offload_bundle.hipfb
|
|
SQ_DEVICE_OBJ=square_device.o
|
|
SQ_ASM_EXE=square_asm.out
|
|
|
|
MCIN_OBJ_GEN=hip_obj_gen.mcin
|
|
GPU_ARCH1=gfx900
|
|
GPU_ARCH2=gfx906
|
|
|
|
.PHONY: test
|
|
|
|
all: src_to_asm asm_to_exec
|
|
|
|
src_to_asm:
|
|
$(HIPCC) -c -S --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_ASM) $(SRCS)
|
|
$(HIPCC) -c -S --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) $(SRCS)
|
|
|
|
# You may modify the .s assembly files before the next step
|
|
# By default, their names will be:
|
|
# square-hip-amdgcn-amd-amdhsa-gfx900.s
|
|
# square-hip-amdgcn-amd-amdhsa-gfx906.s
|
|
#
|
|
# Note: hipcc does not work to convert .s to .o, use clang instead.
|
|
|
|
asm_to_exec:
|
|
$(HIPCC) -c $(SQ_HOST_ASM) -o $(SQ_HOST_OBJ)
|
|
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH1) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).s -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).o
|
|
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH2) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).s -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).o
|
|
$(CLANG_OFFLOAD_BUNDLER) -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-$(GPU_ARCH1),hip-amdgcn-amd-amdhsa-$(GPU_ARCH2) -inputs=/dev/null,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).o,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).o -outputs=$(SQ_DEVICE_HIPFB)
|
|
$(LLVM_MC) $(MCIN_OBJ_GEN) -o $(SQ_DEVICE_OBJ) --filetype=obj
|
|
$(HIPCC) $(SQ_HOST_OBJ) $(SQ_DEVICE_OBJ) -o $(SQ_ASM_EXE)
|
|
|
|
clean:
|
|
rm -f *.o *.out *.hipfb *.s *.ll *.bc
|
|
|