SWDEV-284207 - Modify HIP samples to run on Navi and MI100 Asics
Change-Id: I5b8d4f8df36a5be2ea4c381b88454832c59253e6
This commit is contained in:
@@ -41,6 +41,9 @@ SQ_ASM_EXE=square_asm.out
|
||||
MCIN_OBJ_GEN=hip_obj_gen.mcin
|
||||
GPU_ARCH1=gfx900
|
||||
GPU_ARCH2=gfx906
|
||||
GPU_ARCH3=gfx908
|
||||
GPU_ARCH4=gfx1010
|
||||
GPU_ARCH5=gfx1030
|
||||
|
||||
.PHONY: test
|
||||
|
||||
@@ -48,12 +51,15 @@ all: src_to_asm asm_to_exec
|
||||
|
||||
src_to_asm:
|
||||
$(HIPCC) -c -S --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_ASM) $(SRCS)
|
||||
$(HIPCC) -c -S --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) $(SRCS)
|
||||
$(HIPCC) -c -S --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) $(SRCS)
|
||||
|
||||
# You may modify the .s assembly files before the next step
|
||||
# By default, their names will be:
|
||||
# square-hip-amdgcn-amd-amdhsa-gfx900.s
|
||||
# square-hip-amdgcn-amd-amdhsa-gfx906.s
|
||||
# square-hip-amdgcn-amd-amdhsa-gfx908.s
|
||||
# square-hip-amdgcn-amd-amdhsa-gfx1010.s
|
||||
# square-hip-amdgcn-amd-amdhsa-gfx1030.s
|
||||
#
|
||||
# Note: hipcc does not work to convert .s to .o, use clang instead.
|
||||
|
||||
@@ -61,7 +67,10 @@ asm_to_exec:
|
||||
$(HIPCC) -c $(SQ_HOST_ASM) -o $(SQ_HOST_OBJ)
|
||||
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH1) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).s -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).o
|
||||
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH2) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).s -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).o
|
||||
$(CLANG_OFFLOAD_BUNDLER) -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-$(GPU_ARCH1),hip-amdgcn-amd-amdhsa-$(GPU_ARCH2) -inputs=/dev/null,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).o,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).o -outputs=$(SQ_DEVICE_HIPFB)
|
||||
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH3) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH3).s -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH3).o
|
||||
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH4) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH4).s -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH4).o
|
||||
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH5) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH5).s -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH5).o
|
||||
$(CLANG_OFFLOAD_BUNDLER) -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-$(GPU_ARCH1),hip-amdgcn-amd-amdhsa-$(GPU_ARCH2),hip-amdgcn-amd-amdhsa-$(GPU_ARCH3),hip-amdgcn-amd-amdhsa-$(GPU_ARCH4),hip-amdgcn-amd-amdhsa-$(GPU_ARCH5) -inputs=/dev/null,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).o,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).o,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH3).o,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH4).o,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH5).o -outputs=$(SQ_DEVICE_HIPFB)
|
||||
$(LLVM_MC) $(MCIN_OBJ_GEN) -o $(SQ_DEVICE_OBJ) --filetype=obj
|
||||
$(HIPCC) $(SQ_HOST_OBJ) $(SQ_DEVICE_OBJ) -o $(SQ_ASM_EXE)
|
||||
|
||||
|
||||
@@ -44,6 +44,9 @@ SQ_IR_EXE=square_ir.out
|
||||
MCIN_OBJ_GEN=hip_obj_gen.mcin
|
||||
GPU_ARCH1=gfx900
|
||||
GPU_ARCH2=gfx906
|
||||
GPU_ARCH3=gfx908
|
||||
GPU_ARCH4=gfx1010
|
||||
GPU_ARCH5=gfx1030
|
||||
|
||||
.PHONY: test
|
||||
|
||||
@@ -51,16 +54,22 @@ all: src_to_ir bc_to_ll ll_to_bc ir_to_exec
|
||||
|
||||
src_to_ir:
|
||||
$(HIPCC) -c -emit-llvm --cuda-host-only -target x86_64-linux-gnu -o $(SQ_HOST_BC) $(SRCS)
|
||||
$(HIPCC) -c -emit-llvm --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) $(SRCS)
|
||||
$(HIPCC) -c -emit-llvm --cuda-device-only --offload-arch=$(GPU_ARCH1) --offload-arch=$(GPU_ARCH2) --offload-arch=$(GPU_ARCH3) --offload-arch=$(GPU_ARCH4) --offload-arch=$(GPU_ARCH5) $(SRCS)
|
||||
|
||||
# By default, the LLVM IR Bitcode file names will be:
|
||||
# square-hip-amdgcn-amd-amdhsa-gfx900.bc
|
||||
# square-hip-amdgcn-amd-amdhsa-gfx906.bc
|
||||
# square-hip-amdgcn-amd-amdhsa-gfx908.bc
|
||||
# square-hip-amdgcn-amd-amdhsa-gfx1010.bc
|
||||
# square-hip-amdgcn-amd-amdhsa-gfx1030.bc
|
||||
|
||||
bc_to_ll:
|
||||
$(LLVM_DIS) $(SQ_HOST_BC) -o $(SQ_HOST_LL)
|
||||
$(LLVM_DIS) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).bc -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).ll
|
||||
$(LLVM_DIS) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).bc -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).ll
|
||||
$(LLVM_DIS) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH3).bc -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH3).ll
|
||||
$(LLVM_DIS) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH4).bc -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH4).ll
|
||||
$(LLVM_DIS) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH5).bc -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH5).ll
|
||||
|
||||
# You may modify the .ll LLVM IR files before the next step
|
||||
#
|
||||
@@ -70,12 +79,18 @@ ll_to_bc:
|
||||
$(LLVM_AS) $(SQ_HOST_LL) -o $(SQ_HOST_BC)
|
||||
$(LLVM_AS) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).ll -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).bc
|
||||
$(LLVM_AS) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).ll -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).bc
|
||||
$(LLVM_AS) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH3).ll -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH3).bc
|
||||
$(LLVM_AS) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH4).ll -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH4).bc
|
||||
$(LLVM_AS) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH5).ll -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH5).bc
|
||||
|
||||
ir_to_exec:
|
||||
$(HIPCC) -c $(SQ_HOST_BC) -o $(SQ_HOST_OBJ)
|
||||
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH1) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).bc -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).o
|
||||
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH2) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).bc -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).o
|
||||
$(CLANG_OFFLOAD_BUNDLER) -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-$(GPU_ARCH1),hip-amdgcn-amd-amdhsa-$(GPU_ARCH2) -inputs=/dev/null,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).o,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).o -outputs=$(SQ_DEVICE_HIPFB)
|
||||
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH3) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH3).bc -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH3).o
|
||||
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH4) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH4).bc -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH4).o
|
||||
$(CLANG) -target amdgcn-amd-amdhsa -mcpu=$(GPU_ARCH5) square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH5).bc -o square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH5).o
|
||||
$(CLANG_OFFLOAD_BUNDLER) -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-$(GPU_ARCH1),hip-amdgcn-amd-amdhsa-$(GPU_ARCH2),hip-amdgcn-amd-amdhsa-$(GPU_ARCH3),hip-amdgcn-amd-amdhsa-$(GPU_ARCH4),hip-amdgcn-amd-amdhsa-$(GPU_ARCH5) -inputs=/dev/null,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH1).o,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH2).o,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH3).o,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH4).o,square-hip-amdgcn-amd-amdhsa-$(GPU_ARCH5).o -outputs=$(SQ_DEVICE_HIPFB)
|
||||
$(LLVM_MC) $(MCIN_OBJ_GEN) -o $(SQ_DEVICE_OBJ) --filetype=obj
|
||||
$(HIPCC) $(SQ_HOST_OBJ) $(SQ_DEVICE_OBJ) -o $(SQ_IR_EXE)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user