Fixed offline kernel compilation
1. Removed vcpy_isa.ptx as it should be generated during make
2. Made argument padding specific to hcc path
3. Renamed --gencodeobject to --genco
4. Changed Makefile to work on both nvcc and hcc path
Change-Id: Ifd053d541085d9ce4fd37bc21b07674786c7163e
[ROCm/clr commit: f22fda1291]
Этот коммит содержится в:
@@ -181,7 +181,7 @@ my $ISACMD="";
|
||||
if($HIP_PLATFORM eq "hcc"){
|
||||
$ISACMD .= "$HIP_PATH/bin/hipgenisa.sh ";
|
||||
$ISACMD .= $ROCM_PATH;
|
||||
if($ARGV[0] eq "--gencodeobject"){
|
||||
if($ARGV[0] eq "--genco"){
|
||||
foreach $isaarg (@ARGV[1..$#ARGV]){
|
||||
$ISACMD .= " ";
|
||||
$ISACMD .= $isaarg;
|
||||
@@ -196,7 +196,7 @@ if($HIP_PLATFORM eq "hcc"){
|
||||
|
||||
if($HIP_PLATFORM eq "nvcc"){
|
||||
$ISACMD .= "$HIP_PATH/bin/hipcc -ptx ";
|
||||
if($ARGV[0] eq "--gencodeobject"){
|
||||
if($ARGV[0] eq "--genco"){
|
||||
foreach $isaarg (@ARGV[1..$#ARGV]){
|
||||
$ISACMD .= " ";
|
||||
$ISACMD .= $isaarg;
|
||||
|
||||
@@ -6,16 +6,28 @@ HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
|
||||
|
||||
ifeq (${HIP_PLATFORM}, hcc)
|
||||
GENCODEOBJECT_FLAGS=--target-isa-fiji
|
||||
GENCODEOBJECT_FLAGS=--target-isa=fiji
|
||||
|
||||
vcpy_isa.compile: vcpy_isa.cpp
|
||||
$(HIPCC) --genco $(GENCODEOBJECT_FLAGS) vcpy_isa.cpp -o vcpy_isa.co
|
||||
|
||||
clean:
|
||||
rm -f *.co *.out
|
||||
|
||||
endif
|
||||
|
||||
ifeq (${HIP_PLATFORM}, nvcc)
|
||||
|
||||
vcpy_isa.compile: vcpy_isa.cu
|
||||
$(HIPCC) --genco vcpy_isa.cu -o vcpy_isa.ptx
|
||||
|
||||
clean:
|
||||
rm -f *.ptx *.out
|
||||
|
||||
endif
|
||||
|
||||
all: vcpy_isa.compile runKernel.hip.out
|
||||
|
||||
vcpy_isa.compile: vcpy_isa.cpp
|
||||
$(HIPCC) --gencodeobject $(GENCODEOBJECT_FLAGS) vcpy_isa.cpp -o vcpy_isa.co
|
||||
|
||||
runKernel.hip.out: runKernel.cpp
|
||||
$(HIPCC) runKernel.cpp -o runKernel.hip.out
|
||||
|
||||
clean:
|
||||
rm -f *.co *.out
|
||||
|
||||
@@ -66,8 +66,9 @@ int main(){
|
||||
hipModuleLoad(&Module, fileName);
|
||||
hipModuleGetFunction(&Function, Module, kernel_name);
|
||||
|
||||
uint32_t len = LEN;
|
||||
uint32_t one = 1;
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
uint32_t len = LEN;
|
||||
uint32_t one = 1;
|
||||
|
||||
std::vector<void*>argBuffer(5);
|
||||
uint32_t *ptr32_t = (uint32_t*)&argBuffer[0];
|
||||
@@ -79,7 +80,13 @@ int main(){
|
||||
memcpy(ptr32_t + 5, &one, sizeof(uint32_t));
|
||||
memcpy(&argBuffer[3], &Ad, sizeof(void*));
|
||||
memcpy(&argBuffer[4], &Bd, sizeof(void*));
|
||||
#endif
|
||||
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
std::vector<void*>argBuffer(2);
|
||||
memcpy(&argBuffer[0], &Ad, sizeof(void*));
|
||||
memcpy(&argBuffer[1], &Bd, sizeof(void*));
|
||||
#endif
|
||||
|
||||
|
||||
size_t size = argBuffer.size()*sizeof(void*);
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
//
|
||||
// Generated by NVIDIA NVVM Compiler
|
||||
//
|
||||
// Compiler Build ID: CL-19856038
|
||||
// Cuda compilation tools, release 7.5, V7.5.17
|
||||
// Based on LLVM 3.4svn
|
||||
//
|
||||
|
||||
.version 4.3
|
||||
.target sm_20
|
||||
.address_size 64
|
||||
|
||||
// .globl hello_world
|
||||
|
||||
.visible .entry hello_world(
|
||||
.param .u64 hello_world_param_0,
|
||||
.param .u64 hello_world_param_1
|
||||
)
|
||||
{
|
||||
.reg .f32 %f<2>;
|
||||
.reg .b32 %r<2>;
|
||||
.reg .b64 %rd<8>;
|
||||
|
||||
|
||||
ld.param.u64 %rd1, [hello_world_param_0];
|
||||
ld.param.u64 %rd2, [hello_world_param_1];
|
||||
cvta.to.global.u64 %rd3, %rd2;
|
||||
cvta.to.global.u64 %rd4, %rd1;
|
||||
mov.u32 %r1, %tid.x;
|
||||
mul.wide.s32 %rd5, %r1, 4;
|
||||
add.s64 %rd6, %rd4, %rd5;
|
||||
ld.global.f32 %f1, [%rd6];
|
||||
add.s64 %rd7, %rd3, %rd5;
|
||||
st.global.f32 [%rd7], %f1;
|
||||
ret;
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user