From 73eb4c63302080a306f16075ee2830cbe82ddc89 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 14 May 2020 03:50:34 -0500 Subject: [PATCH 01/11] API tracing instrumentation Change-Id: I257409b9fe299b009ded3e3a43287322d5f93a70 [ROCm/hip commit: 5abb8e1a68e3b0d1c27593f56ce80ea4c20c8d54] --- .../include/hip/hcc_detail/hip_runtime_api.h | 2 +- projects/hip/rocclr/hip_device.cpp | 6 +- projects/hip/rocclr/hip_device_runtime.cpp | 6 +- projects/hip/rocclr/hip_memory.cpp | 2 +- projects/hip/rocclr/hip_module.cpp | 10 ++-- projects/hip/rocclr/hip_platform.cpp | 12 ++-- projects/hip/rocclr/hip_prof_gen.py | 60 ++++++++----------- 7 files changed, 44 insertions(+), 54 deletions(-) diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime_api.h b/projects/hip/include/hip/hcc_detail/hip_runtime_api.h index 17c34b0ad5..744885923f 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime_api.h @@ -513,7 +513,7 @@ hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig); * * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue */ -hipError_t hipGetDeviceFlags(unsigned *flags); +hipError_t hipGetDeviceFlags(unsigned int* flags); /** * @brief The bank width of shared memory on current device is set diff --git a/projects/hip/rocclr/hip_device.cpp b/projects/hip/rocclr/hip_device.cpp index 8695ef43e5..b8a5e65d29 100644 --- a/projects/hip/rocclr/hip_device.cpp +++ b/projects/hip/rocclr/hip_device.cpp @@ -99,7 +99,7 @@ hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device } hipError_t hipDeviceGetCount(int* count) { - HIP_INIT_API(NONE, count); + HIP_INIT_API(hipDeviceGetCount, count); HIP_RETURN(ihipDeviceGetCount(count)); } @@ -238,7 +238,7 @@ hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) } hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) { - HIP_INIT_API(NONE, deviceId, acc); + HIP_INIT_API(hipHccGetAccelerator, deviceId, acc); assert(0 && "Unimplemented"); @@ -246,7 +246,7 @@ hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) { } hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) { - HIP_INIT_API(NONE, stream, av); + HIP_INIT_API(hipHccGetAcceleratorView, stream, av); assert(0 && "Unimplemented"); diff --git a/projects/hip/rocclr/hip_device_runtime.cpp b/projects/hip/rocclr/hip_device_runtime.cpp index 531f35c732..3823af5206 100644 --- a/projects/hip/rocclr/hip_device_runtime.cpp +++ b/projects/hip/rocclr/hip_device_runtime.cpp @@ -480,7 +480,7 @@ hipError_t hipGetDeviceFlags ( unsigned int* flags ) { } hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) { - HIP_INIT_API(NONE, handle, event); + HIP_INIT_API(hipIpcGetEventHandle, handle, event); assert(0 && "Unimplemented"); @@ -488,7 +488,7 @@ hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event } hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) { - HIP_INIT_API(NONE, event, handle); + HIP_INIT_API(hipIpcOpenEventHandle, event, handle); assert(0 && "Unimplemented"); @@ -542,7 +542,7 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) { } hipError_t hipSetValidDevices ( int* device_arr, int len ) { - HIP_INIT_API(NONE, device_arr, len); + HIP_INIT_API(hipSetValidDevices, device_arr, len); assert(0 && "Unimplemented"); diff --git a/projects/hip/rocclr/hip_memory.cpp b/projects/hip/rocclr/hip_memory.cpp index 4bd75b8ffb..febf5cc700 100755 --- a/projects/hip/rocclr/hip_memory.cpp +++ b/projects/hip/rocclr/hip_memory.cpp @@ -2090,7 +2090,7 @@ hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream) { - HIP_INIT_API(hipMemcpyParam2D, pCopy); + HIP_INIT_API(hipMemcpyParam2DAsync, pCopy); HIP_RETURN(ihipMemcpyParam2D(pCopy, stream, true)); } diff --git a/projects/hip/rocclr/hip_module.cpp b/projects/hip/rocclr/hip_module.cpp index 7cda6864c6..3a51001063 100755 --- a/projects/hip/rocclr/hip_module.cpp +++ b/projects/hip/rocclr/hip_module.cpp @@ -119,7 +119,7 @@ hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, void** optionsValues) { /* TODO: Pass options to Program */ - HIP_INIT_API(hipModuleLoadData, module, image); + HIP_INIT_API(hipModuleLoadDataEx, module, image); HIP_RETURN(ihipModuleLoadData(module, image, 0)); } @@ -369,7 +369,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0, uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0, uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0) { - HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, + HIP_INIT_API(ihipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params); hip::Function* function = hip::Function::asFunction(f); @@ -493,7 +493,7 @@ hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hipStream_t hStream, void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) { - HIP_INIT_API(NONE, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + HIP_INIT_API(hipExtModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags); @@ -512,7 +512,7 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, hipEvent_t startEvent, hipEvent_t stopEvent) { - HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, + HIP_INIT_API(hipHccModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent); @@ -529,7 +529,7 @@ hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX, hipEvent_t startEvent, hipEvent_t stopEvent) { - HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, + HIP_INIT_API(hipModuleLaunchKernelExt, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent); diff --git a/projects/hip/rocclr/hip_platform.cpp b/projects/hip/rocclr/hip_platform.cpp index 8759ef47f1..b67c6ac90a 100755 --- a/projects/hip/rocclr/hip_platform.cpp +++ b/projects/hip/rocclr/hip_platform.cpp @@ -706,7 +706,7 @@ extern "C" hipError_t hipConfigureCall( size_t sharedMem, hipStream_t stream) { - HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + HIP_INIT_API(hipConfigureCall, gridDim, blockDim, sharedMem, stream); PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); @@ -719,7 +719,7 @@ extern "C" hipError_t __hipPushCallConfiguration( size_t sharedMem, hipStream_t stream) { - HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + HIP_INIT_API(__hipPushCallConfiguration, gridDim, blockDim, sharedMem, stream); PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); @@ -730,7 +730,7 @@ extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim, dim3 *blockDim, size_t *sharedMem, hipStream_t *stream) { - HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream); + HIP_INIT_API(__hipPopCallConfiguration, gridDim, blockDim, sharedMem, stream); ihipExec_t exec; PlatformState::instance().popExec(exec); @@ -747,7 +747,7 @@ extern "C" hipError_t hipSetupArgument( size_t size, size_t offset) { - HIP_INIT_API(NONE, arg, size, offset); + HIP_INIT_API(hipSetupArgument, arg, size, offset); PlatformState::instance().setupArgument(arg, size, offset); @@ -756,7 +756,7 @@ extern "C" hipError_t hipSetupArgument( extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) { - HIP_INIT_API(NONE, hostFunction); + HIP_INIT_API(hipLaunchByPtr, hostFunction); ihipExec_t exec; PlatformState::instance().popExec(exec); @@ -1301,7 +1301,7 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, size_t sharedMemBytes, hipStream_t stream) { - HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes, + HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); hip::Stream* s = reinterpret_cast(stream); diff --git a/projects/hip/rocclr/hip_prof_gen.py b/projects/hip/rocclr/hip_prof_gen.py index c20df3c1aa..c508888140 100755 --- a/projects/hip/rocclr/hip_prof_gen.py +++ b/projects/hip/rocclr/hip_prof_gen.py @@ -68,8 +68,8 @@ def filtr_api_name(name): return name def filtr_api_decl(record): - record = re.sub("\s__dparm\([^\)]*\)", '', record); - record = re.sub("\(void\*\)", '', record); + record = re.sub("\s__dparm\([^\)]*\)", r'', record); + record = re.sub("\(void\*\)", r'', record); return record # Normalizing API arguments @@ -216,6 +216,8 @@ def parse_content(inp_file_p, api_map, out): api_name = "" # Valid public API found flag api_valid = 0 + # API overload (parameters mismatch) + api_overload = 0 # Input file patched content content = '' @@ -240,6 +242,7 @@ def parse_content(inp_file_p, api_map, out): # Looking for API begin if found == 0: + record = re.sub(r'\s*extern\s+"C"\s+', r'', record); if beg_pattern.match(record): found = 1 record = filtr_api_decl(record) @@ -255,6 +258,8 @@ def parse_content(inp_file_p, api_map, out): # Checking if complete API matched if m: found = 2 + api_valid = 0 + api_overload = 0 api_name = filtr_api_name(m.group(2)) # Checking if API name is in the API map if (private_check_mode == 0) or (api_name in api_map): @@ -280,48 +285,34 @@ def parse_content(inp_file_p, api_map, out): out[api_name] = filtr_api_opts(api_args) # Register missmatched API methods else: + api_overload = 1 # Warning about mismatched API, possible non public overloaded version api_diff = '\t\t' + inp_file + " line(" + str(line_num) + ")\n\t\tapi: " + api_types + "\n\t\teta: " + eta_types message("\t" + api_name + ' args mismatch:\n' + api_diff + '\n') - if hip_patch_mode != 0: - # Looking for INIT macro - m = init_pattern.match(line) - if m: - if api_valid == 0: api_name = 'NONE' - - if api_name == m.group(3): - if hip_patch_mode == 1: hip_patch_mode = 0 - else: fatal("patching failed") - else: - hip_patch_mode = 2 - init_args = m.group(2) - if init_args != '': init_args = ', ' + init_args - line = m.group(1) + '(' + api_name + init_args + m.group(5) + '\n' - non_public_api = 0 - # API found action if found == 2: - # Looking for INIT macro + if hip_patch_mode != 0: + # Looking for INIT macro + m = init_pattern.match(line) + if m: + init_name = api_name + if api_overload == 1: init_name = 'NONE' + init_args = m.group(4) + line = m.group(1) + '(' + init_name + init_args + m.group(5) + '\n' + m = init_pattern.match(line) if m: found = 0 - non_public_api = 0 - - if api_valid == 1: - api_valid = 0 - message("\t" + api_name) - else: - non_public_api = 1 - - if non_public_api == 1: + if api_valid == 1: message("\t" + api_name) + # Ignore if it is initialized as NONE + init_name = m.group(3) + if init_name != 'NONE': + # Check if init name matching API name + if init_name != api_name: + fatal("init name mismatch: '" + init_name + "' <> '" + api_name + "'") # Registering dummy API for non public API if the name in INIT is not NONE - init_name = m.group(3) - # Ignore if it is initialized as NONE - if init_name != 'NONE': - # Check if init name matching API name - if init_name != api_name: - fatal("init name mismatch: '" + init_name + "' <> '" + api_name + "'") + if api_valid == 0: # If init name is not in public API map then it is private API # else it was not identified and will be checked on finish if not init_name in api_map: @@ -333,7 +324,6 @@ def parse_content(inp_file_p, api_map, out): # Expect INIT macro for valid public API # Removing and registering non-conformant APIs with missing HIP_INIT macro if api_valid == 1: - api_valid = 0 if api_name in out: del out[api_name] del api_map[api_name] From 8760f751357885250b9d9ee174b7485573fe3997 Mon Sep 17 00:00:00 2001 From: agodavar Date: Thu, 14 May 2020 09:13:31 -0400 Subject: [PATCH 02/11] SWDEV-235875:Fix rocALUTION build error Change-Id: I2f2a15687e943a860ff6ca05f22422f791a36c0a [ROCm/hip commit: 07419c763559dd7eb0c7c10f8b33a29a075186ee] --- projects/hip/cmake/FindHIP/run_hipcc.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/projects/hip/cmake/FindHIP/run_hipcc.cmake b/projects/hip/cmake/FindHIP/run_hipcc.cmake index 24b754c874..1d8f3b38ad 100644 --- a/projects/hip/cmake/FindHIP/run_hipcc.cmake +++ b/projects/hip/cmake/FindHIP/run_hipcc.cmake @@ -35,7 +35,8 @@ set(HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS "@HIP_CLANG_PARALLEL_BUILD_COMPILE_ @_HIP_HCC_FLAGS@ @_HIP_CLANG_FLAGS@ @_HIP_NVCC_FLAGS@ -set(HIP_HIPCC_INCLUDE_ARGS "@HIP_HIPCC_INCLUDE_ARGS@") # list (needs to be in quotes to handle spaces properly) +#Needed to bring the HIP_HIPCC_INCLUDE_ARGS variable in scope +set(HIP_HIPCC_INCLUDE_ARGS @HIP_HIPCC_INCLUDE_ARGS@) # list set(cmake_dependency_file "@cmake_dependency_file@") # path set(source_file "@source_file@") # path From bfea5742714d62a6dacc26189bbb546ee4607b9e Mon Sep 17 00:00:00 2001 From: Payam Date: Thu, 14 May 2020 00:18:32 -0400 Subject: [PATCH 03/11] load functions when registered if lazy load is disabled Change-Id: I6f10a72f7643e476407af03b04ef52e59741be45 [ROCm/hip commit: a5b4f5f02df91a842d2768155290d70a6bdf28f2] --- projects/hip/rocclr/hip_platform.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/projects/hip/rocclr/hip_platform.cpp b/projects/hip/rocclr/hip_platform.cpp index b67c6ac90a..debe33b6f3 100755 --- a/projects/hip/rocclr/hip_platform.cpp +++ b/projects/hip/rocclr/hip_platform.cpp @@ -191,13 +191,6 @@ void PlatformState::init() for (auto& it : vars_) { it.second.rvars.resize(g_devices.size()); } - if (!HIP_ENABLE_LAZY_KERNEL_LOADING) { - for (size_t i = 0; i < g_devices.size(); ++i) { - for (auto& it: functions_) { - getFunc(it.first, i); - } - } - } } bool PlatformState::unregisterFunc(hipModule_t hmod) { @@ -613,9 +606,12 @@ extern "C" void __hipRegisterFunction( { PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{g_devices.size()}}; PlatformState::instance().registerFunction(hostFunction, func); -// for (size_t i = 0; i < g_devices.size(); ++i) { -// PlatformState::instance().getFunc(hostFunction, i); -// } + if (!HIP_ENABLE_LAZY_KERNEL_LOADING) { + HIP_INIT(); + for (size_t i = 0; i < g_devices.size(); ++i) { + PlatformState::instance().getFunc(hostFunction, i); + } + } } // Registers a device-side global variable. From 0becc2185e5fb4f8113c597282a9b0b36afee83d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 12 May 2020 20:47:42 -0400 Subject: [PATCH 04/11] Include interface directories from rocclr target This is overly painful because this is an object library. Change-Id: I618843b810549876053212bf24efff18aece9470 [ROCm/hip commit: f55e25369929dfa21a728b5a21e2eee666367c8e] --- projects/hip/rocclr/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/projects/hip/rocclr/CMakeLists.txt b/projects/hip/rocclr/CMakeLists.txt index aaee0a4d0b..9d05de586d 100644 --- a/projects/hip/rocclr/CMakeLists.txt +++ b/projects/hip/rocclr/CMakeLists.txt @@ -132,6 +132,8 @@ target_include_directories(hip64 ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/amdocl ${PROJECT_SOURCE_DIR}/include/hip/hcc_detail/elfio + + # FIXME: Remove ROCclr_DIr explicit references ${ROCclr_DIR} ${ROCclr_DIR}/include ${ROCclr_DIR}/compiler/lib @@ -139,6 +141,7 @@ target_include_directories(hip64 ${ROCclr_DIR}/elf/utils/common ${ROCclr_DIR}/elf/utils/libelf ${ROCR_INCLUDES} + $ $) target_compile_definitions(hip64 PRIVATE From f0bd04856a06ce57824ee6194f7bff3685f42a8d Mon Sep 17 00:00:00 2001 From: Christophe Paquot Date: Tue, 12 May 2020 15:51:52 -0700 Subject: [PATCH 05/11] [hip] Skip lang guess if `-x ` is specified. Change-Id: If5cbdd1e5d36d2d8b83f3eadd0598162691167aa [ROCm/hip commit: 96704ddd007f1fd64f12b2a585882bcfadf0793a] --- projects/hip/bin/hipcc | 55 +++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/projects/hip/bin/hipcc b/projects/hip/bin/hipcc index fc6834997e..1c858b070c 100755 --- a/projects/hip/bin/hipcc +++ b/projects/hip/bin/hipcc @@ -620,7 +620,17 @@ foreach $arg (@ARGV) $toolArgs = substr $toolArgs, 0, -8; chomp $toolArgs; } + } elsif ($arg eq 'c' and $prevArg eq '-x') { + $hasC = 1; + $hasCXX = 0; + $hasHIP = 0; + } elsif ($arg eq 'c++' and $prevArg eq '-x') { + $hasC = 0; + $hasCXX = 1; + $hasHIP = 0; } elsif ($arg eq 'hip' and $prevArg eq '-x') { + $hasC = 0; + $hasCXX = 0; $hasHIP = 1; } elsif ($arg =~ m/^-/) { # options start with - @@ -647,28 +657,35 @@ foreach $arg (@ARGV) #print "O: <$arg>\n"; } elsif ($prevArg ne '-o') { # input files and libraries - if ($arg =~ /\.c$/) { - $hasC = 1; + # Skip guessing if `-x {c|c++|hip}` is already specified. + if (not ($hasC or $hasCXX or $hasHIP)) { + if ($arg =~ /\.c$/) { + $hasC = 1; + $needCFLAGS = 1; + $toolArgs .= " -x c" + } elsif (($arg =~ /\.cpp$/) or ($arg =~ /\.cxx$/) or ($arg =~ /\.cc$/) ) { + $needCXXFLAGS = 1; + if ($HIP_COMPILE_CXX_AS_HIP eq '0' or $HIP_COMPILER ne "clang") { + $hasCXX = 1; + } else { + $hasHIP = 1; + $toolArgs .= " -x hip"; + } + } elsif ((($arg =~ /\.cu$/ or $arg =~ /\.cuh$/) and $HIP_COMPILE_CXX_AS_HIP ne '0') or ($arg =~ /\.hip$/)) { + $needCXXFLAGS = 1; + if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { + $hasHIP = 1; + $toolArgs .= " -x hip"; + } else { + $hasCU = 1; + } + } + } elsif ($hasC) { $needCFLAGS = 1; - $toolArgs .= " -x c" - } - elsif (($arg =~ /\.cpp$/) or ($arg =~ /\.cxx$/) or ($arg =~ /\.cc$/) ) { + } elsif ($hasCXX) { $needCXXFLAGS = 1; - if ($HIP_COMPILE_CXX_AS_HIP eq '0' or $HIP_COMPILER ne "clang") { - $hasCXX = 1; - } else { - $hasHIP = 1; - $toolArgs .= " -x hip"; - } - } - elsif ((($arg =~ /\.cu$/ or $arg =~ /\.cuh$/) and $HIP_COMPILE_CXX_AS_HIP ne '0') or ($arg =~ /\.hip$/)) { + } else { $needCXXFLAGS = 1; - if ($HIP_PLATFORM eq "hcc" and $HIP_COMPILER eq "clang") { - $hasHIP = 1; - $toolArgs .= " -x hip"; - } else { - $hasCU = 1; - } } push (@inputs, $arg); #print "I: <$arg>\n"; From e5c6555eee2d3cea4d58f315c992511c3d14a7d3 Mon Sep 17 00:00:00 2001 From: agodavar Date: Tue, 12 May 2020 15:14:06 -0400 Subject: [PATCH 06/11] Fix hipMemset for unaligned ptr, correct offset required by ROCclr per aligment and testcase to validate unaligned ptr Change-Id: Ib1a0f9d1556d09cf72a12a90cad2e27c6d9c6a1c [ROCm/hip commit: 458a966aa1274744626855145d52b9d5ce2cba3a] --- projects/hip/rocclr/hip_memory.cpp | 40 +- .../tests/src/runtimeApi/memory/hipMemset.cpp | 344 +++++++----------- 2 files changed, 153 insertions(+), 231 deletions(-) diff --git a/projects/hip/rocclr/hip_memory.cpp b/projects/hip/rocclr/hip_memory.cpp index febf5cc700..df331ea9ad 100755 --- a/projects/hip/rocclr/hip_memory.cpp +++ b/projects/hip/rocclr/hip_memory.cpp @@ -1717,7 +1717,9 @@ hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeByt } size_t offset = 0; - amd::Memory* memory = getMemoryObject(dst, offset); + auto aligned_dst = amd::alignUp(reinterpret_cast
(dst), sizeof(uint64_t)); + + amd::Memory* memory = getMemoryObject(aligned_dst, offset); if (memory == nullptr) { // Host alloced memory memset(dst, value, sizeBytes); @@ -1727,44 +1729,52 @@ hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeByt hipError_t hip_error = hipSuccess; amd::HostQueue* queue = hip::getQueue(stream); + size_t n_head_bytes = 0; + size_t n_tail_bytes = 0; int64_t value64 = 0; - const size_t uint64ModSize = (sizeBytes % sizeof(int64_t)); - if (sizeBytes/sizeof(int64_t) > 0) { + n_head_bytes = static_cast(aligned_dst) - static_cast(dst); if (valueSize == sizeof(int8_t)) { value = value & 0xff; value64 = ((value << 56) | (value << 48) | (value << 40) | (value << 32) | (value << 24) | (value << 16) | (value << 8) | (value)); } else if (valueSize == sizeof(int16_t)) { value = value & 0xffff; - value64 = ((value << 48) | (value << 32) | (value<<16) | (value)); - } else if(valueSize == sizeof(int32_t)) { + value64 = ((value << 48) | (value << 32) | (value << 16) | (value)); + } else if (valueSize == sizeof(int32_t)) { value = value & 0xffffffff; - value64 = ((value<<32) | (value)); + value64 = ((value << 32) | (value)); } else if (valueSize == sizeof(int64_t)) { value64 = value; } else { LogPrintfError("Unsupported Pattern size: %u \n", valueSize); return hipErrorInvalidValue; } - // If uint64ModSize is != 0 then we will do a second fillBuffer Command + n_tail_bytes = ((sizeBytes - n_head_bytes) % sizeof(int64_t)); + // If n_tail_bytes is != 0 then we will do a second fillBuffer Command // on the same stream below, dont wait, do the first call async. hip_error = packFillMemoryCommand(memory, offset, value64, sizeof(int64_t), - sizeBytes - uint64ModSize, queue, - ((uint64ModSize != 0) || isAsync)); - if(hip_error != hipSuccess) { + sizeBytes - n_tail_bytes - n_head_bytes, queue, + ((n_head_bytes != 0) || (n_tail_bytes != 0) || isAsync)); + if (hip_error != hipSuccess) { return hip_error; } + } else { + n_head_bytes = sizeBytes; } - if (uint64ModSize != 0) { - void* new_dst = reinterpret_cast((reinterpret_cast
(dst) - + sizeBytes) - uint64ModSize); + if (n_head_bytes != 0) { + memory = getMemoryObject(dst, offset); + hip_error = packFillMemoryCommand(memory, offset, value, valueSize, + n_head_bytes , queue, isAsync); + } + + if (n_tail_bytes != 0) { + void* new_dst = (reinterpret_cast
(dst) + sizeBytes) - n_tail_bytes; memory = getMemoryObject(new_dst, offset); hip_error = packFillMemoryCommand(memory, offset, value, valueSize, - uint64ModSize, queue, isAsync); + n_tail_bytes, queue, isAsync); } - return hip_error; } diff --git a/projects/hip/tests/src/runtimeApi/memory/hipMemset.cpp b/projects/hip/tests/src/runtimeApi/memory/hipMemset.cpp index fac83ec003..f1fbbe8f8b 100644 --- a/projects/hip/tests/src/runtimeApi/memory/hipMemset.cpp +++ b/projects/hip/tests/src/runtimeApi/memory/hipMemset.cpp @@ -1,16 +1,13 @@ /* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -33,236 +30,151 @@ THE SOFTWARE. * TEST: %t -N 256M --memsetval 0xa6 --memsetD32val 0xCAFEBABE --memsetD16val 0xCAFE --memsetD8val 0xCA * HIT_END */ - +#define MAX_OFFSET 3 +// To test memset on unaligned pointer +#define loop(offset, offsetMax) for (int offset = offsetMax; offset >= 0; offset --) #include "hip/hip_runtime.h" #include "test_common.h" +enum MemsetType { + hipMemsetTypeDefault, + hipMemsetTypeD8, + hipMemsetTypeD16, + hipMemsetTypeD32 +}; -bool testhipMemset(int memsetval,int p_gpuDevice) +bool testhipMemsetSmallSize(int memsetval, int p_gpuDevice) { - size_t Nbytes = N*sizeof(char); - printf ("testhipMemset N=%zu memsetval=%2x device=%d\n", N, memsetval, p_gpuDevice); - char *A_d; - char *A_h; - bool testResult = true; + char *A_d; + char *A_h; + bool testResult = true; + for ( size_t iSize = 1; iSize < 4; iSize++ ) { + size_t Nbytes = iSize * sizeof(char); + HIPCHECK(hipMalloc(&A_d, Nbytes)); + A_h = reinterpret_cast (malloc(Nbytes)); + printf("testhipMemsetSmallSize N=%zu memsetval=%2x device=%d\n", + iSize, memsetval, p_gpuDevice); + HIPCHECK(hipMemset(A_d, memsetval, Nbytes)); + HIPCHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK ( hipMalloc(&A_d, Nbytes) ); - A_h = (char*)malloc(Nbytes); - HIPCHECK ( hipMemset(A_d, memsetval, Nbytes) ); - HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); - - for (int i=0; i (A_h[i]), static_cast (memsetval)); + break; + } } - HIPCHECK(hipFree(A_d)); - free(A_h); - return testResult; + HIPCHECK(hipFree(A_d)); + free(A_h); + } + return testResult; } -bool testhipMemsetD32(int memsetD32val,int p_gpuDevice) -{ - size_t Nbytes = N*sizeof(int); - printf ("testhipMemsetD32 N=%zu memsetD32val=%8x device=%d\n", N, memsetD32val, p_gpuDevice); - int *A_d; - int *A_h; - bool testResult = true; - - HIPCHECK ( hipMalloc(&A_d, Nbytes) ); - A_h = (int*)malloc(Nbytes); - HIPCHECK ( hipMemsetD32((hipDeviceptr_t)A_d, memsetD32val, N) ); - HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); - - for (int i=0; i +bool testhipMemset(T*A_h, T*A_d, T memsetval, enum MemsetType type, + int p_gpuDevice) { + size_t Nbytes = N * sizeof(T); + bool testResult = true; + HIPCHECK(hipMalloc(&A_d, Nbytes)); + A_h = reinterpret_cast (malloc(Nbytes)); + loop(offset, MAX_OFFSET) { + if (type == hipMemsetTypeDefault) { + printf("testhipMemset N=%zu memsetval=%2x device=%d\n", + (N - offset), memsetval, p_gpuDevice); + HIPCHECK(hipMemset(A_d + offset, memsetval, N - offset)); + } else if (type == hipMemsetTypeD8) { + printf("testhipMemsetD8 N=%zu memsetD8val=%4x device=%d\n", + (N - offset), memsetval, p_gpuDevice); + HIPCHECK(hipMemsetD8((hipDeviceptr_t)(A_d + offset), memsetval, N - offset)); + } else if (type == hipMemsetTypeD16) { + printf("testhipMemsetD16 N=%zu memsetD16val=%4x device=%d\n", + (N - offset), memsetval, p_gpuDevice); + HIPCHECK(hipMemsetD16((hipDeviceptr_t)(A_d + offset), memsetval, N - offset)); + } else if (type == hipMemsetTypeD32) { + printf("testhipMemsetD32 N=%zu memsetD32val=%8x device=%d\n", + (N - offset), memsetval, p_gpuDevice); + HIPCHECK(hipMemsetD32((hipDeviceptr_t)(A_d + offset), memsetval, N - offset)); } - HIPCHECK(hipFree(A_d)); - free(A_h); - return testResult; -} - -bool testhipMemsetD16(short memsetD16val,int p_gpuDevice) -{ - size_t Nbytes = N*sizeof(int); - printf ("testhipMemsetD16 N=%zu memsetD16val=%4x device=%d\n", N, memsetD16val, p_gpuDevice); - short *A_d; - short *A_h; - bool testResult = true; - - HIPCHECK ( hipMalloc(&A_d, Nbytes) ); - A_h = (short*)malloc(Nbytes); - HIPCHECK ( hipMemsetD16((hipDeviceptr_t)A_d, memsetD16val, N) ); - HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); - - for (int i=0; i (A_h[i]), static_cast (memsetval)); + break; + } } - HIPCHECK(hipFree(A_d)); - free(A_h); - return testResult; + } + HIPCHECK(hipFree(A_d)); + free(A_h); + return testResult; } -bool testhipMemsetD8(char memsetD8val,int p_gpuDevice) -{ - size_t Nbytes = N*sizeof(int); - printf ("testhipMemsetD8 N=%zu memsetD8val=%4x device=%d\n", N, memsetD8val, p_gpuDevice); - char *A_d; - char *A_h; - bool testResult = true; - - HIPCHECK ( hipMalloc(&A_d, Nbytes) ); - A_h = (char*)malloc(Nbytes); - HIPCHECK ( hipMemsetD8((hipDeviceptr_t)A_d, memsetD8val, N) ); - HIPCHECK ( hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); - - for (int i=0; i +bool testhipMemsetAsync(T*A_h, T*A_d, T memsetval, enum MemsetType type, + int p_gpuDevice) { + size_t Nbytes = N * sizeof(T); + bool testResult = true; + HIPCHECK(hipMalloc(reinterpret_cast (&A_d), Nbytes)); + A_h = reinterpret_cast (malloc(Nbytes)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + loop(offset, MAX_OFFSET) { + if (type == hipMemsetTypeDefault) { + printf("testhipMemsetAsync N=%zu memsetval=%2x device=%d\n", + (N - offset), memsetval, p_gpuDevice); + HIPCHECK(hipMemsetAsync(A_d+offset, memsetval, Nbytes-offset, stream)); + } else if (type == hipMemsetTypeD8) { + printf("testhipMemsetD8Async N=%zu memsetD8val=%2x device=%d\n", + (N - offset), memsetval, p_gpuDevice); + HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)(A_d + offset), memsetval, N - offset, stream)); + } else if (type == hipMemsetTypeD16) { + printf("testhipMemsetD16Async N=%zu memsetD16val=%8x device=%d\n", + (N - offset), memsetval, p_gpuDevice); + HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)(A_d + offset), memsetval, N - offset, stream)); + } else if (type == hipMemsetTypeD32) { + printf("testhipMemsetD32Async N=%zu memsetD32val=%8x device=%d\n", + (N - offset), memsetval, p_gpuDevice); + HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)(A_d + offset), memsetval, N - offset, stream)); } - HIPCHECK(hipFree(A_d)); - free(A_h); - return testResult; -} + HIPCHECK(hipStreamSynchronize(stream)); + HIPCHECK(hipMemcpy(A_h, reinterpret_cast (A_d), Nbytes, hipMemcpyDeviceToHost)); -bool testhipMemsetAsync(int memsetval,int p_gpuDevice) -{ - size_t Nbytes = N*sizeof(int); - printf ("testhipMemsetAsync N=%zu memsetval=%2x device=%d\n", N, memsetval, p_gpuDevice); - char *A_d; - char *A_h; - bool testResult = true; - - HIPCHECK ( hipMalloc((void**)&A_d, Nbytes) ); - A_h = (char*)malloc(Nbytes); - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - HIPCHECK ( hipMemsetAsync(A_d, memsetval, Nbytes, stream )); - HIPCHECK ( hipStreamSynchronize(stream)); - HIPCHECK ( hipMemcpy(A_h, (void*)A_d, Nbytes, hipMemcpyDeviceToHost)); - - for (int i=0; i (A_h[i])); + break; + } } - HIPCHECK(hipFree((void*)A_d)); - HIPCHECK(hipStreamDestroy(stream)); - free(A_h); - return testResult; + } + HIPCHECK(hipFree(reinterpret_cast (A_d)) ); + HIPCHECK(hipStreamDestroy(stream)); + free(A_h); + return testResult; } -bool testhipMemsetD32Async(int memsetD32val,int p_gpuDevice) -{ - size_t Nbytes = N*sizeof(int); - printf ("testhipMemsetD32Async N=%zu memsetval=%8x device=%d\n", N, memsetD32val, p_gpuDevice); - int *A_d; - int *A_h; - bool testResult = true; +int main(int argc, char *argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + bool testResult = true; + char * cA_d; + char * cA_h; + int16_t * siA_d; + int16_t * siA_h; + int32_t * iA_d; + int32_t * iA_h; + HIPCHECK(hipSetDevice(p_gpuDevice)); + testResult &= testhipMemsetSmallSize(memsetval, p_gpuDevice); - HIPCHECK ( hipMalloc((void**)&A_d, Nbytes) ); - A_h = (int*)malloc(Nbytes); - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - HIPCHECK ( hipMemsetD32Async((hipDeviceptr_t)A_d, memsetD32val, N, stream )); - HIPCHECK ( hipStreamSynchronize(stream)); - HIPCHECK ( hipMemcpy(A_h, (void*)A_d, Nbytes, hipMemcpyDeviceToHost)); + testResult &= testhipMemset(cA_h, cA_d, memsetval, hipMemsetTypeDefault, p_gpuDevice); + testResult &= testhipMemset(iA_h, iA_d, memsetD32val, hipMemsetTypeD32, p_gpuDevice); + testResult &= testhipMemset(siA_h, siA_d, memsetD16val, hipMemsetTypeD16, p_gpuDevice); + testResult &= testhipMemset(cA_h, cA_d, memsetD8val, hipMemsetTypeD8, p_gpuDevice); - for (int i=0; i Date: Fri, 15 May 2020 09:05:06 -0400 Subject: [PATCH 07/11] enable occupancy sample on HIP-Clang Change-Id: I236daad743bbbc7f8daa41e8157a4ff73058518b [ROCm/hip commit: 700474db6f92a0acaec2316648ff47b7cc9e28f3] --- projects/hip/samples/2_Cookbook/13_occupancy/occupancy.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/projects/hip/samples/2_Cookbook/13_occupancy/occupancy.cpp b/projects/hip/samples/2_Cookbook/13_occupancy/occupancy.cpp index e772e82b1d..7b32b6d0fa 100644 --- a/projects/hip/samples/2_Cookbook/13_occupancy/occupancy.cpp +++ b/projects/hip/samples/2_Cookbook/13_occupancy/occupancy.cpp @@ -44,8 +44,6 @@ void multiplyCPU(float* C, float* A, float* B, int N){ } } -#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) - void launchKernel(float* C, float* A, float* B, bool manual){ hipDeviceProp_t devProp; @@ -95,10 +93,8 @@ void launchKernel(float* C, float* A, float* B, bool manual){ std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl; } } -#endif int main() { -#if defined(__HIP_PLATFORM_HCC__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__) float *A, *B, *C0, *C1, *cpuC; float *Ad, *Bd, *C0d, *C1d; int errors=0; @@ -177,8 +173,5 @@ int main() { free(C0); free(C1); free(cpuC); -#else - std::cout <<"hipOccupancyMaxPotentialBlockSize template not support for Clang compiler"< Date: Wed, 20 May 2020 12:12:49 -0400 Subject: [PATCH 08/11] Add a test case for async double memset Change-Id: I8a1df610c8d3d942651f258e7812e8697067c347 [ROCm/hip commit: 4503d44e12a6a019f316007af5ff97a84293786a] --- .../tests/src/runtimeApi/memory/hipMemset.cpp | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/projects/hip/tests/src/runtimeApi/memory/hipMemset.cpp b/projects/hip/tests/src/runtimeApi/memory/hipMemset.cpp index f1fbbe8f8b..f08b6c921f 100644 --- a/projects/hip/tests/src/runtimeApi/memory/hipMemset.cpp +++ b/projects/hip/tests/src/runtimeApi/memory/hipMemset.cpp @@ -33,6 +33,8 @@ THE SOFTWARE. #define MAX_OFFSET 3 // To test memset on unaligned pointer #define loop(offset, offsetMax) for (int offset = offsetMax; offset >= 0; offset --) + +#include #include "hip/hip_runtime.h" #include "test_common.h" enum MemsetType { @@ -154,6 +156,33 @@ bool testhipMemsetAsync(T*A_h, T*A_d, T memsetval, enum MemsetType type, return testResult; } +bool testhipMemset2AsyncOps() { + printf("testhipMemset2AsyncOps 2 memset operations at the same time\n"); + std::vector v; + v.resize(2048); + float* p2, *p3; + hipMalloc(reinterpret_cast(&p2), 4096 + 4096*2); + p3 = p2+2048; + hipStream_t s; + hipStreamCreate(&s); + hipMemsetAsync(p2, 0, 32*32*4, s); + hipMemsetD32Async(p3, 0x3fe00000, 32*32, s ); + hipStreamSynchronize(s); + for (int i = 0; i < 256; ++i) { + hipMemsetAsync(p2, 0, 32*32*4, s); + hipMemsetD32Async(p3, 0x3fe00000, 32*32, s ); + } + hipStreamSynchronize(s); + hipDeviceSynchronize(); + hipMemcpy(&v[0], p2, 1024, hipMemcpyDeviceToHost); + hipMemcpy(&v[1024], p3, 1024, hipMemcpyDeviceToHost); + if ((v[0] != 0) || (v[1024] != 1.75f)) { + printf("mismatch (%f != 0) or (%f != 1.75f)\n", v[0], v[1024]); + return false; + } + return true; +} + int main(int argc, char *argv[]) { HipTest::parseStandardArguments(argc, argv, true); bool testResult = true; @@ -175,6 +204,7 @@ int main(int argc, char *argv[]) { testResult &= testhipMemsetAsync(iA_h, iA_d, memsetD32val, hipMemsetTypeD32, p_gpuDevice); testResult &= testhipMemsetAsync(siA_h, siA_d, memsetD16val, hipMemsetTypeD16, p_gpuDevice); testResult &= testhipMemsetAsync(cA_h, cA_d, memsetD8val, hipMemsetTypeD8, p_gpuDevice); + testResult &= testhipMemset2AsyncOps(); if (testResult) passed(); failed("Output Mismatch\n"); } From 7836dfd3222a6a4c34aa0520640e40cb344fd189 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Wed, 20 May 2020 10:54:57 -0700 Subject: [PATCH 09/11] Fix elapsed time calc for hipEventElapsedTime If the start and stop events have same command internally then measure command end to command start Change-Id: Ie70cfa37c06c06573f0ed58dab2bbe4434c1724b [ROCm/hip commit: 50be95e169c8d0df66201db6334f60b8a29dc13b] --- projects/hip/rocclr/hip_event.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/projects/hip/rocclr/hip_event.cpp b/projects/hip/rocclr/hip_event.cpp index dddadd1bfd..2e4870834f 100644 --- a/projects/hip/rocclr/hip_event.cpp +++ b/projects/hip/rocclr/hip_event.cpp @@ -90,9 +90,15 @@ hipError_t Event::elapsedTime(Event& eStop, float& ms) { return hipErrorNotReady; } - ms = static_cast(static_cast(eStop.event_->profilingInfo().end_ - + // For certain HIP Api's that take start and stop event + // the command is the same + if (event_ == eStop.event_) { + ms = static_cast(static_cast(eStop.event_->profilingInfo().end_ - + event_->profilingInfo().start_))/1000000.f; + } else { + ms = static_cast(static_cast(eStop.event_->profilingInfo().end_ - event_->profilingInfo().end_))/1000000.f; - + } return hipSuccess; } From ca8de0f6c8c6f544f950e644022271e8c0ea6cfa Mon Sep 17 00:00:00 2001 From: Sarbojit Sarkar Date: Wed, 20 May 2020 03:50:36 -0400 Subject: [PATCH 10/11] added null check Change-Id: I7631965286bad5684bacb7e38c63031b5c691dc0 [ROCm/hip commit: 87d1101a33a7bf8c1d7f02dfeba87008363c3c81] --- projects/hip/rocclr/hip_memory.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/projects/hip/rocclr/hip_memory.cpp b/projects/hip/rocclr/hip_memory.cpp index df331ea9ad..3aad7f57ba 100755 --- a/projects/hip/rocclr/hip_memory.cpp +++ b/projects/hip/rocclr/hip_memory.cpp @@ -110,6 +110,9 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin // Skip if nothing needs writing. return hipSuccess; } + if (dst == nullptr || src == nullptr) { + return hipErrorInvalidValue; + } amd::Command* command = nullptr; amd::Command::EventWaitList waitList; From 02d5a7c4b9fd75f3b6eb6b02005fd02206a28abd Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Fri, 15 May 2020 16:47:26 -0700 Subject: [PATCH 11/11] load functions when registered if lazy load is disabled Change-Id: I08c9684ca4ab86a6294ed9e670f3125c1a0cc6c4 [ROCm/hip commit: 22edde84d541c38c10024d6c21cd78ac68f6e9c1] --- projects/hip/rocclr/hip_platform.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/projects/hip/rocclr/hip_platform.cpp b/projects/hip/rocclr/hip_platform.cpp index debe33b6f3..0be928f9a4 100755 --- a/projects/hip/rocclr/hip_platform.cpp +++ b/projects/hip/rocclr/hip_platform.cpp @@ -592,6 +592,13 @@ void PlatformState::popExec(ihipExec_t& exec) { execStack_.pop(); } +namespace { +const int HIP_ENABLE_DEFERRED_LOADING{[] () { + char *var = getenv("HIP_ENABLE_DEFERRED_LOADING"); + return var ? atoi(var) : 1; +}()}; +} /* namespace */ + extern "C" void __hipRegisterFunction( std::vector >* modules, const void* hostFunction, @@ -606,7 +613,7 @@ extern "C" void __hipRegisterFunction( { PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector{g_devices.size()}}; PlatformState::instance().registerFunction(hostFunction, func); - if (!HIP_ENABLE_LAZY_KERNEL_LOADING) { + if (!HIP_ENABLE_DEFERRED_LOADING) { HIP_INIT(); for (size_t i = 0; i < g_devices.size(); ++i) { PlatformState::instance().getFunc(hostFunction, i);