From d5776751ca65b4d03c4dbd896007db7cbd07c4f2 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 17 Dec 2019 20:18:36 -0500 Subject: [PATCH] P4 to Git Change 2047424 by lmoriche@lmoriche_opencl_dev2 on 2019/12/17 20:13:01 SWDEV-216705 - [hipclang-vdi-rocm][FBA-80]Test crash when all GPUs are hidden by ROCR_VISIBLE_DEVICES Return an error instead of dereferencing a null pointer. This should address the issue described in the ticket, but more places need fixing in the runtime to avoid crashes for corner cases. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_context.cpp#26 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#92 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#58 edit [ROCm/hip commit: f6d38a725c64bc1b82ac9cce097ae6988d481d9f] --- projects/hip/api/hip/hip_context.cpp | 3 ++- projects/hip/api/hip/hip_device_runtime.cpp | 5 +++-- projects/hip/api/hip/hip_memory.cpp | 4 ++++ projects/hip/api/hip/hip_platform.cpp | 7 ++++++- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/projects/hip/api/hip/hip_context.cpp b/projects/hip/api/hip/hip_context.cpp index cc590678c5..c4a68777ab 100644 --- a/projects/hip/api/hip/hip_context.cpp +++ b/projects/hip/api/hip/hip_context.cpp @@ -106,7 +106,8 @@ amd::HostQueue* getNullStream(amd::Context& context) { } amd::HostQueue* getNullStream() { - return getNullStream(*getCurrentContext()); + amd::Context* context = getCurrentContext(); + return context ? getNullStream(*context) : nullptr; } }; diff --git a/projects/hip/api/hip/hip_device_runtime.cpp b/projects/hip/api/hip/hip_device_runtime.cpp index d3d0b10b87..a36cad8c3b 100644 --- a/projects/hip/api/hip/hip_device_runtime.cpp +++ b/projects/hip/api/hip/hip_device_runtime.cpp @@ -437,7 +437,6 @@ int ihipGetDevice() { return i; } } - assert(0 && "Current device not found?!"); return -1; } @@ -446,7 +445,9 @@ hipError_t hipGetDevice ( int* deviceId ) { if (deviceId != nullptr) { int dev = ihipGetDevice(); - assert(dev != -1); + if (dev == -1) { + HIP_RETURN(hipErrorNoDevice); + } *deviceId = dev; HIP_RETURN(hipSuccess); } else { diff --git a/projects/hip/api/hip/hip_memory.cpp b/projects/hip/api/hip/hip_memory.cpp index e6f3f2f622..49895e67e9 100644 --- a/projects/hip/api/hip/hip_memory.cpp +++ b/projects/hip/api/hip/hip_memory.cpp @@ -69,6 +69,10 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) amd::Context* amdContext = ((flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0)? hip::host_context : hip::getCurrentContext(); + if (amdContext == nullptr) { + return hipErrorMemoryAllocation; + } + if (amdContext->devices()[0]->info().maxMemAllocSize_ < sizeBytes) { return hipErrorMemoryAllocation; } diff --git a/projects/hip/api/hip/hip_platform.cpp b/projects/hip/api/hip/hip_platform.cpp index 05c8d6fce7..1aa356cd36 100644 --- a/projects/hip/api/hip/hip_platform.cpp +++ b/projects/hip/api/hip/hip_platform.cpp @@ -469,7 +469,9 @@ extern "C" void __hipUnregisterFatBinary(std::vector< std::pair(module.first))->release(); } }); - PlatformState::instance().unregisterVar((*modules)[0].first); + if (modules->size() > 0) { + PlatformState::instance().unregisterVar((*modules)[0].first); + } PlatformState::instance().removeFatBinary(modules); } @@ -532,6 +534,9 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) HIP_INIT_API(NONE, hostFunction); int deviceId = ihipGetDevice(); + if (deviceId == -1) { + HIP_RETURN(hipErrorNoDevice); + } hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); if (func == nullptr) { HIP_RETURN(hipErrorInvalidDeviceFunction);