From cd7de89fc3a4196b41ac9a1ea67c536b6544f8de Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Thu, 2 Apr 2020 22:13:26 -0400 Subject: [PATCH] SWDEV-229840 - Improve Error Codes. RocKernel and RocProgram Change-Id: I8f785308e0562a50924f8bdd02e88c92a759f01a --- rocclr/device/rocm/rockernel.cpp | 74 ++++++++++++++++++++++--------- rocclr/device/rocm/rocprogram.cpp | 8 ++++ 2 files changed, 61 insertions(+), 21 deletions(-) mode change 100644 => 100755 rocclr/device/rocm/rockernel.cpp mode change 100644 => 100755 rocclr/device/rocm/rocprogram.cpp diff --git a/rocclr/device/rocm/rockernel.cpp b/rocclr/device/rocm/rockernel.cpp old mode 100644 new mode 100755 index 893966c441..28fd20c42c --- a/rocclr/device/rocm/rockernel.cpp +++ b/rocclr/device/rocm/rockernel.cpp @@ -49,6 +49,7 @@ bool LightningKernel::init() { hsa_agent_t hsaDevice = program()->hsaDevice(); if (!GetAttrCodePropMetadata()) { + LogError("[ROC][Kernel] Could not get Code Prop Meta Data \n"); return false; } @@ -75,6 +76,8 @@ bool LightningKernel::init() { } if (!SetAvailableSgprVgpr(targetIdent)) { + LogError(("[ROC][Kernel] Cannot set available SGPR/VGPR for target Ident" + targetIdent).c_str()); + LogError("\n"); return false; } @@ -85,11 +88,17 @@ bool LightningKernel::init() { hsaStatus = hsa_executable_get_symbol_by_name(program()->hsaExecutable(), symbolName().c_str(), &agent, &symbol); - if (hsaStatus == HSA_STATUS_SUCCESS) { - hsaStatus = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, - &kernelCodeHandle_); - } if (hsaStatus != HSA_STATUS_SUCCESS) { + LogError(("[ROC][Kernel] Cannot Get Symbol : " + symbolName()).c_str()); + LogError("\n"); + return false; + } + + hsaStatus = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernelCodeHandle_); + if (hsaStatus != HSA_STATUS_SUCCESS) { + LogError(("[ROC][Kernel] Cannot Get Symbol Info : " + symbolName()).c_str()); + LogError("\n"); return false; } @@ -106,28 +115,38 @@ bool LightningKernel::init() { hsaStatus = hsa_executable_get_symbol_by_name(program()->hsaExecutable(), RuntimeHandle().c_str(), &agent, &kernelSymbol); - if (hsaStatus == HSA_STATUS_SUCCESS) { - hsaStatus = hsa_executable_symbol_get_info(kernelSymbol, - HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, - &variable_size); - } - if (hsaStatus == HSA_STATUS_SUCCESS) { - hsaStatus = hsa_executable_symbol_get_info(kernelSymbol, - HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, - &variable_address); + if (hsaStatus != HSA_STATUS_SUCCESS) { + LogError(("[ROC][Kernel] Cannot get Kernel Symbol by name" + RuntimeHandle()).c_str()); + LogError("\n"); + return false; } - if (hsaStatus == HSA_STATUS_SUCCESS) { - const struct RuntimeHandle runtime_handle = { - kernelCodeHandle_, - WorkitemPrivateSegmentByteSize(), - WorkgroupGroupSegmentByteSize() - }; - hsaStatus = hsa_memory_copy(reinterpret_cast(variable_address), - &runtime_handle, variable_size); + hsaStatus = hsa_executable_symbol_get_info(kernelSymbol, + HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, + &variable_size); + if (hsaStatus != HSA_STATUS_SUCCESS) { + LogError("[ROC][Kernel] Cannot get Kernel Symbol Info \n"); + return false; } + hsaStatus = hsa_executable_symbol_get_info(kernelSymbol, + HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, + &variable_address); + if (hsaStatus != HSA_STATUS_SUCCESS) { + LogError("[ROC][Kernel] Cannot get Kernel Address \n"); + return false; + } + + const struct RuntimeHandle runtime_handle = { + kernelCodeHandle_, + WorkitemPrivateSegmentByteSize(), + WorkgroupGroupSegmentByteSize() + }; + hsaStatus = hsa_memory_copy(reinterpret_cast(variable_address), + &runtime_handle, variable_size); + if (hsaStatus != HSA_STATUS_SUCCESS) { + LogError("[ROC][Kernel] HSA Memory copy failed \n"); return false; } } @@ -135,6 +154,7 @@ bool LightningKernel::init() { uint32_t wavefront_size = 0; if (hsa_agent_get_info(program()->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size) != HSA_STATUS_SUCCESS) { + LogError("[ROC][Kernel] Cannot get Wavefront Size \n"); return false; } assert(wavefront_size > 0); @@ -175,12 +195,16 @@ bool HSAILKernel::init() { errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_ARGUMENT_ARRAY, openClKernelName.c_str(), nullptr, &sizeOfArgList); if (errorCode != ACL_SUCCESS) { + LogError("[ROC][Kernel] Query Info failed with error code: " + itoa(errorCode)); + LogError("\n"); return false; } std::unique_ptr argList(new char[sizeOfArgList]); errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_ARGUMENT_ARRAY, openClKernelName.c_str(), argList.get(), &sizeOfArgList); if (errorCode != ACL_SUCCESS) { + LogError("[ROC][Kernel] Query Info failed with error code: " + itoa(errorCode)); + LogError("\n"); return false; } @@ -197,18 +221,24 @@ bool HSAILKernel::init() { errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_WORK_GROUP_SIZE, openClKernelName.c_str(), nullptr, &sizeOfWorkGroupSize); if (errorCode != ACL_SUCCESS) { + LogError("[ROC][Kernel] Query Info failed with error code: " + itoa(errorCode)); + LogError("\n"); return false; } errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_WORK_GROUP_SIZE, openClKernelName.c_str(), workGroupInfo_.compileSize_, &sizeOfWorkGroupSize); if (errorCode != ACL_SUCCESS) { + LogError("[ROC][Kernel] Query Info failed with error code: " + itoa(errorCode)); + LogError("\n"); return false; } uint32_t wavefront_size = 0; if (HSA_STATUS_SUCCESS != hsa_agent_get_info(program()->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size)) { + LogError("[ROC][Kernel] Could not get Wave Info Size: " + itoa(errorCode)); + LogError("\n"); return false; } assert(wavefront_size > 0); @@ -247,6 +277,8 @@ bool HSAILKernel::init() { errorCode = aclQueryInfo(compileHandle, program()->binaryElf(), RT_GPU_PRINTF_ARRAY, openClKernelName.c_str(), nullptr, &sizeOfPrintfList); if (errorCode != ACL_SUCCESS) { + LogError("[ROC][Kernel] Query Info failed with error code: " + itoa(errorCode)); + LogError("\n"); return false; } diff --git a/rocclr/device/rocm/rocprogram.cpp b/rocclr/device/rocm/rocprogram.cpp old mode 100644 new mode 100755 index d6aac9cb0f..3a14a82908 --- a/rocclr/device/rocm/rocprogram.cpp +++ b/rocclr/device/rocm/rocprogram.cpp @@ -104,6 +104,8 @@ bool Program::initClBinary(char* binaryIn, size_t size) { char* decryptedBin; size_t decryptedSize; if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) { + buildLog_ += "Decrypting ELF Failed "; + buildLog_ += "\n"; return false; } if (decryptedBin != nullptr) { @@ -118,6 +120,8 @@ bool Program::initClBinary(char* binaryIn, size_t size) { if (decryptedBin != nullptr) { delete[] decryptedBin; } + buildLog_ += "Elf Magic failed"; + buildLog_ += "\n"; return false; } @@ -419,6 +423,8 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_ workitemPrivateSegmentByteSize, kernargSegmentByteSize, kernargSegmentAlignment); if (!aKernel->init()) { + buildLog_ += "Error: Kernel Init Failed "; + buildLog_ += "\n"; return false; } aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize); @@ -476,6 +482,8 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s #if defined(USE_COMGR_LIBRARY) // Find the size of global variables from the binary if (!FindGlobalVarSize(binary, binSize)) { + buildLog_ += "Error: Cannot Global Var Sizes "; + buildLog_ += "\n"; return false; }