From da0ca9421937941a97ae272c748cf320b2dbba7c Mon Sep 17 00:00:00 2001 From: jie1zhan Date: Fri, 22 Jul 2022 11:51:45 +0800 Subject: [PATCH] Free the executable memory , when it don't used Fix the issue of rocrtst test - The runtime failed to allocate the necessary resources Change-Id: Ie4ffeb939fb322db068f3132a7973a359c204176 [ROCm/ROCR-Runtime commit: 8a0fe6a83271c0c9297404919a828043aa50eaca] --- .../rocr-runtime/rocrtst/common/base_rocr.h | 13 +++ .../rocrtst/common/base_rocr_utils.cc | 81 +++++-------------- .../rocrtst/suites/functional/cu_masking.h | 1 - 3 files changed, 34 insertions(+), 61 deletions(-) diff --git a/projects/rocr-runtime/rocrtst/common/base_rocr.h b/projects/rocr-runtime/rocrtst/common/base_rocr.h index 74e76d318a..c044f668ee 100755 --- a/projects/rocr-runtime/rocrtst/common/base_rocr.h +++ b/projects/rocr-runtime/rocrtst/common/base_rocr.h @@ -56,6 +56,7 @@ #include "common/hsatimer.h" #include "hsa/hsa.h" #include "hsa/hsa_ext_amd.h" +#include "common/rocr.h" namespace rocrtst { @@ -147,6 +148,16 @@ class BaseRocR { return main_queue_; } + void clear_code_object() { + for(std::vector::iterator it = objs_.begin(); it != objs_.end(); ++it) { + delete *it; + } + objs_.clear(); + } + void set_code_object(CodeObject* obj) { + objs_.push_back(obj); + } + hsa_kernel_dispatch_packet_t& aql(void) { return aql_; } @@ -242,6 +253,8 @@ class BaseRocR { hsa_queue_t* main_queue_; ///< AQL queue used for packets + std::vector objs_; ///< CodeObject vector + hsa_agent_t gpu_device1_; ///< Handle to first GPU found hsa_agent_t cpu_device_; ///< Handle to CPU diff --git a/projects/rocr-runtime/rocrtst/common/base_rocr_utils.cc b/projects/rocr-runtime/rocrtst/common/base_rocr_utils.cc index e16e3b4d75..73dbe00eed 100755 --- a/projects/rocr-runtime/rocrtst/common/base_rocr_utils.cc +++ b/projects/rocr-runtime/rocrtst/common/base_rocr_utils.cc @@ -81,7 +81,6 @@ namespace rocrtst { return (ret); \ } \ } - // Clean up some of the common handles and memory used by BaseRocR code, then // shut down hsa. Restore HSA_ENABLE_INTERRUPT to original value, if necessary hsa_status_t CommonCleanUp(BaseRocR* test) { @@ -106,6 +105,7 @@ hsa_status_t CommonCleanUp(BaseRocR* test) { RET_IF_HSA_UTILS_ERR(err); } + test->clear_code_object(); err = hsa_shut_down(); RET_IF_HSA_UTILS_ERR(err); @@ -318,74 +318,35 @@ std::string LocateKernelFile(std::string filename, hsa_agent_t agent) { // -kernarg_align() hsa_status_t LoadKernelFromObjFile(BaseRocR* test, hsa_agent_t* agent) { hsa_status_t err; - hsa_code_object_reader_t code_obj_rdr = {0}; - hsa_executable_t executable = {0}; + Kernel kern; + std::string kern_name; + char agent_name[64]; + std::string obj_file; + CodeObject* obj; assert(test != nullptr); if (agent == nullptr) { agent = test->gpu_device1(); // Assume GPU agent for now } - std::string filename = LocateKernelFile(test->kernel_file_name(), *agent); + obj_file = LocateKernelFile(test->kernel_file_name(), *agent); + Device *gpu = (Device*)(agent - offsetof(Device, agent)); + obj = new CodeObject(obj_file, *gpu); + test->set_code_object(obj); + kern_name = test->kernel_name() + ".kd"; - hsa_file_t file_handle = open(filename.c_str(), O_RDONLY); - if (file_handle == -1) { - std::cout << "failed to open " << filename.c_str() << " at line " - << __LINE__ << ", file: " << __FILE__ << std::endl; - - return (hsa_status_t) errno; + if(!obj->GetKernel(kern_name, kern)) { + ADD_FAILURE(); + return HSA_STATUS_ERROR; } - err = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr); - RET_IF_HSA_UTILS_ERR(err); - close(file_handle); - - err = hsa_executable_create_alt(HSA_PROFILE_FULL, - HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, - NULL, &executable); - RET_IF_HSA_UTILS_ERR(err); - err = hsa_executable_load_agent_code_object(executable, *agent, code_obj_rdr, - NULL, NULL); - RET_IF_HSA_UTILS_ERR(err); - err = hsa_executable_freeze(executable, NULL); - RET_IF_HSA_UTILS_ERR(err); - - std::string kern_name = test->kernel_name(); - hsa_executable_symbol_t kern_sym; - err = hsa_executable_get_symbol(executable, NULL, (kern_name + ".kd").c_str(), *agent, - 0, &kern_sym); - RET_IF_HSA_UTILS_ERR(err); - - uint64_t codeHandle; - err = hsa_executable_symbol_get_info(kern_sym, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &codeHandle); - RET_IF_HSA_UTILS_ERR(err); - test->set_kernel_object(codeHandle); - - uint32_t val; - err = hsa_executable_symbol_get_info(kern_sym, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &val); - RET_IF_HSA_UTILS_ERR(err); - test->set_private_segment_size(val); - - err = hsa_executable_symbol_get_info(kern_sym, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &val); - RET_IF_HSA_UTILS_ERR(err); - test->set_group_segment_size(val); - - // Remaining queries only supported on code object v3. - err = hsa_executable_symbol_get_info(kern_sym, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &val); - RET_IF_HSA_UTILS_ERR(err); - test->set_kernarg_size(val); - - err = hsa_executable_symbol_get_info(kern_sym, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &val); - RET_IF_HSA_UTILS_ERR(err); - assert(val >= 16 && "Reported kernarg size is too small."); - val = (val == 0) ? 16 : val; - test->set_kernarg_align(val); - + test->set_kernel_object(kern.handle); + test->set_private_segment_size(kern.scratch); + test->set_group_segment_size(kern.group); + test->set_kernarg_size(kern.kernarg_size); + assert(kern.kernarg_align >= 16 && "Reported kernarg size is too small."); + kern.kernarg_size = (kern.kernarg_size == 0) ? 16 : kern.kernarg_size; + test->set_kernarg_align(kern.kernarg_size); return HSA_STATUS_SUCCESS; } diff --git a/projects/rocr-runtime/rocrtst/suites/functional/cu_masking.h b/projects/rocr-runtime/rocrtst/suites/functional/cu_masking.h index 4eac1d3c54..0ea1ca8972 100644 --- a/projects/rocr-runtime/rocrtst/suites/functional/cu_masking.h +++ b/projects/rocr-runtime/rocrtst/suites/functional/cu_masking.h @@ -50,7 +50,6 @@ #include "suites/test_common/test_base.h" #include "common/base_rocr.h" #include "common/common.h" -#include "common/rocr.h" // @Brief: This class is defined to measure the mean latency of enqueuing // the packets to an empty kernel