From 2d42e93cdf7f8ae2ca88bd942db6e29c97c9e8ff Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 20 Aug 2020 04:25:39 -0500 Subject: [PATCH] kernel objects dumping Change-Id: I5a16e05b7df438efa903948701b65a9ced99e5f3 initial codeobj event implementation Change-Id: Ia7fac3c2b9897a004cfe88c4de82ba8c18284196 update - codeobj event implementation Change-Id: I2b91b6e689875af03f0086f5a0872a97a629fd83 update2 - codeobj event implementation Change-Id: Icff75f14fd21963e40db95373fa74880957a9e32 fix - codeobj event implementation Change-Id: I76c33c875cb429fb12a974bb408b217f187b4536 URI buffer fix - codeobj event implementation Change-Id: I7ce1a758e021455da3fe5b8a6e4ae3ab46e9760e HSA events exposing Change-Id: I3664ab4e5111c4ccedaf068dcb19f48055f0ef9b HSA events data struct normalizing Change-Id: I365ef0db45e0a9314bd2a1a4d29dd4eb4e91297d [ROCm/rocprofiler commit: 8850e46071b7e12b9a9f1497a27f8edc20760461] --- projects/rocprofiler/CMakeLists.txt | 10 +- projects/rocprofiler/inc/rocprofiler.h | 19 ++- projects/rocprofiler/src/CMakeLists.txt | 2 +- projects/rocprofiler/src/core/activity.cpp | 64 ++++++++ projects/rocprofiler/src/core/activity.h | 26 ++++ .../rocprofiler/src/core/hsa_interceptor.h | 142 +++++++++++++----- projects/rocprofiler/test/tool/tool.cpp | 95 +++++++++++- 7 files changed, 317 insertions(+), 41 deletions(-) create mode 100644 projects/rocprofiler/src/core/activity.h diff --git a/projects/rocprofiler/CMakeLists.txt b/projects/rocprofiler/CMakeLists.txt index e6765e47b0..c5de434d7f 100644 --- a/projects/rocprofiler/CMakeLists.txt +++ b/projects/rocprofiler/CMakeLists.txt @@ -135,8 +135,14 @@ add_custom_target ( so-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} # Install header and library install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${DEST_NAME}/lib ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h DESTINATION ${DEST_NAME}/include ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h DESTINATION include/${DEST_NAME} ) +install ( FILES + ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h + ${CMAKE_CURRENT_SOURCE_DIR}/src/core/activity.h + DESTINATION ${DEST_NAME}/include ) +install ( FILES + ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h + ${CMAKE_CURRENT_SOURCE_DIR}/src/core/activity.h + DESTINATION include/${DEST_NAME} ) # rpl_run.sh tblextr.py txt2xml.sh install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/bin/rpl_run.sh diff --git a/projects/rocprofiler/inc/rocprofiler.h b/projects/rocprofiler/inc/rocprofiler.h index b176cadf4b..4a9661905c 100644 --- a/projects/rocprofiler/inc/rocprofiler.h +++ b/projects/rocprofiler/inc/rocprofiler.h @@ -74,6 +74,7 @@ typedef struct { uint32_t hsa_intercepting; uint32_t k_concurrent; uint32_t opt_mode; + uint32_t obj_dumping; } rocprofiler_settings_t; //////////////////////////////////////////////////////////////////////////////// @@ -481,7 +482,8 @@ typedef enum { ROCPROFILER_HSA_CB_ID_DEVICE = 1, // Device assign callback ROCPROFILER_HSA_CB_ID_MEMCOPY = 2, // Memcopy callback ROCPROFILER_HSA_CB_ID_SUBMIT = 3, // Packet submit callback - ROCPROFILER_HSA_CB_ID_KSYMBOL = 4 // Loading/unloading of kernel symbol + ROCPROFILER_HSA_CB_ID_KSYMBOL = 4, // Loading/unloading of kernel symbol + ROCPROFILER_HSA_CB_ID_CODEOBJ = 5 // Loading/unloading of kernel symbol } rocprofiler_hsa_cb_id_t; // HSA callback data type @@ -516,8 +518,20 @@ typedef struct { uint64_t object; // kernel symbol object const char* name; // kernel symbol name uint32_t name_length; // kernel symbol name length - int destroy; // symbol executable destroy + int unload; // symbol executable destroy } ksymbol; + struct { + uint32_t storage_type; // code object storage type + int storage_file; // origin file descriptor + uint64_t memory_base; // origin memory base + uint64_t memory_size; // origin memory size + uint64_t load_base; // codeobj load base + uint64_t load_size; // codeobj load size + uint64_t load_delta; // codeobj load size + uint32_t uri_length; // URI string length + char* uri; // URI string + int unload; // unload flag + } codeobj; }; } rocprofiler_hsa_callback_data_t; @@ -534,6 +548,7 @@ typedef struct { rocprofiler_hsa_callback_fun_t memcopy; // memory copy callback rocprofiler_hsa_callback_fun_t submit; // packet submit callback rocprofiler_hsa_callback_fun_t ksymbol; // kernel symbol callback + rocprofiler_hsa_callback_fun_t codeobj; // codeobject load/unload callback } rocprofiler_hsa_callbacks_t; // Set callbacks. If the callback is NULL then it is disabled. diff --git a/projects/rocprofiler/src/CMakeLists.txt b/projects/rocprofiler/src/CMakeLists.txt index 4c97ea6f51..ccbe31cbbf 100644 --- a/projects/rocprofiler/src/CMakeLists.txt +++ b/projects/rocprofiler/src/CMakeLists.txt @@ -35,4 +35,4 @@ set ( LIB_SRC ) add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} ) target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_KMT_LIB_PATH}/.. ) -target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++) +target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ atomic ) diff --git a/projects/rocprofiler/src/core/activity.cpp b/projects/rocprofiler/src/core/activity.cpp index c72977e127..d777a3a8a9 100644 --- a/projects/rocprofiler/src/core/activity.cpp +++ b/projects/rocprofiler/src/core/activity.cpp @@ -20,6 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *******************************************************************************/ +#define ROCP_INTERNAL_BUILD +#include "activity.h" + #include #include #include @@ -168,4 +171,65 @@ PUBLIC_API bool EnableActivityCallback(uint32_t op, bool enable) { } return true; } + +struct evt_cb_entry_t { + void* callback; + void* arg; +}; +typedef std::atomic evt_cb_entry_atomic_t; +evt_cb_entry_atomic_t evt_cb_table[HSA_EVT_ID_NUMBER]{}; + +hsa_status_t codeobj_evt_callback( + rocprofiler_hsa_cb_id_t id, + const rocprofiler_hsa_callback_data_t* cb_data, + void* arg) +{ + evt_cb_entry_t evt = evt_cb_table[id].load(std::memory_order_relaxed); + activity_rtapi_callback_t evt_callback = (activity_rtapi_callback_t)evt.callback; + + if (evt_callback != NULL) { + evt_callback(ACTIVITY_DOMAIN_HSA_EVT, id, cb_data, evt.arg); + } + + return HSA_STATUS_SUCCESS; +} + +PUBLIC_API const char* GetEvtName(uint32_t op) { return strdup("CODEOBJ"); } + +PUBLIC_API bool RegisterEvtCallback(uint32_t op, void* callback, void* arg) { + evt_cb_table[op].store(evt_cb_entry_t{callback, arg}, std::memory_order_relaxed); + + rocprofiler_hsa_callbacks_t ocb{}; + switch (op) { + case HSA_EVT_ID_ALLOCATE: + ocb.allocate = codeobj_evt_callback; + break; + case HSA_EVT_ID_DEVICE: + ocb.device = codeobj_evt_callback; + break; + case HSA_EVT_ID_MEMCOPY: + ocb.memcopy = codeobj_evt_callback; + break; + case HSA_EVT_ID_SUBMIT: + ocb.submit = codeobj_evt_callback; + break; + case HSA_EVT_ID_KSYMBOL: + ocb.ksymbol = codeobj_evt_callback; + break; + case HSA_EVT_ID_CODEOBJ: + ocb.codeobj = codeobj_evt_callback; + break; + default: + fatal("invalid activity opcode"); + } + rocprofiler_set_hsa_callbacks(ocb, NULL); + + return true; +} + +PUBLIC_API bool RemoveEvtCallback(uint32_t op) { + rocprofiler_hsa_callbacks_t ocb{}; + rocprofiler_set_hsa_callbacks(ocb, NULL); + return true; +} } // extern "C" diff --git a/projects/rocprofiler/src/core/activity.h b/projects/rocprofiler/src/core/activity.h new file mode 100644 index 0000000000..ad64c0faa0 --- /dev/null +++ b/projects/rocprofiler/src/core/activity.h @@ -0,0 +1,26 @@ +#ifndef _SRC_CORE_ACTIVITY_H +#define _SRC_CORE_ACTIVITY_H + +#ifdef ROCP_INTERNAL_BUILD +#include "inc/rocprofiler.h" +#else +#include +#endif + +#include + +// HSA EVT ID enumeration +enum hsa_evt_id_t { + HSA_EVT_ID_ALLOCATE = ROCPROFILER_HSA_CB_ID_ALLOCATE, + HSA_EVT_ID_DEVICE = ROCPROFILER_HSA_CB_ID_DEVICE, + HSA_EVT_ID_MEMCOPY = ROCPROFILER_HSA_CB_ID_MEMCOPY, + HSA_EVT_ID_SUBMIT = ROCPROFILER_HSA_CB_ID_SUBMIT, + HSA_EVT_ID_KSYMBOL = ROCPROFILER_HSA_CB_ID_KSYMBOL, + HSA_EVT_ID_CODEOBJ = ROCPROFILER_HSA_CB_ID_CODEOBJ, + HSA_EVT_ID_NUMBER +}; + +// HSA EVT callback data type +typedef rocprofiler_hsa_callback_data_t hsa_evt_data_t; + +#endif // _SRC_CORE_ACTIVITY_H diff --git a/projects/rocprofiler/src/core/hsa_interceptor.h b/projects/rocprofiler/src/core/hsa_interceptor.h index 9207730b79..ed20da96d6 100644 --- a/projects/rocprofiler/src/core/hsa_interceptor.h +++ b/projects/rocprofiler/src/core/hsa_interceptor.h @@ -51,7 +51,8 @@ SOFTWARE. (ID == ROCPROFILER_HSA_CB_ID_DEVICE) ? callbacks_.device: \ (ID == ROCPROFILER_HSA_CB_ID_MEMCOPY) ? callbacks_.memcopy: \ (ID == ROCPROFILER_HSA_CB_ID_SUBMIT) ? callbacks_.submit: \ - callbacks_.ksymbol; \ + (ID == ROCPROFILER_HSA_CB_ID_KSYMBOL) ? callbacks_.ksymbol: \ + callbacks_.codeobj; \ if ((__callback != NULL) && (recursion_ == false)) #define DO_HSA_CALLBACK \ @@ -230,12 +231,12 @@ class HsaInterceptor { rocprofiler_hsa_callback_data_t data{}; data.allocate.ptr = *ptr; data.allocate.size = size; - + HSA_RT(hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &data.allocate.segment)); HSA_RT(hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &data.allocate.global_flag)); - + DO_HSA_CALLBACK; - + IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_DEVICE) { // Scan the pool assigned devices agent_callback_data_t callback_data{pool, *ptr}; @@ -303,44 +304,116 @@ class HsaInterceptor { void* arg) { const int free_flag = reinterpret_cast(arg); - rocprofiler_hsa_callback_data_t data{}; + hsa_ven_amd_loader_code_object_storage_type_t storage_type = + HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE; + int storage_fd = -1; + uint64_t memory_base = 0; + uint64_t memory_size = 0; + uint64_t load_base = 0; + uint64_t load_size = 0; + uint64_t load_delta = 0; + uint32_t uri_len = 0; + char* uri_str = NULL; + + HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE, + &storage_type)); + + if (storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE) { + HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE, + &storage_fd)); + if (storage_fd == -1) { + printf("CodeObjectCallback: fd == -1\n"); fflush(stdout); + abort(); + } + } else if (storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY) { + HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE, + &memory_base)); + HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE, + &memory_size)); + } HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info( loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, - &data.allocate.ptr)); + &load_base)); + HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, + &load_size)); + HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, + &load_delta)); - if (free_flag == 0) { - HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info( - loaded_code_object, - HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, - &data.allocate.size)); - } else { - data.allocate.size = 0; + // Getting URI + HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, + &uri_len)); + + uri_str = (char*)calloc(uri_len + 1, sizeof(char)); + if (!uri_str) EXC_ABORT(HSA_STATUS_ERROR, "URI allocation"); + + HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, + uri_str)); + + if (storage_type != HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE) { + IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_CODEOBJ) { + rocprofiler_hsa_callback_data_t data{}; + data.codeobj.storage_type = storage_type; + data.codeobj.storage_file = storage_fd; + data.codeobj.memory_base = memory_base; + data.codeobj.memory_size = memory_size; + data.codeobj.load_base = load_base; + data.codeobj.load_size = load_size; + data.codeobj.load_delta = load_delta; + data.codeobj.uri_length = uri_len; + data.codeobj.uri = uri_str; + data.codeobj.unload = free_flag; + + DO_HSA_CALLBACK; + } } - // Local GPU memory - // GLOBAL; FLAGS: COARSE GRAINED - data.allocate.segment = HSA_AMD_SEGMENT_GLOBAL; - data.allocate.global_flag = HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED; - data.allocate.is_code = 1; + { + IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_ALLOCATE) { + // Local GPU memory + // GLOBAL; FLAGS: COARSE GRAINED + rocprofiler_hsa_callback_data_t data{}; + data.allocate.ptr = reinterpret_cast(load_base); + data.allocate.size = (free_flag == 0) ? load_size : 0; + data.allocate.segment = HSA_AMD_SEGMENT_GLOBAL; + data.allocate.global_flag = HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED; + data.allocate.is_code = 1; - ISSUE_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_ALLOCATE); + DO_HSA_CALLBACK; + } + } - if (free_flag == 0) { + if (free_flag != 0) { IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_DEVICE) { hsa_amd_pointer_info_t pointer_info{}; uint32_t num_agents = 0; hsa_agent_t* agents = NULL; pointer_info.size = sizeof(hsa_amd_pointer_info_t); HSA_RT(hsa_amd_pointer_info( - const_cast(data.allocate.ptr), + reinterpret_cast(load_base), &pointer_info, malloc, &num_agents, &agents)); - - DeviceCallback(num_agents, agents, data.allocate.ptr); + + DeviceCallback(num_agents, agents, reinterpret_cast(load_base)); } } @@ -372,7 +445,7 @@ class HsaInterceptor { data.ksymbol.object = obj; data.ksymbol.name = name; data.ksymbol.name_length = len; - data.ksymbol.destroy = free_flag; + data.ksymbol.unload = free_flag; ISSUE_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL); } @@ -388,22 +461,23 @@ class HsaInterceptor { HSA_RT(hsa_executable_freeze_fn(executable, options)); - IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_ALLOCATE) { + IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL) { + HSA_RT(hsa_executable_iterate_symbols( + executable, + KernelSymbolCallback, + reinterpret_cast(0))); + } + + unsigned is_codeobj_cb = 0; + { IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_CODEOBJ) is_codeobj_cb |= 1; } + { IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_ALLOCATE) is_codeobj_cb |= 1; } + if (is_codeobj_cb) { LoaderApiTable.hsa_ven_amd_loader_executable_iterate_loaded_code_objects( executable, CodeObjectCallback, reinterpret_cast(0)); } - { - IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL) { - HSA_RT(hsa_executable_iterate_symbols( - executable, - KernelSymbolCallback, - reinterpret_cast(0))); - } - } - return status; } diff --git a/projects/rocprofiler/test/tool/tool.cpp b/projects/rocprofiler/test/tool/tool.cpp index 6cb184166f..1bc69196b1 100644 --- a/projects/rocprofiler/test/tool/tool.cpp +++ b/projects/rocprofiler/test/tool/tool.cpp @@ -87,6 +87,7 @@ struct kernel_properties_t { uint32_t sgpr_count; uint32_t fbarrier_count; hsa_signal_t signal; + uint64_t object; }; // Context stored entry type @@ -524,7 +525,7 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { const std::string nik_name = (to_truncate_names == 0) ? entry->data.kernel_name : filtr_kernel_name(entry->data.kernel_name); const AgentInfo* agent_info = HsaRsrcFactory::Instance().GetAgentInfo(entry->agent); - fprintf(file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), kernel-name(\"%s\")", + fprintf(file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), obj(0x%lx), kernel-name(\"%s\")", index, agent_info->dev_index, entry->data.queue_id, @@ -539,6 +540,7 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { (entry->kernel_properties.sgpr_count + agent_info->sgpr_block_dflt) * agent_info->sgpr_block_size, entry->kernel_properties.fbarrier_count, entry->kernel_properties.signal.handle, + entry->kernel_properties.object, nik_name.c_str()); if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)", record->dispatch, @@ -780,6 +782,7 @@ void set_kernel_properties(const rocprofiler_callback_data_t* callback_data, kernel_properties_ptr->sgpr_count = AMD_HSA_BITS_GET(kernel_code->compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT); kernel_properties_ptr->fbarrier_count = kernel_code->workgroup_fbarrier_count; kernel_properties_ptr->signal = callback_data->completion_signal; + kernel_properties_ptr->object = callback_data->packet->kernel_object; } // Kernel disoatch callback @@ -1113,6 +1116,7 @@ rocprofiler_hsa_callbacks_t hsa_callbacks { hsa_unified_callback, hsa_unified_callback, hsa_unified_callback, + NULL, NULL }; @@ -1121,7 +1125,86 @@ hsa_status_t hsa_ksymbol_cb(rocprofiler_hsa_cb_id_t id, const rocprofiler_hsa_callback_data_t* data, void* arg) { - HsaRsrcFactory::SetKernelNameRef(data->ksymbol.object, data->ksymbol.name, data->ksymbol.destroy); + HsaRsrcFactory::SetKernelNameRef(data->ksymbol.object, data->ksymbol.name, data->ksymbol.unload); + return HSA_STATUS_SUCCESS; +} + +// code object callback +hsa_status_t codeobj_callback( + rocprofiler_hsa_cb_id_t id, + const rocprofiler_hsa_callback_data_t* data, + void* arg) +{ + static std::atomic codeobj_counter{}; + static FILE* codeobj_csv_file = NULL; + + if (data == NULL) { + printf("codeobj_callback error, data == 0\n"); fflush(stdout); + abort(); + } + + if (id == ROCPROFILER_HSA_CB_ID_CODEOBJ) { + const uint64_t codeobj_index = codeobj_counter.fetch_add(1, std::memory_order_relaxed); + const uint64_t ts = HsaRsrcFactory::Instance().TimestampNs(); + const int unload = data->codeobj.unload; + const uint64_t load_base = data->codeobj.load_base; + const uint64_t load_size = data->codeobj.load_size; + const int fd1 = data->codeobj.storage_file; + const uint64_t count = (fd1 != -1) ? lseek(fd1, 0, SEEK_END) : data->codeobj.memory_size; + void* buf = (fd1 != -1) ? malloc(count) : reinterpret_cast(data->codeobj.memory_base); + + if (fd1 != -1) { + ssize_t ret = read(fd1, buf, count); + if (ret == -1) { + perror("codeobj_callback::read()"); + abort(); + } + const uint64_t rcount = (uint64_t)ret; + if (rcount != count) { + printf("codeobj_callback::read() ret(%lu) != count(%lu)\n", rcount, count); + abort(); + } + //close(fd1); + } + + std::ostringstream oss; + oss << "codeobj/" << codeobj_index << ".obj" << std::dec; + const char* codeobj_data_name = oss.str().c_str(); + const char* codeobj_csv_name = "codeobj/index.csv"; + + if (codeobj_csv_file == NULL) { + codeobj_csv_file = fopen(codeobj_csv_name, "w"); + if (codeobj_csv_file == NULL) { + fprintf(stderr, "file(\"%s\")\n", codeobj_csv_name); fflush(stderr); + perror("codeobj_callback::fopen"); fflush(stderr); + abort(); + } + fprintf(codeobj_csv_file, "file,ts,base,size,unload\n"); + } + fprintf(codeobj_csv_file, "%s,%lu,0x%lx,0x%lx,%d\n", codeobj_data_name, ts, load_base, load_size, unload); + fflush(codeobj_csv_file); + + int fd2 = open(codeobj_data_name, O_RDWR|O_CREAT, 0777); + if (fd2 == -1) { + fprintf(stderr, "file(\"%s\")\n", codeobj_data_name); fflush(stderr); + perror("codeobj_callback::open()"); fflush(stderr); + abort(); + } + + ssize_t ret = write(fd2, buf, count); + if (ret == -1) { + perror("codeobj_callback::write()"); + abort(); + } + const uint64_t wcount = (uint64_t)ret; + if (wcount != count) { + printf("codeobj_callback::write() ret(%lu) != count(%lu)\n", wcount, count); + abort(); + } + + close(fd2); + } + return HSA_STATUS_SUCCESS; } @@ -1212,6 +1295,14 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) // Set HSA intercepting check_env_var("ROCP_HSA_INTERC", settings->hsa_intercepting); if (settings->hsa_intercepting) rocprofiler_set_hsa_callbacks(hsa_callbacks, (void*)14); + // Enable code objects dumping + check_env_var("ROCP_OBJ_DUMPING", settings->obj_dumping); + rocprofiler_hsa_callbacks_t ocb{}; + ocb.codeobj = codeobj_callback; + if (settings->obj_dumping) { + rocprofiler_set_hsa_callbacks(ocb, (void*)1); + settings->hsa_intercepting = 1; + } // Enable concurrent SQTT check_env_var("ROCP_K_CONCURRENT", settings->k_concurrent); // Enable optmized mode