kernel objects dumping
Change-Id: I5a16e05b7df438efa903948701b65a9ced99e5f3
initial codeobj event implementation
Change-Id: Ia7fac3c2b9897a004cfe88c4de82ba8c18284196
update - codeobj event implementation
Change-Id: I2b91b6e689875af03f0086f5a0872a97a629fd83
update2 - codeobj event implementation
Change-Id: Icff75f14fd21963e40db95373fa74880957a9e32
fix - codeobj event implementation
Change-Id: I76c33c875cb429fb12a974bb408b217f187b4536
URI buffer fix - codeobj event implementation
Change-Id: I7ce1a758e021455da3fe5b8a6e4ae3ab46e9760e
HSA events exposing
Change-Id: I3664ab4e5111c4ccedaf068dcb19f48055f0ef9b
HSA events data struct normalizing
Change-Id: I365ef0db45e0a9314bd2a1a4d29dd4eb4e91297d
[ROCm/rocprofiler commit: 8850e46071]
Tá an tiomantas seo le fáil i:
@@ -135,8 +135,14 @@ add_custom_target ( so-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
|
||||
# Install header and library
|
||||
install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${DEST_NAME}/lib )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h DESTINATION ${DEST_NAME}/include )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h DESTINATION include/${DEST_NAME} )
|
||||
install ( FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/core/activity.h
|
||||
DESTINATION ${DEST_NAME}/include )
|
||||
install ( FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/core/activity.h
|
||||
DESTINATION include/${DEST_NAME} )
|
||||
# rpl_run.sh tblextr.py txt2xml.sh
|
||||
install ( FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bin/rpl_run.sh
|
||||
|
||||
@@ -74,6 +74,7 @@ typedef struct {
|
||||
uint32_t hsa_intercepting;
|
||||
uint32_t k_concurrent;
|
||||
uint32_t opt_mode;
|
||||
uint32_t obj_dumping;
|
||||
} rocprofiler_settings_t;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -481,7 +482,8 @@ typedef enum {
|
||||
ROCPROFILER_HSA_CB_ID_DEVICE = 1, // Device assign callback
|
||||
ROCPROFILER_HSA_CB_ID_MEMCOPY = 2, // Memcopy callback
|
||||
ROCPROFILER_HSA_CB_ID_SUBMIT = 3, // Packet submit callback
|
||||
ROCPROFILER_HSA_CB_ID_KSYMBOL = 4 // Loading/unloading of kernel symbol
|
||||
ROCPROFILER_HSA_CB_ID_KSYMBOL = 4, // Loading/unloading of kernel symbol
|
||||
ROCPROFILER_HSA_CB_ID_CODEOBJ = 5 // Loading/unloading of kernel symbol
|
||||
} rocprofiler_hsa_cb_id_t;
|
||||
|
||||
// HSA callback data type
|
||||
@@ -516,8 +518,20 @@ typedef struct {
|
||||
uint64_t object; // kernel symbol object
|
||||
const char* name; // kernel symbol name
|
||||
uint32_t name_length; // kernel symbol name length
|
||||
int destroy; // symbol executable destroy
|
||||
int unload; // symbol executable destroy
|
||||
} ksymbol;
|
||||
struct {
|
||||
uint32_t storage_type; // code object storage type
|
||||
int storage_file; // origin file descriptor
|
||||
uint64_t memory_base; // origin memory base
|
||||
uint64_t memory_size; // origin memory size
|
||||
uint64_t load_base; // codeobj load base
|
||||
uint64_t load_size; // codeobj load size
|
||||
uint64_t load_delta; // codeobj load size
|
||||
uint32_t uri_length; // URI string length
|
||||
char* uri; // URI string
|
||||
int unload; // unload flag
|
||||
} codeobj;
|
||||
};
|
||||
} rocprofiler_hsa_callback_data_t;
|
||||
|
||||
@@ -534,6 +548,7 @@ typedef struct {
|
||||
rocprofiler_hsa_callback_fun_t memcopy; // memory copy callback
|
||||
rocprofiler_hsa_callback_fun_t submit; // packet submit callback
|
||||
rocprofiler_hsa_callback_fun_t ksymbol; // kernel symbol callback
|
||||
rocprofiler_hsa_callback_fun_t codeobj; // codeobject load/unload callback
|
||||
} rocprofiler_hsa_callbacks_t;
|
||||
|
||||
// Set callbacks. If the callback is NULL then it is disabled.
|
||||
|
||||
@@ -35,4 +35,4 @@ set ( LIB_SRC
|
||||
)
|
||||
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_KMT_LIB_PATH}/.. )
|
||||
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++)
|
||||
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ atomic )
|
||||
|
||||
@@ -20,6 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#define ROCP_INTERNAL_BUILD
|
||||
#include "activity.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -168,4 +171,65 @@ PUBLIC_API bool EnableActivityCallback(uint32_t op, bool enable) {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
struct evt_cb_entry_t {
|
||||
void* callback;
|
||||
void* arg;
|
||||
};
|
||||
typedef std::atomic<evt_cb_entry_t> evt_cb_entry_atomic_t;
|
||||
evt_cb_entry_atomic_t evt_cb_table[HSA_EVT_ID_NUMBER]{};
|
||||
|
||||
hsa_status_t codeobj_evt_callback(
|
||||
rocprofiler_hsa_cb_id_t id,
|
||||
const rocprofiler_hsa_callback_data_t* cb_data,
|
||||
void* arg)
|
||||
{
|
||||
evt_cb_entry_t evt = evt_cb_table[id].load(std::memory_order_relaxed);
|
||||
activity_rtapi_callback_t evt_callback = (activity_rtapi_callback_t)evt.callback;
|
||||
|
||||
if (evt_callback != NULL) {
|
||||
evt_callback(ACTIVITY_DOMAIN_HSA_EVT, id, cb_data, evt.arg);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
PUBLIC_API const char* GetEvtName(uint32_t op) { return strdup("CODEOBJ"); }
|
||||
|
||||
PUBLIC_API bool RegisterEvtCallback(uint32_t op, void* callback, void* arg) {
|
||||
evt_cb_table[op].store(evt_cb_entry_t{callback, arg}, std::memory_order_relaxed);
|
||||
|
||||
rocprofiler_hsa_callbacks_t ocb{};
|
||||
switch (op) {
|
||||
case HSA_EVT_ID_ALLOCATE:
|
||||
ocb.allocate = codeobj_evt_callback;
|
||||
break;
|
||||
case HSA_EVT_ID_DEVICE:
|
||||
ocb.device = codeobj_evt_callback;
|
||||
break;
|
||||
case HSA_EVT_ID_MEMCOPY:
|
||||
ocb.memcopy = codeobj_evt_callback;
|
||||
break;
|
||||
case HSA_EVT_ID_SUBMIT:
|
||||
ocb.submit = codeobj_evt_callback;
|
||||
break;
|
||||
case HSA_EVT_ID_KSYMBOL:
|
||||
ocb.ksymbol = codeobj_evt_callback;
|
||||
break;
|
||||
case HSA_EVT_ID_CODEOBJ:
|
||||
ocb.codeobj = codeobj_evt_callback;
|
||||
break;
|
||||
default:
|
||||
fatal("invalid activity opcode");
|
||||
}
|
||||
rocprofiler_set_hsa_callbacks(ocb, NULL);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PUBLIC_API bool RemoveEvtCallback(uint32_t op) {
|
||||
rocprofiler_hsa_callbacks_t ocb{};
|
||||
rocprofiler_set_hsa_callbacks(ocb, NULL);
|
||||
return true;
|
||||
}
|
||||
} // extern "C"
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
#ifndef _SRC_CORE_ACTIVITY_H
|
||||
#define _SRC_CORE_ACTIVITY_H
|
||||
|
||||
#ifdef ROCP_INTERNAL_BUILD
|
||||
#include "inc/rocprofiler.h"
|
||||
#else
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// HSA EVT ID enumeration
|
||||
enum hsa_evt_id_t {
|
||||
HSA_EVT_ID_ALLOCATE = ROCPROFILER_HSA_CB_ID_ALLOCATE,
|
||||
HSA_EVT_ID_DEVICE = ROCPROFILER_HSA_CB_ID_DEVICE,
|
||||
HSA_EVT_ID_MEMCOPY = ROCPROFILER_HSA_CB_ID_MEMCOPY,
|
||||
HSA_EVT_ID_SUBMIT = ROCPROFILER_HSA_CB_ID_SUBMIT,
|
||||
HSA_EVT_ID_KSYMBOL = ROCPROFILER_HSA_CB_ID_KSYMBOL,
|
||||
HSA_EVT_ID_CODEOBJ = ROCPROFILER_HSA_CB_ID_CODEOBJ,
|
||||
HSA_EVT_ID_NUMBER
|
||||
};
|
||||
|
||||
// HSA EVT callback data type
|
||||
typedef rocprofiler_hsa_callback_data_t hsa_evt_data_t;
|
||||
|
||||
#endif // _SRC_CORE_ACTIVITY_H
|
||||
@@ -51,7 +51,8 @@ SOFTWARE.
|
||||
(ID == ROCPROFILER_HSA_CB_ID_DEVICE) ? callbacks_.device: \
|
||||
(ID == ROCPROFILER_HSA_CB_ID_MEMCOPY) ? callbacks_.memcopy: \
|
||||
(ID == ROCPROFILER_HSA_CB_ID_SUBMIT) ? callbacks_.submit: \
|
||||
callbacks_.ksymbol; \
|
||||
(ID == ROCPROFILER_HSA_CB_ID_KSYMBOL) ? callbacks_.ksymbol: \
|
||||
callbacks_.codeobj; \
|
||||
if ((__callback != NULL) && (recursion_ == false))
|
||||
|
||||
#define DO_HSA_CALLBACK \
|
||||
@@ -230,12 +231,12 @@ class HsaInterceptor {
|
||||
rocprofiler_hsa_callback_data_t data{};
|
||||
data.allocate.ptr = *ptr;
|
||||
data.allocate.size = size;
|
||||
|
||||
|
||||
HSA_RT(hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &data.allocate.segment));
|
||||
HSA_RT(hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &data.allocate.global_flag));
|
||||
|
||||
|
||||
DO_HSA_CALLBACK;
|
||||
|
||||
|
||||
IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_DEVICE) {
|
||||
// Scan the pool assigned devices
|
||||
agent_callback_data_t callback_data{pool, *ptr};
|
||||
@@ -303,44 +304,116 @@ class HsaInterceptor {
|
||||
void* arg)
|
||||
{
|
||||
const int free_flag = reinterpret_cast<long>(arg);
|
||||
rocprofiler_hsa_callback_data_t data{};
|
||||
hsa_ven_amd_loader_code_object_storage_type_t storage_type =
|
||||
HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE;
|
||||
int storage_fd = -1;
|
||||
uint64_t memory_base = 0;
|
||||
uint64_t memory_size = 0;
|
||||
uint64_t load_base = 0;
|
||||
uint64_t load_size = 0;
|
||||
uint64_t load_delta = 0;
|
||||
uint32_t uri_len = 0;
|
||||
char* uri_str = NULL;
|
||||
|
||||
HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE,
|
||||
&storage_type));
|
||||
|
||||
if (storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE) {
|
||||
HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE,
|
||||
&storage_fd));
|
||||
if (storage_fd == -1) {
|
||||
printf("CodeObjectCallback: fd == -1\n"); fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
} else if (storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY) {
|
||||
HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE,
|
||||
&memory_base));
|
||||
HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE,
|
||||
&memory_size));
|
||||
}
|
||||
|
||||
HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE,
|
||||
&data.allocate.ptr));
|
||||
&load_base));
|
||||
HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE,
|
||||
&load_size));
|
||||
HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA,
|
||||
&load_delta));
|
||||
|
||||
if (free_flag == 0) {
|
||||
HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE,
|
||||
&data.allocate.size));
|
||||
} else {
|
||||
data.allocate.size = 0;
|
||||
// Getting URI
|
||||
HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH,
|
||||
&uri_len));
|
||||
|
||||
uri_str = (char*)calloc(uri_len + 1, sizeof(char));
|
||||
if (!uri_str) EXC_ABORT(HSA_STATUS_ERROR, "URI allocation");
|
||||
|
||||
HSA_RT(LoaderApiTable.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI,
|
||||
uri_str));
|
||||
|
||||
if (storage_type != HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE) {
|
||||
IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_CODEOBJ) {
|
||||
rocprofiler_hsa_callback_data_t data{};
|
||||
data.codeobj.storage_type = storage_type;
|
||||
data.codeobj.storage_file = storage_fd;
|
||||
data.codeobj.memory_base = memory_base;
|
||||
data.codeobj.memory_size = memory_size;
|
||||
data.codeobj.load_base = load_base;
|
||||
data.codeobj.load_size = load_size;
|
||||
data.codeobj.load_delta = load_delta;
|
||||
data.codeobj.uri_length = uri_len;
|
||||
data.codeobj.uri = uri_str;
|
||||
data.codeobj.unload = free_flag;
|
||||
|
||||
DO_HSA_CALLBACK;
|
||||
}
|
||||
}
|
||||
|
||||
// Local GPU memory
|
||||
// GLOBAL; FLAGS: COARSE GRAINED
|
||||
data.allocate.segment = HSA_AMD_SEGMENT_GLOBAL;
|
||||
data.allocate.global_flag = HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED;
|
||||
data.allocate.is_code = 1;
|
||||
{
|
||||
IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_ALLOCATE) {
|
||||
// Local GPU memory
|
||||
// GLOBAL; FLAGS: COARSE GRAINED
|
||||
rocprofiler_hsa_callback_data_t data{};
|
||||
data.allocate.ptr = reinterpret_cast<void*>(load_base);
|
||||
data.allocate.size = (free_flag == 0) ? load_size : 0;
|
||||
data.allocate.segment = HSA_AMD_SEGMENT_GLOBAL;
|
||||
data.allocate.global_flag = HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED;
|
||||
data.allocate.is_code = 1;
|
||||
|
||||
ISSUE_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_ALLOCATE);
|
||||
DO_HSA_CALLBACK;
|
||||
}
|
||||
}
|
||||
|
||||
if (free_flag == 0) {
|
||||
if (free_flag != 0) {
|
||||
IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_DEVICE) {
|
||||
hsa_amd_pointer_info_t pointer_info{};
|
||||
uint32_t num_agents = 0;
|
||||
hsa_agent_t* agents = NULL;
|
||||
pointer_info.size = sizeof(hsa_amd_pointer_info_t);
|
||||
HSA_RT(hsa_amd_pointer_info(
|
||||
const_cast<void*>(data.allocate.ptr),
|
||||
reinterpret_cast<void*>(load_base),
|
||||
&pointer_info,
|
||||
malloc,
|
||||
&num_agents,
|
||||
&agents));
|
||||
|
||||
DeviceCallback(num_agents, agents, data.allocate.ptr);
|
||||
|
||||
DeviceCallback(num_agents, agents, reinterpret_cast<void*>(load_base));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -372,7 +445,7 @@ class HsaInterceptor {
|
||||
data.ksymbol.object = obj;
|
||||
data.ksymbol.name = name;
|
||||
data.ksymbol.name_length = len;
|
||||
data.ksymbol.destroy = free_flag;
|
||||
data.ksymbol.unload = free_flag;
|
||||
|
||||
ISSUE_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL);
|
||||
}
|
||||
@@ -388,22 +461,23 @@ class HsaInterceptor {
|
||||
|
||||
HSA_RT(hsa_executable_freeze_fn(executable, options));
|
||||
|
||||
IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_ALLOCATE) {
|
||||
IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL) {
|
||||
HSA_RT(hsa_executable_iterate_symbols(
|
||||
executable,
|
||||
KernelSymbolCallback,
|
||||
reinterpret_cast<void*>(0)));
|
||||
}
|
||||
|
||||
unsigned is_codeobj_cb = 0;
|
||||
{ IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_CODEOBJ) is_codeobj_cb |= 1; }
|
||||
{ IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_ALLOCATE) is_codeobj_cb |= 1; }
|
||||
if (is_codeobj_cb) {
|
||||
LoaderApiTable.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
|
||||
executable,
|
||||
CodeObjectCallback,
|
||||
reinterpret_cast<void*>(0));
|
||||
}
|
||||
|
||||
{
|
||||
IS_HSA_CALLBACK(ROCPROFILER_HSA_CB_ID_KSYMBOL) {
|
||||
HSA_RT(hsa_executable_iterate_symbols(
|
||||
executable,
|
||||
KernelSymbolCallback,
|
||||
reinterpret_cast<void*>(0)));
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -87,6 +87,7 @@ struct kernel_properties_t {
|
||||
uint32_t sgpr_count;
|
||||
uint32_t fbarrier_count;
|
||||
hsa_signal_t signal;
|
||||
uint64_t object;
|
||||
};
|
||||
|
||||
// Context stored entry type
|
||||
@@ -524,7 +525,7 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) {
|
||||
const std::string nik_name = (to_truncate_names == 0) ? entry->data.kernel_name : filtr_kernel_name(entry->data.kernel_name);
|
||||
const AgentInfo* agent_info = HsaRsrcFactory::Instance().GetAgentInfo(entry->agent);
|
||||
|
||||
fprintf(file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), kernel-name(\"%s\")",
|
||||
fprintf(file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), obj(0x%lx), kernel-name(\"%s\")",
|
||||
index,
|
||||
agent_info->dev_index,
|
||||
entry->data.queue_id,
|
||||
@@ -539,6 +540,7 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) {
|
||||
(entry->kernel_properties.sgpr_count + agent_info->sgpr_block_dflt) * agent_info->sgpr_block_size,
|
||||
entry->kernel_properties.fbarrier_count,
|
||||
entry->kernel_properties.signal.handle,
|
||||
entry->kernel_properties.object,
|
||||
nik_name.c_str());
|
||||
if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)",
|
||||
record->dispatch,
|
||||
@@ -780,6 +782,7 @@ void set_kernel_properties(const rocprofiler_callback_data_t* callback_data,
|
||||
kernel_properties_ptr->sgpr_count = AMD_HSA_BITS_GET(kernel_code->compute_pgm_rsrc1, AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT);
|
||||
kernel_properties_ptr->fbarrier_count = kernel_code->workgroup_fbarrier_count;
|
||||
kernel_properties_ptr->signal = callback_data->completion_signal;
|
||||
kernel_properties_ptr->object = callback_data->packet->kernel_object;
|
||||
}
|
||||
|
||||
// Kernel disoatch callback
|
||||
@@ -1113,6 +1116,7 @@ rocprofiler_hsa_callbacks_t hsa_callbacks {
|
||||
hsa_unified_callback,
|
||||
hsa_unified_callback,
|
||||
hsa_unified_callback,
|
||||
NULL,
|
||||
NULL
|
||||
};
|
||||
|
||||
@@ -1121,7 +1125,86 @@ hsa_status_t hsa_ksymbol_cb(rocprofiler_hsa_cb_id_t id,
|
||||
const rocprofiler_hsa_callback_data_t* data,
|
||||
void* arg)
|
||||
{
|
||||
HsaRsrcFactory::SetKernelNameRef(data->ksymbol.object, data->ksymbol.name, data->ksymbol.destroy);
|
||||
HsaRsrcFactory::SetKernelNameRef(data->ksymbol.object, data->ksymbol.name, data->ksymbol.unload);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// code object callback
|
||||
hsa_status_t codeobj_callback(
|
||||
rocprofiler_hsa_cb_id_t id,
|
||||
const rocprofiler_hsa_callback_data_t* data,
|
||||
void* arg)
|
||||
{
|
||||
static std::atomic<uint64_t> codeobj_counter{};
|
||||
static FILE* codeobj_csv_file = NULL;
|
||||
|
||||
if (data == NULL) {
|
||||
printf("codeobj_callback error, data == 0\n"); fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
if (id == ROCPROFILER_HSA_CB_ID_CODEOBJ) {
|
||||
const uint64_t codeobj_index = codeobj_counter.fetch_add(1, std::memory_order_relaxed);
|
||||
const uint64_t ts = HsaRsrcFactory::Instance().TimestampNs();
|
||||
const int unload = data->codeobj.unload;
|
||||
const uint64_t load_base = data->codeobj.load_base;
|
||||
const uint64_t load_size = data->codeobj.load_size;
|
||||
const int fd1 = data->codeobj.storage_file;
|
||||
const uint64_t count = (fd1 != -1) ? lseek(fd1, 0, SEEK_END) : data->codeobj.memory_size;
|
||||
void* buf = (fd1 != -1) ? malloc(count) : reinterpret_cast<void*>(data->codeobj.memory_base);
|
||||
|
||||
if (fd1 != -1) {
|
||||
ssize_t ret = read(fd1, buf, count);
|
||||
if (ret == -1) {
|
||||
perror("codeobj_callback::read()");
|
||||
abort();
|
||||
}
|
||||
const uint64_t rcount = (uint64_t)ret;
|
||||
if (rcount != count) {
|
||||
printf("codeobj_callback::read() ret(%lu) != count(%lu)\n", rcount, count);
|
||||
abort();
|
||||
}
|
||||
//close(fd1);
|
||||
}
|
||||
|
||||
std::ostringstream oss;
|
||||
oss << "codeobj/" << codeobj_index << ".obj" << std::dec;
|
||||
const char* codeobj_data_name = oss.str().c_str();
|
||||
const char* codeobj_csv_name = "codeobj/index.csv";
|
||||
|
||||
if (codeobj_csv_file == NULL) {
|
||||
codeobj_csv_file = fopen(codeobj_csv_name, "w");
|
||||
if (codeobj_csv_file == NULL) {
|
||||
fprintf(stderr, "file(\"%s\")\n", codeobj_csv_name); fflush(stderr);
|
||||
perror("codeobj_callback::fopen"); fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
fprintf(codeobj_csv_file, "file,ts,base,size,unload\n");
|
||||
}
|
||||
fprintf(codeobj_csv_file, "%s,%lu,0x%lx,0x%lx,%d\n", codeobj_data_name, ts, load_base, load_size, unload);
|
||||
fflush(codeobj_csv_file);
|
||||
|
||||
int fd2 = open(codeobj_data_name, O_RDWR|O_CREAT, 0777);
|
||||
if (fd2 == -1) {
|
||||
fprintf(stderr, "file(\"%s\")\n", codeobj_data_name); fflush(stderr);
|
||||
perror("codeobj_callback::open()"); fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
|
||||
ssize_t ret = write(fd2, buf, count);
|
||||
if (ret == -1) {
|
||||
perror("codeobj_callback::write()");
|
||||
abort();
|
||||
}
|
||||
const uint64_t wcount = (uint64_t)ret;
|
||||
if (wcount != count) {
|
||||
printf("codeobj_callback::write() ret(%lu) != count(%lu)\n", wcount, count);
|
||||
abort();
|
||||
}
|
||||
|
||||
close(fd2);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1212,6 +1295,14 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
|
||||
// Set HSA intercepting
|
||||
check_env_var("ROCP_HSA_INTERC", settings->hsa_intercepting);
|
||||
if (settings->hsa_intercepting) rocprofiler_set_hsa_callbacks(hsa_callbacks, (void*)14);
|
||||
// Enable code objects dumping
|
||||
check_env_var("ROCP_OBJ_DUMPING", settings->obj_dumping);
|
||||
rocprofiler_hsa_callbacks_t ocb{};
|
||||
ocb.codeobj = codeobj_callback;
|
||||
if (settings->obj_dumping) {
|
||||
rocprofiler_set_hsa_callbacks(ocb, (void*)1);
|
||||
settings->hsa_intercepting = 1;
|
||||
}
|
||||
// Enable concurrent SQTT
|
||||
check_env_var("ROCP_K_CONCURRENT", settings->k_concurrent);
|
||||
// Enable optmized mode
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir