contrctor/destructor changes, OnLoad/OnUnload functions, removed hsa_init()/hsa_shutdown()
[ROCm/rocprofiler commit: a9a5119399]
This commit is contained in:
@@ -329,8 +329,14 @@ class Context {
|
||||
const profile_vector_t profile_vector = GetProfiles(group_index);
|
||||
for (auto& tuple : profile_vector) {
|
||||
// Wait for stop packet to complete
|
||||
hsa_signal_wait_scacquire(tuple.completion_signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
|
||||
HSA_WAIT_STATE_BLOCKED);
|
||||
const uint64_t timeout = UINT64_MAX;
|
||||
bool complete = false;
|
||||
while (!complete) {
|
||||
const hsa_signal_value_t signal_value = hsa_signal_wait_scacquire(tuple.completion_signal, HSA_SIGNAL_CONDITION_LT, 1, timeout,
|
||||
HSA_WAIT_STATE_BLOCKED);
|
||||
complete = (signal_value == 0);
|
||||
if (!complete) printf("ROCProfiler: Signal timeout, signal(%d) timeout(%lx)\n", (int)signal_value, timeout);
|
||||
}
|
||||
for (rocprofiler_feature_t* rinfo : *(tuple.info_vector)) rinfo->data.kind = ROCPROFILER_DATA_KIND_UNINIT;
|
||||
callback_data_t callback_data{tuple.info_vector, tuple.info_vector->size(), NULL};
|
||||
const hsa_status_t status =
|
||||
|
||||
@@ -9,7 +9,6 @@ void InterceptQueue::HsaIntercept(HsaApiTable* table) {
|
||||
InterceptQueue::mutex_t InterceptQueue::mutex_;
|
||||
rocprofiler_callback_t InterceptQueue::on_dispatch_cb_ = NULL;
|
||||
void* InterceptQueue::on_dispatch_cb_data_ = NULL;
|
||||
const char* InterceptQueue::tool_lib_ = NULL;
|
||||
void* InterceptQueue::tool_handle_ = NULL;
|
||||
InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL;
|
||||
const char* InterceptQueue::kernel_none_ = "";
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -26,30 +26,13 @@ class InterceptQueue {
|
||||
|
||||
static void HsaIntercept(HsaApiTable* table);
|
||||
|
||||
static void SetTool(const char* tool) { tool_lib_ = tool; }
|
||||
|
||||
static void UnloadTool() {
|
||||
if (tool_handle_) dlclose(tool_handle_);
|
||||
}
|
||||
|
||||
static hsa_status_t QueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
if (tool_lib_) {
|
||||
tool_handle_ = dlopen(tool_lib_, RTLD_NOW);
|
||||
if (tool_handle_ == NULL) {
|
||||
fprintf(stderr, "ROCProfiler: can't load tool library \"%s\"\n", tool_lib_);
|
||||
fprintf(stderr, "%s\n", dlerror());
|
||||
exit(1);
|
||||
}
|
||||
tool_lib_ = NULL;
|
||||
}
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
if (!obj_map_) obj_map_ = new obj_map_t;
|
||||
|
||||
@@ -152,7 +135,7 @@ class InterceptQueue {
|
||||
return (*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask;
|
||||
}
|
||||
|
||||
static char* GetKernelName(const hsa_kernel_dispatch_packet_t* dispatch_packet) {
|
||||
static const char* GetKernelName(const hsa_kernel_dispatch_packet_t* dispatch_packet) {
|
||||
const amd_kernel_code_t* kernel_code = NULL;
|
||||
hsa_status_t status =
|
||||
util::HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address(
|
||||
@@ -167,13 +150,15 @@ class InterceptQueue {
|
||||
|
||||
// Kernel name is mangled name
|
||||
// apply __cxa_demangle() to demangle it
|
||||
char* funcname = NULL;
|
||||
const char* funcname = NULL;
|
||||
if (kernel_name != NULL) {
|
||||
size_t funcnamesize = 0;
|
||||
int status;
|
||||
char* ret = abi::__cxa_demangle(kernel_name, NULL, &funcnamesize, &status);
|
||||
const char* ret = abi::__cxa_demangle(kernel_name, NULL, &funcnamesize, &status);
|
||||
funcname = (ret != 0) ? ret : strdup(kernel_name);
|
||||
}
|
||||
if (funcname == NULL) funcname = strdup(kernel_none_);
|
||||
|
||||
return funcname;
|
||||
}
|
||||
|
||||
@@ -181,9 +166,8 @@ class InterceptQueue {
|
||||
static const packet_word_t header_type_mask = (1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1;
|
||||
static rocprofiler_callback_t on_dispatch_cb_;
|
||||
static void* on_dispatch_cb_data_;
|
||||
static const char* tool_lib_;
|
||||
static void* tool_handle_;
|
||||
static obj_map_t* obj_map_;
|
||||
static const char* kernel_none_;
|
||||
|
||||
ProxyQueue* const proxy_;
|
||||
const util::AgentInfo* agent_info_;
|
||||
|
||||
@@ -99,14 +99,53 @@ void RestoreHsaApi() {
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef void (*tool_handler_t)();
|
||||
void * kTtoolHandle = NULL;
|
||||
|
||||
void LoadTool(const char* tool_lib) {
|
||||
if (tool_lib) {
|
||||
kTtoolHandle = dlopen(tool_lib, RTLD_NOW);
|
||||
if (kTtoolHandle == NULL) {
|
||||
fprintf(stderr, "ROCProfiler: can't load tool library \"%s\"\n", tool_lib);
|
||||
fprintf(stderr, "%s\n", dlerror());
|
||||
exit(1);
|
||||
}
|
||||
tool_handler_t handler = reinterpret_cast<tool_handler_t>(dlsym(kTtoolHandle, "OnLoadTool"));
|
||||
if (handler == NULL) {
|
||||
fprintf(stderr, "ROCProfiler: tool library corrupted, OnLoadTool() method is expected\n");
|
||||
fprintf(stderr, "%s\n", dlerror());
|
||||
exit(1);
|
||||
}
|
||||
tool_handler_t on_unload_handler = reinterpret_cast<tool_handler_t>(dlsym(kTtoolHandle, "OnUnloadTool"));
|
||||
if (on_unload_handler == NULL) {
|
||||
fprintf(stderr, "ROCProfiler: tool library corrupted, OnUnloadTool() method is expected\n");
|
||||
fprintf(stderr, "%s\n", dlerror());
|
||||
exit(1);
|
||||
}
|
||||
handler();
|
||||
}
|
||||
}
|
||||
|
||||
void UnloadTool() {
|
||||
if (kTtoolHandle) {
|
||||
tool_handler_t handler = reinterpret_cast<tool_handler_t>(dlsym(kTtoolHandle, "OnUnloadTool"));
|
||||
if (handler == NULL) {
|
||||
fprintf(stderr, "ROCProfiler error: tool library corrupted, OnUnloadTool() method is expected\n");
|
||||
fprintf(stderr, "%s\n", dlerror());
|
||||
exit(1);
|
||||
}
|
||||
handler();
|
||||
dlclose(kTtoolHandle);
|
||||
}
|
||||
}
|
||||
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
util::Logger::Create();
|
||||
util::HsaRsrcFactory::Create();
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
rocprofiler::MetricsDict::Destroy();
|
||||
util::HsaRsrcFactory::Destroy();
|
||||
rocprofiler::MetricsDict::Destroy();
|
||||
util::Logger::Destroy();
|
||||
}
|
||||
|
||||
@@ -139,19 +178,23 @@ extern "C" {
|
||||
// HSA-runtime tool on-load method
|
||||
PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
const bool intercept_mode = (getenv("ROCP_HSA_INTERCEPT") != NULL);
|
||||
rocprofiler::SaveHsaApi(table);
|
||||
rocprofiler::ProxyQueue::InitFactory();
|
||||
rocprofiler::InterceptQueue::SetTool(getenv("ROCP_TOOL_LIB"));
|
||||
// HSA intercepting
|
||||
if (getenv("ROCP_HSA_INTERCEPT") != NULL) {
|
||||
if (intercept_mode) {
|
||||
rocprofiler::InterceptQueue::HsaIntercept(table);
|
||||
rocprofiler::ProxyQueue::HsaIntercept(table);
|
||||
}
|
||||
rocprofiler::LoadTool(getenv("ROCP_TOOL_LIB"));
|
||||
return true;
|
||||
}
|
||||
|
||||
// HSA-runtime tool on-unload method
|
||||
PUBLIC_API void OnUnload() { rocprofiler::RestoreHsaApi(); }
|
||||
PUBLIC_API void OnUnload() {
|
||||
rocprofiler::UnloadTool();
|
||||
rocprofiler::RestoreHsaApi();
|
||||
}
|
||||
|
||||
// Returns library vesrion
|
||||
PUBLIC_API uint32_t rocprofiler_version_major() { return ROCPROFILER_VERSION_MAJOR; }
|
||||
|
||||
@@ -141,7 +141,10 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
queue_mask_(0),
|
||||
submit_index_(0),
|
||||
on_submit_cb_(0),
|
||||
on_submit_cb_data_(0) {}
|
||||
on_submit_cb_data_(0)
|
||||
{
|
||||
printf("ROCProfiler: SimpleProxyQueue is enabled\n");
|
||||
}
|
||||
|
||||
~SimpleProxyQueue() {}
|
||||
|
||||
|
||||
@@ -81,10 +81,13 @@ hsa_status_t HsaRsrcFactory::FindMemRegionsCallback(hsa_region_t region, void* d
|
||||
|
||||
// Constructor of the class
|
||||
HsaRsrcFactory::HsaRsrcFactory() {
|
||||
hsa_status_t status;
|
||||
#if 0
|
||||
// Initialize the Hsa Runtime
|
||||
hsa_status_t status = hsa_init();
|
||||
printf("ROCProfiler: HSA init\n");
|
||||
status = hsa_init();
|
||||
CHECK_STATUS("Error in hsa_init", status);
|
||||
|
||||
#endif
|
||||
// Discover the set of Gpu devices available on the platform
|
||||
status = hsa_iterate_agents(GetHsaAgentsCallback, this);
|
||||
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
|
||||
@@ -108,9 +111,11 @@ HsaRsrcFactory::HsaRsrcFactory() {
|
||||
HsaRsrcFactory::~HsaRsrcFactory() {
|
||||
for (auto p : cpu_list_) delete p;
|
||||
for (auto p : gpu_list_) delete p;
|
||||
|
||||
#if 0
|
||||
printf("ROCProfiler: HSA shutdown\n");
|
||||
hsa_status_t status = hsa_shut_down();
|
||||
CHECK_STATUS("Error in hsa_shut_down", status);
|
||||
#endif
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) {
|
||||
|
||||
@@ -114,9 +114,7 @@ class HsaRsrcFactory {
|
||||
public:
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
|
||||
static HsaRsrcFactory* Create() { return NULL; }
|
||||
|
||||
static HsaRsrcFactory* CreateInstance() {
|
||||
static HsaRsrcFactory* Create() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_ == NULL) {
|
||||
instance_ = new HsaRsrcFactory();
|
||||
@@ -125,9 +123,9 @@ class HsaRsrcFactory {
|
||||
}
|
||||
|
||||
static HsaRsrcFactory& Instance() {
|
||||
CreateInstance();
|
||||
if (instance_ == NULL) instance_ = Create();
|
||||
hsa_status_t status = (instance_ != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
CHECK_STATUS("HsaRsrcFactory::Instance() is not found", status);
|
||||
CHECK_STATUS("HsaRsrcFactory::Instance() failed", status);
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,6 @@ int main(int argc, char** argv) {
|
||||
const int diter = (diter_s != NULL) ? atol(diter_s) : 1;
|
||||
TestHsa::HsaInstantiate();
|
||||
for (int i = 0; i < kiter; ++i) RunKernel<SimpleConvolution, TestAql>(argc, argv, diter);
|
||||
//TestHsa::HsaShutdown();
|
||||
TestHsa::HsaShutdown();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -69,7 +69,10 @@ HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) {
|
||||
}
|
||||
|
||||
void TestHsa::HsaShutdown() {
|
||||
if (hsa_queue_ != NULL) hsa_queue_destroy(hsa_queue_);
|
||||
if (hsa_queue_ != NULL) {
|
||||
hsa_queue_destroy(hsa_queue_);
|
||||
hsa_queue_ = NULL;
|
||||
}
|
||||
if (hsa_rsrc_) hsa_rsrc_->Destroy();
|
||||
}
|
||||
|
||||
|
||||
@@ -338,7 +338,7 @@ static hsa_status_t info_callback(const rocprofiler_info_data_t info, void * arg
|
||||
}
|
||||
|
||||
// Tool constructor
|
||||
CONSTRUCTOR_API void constructor()
|
||||
extern "C" PUBLIC_API void OnLoadTool()
|
||||
{
|
||||
std::map<std::string, hsa_ven_amd_aqlprofile_parameter_name_t> parameters_dict;
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] =
|
||||
@@ -487,23 +487,21 @@ CONSTRUCTOR_API void constructor()
|
||||
}
|
||||
|
||||
// Tool destructor
|
||||
DESTRUCTOR_API void destructor() {
|
||||
const bool result_file_opened = (result_prefix != NULL) && (result_file_handle != NULL);
|
||||
extern "C" PUBLIC_API void OnUnloadTool() {
|
||||
// Unregister dispatch callback
|
||||
rocprofiler_remove_dispatch_callback();
|
||||
|
||||
// Dump stored profiling output data
|
||||
const bool result_file_opened = (result_prefix != NULL) && (result_file_handle != NULL);
|
||||
printf("\nROCPRofiler: %u contexts collected", context_count);
|
||||
if (result_file_opened) printf(", output directory %s", result_prefix);
|
||||
printf("\n");
|
||||
|
||||
// Dump stored profiling output data
|
||||
dump_context_array();
|
||||
if (result_file_opened) fclose(result_file_handle);
|
||||
|
||||
// Unregister dispatch callback and free callback data
|
||||
rocprofiler_remove_dispatch_callback();
|
||||
// Cleanup
|
||||
if (dispatch_data != NULL) {
|
||||
delete[] dispatch_data->features;
|
||||
delete dispatch_data;
|
||||
}
|
||||
|
||||
// Close output file
|
||||
if (result_file_opened) fclose(result_file_handle);
|
||||
}
|
||||
|
||||
@@ -213,14 +213,14 @@
|
||||
# VALUBusy The percentage of GPUTime vector ALU instructions are processed. Value range: 0% (bad) to 100% (optimal).
|
||||
<metric
|
||||
name="VALUBusy"
|
||||
expr=100*SQ_ACTIVE_INST_VALU*4/NUM_SIMDS/GRBM_GUI_ACTIVE
|
||||
expr=100*SQ_ACTIVE_INST_VALU*4/SIMD_NUM/GRBM_GUI_ACTIVE
|
||||
descr="The percentage of GPUTime vector ALU instructions are processed. Value range: 0% (bad) to 100% (optimal)."
|
||||
></metric>
|
||||
|
||||
# SALUBusy The percentage of GPUTime scalar ALU instructions are processed. Value range: 0% (bad) to 100% (optimal).
|
||||
<metric
|
||||
name="SALUBusy"
|
||||
expr=100*SQ_INST_CYCLES_SALU*4/NUM_SIMDS/GRBM_GUI_ACTIVE
|
||||
expr=100*SQ_INST_CYCLES_SALU*4/SIMD_NUM/GRBM_GUI_ACTIVE
|
||||
descr="The percentage of GPUTime scalar ALU instructions are processed. Value range: 0% (bad) to 100% (optimal)."
|
||||
></metric>
|
||||
|
||||
@@ -248,14 +248,14 @@
|
||||
# MemUnitBusy The percentage of GPUTime the memory unit is active. The result includes the stall time (MemUnitStalled). This is measured with all extra fetches and writes and any cache or memory effects taken into account. Value range: 0% to 100% (fetch-bound).
|
||||
<metric
|
||||
name="MemUnitBusy"
|
||||
expr=100*max(TA_BUSY,16)/GRBM_GUI_ACTIVE/NUM_SHADER_ENGINES
|
||||
expr=100*max(TA_BUSY,16)/GRBM_GUI_ACTIVE/SE_NUM
|
||||
descr="The percentage of GPUTime the memory unit is active. The result includes the stall time (MemUnitStalled). This is measured with all extra fetches and writes and any cache or memory effects taken into account. Value range: 0% to 100% (fetch-bound)."
|
||||
></metric>
|
||||
|
||||
# MemUnitStalled The percentage of GPUTime the memory unit is stalled. Try reducing the number or size of fetches and writes if possible. Value range: 0% (optimal) to 100% (bad).
|
||||
<metric
|
||||
name="MemUnitStalled"
|
||||
expr=100*max(TCP_TA_DATA_STALL_CYCLES,16)/GRBM_GUI_ACTIVE/NUM_SHADER_ENGINES
|
||||
expr=100*max(TCP_TA_DATA_STALL_CYCLES,16)/GRBM_GUI_ACTIVE/SE_NUM
|
||||
descr="The percentage of GPUTime the memory unit is stalled. Try reducing the number or size of fetches and writes if possible. Value range: 0% (optimal) to 100% (bad)."
|
||||
></metric>
|
||||
|
||||
@@ -276,7 +276,7 @@
|
||||
# LDSBankConflict The percentage of GPUTime LDS is stalled by bank conflicts. Value range: 0% (optimal) to 100% (bad).
|
||||
<metric
|
||||
name="LDSBankConflict"
|
||||
expr=100*SQ_LDS_BANK_CONFLICT/GRBM_GUI_ACTIVE/NUM_SIMDS
|
||||
expr=100*SQ_LDS_BANK_CONFLICT/GRBM_GUI_ACTIVE/CU_NUM
|
||||
descr="The percentage of GPUTime LDS is stalled by bank conflicts. Value range: 0% (optimal) to 100% (bad)."
|
||||
></metric>
|
||||
|
||||
|
||||
Referens i nytt ärende
Block a user