diff --git a/projects/rocprofiler/CMakeLists.txt b/projects/rocprofiler/CMakeLists.txt index f3cb061570..5222c836d5 100644 --- a/projects/rocprofiler/CMakeLists.txt +++ b/projects/rocprofiler/CMakeLists.txt @@ -70,6 +70,30 @@ set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ) set ( LIB_DIR "${ROOT_DIR}/src" ) set ( TEST_DIR "${ROOT_DIR}/test" ) +## Enable tracing API +if (NOT USE_PROF_API) + set(USE_PROF_API 1) +endif() + +# Protocol header lookup +if(USE_PROF_API EQUAL 1) + find_path(PROF_API_HEADER_DIR prof_protocol.h + HINTS + ${PROF_API_HEADER_PATH} + PATHS + /opt/rocm/roctracer + PATH_SUFFIXES + include/ext + ) + if(NOT PROF_API_HEADER_DIR) + MESSAGE(ERROR "Profiling API header not found. Tracer integration disabled. Use -DPROF_API_HEADER_PATH=") + else() + add_definitions(-DUSE_PROF_API=1) + include_directories(${PROF_API_HEADER_DIR}) + MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}") + endif() +endif() + ## Build library include ( ${LIB_DIR}/CMakeLists.txt ) diff --git a/projects/rocprofiler/test/CMakeLists.txt b/projects/rocprofiler/test/CMakeLists.txt index 2ab6f0bc3e..fcefbb3f34 100644 --- a/projects/rocprofiler/test/CMakeLists.txt +++ b/projects/rocprofiler/test/CMakeLists.txt @@ -83,7 +83,7 @@ target_link_libraries ( ${INEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} $ ## Building ctrl test executable add_executable ( ${EXE_NAME} ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC} ) -target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) +target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_LIB_PATH}/../include ) target_link_libraries ( ${EXE_NAME} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt ) execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" ) execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/tool/*.xml ${PROJECT_BINARY_DIR}" ) diff --git a/projects/rocprofiler/test/app/intercept_test.cpp b/projects/rocprofiler/test/app/intercept_test.cpp index 876b31020f..a24d5530d5 100644 --- a/projects/rocprofiler/test/app/intercept_test.cpp +++ b/projects/rocprofiler/test/app/intercept_test.cpp @@ -228,7 +228,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, unsigned metrics_input(rocprofiler_feature_t** ret) { // Profiling feature objects - const unsigned feature_count = 9; + const unsigned feature_count = 6; rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count]; memset(features, 0, feature_count * sizeof(rocprofiler_feature_t)); @@ -245,12 +245,12 @@ unsigned metrics_input(rocprofiler_feature_t** ret) { features[4].name = "SQ_INSTS_VALU"; features[5].kind = ROCPROFILER_FEATURE_KIND_METRIC; features[5].name = "VALUInsts"; - features[6].kind = ROCPROFILER_FEATURE_KIND_METRIC; - features[6].name = "TCC_HIT_sum"; - features[7].kind = ROCPROFILER_FEATURE_KIND_METRIC; - features[7].name = "TCC_MISS_sum"; - features[8].kind = ROCPROFILER_FEATURE_KIND_METRIC; - features[8].name = "WRITE_SIZE"; +// features[6].kind = ROCPROFILER_FEATURE_KIND_METRIC; +// features[6].name = "TCC_HIT_sum"; +// features[7].kind = ROCPROFILER_FEATURE_KIND_METRIC; +// features[7].name = "TCC_MISS_sum"; +// features[8].kind = ROCPROFILER_FEATURE_KIND_METRIC; +// features[8].name = "WRITE_SIZE"; *ret = features; return feature_count; diff --git a/projects/rocprofiler/test/app/standalone_test.cpp b/projects/rocprofiler/test/app/standalone_test.cpp index b173c4d38c..fbd1b1c8fd 100644 --- a/projects/rocprofiler/test/app/standalone_test.cpp +++ b/projects/rocprofiler/test/app/standalone_test.cpp @@ -82,7 +82,7 @@ int main() { rocprofiler_properties_t properties; // Profiling feature objects - const unsigned feature_count = 9; + const unsigned feature_count = 6; rocprofiler_feature_t feature[feature_count]; // PMC events memset(feature, 0, sizeof(feature)); @@ -98,12 +98,12 @@ int main() { feature[4].name = "SQ_INSTS_VALU"; feature[5].kind = ROCPROFILER_FEATURE_KIND_METRIC; feature[5].name = "VALUInsts"; - feature[6].kind = ROCPROFILER_FEATURE_KIND_METRIC; - feature[6].name = "TCC_HIT_sum"; - feature[7].kind = ROCPROFILER_FEATURE_KIND_METRIC; - feature[7].name = "TCC_MISS_sum"; - feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC; - feature[8].name = "WRITE_SIZE"; +// feature[6].kind = ROCPROFILER_FEATURE_KIND_METRIC; +// feature[6].name = "TCC_HIT_sum"; +// feature[7].kind = ROCPROFILER_FEATURE_KIND_METRIC; +// feature[7].name = "TCC_MISS_sum"; +// feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC; +// feature[8].name = "WRITE_SIZE"; // feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC; // feature[8].name = "TCC_EA_WRREQ_sum"; // feature[9].kind = ROCPROFILER_FEATURE_KIND_METRIC; diff --git a/projects/rocprofiler/test/app/test.cpp b/projects/rocprofiler/test/app/test.cpp index 5406797300..f386f5f937 100644 --- a/projects/rocprofiler/test/app/test.cpp +++ b/projects/rocprofiler/test/app/test.cpp @@ -20,9 +20,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *******************************************************************************/ +#include #include +#include #include #include + +#include #include #include @@ -31,6 +35,64 @@ THE SOFTWARE. #include "dummy_kernel/dummy_kernel.h" #include "simple_convolution/simple_convolution.h" +int get_gpu_node_id() { + int gpu_node = - 1; + +#if 0 + // find a valid gpu node from /sys/class/kfd/kfd/topology/nodes + std::string path = "/sys/class/kfd/kfd/topology/nodes"; + DIR *dir; + struct dirent *ent; + + if ((dir = opendir(path.c_str())) != NULL) { + while ((ent = readdir(dir)) != NULL) { + + std::string dir = ent->d_name; + + if (dir.find_first_not_of("0123456789") == std::string::npos) { + std::string file = path + "/" + ent->d_name + "/gpu_id"; + std::ifstream infile(file); + int id; + + infile >> id; + if (id != 0) { + gpu_node = atoi(ent->d_name); + break; + } + } + } + closedir (dir); + } +#else + HsaSystemProperties m_SystemProperties; + memset(&m_SystemProperties, 0, sizeof(m_SystemProperties)); + + HSAKMT_STATUS status = hsaKmtAcquireSystemProperties(&m_SystemProperties); + if (status != HSAKMT_STATUS_SUCCESS) { + std::cerr << "Error in hsaKmtAcquireSystemProperties"<< std::endl; + return 1; + } + + // tranverse all CPU and GPU nodes and break when a GPU node is found + for (unsigned i = 0; i < m_SystemProperties.NumNodes; ++i) { + HsaNodeProperties nodeProperties; + memset(&nodeProperties, 0, sizeof(HsaNodeProperties)); + + status = hsaKmtGetNodeProperties(i, &nodeProperties); + if (status != HSAKMT_STATUS_SUCCESS) { + std::cerr << "Error in hsaKmtAcquireSystemProperties"<< std::endl; + break; + } else if(nodeProperties.NumFComputeCores) { + gpu_node = i; + break; + } + } +#endif + + printf ("GPU node id(%d)\n", gpu_node); + return gpu_node; +} + void thread_fun(const int kiter, const int diter, const uint32_t agents_number) { const AgentInfo* agent_info[agents_number]; hsa_queue_t* queue[agents_number]; @@ -65,12 +127,31 @@ int main(int argc, char** argv) { const char* diter_s = getenv("ROCP_DITER"); const char* agents_s = getenv("ROCP_AGENTS"); const char* thrs_s = getenv("ROCP_THRS"); + const char* spm_enabled = getenv("ROCP_SPM"); + int gpu_node_id = -1; const int kiter = (kiter_s != NULL) ? atol(kiter_s) : 1; const int diter = (diter_s != NULL) ? atol(diter_s) : 1; const uint32_t agents_number = (agents_s != NULL) ? (uint32_t)atol(agents_s) : 1; const int thrs = (thrs_s != NULL) ? atol(thrs_s) : 1; + if (spm_enabled != NULL) { + if (hsa_init() != HSA_STATUS_SUCCESS) { + std::cerr << "Error in hsa_init()" << std::endl; + return 1; + } + gpu_node_id = get_gpu_node_id(); + if (gpu_node_id == -1) { + std::cerr << "Error in get_gpu_node_id()" << std::endl; + return 1; + } + HSAKMT_STATUS status = hsaKmtEnableDebugTrap(gpu_node_id, INVALID_QUEUEID); + if (status != HSAKMT_STATUS_SUCCESS) { + std::cerr << "Error in enabling debug trap" << std::endl; + return 1; + } + } + TestHsa::HsaInstantiate(); std::vector t(thrs); @@ -81,6 +162,18 @@ int main(int argc, char** argv) { t[n].join(); } + if (spm_enabled != NULL) { + if (gpu_node_id == -1) { + std::cerr << "Invalid GPU node id" << std::endl; + return 1; + } + HSAKMT_STATUS status = hsaKmtDisableDebugTrap(gpu_node_id); + if (status != HSAKMT_STATUS_SUCCESS) { + std::cerr << "Error in disabling debug" << std::endl; + return 1; + } + } + TestHsa::HsaShutdown(); return 0; } diff --git a/projects/rocprofiler/test/run.sh b/projects/rocprofiler/test/run.sh index 3169f5dcca..20be268b0d 100755 --- a/projects/rocprofiler/test/run.sh +++ b/projects/rocprofiler/test/run.sh @@ -116,12 +116,14 @@ export ROCP_INPUT=input1.xml eval_test "'rocprof' libtool test n-threads" ./test/ctrl ## SPM test -export ROCP_KITER=3 -export ROCP_DITER=3 -export ROCP_AGENTS=1 -export ROCP_THRS=1 -export ROCP_INPUT=spm_input.xml -eval_test "libtool test, SPM trace test" ./test/ctrl +# export ROCP_KITER=3 +# export ROCP_DITER=3 +# export ROCP_AGENTS=1 +# export ROCP_THRS=1 +# export ROCP_INPUT=spm_input.xml +# export ROCP_SPM=1 +# eval_test "libtool test, SPM trace test" ./test/ctrl +# unset ROCP_SPM ## Libtool test, counter sets # Memcopies tracking diff --git a/projects/rocprofiler/test/tool/input.xml b/projects/rocprofiler/test/tool/input.xml index b49513cfa9..f8016ebc03 100644 --- a/projects/rocprofiler/test/tool/input.xml +++ b/projects/rocprofiler/test/tool/input.xml @@ -10,7 +10,7 @@ # List of metrics # SQTT trace with parameters diff --git a/projects/rocprofiler/test/tool/input1.xml b/projects/rocprofiler/test/tool/input1.xml index f4ecd1786f..9fff096c96 100644 --- a/projects/rocprofiler/test/tool/input1.xml +++ b/projects/rocprofiler/test/tool/input1.xml @@ -10,5 +10,5 @@ # List of metrics