From 85e5cf8a93c811da86d80e7f409d0e824a1e2d63 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 3 Sep 2020 04:01:28 -0500 Subject: [PATCH] SWDEV-213367 : codeobj event implementation Change-Id: Ibcaca6869ce96d8802c5fa8ba241f43834d6f2a7 update - codeobj event implementation Change-Id: I4c12f26a19f2b31d9ac2211c3426a0e587a332b3 update2 - codeobj event implementation Change-Id: Ic877549a83542ae00352503471d881e847ebac9c test - codeobj event implementation Change-Id: I0618d3a93de94c3d7467372ba4a3d4ea5520bfc7 URI reference test - codeobj event implementation Change-Id: I6cf7e8a648cf012cb0708058b118a75e58f992b9 adding test/app - codeobj event implementation Change-Id: Idf4c197c7b9116ccde5ec50ff47a26a858bfab32 uri test fix - codeobj event implementation Change-Id: I7c385f82f516d9d8f2cd726366f00be3664006e3 uri test cleanup - codeobj event implementation Change-Id: I542d5baf88c048c8b4717af843b803cd93e8f3bc URI buffer fix - codeobj event implementation Change-Id: Iac65e04c03a0939935c10f53c6b580a2e33878f5 HSA events tests trace-check disabled Change-Id: I0f4d13aeeceb1d1a6e2191673eacbf9c7ae2ae52 [ROCm/roctracer commit: 29c63c52816432de0418bec51e27b91998ac5f76] --- projects/roctracer/cmake_modules/env.cmake | 7 +- projects/roctracer/inc/roctracer_hsa.h | 3 +- projects/roctracer/src/CMakeLists.txt | 9 +- projects/roctracer/src/core/loader.h | 9 + projects/roctracer/src/core/roctracer.cpp | 20 +- projects/roctracer/test/CMakeLists.txt | 19 +- projects/roctracer/test/app/codeobj_test.cpp | 89 ++ projects/roctracer/test/app/hsaco_test.cpp | 134 ++ .../MatrixTranspose_hip_flush_trace.txt | 1315 +++++++++++++---- .../golden_traces/tests_trace_cmp_levels.txt | 2 + projects/roctracer/test/run.sh | 7 + 11 files changed, 1297 insertions(+), 317 deletions(-) create mode 100644 projects/roctracer/test/app/codeobj_test.cpp create mode 100644 projects/roctracer/test/app/hsaco_test.cpp diff --git a/projects/roctracer/cmake_modules/env.cmake b/projects/roctracer/cmake_modules/env.cmake index 405f2665cf..f9fbb436c8 100644 --- a/projects/roctracer/cmake_modules/env.cmake +++ b/projects/roctracer/cmake_modules/env.cmake @@ -151,10 +151,10 @@ get_filename_component ( HSA_RUNTIME_LIB_PATH "${HSA_RUNTIME_LIB}" DIRECTORY ) find_library ( HSA_KMT_LIB "libhsakmt.so" ) get_filename_component ( HSA_KMT_LIB_PATH "${HSA_KMT_LIB}" DIRECTORY ) -get_filename_component ( ROCM_ROOT_DIR "${HSA_KMT_LIB_PATH}" DIRECTORY ) - set ( HSA_KMT_INC_PATH "${HSA_KMT_LIB_PATH}/../include" ) -set ( ROCM_INC_PATH "${HSA_KMT_INC_PATH}" ) + +get_filename_component ( ROCM_ROOT_DIR "${HSA_KMT_LIB_PATH}" DIRECTORY ) +set ( ROCM_INC_PATH "${ROCM_ROOT_DIR}/include" ) ## Basic Tool Chain Information message ( "----------------NBit: ${NBIT}" ) @@ -167,6 +167,7 @@ message ( "-----HSA-Runtime-Inc: ${HSA_RUNTIME_INC_PATH}" ) message ( "-----HSA-Runtime-Lib: ${HSA_RUNTIME_LIB_PATH}" ) message ( "----HSA_KMT_LIB_PATH: ${HSA_KMT_LIB_PATH}" ) message ( "-------ROCM_ROOT_DIR: ${ROCM_ROOT_DIR}" ) +message ( "-------ROCM_INC_PATH: ${ROCM_INC_PATH}" ) message ( "-------------KFD-Inc: ${HSA_KMT_INC_PATH}" ) message ( "-------------HIP-Inc: ${HIP_INC_DIR}" ) message ( "-------------HIP-VDI: ${HIP_VDI}" ) diff --git a/projects/roctracer/inc/roctracer_hsa.h b/projects/roctracer/inc/roctracer_hsa.h index b9b0cf9866..d9daa5e550 100644 --- a/projects/roctracer/inc/roctracer_hsa.h +++ b/projects/roctracer/inc/roctracer_hsa.h @@ -27,6 +27,7 @@ THE SOFTWARE. #include #include +#include // HSA OP ID enumeration enum hsa_op_id_t { @@ -34,7 +35,7 @@ enum hsa_op_id_t { HSA_OP_ID_COPY = 1, HSA_OP_ID_BARRIER = 2, HSA_OP_ID_RESERVED1 = 3, - HSA_OP_ID_NUMBER = 4 + HSA_OP_ID_NUMBER }; #ifdef __cplusplus diff --git a/projects/roctracer/src/CMakeLists.txt b/projects/roctracer/src/CMakeLists.txt index c794c49177..e9c72f8422 100644 --- a/projects/roctracer/src/CMakeLists.txt +++ b/projects/roctracer/src/CMakeLists.txt @@ -11,6 +11,9 @@ execute_process ( COMMAND sh -xc "${CMAKE_C_COMPILER} -E ${HIP_PATH}/include/hip execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${GEN_INC_DIR}/hip_runtime_api_pp.h -out ${GEN_INC_DIR}/hip_ostream_ops.h" ) execute_process ( COMMAND sh -xc "${CMAKE_C_COMPILER} -E ${HSA_RUNTIME_INC_PATH}/hsa.h > ${GEN_INC_DIR}/hsa_pp.h" ) execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${GEN_INC_DIR}/hsa_pp.h -out ${GEN_INC_DIR}/hsa_ostream_ops.h" ) +execute_process ( COMMAND sh -xc "mkdir ${GEN_INC_DIR}/rocprofiler" ) +execute_process ( COMMAND sh -xc "ln -s ${ROOT_DIR}/../rocprofiler/inc/rocprofiler.h ${GEN_INC_DIR}/rocprofiler/rocprofiler.h" ) +execute_process ( COMMAND sh -xc "ln -s ${ROOT_DIR}/../rocprofiler/src/core/activity.h ${GEN_INC_DIR}/rocprofiler/activity.h" ) # Build dynamic Library object set ( TARGET_LIB ${TARGET_NAME} ) @@ -22,14 +25,14 @@ set ( LIB_SRC ${LIB_DIR}/util/hsa_rsrc_factory.cpp ) add_library ( ${TARGET_LIB} ${LIBRARY_TYPE} ${LIB_SRC} ) -target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} ${GEN_INC_DIR} ) +target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HIP_INC_DIR} ${HSA_KMT_INC_PATH} ${ROCM_INC_PATH} ${GEN_INC_DIR} ) target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ ) # Build KFD/Thunk tracing library set ( KFD_LIB "kfdwrapper64" ) set ( KFD_LIB_SRC ${GEN_SRC_DIR}/kfd_wrapper.cpp) add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) -target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ${GEN_INC_DIR} ) +target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_KMT_INC_PATH} ${GEN_INC_DIR} ) target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) # Build ROCTX tracing library @@ -39,5 +42,5 @@ set ( ROCTX_LIB_SRC ${LIB_DIR}/roctx/roctx_intercept.cpp ) add_library ( ${ROCTX_LIB} SHARED ${ROCTX_LIB_SRC} ) -target_include_directories ( ${ROCTX_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${GEN_INC_DIR} ) +target_include_directories ( ${ROCTX_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${GEN_INC_DIR} ) target_link_libraries( ${ROCTX_LIB} PRIVATE c stdc++ ) diff --git a/projects/roctracer/src/core/loader.h b/projects/roctracer/src/core/loader.h index d1f2ef01c6..946521d130 100644 --- a/projects/roctracer/src/core/loader.h +++ b/projects/roctracer/src/core/loader.h @@ -94,6 +94,10 @@ class RocpApi { EnableCallback_t* EnableActivityCallback; NameCallback_t* GetOpName; + RegisterCallback_t* RegisterEvtCallback; + OperateCallback_t* RemoveEvtCallback; + NameCallback_t* GetEvtName; + protected: void init(Loader* loader) { RegisterApiCallback = loader->GetFun("RegisterApiCallback"); @@ -101,6 +105,10 @@ class RocpApi { InitActivityCallback = loader->GetFun("InitActivityCallback"); EnableActivityCallback = loader->GetFun("EnableActivityCallback"); GetOpName = loader->GetFun("GetOpName"); + + RegisterEvtCallback = loader->GetFun("RegisterEvtCallback"); + RemoveEvtCallback = loader->GetFun("RemoveEvtCallback"); + GetEvtName = loader->GetFun("GetEvtName"); } }; @@ -290,6 +298,7 @@ typedef HipLoaderShared HipLoader; template bool roctracer::BaseLoader::to_check_open_ = true; \ template bool roctracer::BaseLoader::to_check_symb_ = true; \ template<> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \ + template<> bool roctracer::RocpLoader::to_load_ = true; \ template<> const char* roctracer::HccLoader::lib_name_ = "libamdhip64.so"; \ template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \ template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \ diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index 21f4b6674d..2754b97173 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -686,6 +686,8 @@ PUBLIC_API const char* roctracer_op_string( switch (domain) { case ACTIVITY_DOMAIN_HSA_API: return roctracer::hsa_support::GetApiName(op); + case ACTIVITY_DOMAIN_HSA_EVT: + return roctracer::RocpLoader::Instance().GetEvtName(op); case ACTIVITY_DOMAIN_HSA_OPS: return roctracer::RocpLoader::Instance().GetOpName(op); case ACTIVITY_DOMAIN_HCC_OPS: @@ -731,6 +733,7 @@ static inline uint32_t get_op_num(const uint32_t& domain) { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: return HSA_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; + case ACTIVITY_DOMAIN_HSA_EVT: return HSA_EVT_ID_NUMBER; case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER; case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER; @@ -760,13 +763,18 @@ static roctracer_status_t roctracer_enable_callback_fun( #if 0 if (op == HSA_API_ID_DISPATCH) { const bool succ = roctracer::RocpLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error(" << op << ") failed"); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RegisterApiCallback error(" << op << ") failed"); break; } #endif roctracer::hsa_support::cb_table.set(op, callback, user_data); break; } + case ACTIVITY_DOMAIN_HSA_EVT: { + const bool succ = roctracer::RocpLoader::Instance().RegisterEvtCallback(op, (void*)callback, user_data); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RegisterEvtCallback error(" << op << ") failed"); + break; + } case ACTIVITY_DOMAIN_HCC_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { if (roctracer::HipLoader::Instance().Enabled() == false) break; @@ -875,6 +883,11 @@ static roctracer_status_t roctracer_disable_callback_fun( } break; } + case ACTIVITY_DOMAIN_HSA_EVT: { + const bool succ = roctracer::RocpLoader::Instance().RemoveEvtCallback(op); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RemoveEvtCallback error(" << op << ") failed"); + break; + } case ACTIVITY_DOMAIN_ROCTX: { if (roctracer::RocTxLoader::Instance().Enabled()) { const bool suc = roctracer::RocTxLoader::Instance().RemoveApiCallback(op); @@ -984,6 +997,7 @@ static roctracer_status_t roctracer_enable_activity_fun( break; } case ACTIVITY_DOMAIN_HSA_API: break; + case ACTIVITY_DOMAIN_HSA_EVT: break; case ACTIVITY_DOMAIN_KFD_API: break; case ACTIVITY_DOMAIN_HCC_OPS: { const bool init_phase = (roctracer::HccLoader::GetRef() == NULL); @@ -1080,6 +1094,7 @@ static roctracer_status_t roctracer_disable_activity_fun( break; } case ACTIVITY_DOMAIN_HSA_API: break; + case ACTIVITY_DOMAIN_HSA_EVT: break; case ACTIVITY_DOMAIN_KFD_API: break; case ACTIVITY_DOMAIN_HCC_OPS: { if (roctracer::HccLoader::Instance().Enabled() == false) break; @@ -1250,6 +1265,9 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( roctracer::kfd_support::intercept_KFDApiTable(); break; } + case ACTIVITY_DOMAIN_HSA_EVT: { + break; + } case ACTIVITY_DOMAIN_HSA_API: { // HSA API properties HsaApiTable* table = reinterpret_cast(properties); diff --git a/projects/roctracer/test/CMakeLists.txt b/projects/roctracer/test/CMakeLists.txt index ce003b7cc9..148c60b0b0 100644 --- a/projects/roctracer/test/CMakeLists.txt +++ b/projects/roctracer/test/CMakeLists.txt @@ -50,7 +50,8 @@ set ( HSA_REV "19b1191" ) set ( RUN_SCRIPT "${TEST_DIR}/run.sh" ) ## build HIP tests -set ( INC_PATH "${INC_PATH} ${PROJECT_BINARY_DIR}/inc" ) +set ( GEN_INC_DIR ${PROJECT_BINARY_DIR}/inc ) +set ( INC_PATH "${INC_PATH} ${GEN_INC_DIR}" ) set ( TEST_ENV HIP_VDI=${HIP_VDI} ROCM_PATH=${ROCM_ROOT_DIR} HSA_PATH=${ROCM_ROOT_DIR}/hsa INC_PATH=${INC_PATH} LIB_PATH=${LIB_PATH} HIPCC_VERBOSE=3 ) add_custom_target( mytest COMMAND ${TEST_ENV} make -C "${TEST_DIR}/MatrixTranspose" @@ -76,10 +77,24 @@ if ( DEFINED ROCTRACER_TARGET ) set ( TEST_LIB "tracer_tool" ) set ( TEST_LIB_SRC ${TEST_DIR}/tool/tracer_tool.cpp ${UTIL_SRC} ) add_library ( ${TEST_LIB} SHARED ${TEST_LIB_SRC} ) - target_include_directories ( ${TEST_LIB} PRIVATE ${HSA_TEST_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} ${PROJECT_BINARY_DIR}/inc ) + target_include_directories ( ${TEST_LIB} PRIVATE ${HSA_TEST_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HIP_INC_DIR} ${HSA_KMT_INC_PATH} ${GEN_INC_DIR} ) target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt ) endif () +## Build hsaco_test.cpp referenc test +set ( CO_LIB_NAME "hsaco_test" ) +set ( CO_LIB_SRC ${TEST_DIR}/app/hsaco_test.cpp ) +add_library ( ${CO_LIB_NAME} SHARED ${CO_LIB_SRC} ) +target_include_directories ( ${CO_LIB_NAME} PRIVATE ${HSA_RUNTIME_INC_PATH} ) +target_link_libraries ( ${CO_LIB_NAME} ${HSA_RUNTIME_LIB} c stdc++ ) + +## Build codeobj event test +set ( CO_LIB_NAME "codeobj_test" ) +set ( CO_LIB_SRC ${TEST_DIR}/app/codeobj_test.cpp ) +add_library ( ${CO_LIB_NAME} SHARED ${CO_LIB_SRC} ) +target_include_directories ( ${CO_LIB_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${GEN_INC_DIR} ${HSA_RUNTIME_INC_PATH} ${ROCM_INC_PATH} ) +target_link_libraries ( ${CO_LIB_NAME} ${ROCTRACER_TARGET} c stdc++ ) + ## Build HSA test execute_process ( COMMAND sh -xc "if [ ! -e ${TEST_DIR}/hsa ] ; then git clone https://github.com/ROCmSoftwarePlatform/hsa-class.git ${TEST_DIR}/hsa; fi" ) execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout ${HSA_REV}; fi" ) diff --git a/projects/roctracer/test/app/codeobj_test.cpp b/projects/roctracer/test/app/codeobj_test.cpp new file mode 100644 index 0000000000..086bcfb696 --- /dev/null +++ b/projects/roctracer/test/app/codeobj_test.cpp @@ -0,0 +1,89 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include +#include +#include + +#include "inc/roctracer.h" +#include "inc/roctracer_hsa.h" +#include + +#define PUBLIC_API __attribute__((visibility("default"))) +#define CONSTRUCTOR_API __attribute__((constructor)) +#define DESTRUCTOR_API __attribute__((destructor)) + +// Check returned HSA API status +void check_status(roctracer_status_t status) { + if (status != ROCTRACER_STATUS_SUCCESS) { + const char* error_string = roctracer_error_string(); + fprintf(stderr, "ERROR: %s\n", error_string); + abort(); + } +} + +// codeobj callback +void codeobj_callback(uint32_t domain, uint32_t cid, const void* data, void* arg) { + const hsa_evt_data_t* evt_data = reinterpret_cast(data); + const uint32_t uri_length = evt_data->codeobj.uri_length; + const char* uri = evt_data->codeobj.uri; + printf("codeobj_callback domain(%u) cid(%u): load_delta(0x%lx) load_size(0x%lx) uri_length(%u) uri(\"%s\")\n", + domain, + cid, + evt_data->codeobj.load_delta, + evt_data->codeobj.load_size, + uri_length, + uri); + fflush(stdout); +} + +void initialize() { + roctracer_status_t status = roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, codeobj_callback, NULL); + check_status(status); +} + +void cleanup() { + roctracer_status_t status = roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_EVT); + check_status(status); +} + +// Tool constructor +extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) { + // Enable HSA events intercepting + settings->hsa_intercepting = 1; + // Initialize profiling + initialize(); +} + +// Tool destructor +extern "C" PUBLIC_API void OnUnloadTool() { + // Final resources cleanup + cleanup(); +} + +extern "C" CONSTRUCTOR_API void constructor() { + printf("constructor\n"); fflush(stdout); +} + +extern "C" DESTRUCTOR_API void destructor() { + OnUnloadTool(); +} diff --git a/projects/roctracer/test/app/hsaco_test.cpp b/projects/roctracer/test/app/hsaco_test.cpp new file mode 100644 index 0000000000..0f2e42adc5 --- /dev/null +++ b/projects/roctracer/test/app/hsaco_test.cpp @@ -0,0 +1,134 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include +#include +#include +#include +#include + +#define PUBLIC_API __attribute__((visibility("default"))) +#define CONSTRUCTOR_API __attribute__((constructor)) +#define DESTRUCTOR_API __attribute__((destructor)) + +#define HSA_RT(call) \ + do { \ + const hsa_status_t status = call; \ + if (status != HSA_STATUS_SUCCESS) { \ + printf("error \"%s\"\n", #call); fflush(stdout); \ + abort(); \ + } \ + } while(0) + +// HSA API intercepting primitives +decltype(hsa_executable_freeze)* hsa_executable_freeze_fn; +hsa_ven_amd_loader_1_01_pfn_t loader_api_table{}; + +hsa_status_t code_object_callback( + hsa_executable_t executable, + hsa_loaded_code_object_t loaded_code_object, + void* arg) +{ + printf("code_object_callback\n"); fflush(stdout); + + uint64_t load_size = 0; + uint64_t load_delta = 0; + uint32_t uri_len = 0; + char* uri_str = NULL; + + HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, + &load_size)); + HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, + &load_delta)); + HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, + &uri_len)); + + uri_str = (char*)calloc(uri_len + 1, sizeof(char)); + if (!uri_str) { + perror("calloc"); + abort(); + } + + HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( + loaded_code_object, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, + uri_str)); + + printf("load_size(0x%lx)\n", load_size); fflush(stdout); + printf("load_delta(0x%lx)\n", load_delta); fflush(stdout); + printf("uri_len(%u)\n", uri_len); fflush(stdout); + printf("uri_str(\"%s\")\n", uri_str); fflush(stdout); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t hsa_executable_freeze_interceptor( + hsa_executable_t executable, + const char *options) +{ + HSA_RT(loader_api_table.hsa_ven_amd_loader_executable_iterate_loaded_code_objects( + executable, + code_object_callback, + NULL)); + HSA_RT(hsa_executable_freeze_fn( + executable, + options)); + return HSA_STATUS_SUCCESS; +} + +// HSA-runtime tool on-load method +extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, + uint64_t runtime_version, + uint64_t failed_tool_count, + const char* const* failed_tool_names) +{ + printf("OnLoad: begin\n"); fflush(stdout); + // intercepting hsa_executable_freeze API + hsa_executable_freeze_fn = table->core_->hsa_executable_freeze_fn; + table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; + // Fetching AMD Loader HSA extension API + HSA_RT(hsa_system_get_major_extension_table( + HSA_EXTENSION_AMD_LOADER, + 1, + sizeof(hsa_ven_amd_loader_1_01_pfn_t), + &loader_api_table)); + printf("OnLoad: end\n"); fflush(stdout); + return true; +} + +extern "C" PUBLIC_API void OnUnload() { + printf("OnUnload\n"); fflush(stdout); +} + +extern "C" CONSTRUCTOR_API void constructor() { + printf("constructor\n"); fflush(stdout); +} + +extern "C" DESTRUCTOR_API void destructor() { + printf("destructor\n"); fflush(stdout); +} diff --git a/projects/roctracer/test/golden_traces/MatrixTranspose_hip_flush_trace.txt b/projects/roctracer/test/golden_traces/MatrixTranspose_hip_flush_trace.txt index 27ef8e95f8..6f0c4d17aa 100644 --- a/projects/roctracer/test/golden_traces/MatrixTranspose_hip_flush_trace.txt +++ b/projects/roctracer/test/golden_traces/MatrixTranspose_hip_flush_trace.txt @@ -1,25 +1,58 @@ -+ ROCP_FLUSH_RATE=100000 ./test/MatrixTranspose -ROCTracer (pid=1991): +ROCTracer (pid=14696): ROCTracer: trace control flush rate(100000us) -3802701299772587 +129855595266140 HIP-trace() -Device name Device 687f +Device name Device 738c ## Iteration (99) ################# -3802701304199730:3802701304207180 1991:1991 hipGetDeviceProperties(props=, device=0) -3802701305255618:3802701305368889 1991:1991 hipMalloc(ptr=0x7fce16e0dec3, size=4194304) -3802701305370969:3802701305429809 1991:1991 hipMalloc(ptr=0x7fffc1295178, size=4194304) +129855603476896:129855603483734 14696:14696 hipGetDeviceProperties(props={}, device=0) :1 +129855604686134:129855605152950 14696:14696 hipMalloc(ptr=0x7fd65ce00000, size=4194304) :2 +129855605160451:129855605528247 14696:14696 hipMalloc(ptr=0x7fd65c800000, size=4194304) :3 PASSED! ## Iteration (98) ################# -3802701580515709:3802701582582904 0:0 CopyHostToDevice:4:1991 -3802701583225872:3802701584425191 0:0 KernelExecution:8:1991 -3802701583217109:3802701586447303 0:0 CopyDeviceToHost:10:1991 -3802701594795564:3802701596533727 0:0 CopyHostToDevice:11:1991 -3802701596646592:3802701597848875 0:0 KernelExecution:15:1991 -3802701596604988:3802701599522360 0:0 CopyDeviceToHost:17:1991 PASSED! ## Iteration (97) ################# PASSED! ## Iteration (96) ################# +129855955913848:129855957428192 0:0 CopyHostToDevice:4:14696 +129855958763342:129855959991823 0:0 KernelExecution:8:14696 +129855958734601:129855961705377 0:0 CopyDeviceToHost:10:14696 +129855971471522:129855972254607 0:0 CopyHostToDevice:11:14696 +129855972381516:129855973633356 0:0 KernelExecution:15:14696 +129855972673800:129855974135421 0:0 CopyDeviceToHost:17:14696 +129855980290261:129855981019714 0:0 CopyHostToDevice:18:14696 +129855981112002:129855982336482 0:0 KernelExecution:22:14696 +129855981076333:129855982783351 0:0 CopyDeviceToHost:24:14696 +129855988849671:129855989612220 0:0 CopyHostToDevice:25:14696 +129855989696159:129855990920319 0:0 KernelExecution:29:14696 +129855989668256:129855991384209 0:0 CopyDeviceToHost:31:14696 +129855605540988:129855957443403 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :4 +129855957456260:129855957456261 14696:14696 MARK(name(before HIP LaunchKernel)) +129855957507034:129855957514510 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :6 +129855957521000:129855957523014 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :7 +129855957529950:129855958671150 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :8 +129855958701410:129855958701411 14696:14696 MARK(name(after HIP LaunchKernel)) +129855958708321:129855961719221 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :10 +129855971408776:129855972257972 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :11 +129855972261515:129855972261516 14696:14696 MARK(name(before HIP LaunchKernel)) +129855972266736:129855972268234 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :13 +129855972271629:129855972272780 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :14 +129855972276181:129855972282118 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :15 +129855972663504:129855972663505 14696:14696 MARK(name(after HIP LaunchKernel)) +129855972666015:129855974143463 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :17 +129855980222888:129855981023250 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :18 +129855981025473:129855981025474 14696:14696 MARK(name(before HIP LaunchKernel)) +129855981028834:129855981029831 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :20 +129855981032043:129855981032913 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :21 +129855981035237:129855981038997 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :22 +129855981041265:129855981041266 14696:14696 MARK(name(after HIP LaunchKernel)) +129855981043695:129855982796928 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :24 +129855988764565:129855989615901 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :25 +129855989618073:129855989618074 14696:14696 MARK(name(before HIP LaunchKernel)) +129855989621096:129855989622129 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :27 +129855989624243:129855989625087 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :28 +129855989627271:129855989630934 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :29 +129855989632959:129855989632960 14696:14696 MARK(name(after HIP LaunchKernel)) +129855989635351:129855991396402 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :31 PASSED! ## Iteration (95) ################# PASSED! @@ -32,30 +65,6 @@ PASSED! ## Iteration (91) ################# PASSED! ## Iteration (90) ################# -3802701606826614:3802701608688328 0:0 CopyHostToDevice:18:1991 -3802701608781496:3802701609988668 0:0 KernelExecution:22:1991 -3802701608758548:3802701611510159 0:0 CopyDeviceToHost:24:1991 -3802701618702082:3802701620571865 0:0 CopyHostToDevice:25:1991 -3802701620675087:3802701621878110 0:0 KernelExecution:29:1991 -3802701620650876:3802701623502597 0:0 CopyDeviceToHost:31:1991 -3802701630690881:3802701632557164 0:0 CopyHostToDevice:32:1991 -3802701632661061:3802701633864973 0:0 KernelExecution:36:1991 -3802701632637885:3802701635182424 0:0 CopyDeviceToHost:38:1991 -3802701642392578:3802701644307152 0:0 CopyHostToDevice:39:1991 -3802701644410516:3802701645608650 0:0 KernelExecution:43:1991 -3802701644387082:3802701647064112 0:0 CopyDeviceToHost:45:1991 -3802701654288485:3802701656163049 0:0 CopyHostToDevice:46:1991 -3802701656267334:3802701657467098 0:0 KernelExecution:50:1991 -3802701656244070:3802701658916870 0:0 CopyDeviceToHost:52:1991 -3802701666450396:3802701668378780 0:0 CopyHostToDevice:53:1991 -3802701668482438:3802701669683832 0:0 KernelExecution:57:1991 -3802701668458481:3802701671148361 0:0 CopyDeviceToHost:59:1991 -3802701678631556:3802701680505490 0:0 CopyHostToDevice:60:1991 -3802701680609945:3802701681806894 0:0 KernelExecution:64:1991 -3802701680586811:3802701683591443 0:0 CopyDeviceToHost:66:1991 -3802701691032768:3802701692918102 0:0 CopyHostToDevice:67:1991 -3802701693021896:3802701694223438 0:0 KernelExecution:71:1991 -3802701692999202:3802701695886464 0:0 CopyDeviceToHost:73:1991 PASSED! ## Iteration (89) ################# PASSED! @@ -68,36 +77,132 @@ PASSED! ## Iteration (85) ################# PASSED! ## Iteration (84) ################# +129855997366746:129855998130772 0:0 CopyHostToDevice:32:14696 +129855998225065:129855999449385 0:0 KernelExecution:36:14696 +129855998197249:129855999925825 0:0 CopyDeviceToHost:38:14696 +129856005895171:129856006661973 0:0 CopyHostToDevice:39:14696 +129856006745770:129856007968491 0:0 KernelExecution:43:14696 +129856006717709:129856008455141 0:0 CopyDeviceToHost:45:14696 +129856014425283:129856015187951 0:0 CopyHostToDevice:46:14696 +129856015270363:129856016493884 0:0 KernelExecution:50:14696 +129856015242633:129856016989490 0:0 CopyDeviceToHost:52:14696 +129856022971470:129856023730704 0:0 CopyHostToDevice:53:14696 +129856023813883:129856025033244 0:0 KernelExecution:57:14696 +129856023785712:129856025544334 0:0 CopyDeviceToHost:59:14696 +129856031596064:129856032498907 0:0 CopyHostToDevice:60:14696 +129856032586758:129856033809639 0:0 KernelExecution:64:14696 +129856032558443:129856034354036 0:0 CopyDeviceToHost:66:14696 +129856040416553:129856041127473 0:0 CopyHostToDevice:67:14696 +129856041212287:129856042435488 0:0 KernelExecution:71:14696 +129856041184491:129856042941958 0:0 CopyDeviceToHost:73:14696 +129856049061163:129856049826011 0:0 CopyHostToDevice:74:14696 +129856049910719:129856051134400 0:0 KernelExecution:78:14696 +129856049882831:129856051651620 0:0 CopyDeviceToHost:80:14696 +129856057864499:129856058629610 0:0 CopyHostToDevice:81:14696 +129856058712855:129856059935896 0:0 KernelExecution:85:14696 +129856058684894:129856060452569 0:0 CopyDeviceToHost:87:14696 +129856066769721:129856067537899 0:0 CopyHostToDevice:88:14696 +129856067621801:129856068845321 0:0 KernelExecution:92:14696 +129856067594217:129856069423348 0:0 CopyDeviceToHost:94:14696 +129856075784739:129856076568384 0:0 CopyHostToDevice:95:14696 +129856076658166:129856077880567 0:0 KernelExecution:99:14696 +129856076630540:129856078394130 0:0 CopyDeviceToHost:101:14696 +129856084835135:129856085603333 0:0 CopyHostToDevice:102:14696 +129856085689351:129856086911912 0:0 KernelExecution:106:14696 +129856085661614:129856087438495 0:0 CopyDeviceToHost:108:14696 +129856093911070:129856094682948 0:0 CopyHostToDevice:109:14696 +129856094767987:129856095991348 0:0 KernelExecution:113:14696 +129856094739044:129856096520182 0:0 CopyDeviceToHost:115:14696 +129855997303698:129855998134058 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :32 +129855998136242:129855998136243 14696:14696 MARK(name(before HIP LaunchKernel)) +129855998138933:129855998139817 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :34 +129855998141918:129855998142773 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :35 +129855998144935:129855998149221 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :36 +129855998151431:129855998151432 14696:14696 MARK(name(after HIP LaunchKernel)) +129855998153828:129855999937506 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :38 +129856005829520:129856006665192 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :39 +129856006667396:129856006667397 14696:14696 MARK(name(before HIP LaunchKernel)) +129856006670307:129856006671160 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :41 +129856006673376:129856006674209 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :42 +129856006676323:129856006679651 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :43 +129856006681635:129856006681636 14696:14696 MARK(name(after HIP LaunchKernel)) +129856006683967:129856008469471 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :45 +129856014360174:129856015191285 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :46 +129856015193489:129856015193490 14696:14696 MARK(name(before HIP LaunchKernel)) +129856015196342:129856015197217 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :48 +129856015199400:129856015200221 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :49 +129856015202314:129856015205930 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :50 +129856015208058:129856015208059 14696:14696 MARK(name(after HIP LaunchKernel)) +129856015210764:129856017001555 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :52 +129856022908053:129856023733985 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :53 +129856023736320:129856023736321 14696:14696 MARK(name(before HIP LaunchKernel)) +129856023739178:129856023740063 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :55 +129856023742240:129856023743090 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :56 +129856023745309:129856023748845 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :57 +129856023750891:129856023750892 14696:14696 MARK(name(after HIP LaunchKernel)) +129856023753396:129856025556257 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :59 +129856031530409:129856032503170 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :60 +129856032505392:129856032505393 14696:14696 MARK(name(before HIP LaunchKernel)) +129856032508345:129856032509226 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :62 +129856032511486:129856032512316 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :63 +129856032514599:129856032518036 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :64 +129856032520150:129856032520151 14696:14696 MARK(name(after HIP LaunchKernel)) +129856032522410:129856034373111 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :66 +129856040397979:129856041130687 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :67 +129856041132973:129856041132974 14696:14696 MARK(name(before HIP LaunchKernel)) +129856041136399:129856041137389 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :69 +129856041139653:129856041140500 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :70 +129856041142893:129856041146663 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :71 +129856041148645:129856041148646 14696:14696 MARK(name(after HIP LaunchKernel)) +129856041151128:129856042953843 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :73 +129856048994841:129856049829566 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :74 +129856049831724:129856049831725 14696:14696 MARK(name(before HIP LaunchKernel)) +129856049834527:129856049835413 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :76 +129856049837759:129856049838585 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :77 +129856049840796:129856049844487 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :78 +129856049846529:129856049846530 14696:14696 MARK(name(after HIP LaunchKernel)) +129856049848934:129856051663797 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :80 +129856057798518:129856058633464 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :81 +129856058635650:129856058635651 14696:14696 MARK(name(before HIP LaunchKernel)) +129856058638530:129856058639560 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :83 +129856058641994:129856058642826 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :84 +129856058645125:129856058648721 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :85 +129856058650749:129856058650750 14696:14696 MARK(name(after HIP LaunchKernel)) +129856058653478:129856060466863 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :87 +129856066704603:129856067541502 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :88 +129856067543802:129856067543803 14696:14696 MARK(name(before HIP LaunchKernel)) +129856067546791:129856067547681 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :90 +129856067550027:129856067550854 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :91 +129856067553125:129856067556952 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :92 +129856067559149:129856067559150 14696:14696 MARK(name(after HIP LaunchKernel)) +129856067561903:129856069442958 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :94 +129856075719215:129856076572398 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :95 +129856076574828:129856076574829 14696:14696 MARK(name(before HIP LaunchKernel)) +129856076578071:129856076578997 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :97 +129856076581286:129856076582119 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :98 +129856076584498:129856076588395 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :99 +129856076590554:129856076590555 14696:14696 MARK(name(after HIP LaunchKernel)) +129856076592857:129856078406672 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :101 +129856084768530:129856085607081 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :102 +129856085609437:129856085609438 14696:14696 MARK(name(before HIP LaunchKernel)) +129856085612528:129856085613498 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :104 +129856085615751:129856085616602 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :105 +129856085618831:129856085623039 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :106 +129856085625178:129856085625179 14696:14696 MARK(name(after HIP LaunchKernel)) +129856085627731:129856087451206 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :108 +129856093846767:129856094686797 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :109 +129856094689153:129856094689154 14696:14696 MARK(name(before HIP LaunchKernel)) +129856094692497:129856094693485 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :111 +129856094695727:129856094696598 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :112 +129856094698884:129856094702856 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :113 +129856094705178:129856094705179 14696:14696 MARK(name(after HIP LaunchKernel)) +129856094707931:129856096534639 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :115 PASSED! ## Iteration (83) ################# PASSED! ## Iteration (82) ################# PASSED! ## Iteration (81) ################# -3802701703288299:3802701705170783 0:0 CopyHostToDevice:74:1991 -3802701705274243:3802701706486156 0:0 KernelExecution:78:1991 -3802701705250604:3802701707936074 0:0 CopyDeviceToHost:80:1991 -3802701715184407:3802701716946440 0:0 CopyHostToDevice:81:1991 -3802701717062173:3802701718258234 0:0 KernelExecution:85:1991 -3802701717027281:3802701719895352 0:0 CopyDeviceToHost:87:1991 -3802701727144976:3802701729139460 0:0 CopyHostToDevice:88:1991 -3802701729244175:3802701730445125 0:0 KernelExecution:92:1991 -3802701729220511:3802701732165583 0:0 CopyDeviceToHost:94:1991 -3802701739387037:3802701741142680 0:0 CopyHostToDevice:95:1991 -3802701741249310:3802701742453815 0:0 KernelExecution:99:1991 -3802701741225710:3802701744149042 0:0 CopyDeviceToHost:101:1991 -3802701751388465:3802701753137668 0:0 CopyHostToDevice:102:1991 -3802701753243075:3802701754440321 0:0 KernelExecution:106:1991 -3802701753219589:3802701756153951 0:0 CopyDeviceToHost:108:1991 -3802701763443335:3802701765498080 0:0 CopyHostToDevice:109:1991 -3802701765603802:3802701766820456 0:0 KernelExecution:113:1991 -3802701765580171:3802701768590463 0:0 CopyDeviceToHost:115:1991 -3802701775866137:3802701777758951 0:0 CopyHostToDevice:116:1991 -3802701777862528:3802701779073255 0:0 KernelExecution:120:1991 -3802701777839322:3802701780544442 0:0 CopyDeviceToHost:122:1991 -3802701787979987:3802701790138553 0:0 CopyHostToDevice:123:1991 -3802701790243940:3802701791446371 0:0 KernelExecution:127:1991 -3802701790220103:3802701792896973 0:0 CopyDeviceToHost:129:1991 PASSED! ## Iteration (80) ################# PASSED! @@ -114,35 +219,118 @@ PASSED! ## Iteration (74) ################# PASSED! ## Iteration (73) ################# -3802701800291738:3802701802179392 0:0 CopyHostToDevice:130:1991 -3802701802285163:3802701803481223 0:0 KernelExecution:134:1991 -3802701802261733:3802701804931343 0:0 CopyDeviceToHost:136:1991 -3802701812337128:3802701814252581 0:0 CopyHostToDevice:137:1991 -3802701814356366:3802701815565464 0:0 KernelExecution:141:1991 -3802701814332902:3802701817015292 0:0 CopyDeviceToHost:143:1991 -3802701824392847:3802701826310401 0:0 CopyHostToDevice:144:1991 -3802701826415256:3802701827613539 0:0 KernelExecution:148:1991 -3802701826391761:3802701829071431 0:0 CopyDeviceToHost:150:1991 -3802701836291435:3802701838179779 0:0 CopyHostToDevice:151:1991 -3802701838283081:3802701839480623 0:0 KernelExecution:155:1991 -3802701838259290:3802701840931690 0:0 CopyDeviceToHost:157:1991 -3802701848294054:3802701850186618 0:0 CopyHostToDevice:158:1991 -3802701850293201:3802701851487632 0:0 KernelExecution:162:1991 -3802701850269869:3802701852937908 0:0 CopyDeviceToHost:164:1991 -3802701860182332:3802701862143417 0:0 CopyHostToDevice:165:1991 -3802701862248805:3802701863444865 0:0 KernelExecution:169:1991 -3802701862224967:3802701865141909 0:0 CopyDeviceToHost:171:1991 -3802701872353003:3802701874265587 0:0 CopyHostToDevice:172:1991 -3802701874371291:3802701875572092 0:0 KernelExecution:176:1991 -3802701874348307:3802701877019147 0:0 CopyDeviceToHost:178:1991 -3802701884267750:3802701886153054 0:0 CopyHostToDevice:179:1991 -3802701886259179:3802701887463536 0:0 KernelExecution:183:1991 -3802701886235615:3802701888914085 0:0 CopyDeviceToHost:185:1991 -3802701896155929:3802701898142244 0:0 CopyHostToDevice:186:1991 -3802701898246687:3802701899454155 0:0 KernelExecution:190:1991 -3802701898223504:3802701901145246 0:0 CopyDeviceToHost:192:1991 +129856103067958:129856103841032 0:0 CopyHostToDevice:116:14696 +129856103927769:129856105150970 0:0 KernelExecution:120:14696 +129856103899316:129856105721054 0:0 CopyDeviceToHost:122:14696 +129856112245852:129856113015798 0:0 CopyHostToDevice:123:14696 +129856113100485:129856114323526 0:0 KernelExecution:127:14696 +129856113072690:129856114900649 0:0 CopyDeviceToHost:129:14696 +129856121600998:129856122374148 0:0 CopyHostToDevice:130:14696 +129856122460856:129856123685017 0:0 KernelExecution:134:14696 +129856122432406:129856124221503 0:0 CopyDeviceToHost:136:14696 +129856130996154:129856131718339 0:0 CopyHostToDevice:137:14696 +129856131803770:129856133026171 0:0 KernelExecution:141:14696 +129856131775718:129856133613724 0:0 CopyDeviceToHost:143:14696 +129856140505813:129856141285491 0:0 CopyHostToDevice:144:14696 +129856141371337:129856142594218 0:0 KernelExecution:148:14696 +129856141343575:129856143188801 0:0 CopyDeviceToHost:150:14696 +129856150234971:129856151016053 0:0 CopyHostToDevice:151:14696 +129856151102892:129856152327053 0:0 KernelExecution:155:14696 +129856151074919:129856152872907 0:0 CopyDeviceToHost:157:14696 +129856159481376:129856160253347 0:0 CopyHostToDevice:158:14696 +129856160343525:129856161566086 0:0 KernelExecution:162:14696 +129856160315355:129856162137295 0:0 CopyDeviceToHost:164:14696 +129856168059715:129856168791250 0:0 CopyHostToDevice:165:14696 +129856168876828:129856170099709 0:0 KernelExecution:169:14696 +129856168849139:129856170629902 0:0 CopyDeviceToHost:171:14696 +129856176005269:129856176724156 0:0 CopyHostToDevice:172:14696 +129856176811979:129856178033100 0:0 KernelExecution:176:14696 +129856176783784:129856178564862 0:0 CopyDeviceToHost:178:14696 +129856183804454:129856184516916 0:0 CopyHostToDevice:179:14696 +129856184609470:129856185832511 0:0 KernelExecution:183:14696 +129856184581802:129856186368858 0:0 CopyDeviceToHost:185:14696 +129856191541921:129856192254454 0:0 CopyHostToDevice:186:14696 +129856192345329:129856193569809 0:0 KernelExecution:190:14696 +129856192317767:129856194105080 0:0 CopyDeviceToHost:192:14696 +129856103003811:129856103844379 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :116 +129856103846787:129856103846788 14696:14696 MARK(name(before HIP LaunchKernel)) +129856103849922:129856103850838 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :118 +129856103853240:129856103854136 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :119 +129856103856444:129856103860149 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :120 +129856103862386:129856103862387 14696:14696 MARK(name(after HIP LaunchKernel)) +129856103864691:129856105741098 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :122 +129856112200226:129856113019342 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :123 +129856113021598:129856113021599 14696:14696 MARK(name(before HIP LaunchKernel)) +129856113024595:129856113025504 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :125 +129856113027902:129856113028756 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :126 +129856113031010:129856113034968 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :127 +129856113037098:129856113037099 14696:14696 MARK(name(after HIP LaunchKernel)) +129856113039452:129856114918382 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :129 +129856121536590:129856122377686 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :130 +129856122380177:129856122380178 14696:14696 MARK(name(before HIP LaunchKernel)) +129856122383242:129856122384157 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :132 +129856122386562:129856122387438 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :133 +129856122389743:129856122393887 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :134 +129856122395917:129856122395918 14696:14696 MARK(name(after HIP LaunchKernel)) +129856122398705:129856124236553 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :136 +129856130930250:129856131721919 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :137 +129856131724534:129856131724535 14696:14696 MARK(name(before HIP LaunchKernel)) +129856131727544:129856131728453 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :139 +129856131730840:129856131731718 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :140 +129856131734248:129856131738338 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :141 +129856131740508:129856131740509 14696:14696 MARK(name(after HIP LaunchKernel)) +129856131742956:129856133633762 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :143 +129856140484642:129856141289559 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :144 +129856141292040:129856141292041 14696:14696 MARK(name(before HIP LaunchKernel)) +129856141295360:129856141296366 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :146 +129856141298705:129856141299584 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :147 +129856141301885:129856141305904 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :148 +129856141308287:129856141308288 14696:14696 MARK(name(after HIP LaunchKernel)) +129856141310745:129856143207185 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :150 +129856150167842:129856151019519 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :151 +129856151021903:129856151021904 14696:14696 MARK(name(before HIP LaunchKernel)) +129856151025430:129856151026339 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :153 +129856151028846:129856151029731 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :154 +129856151032070:129856151036399 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :155 +129856151038525:129856151038526 14696:14696 MARK(name(after HIP LaunchKernel)) +129856151041204:129856152887054 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :157 +129856159416500:129856160257922 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :158 +129856160260251:129856160260252 14696:14696 MARK(name(before HIP LaunchKernel)) +129856160263327:129856160264253 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :160 +129856160266588:129856160267551 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :161 +129856160269815:129856160273583 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :162 +129856160275639:129856160275640 14696:14696 MARK(name(after HIP LaunchKernel)) +129856160277873:129856162154856 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :164 +129856167989129:129856168794954 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :165 +129856168796817:129856168796818 14696:14696 MARK(name(before HIP LaunchKernel)) +129856168799680:129856168800356 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :167 +129856168802336:129856168803043 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :168 +129856168804923:129856168808196 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :169 +129856168810026:129856168810027 14696:14696 MARK(name(after HIP LaunchKernel)) +129856168811889:129856170642148 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :171 +129856175935119:129856176727698 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :172 +129856176729573:129856176729574 14696:14696 MARK(name(before HIP LaunchKernel)) +129856176732312:129856176733001 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :174 +129856176734764:129856176735517 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :175 +129856176737306:129856176740961 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :176 +129856176742551:129856176742552 14696:14696 MARK(name(after HIP LaunchKernel)) +129856176744384:129856178576608 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :178 +129856183733862:129856184521359 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :179 PASSED! ## Iteration (72) ################# +129856184523202:129856184523203 14696:14696 MARK(name(before HIP LaunchKernel)) +129856184526239:129856184526918 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :181 +129856184528695:129856184529339 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :182 +129856184531203:129856184534819 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :183 +129856184536444:129856184536445 14696:14696 MARK(name(after HIP LaunchKernel)) +129856184538159:129856186381152 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :185 +129856191471466:129856192258965 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :186 +129856192260887:129856192260888 14696:14696 MARK(name(before HIP LaunchKernel)) +129856192264565:129856192265231 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :188 +129856192266936:129856192267582 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :189 +129856192269493:129856192272647 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :190 +129856192274238:129856192274239 14696:14696 MARK(name(after HIP LaunchKernel)) +129856192276014:129856194117333 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :192 PASSED! ## Iteration (71) ################# PASSED! @@ -157,30 +345,6 @@ PASSED! ## Iteration (66) ################# PASSED! ## Iteration (65) ################# -3802701908363640:3802701910282004 0:0 CopyHostToDevice:193:1991 -3802701910388686:3802701911593636 0:0 KernelExecution:197:1991 -3802701910364944:3802701913041924 0:0 CopyDeviceToHost:199:1991 -3802701920274197:3802701922171761 0:0 CopyHostToDevice:200:1991 -3802701922278125:3802701923475222 0:0 KernelExecution:204:1991 -3802701922254592:3802701924925132 0:0 CopyDeviceToHost:206:1991 -3802701932168496:3802701934142771 0:0 CopyHostToDevice:207:1991 -3802701934246976:3802701935438295 0:0 KernelExecution:211:1991 -3802701934223551:3802701937141613 0:0 CopyDeviceToHost:213:1991 -3802701944352056:3802701946257570 0:0 CopyHostToDevice:214:1991 -3802701946362997:3802701947574317 0:0 KernelExecution:218:1991 -3802701946339571:3802701949023790 0:0 CopyDeviceToHost:220:1991 -3802701956400665:3802701958316110 0:0 CopyHostToDevice:221:1991 -3802701958422590:3802701959641615 0:0 KernelExecution:225:1991 -3802701958399130:3802701961106280 0:0 CopyDeviceToHost:227:1991 -3802701968320724:3802701970208178 0:0 CopyHostToDevice:228:1991 -3802701970318670:3802701971521693 0:0 KernelExecution:232:1991 -3802701970295529:3802701972971609 0:0 CopyDeviceToHost:234:1991 -3802701980199792:3802701982142436 0:0 CopyHostToDevice:235:1991 -3802701982245928:3802701983440062 0:0 KernelExecution:239:1991 -3802701982222487:3802701985143188 0:0 CopyDeviceToHost:241:1991 -3802701992355642:3802701994267646 0:0 CopyHostToDevice:242:1991 -3802701994371730:3802701995578753 0:0 KernelExecution:246:1991 -3802701994348667:3802701997026937 0:0 CopyDeviceToHost:248:1991 PASSED! ## Iteration (64) ################# PASSED! @@ -193,39 +357,152 @@ PASSED! ## Iteration (60) ################# PASSED! ## Iteration (59) ################# +129856199280943:129856199989681 0:0 CopyHostToDevice:193:14696 +129856200075190:129856201299831 0:0 KernelExecution:197:14696 +129856200047538:129856201850341 0:0 CopyDeviceToHost:199:14696 +129856206897412:129856207614253 0:0 CopyHostToDevice:200:14696 +129856207705498:129856208928859 0:0 KernelExecution:204:14696 +129856207676917:129856209473592 0:0 CopyDeviceToHost:206:14696 +129856214432984:129856215196409 0:0 CopyHostToDevice:207:14696 +129856215281304:129856216504825 0:0 KernelExecution:211:14696 +129856215253529:129856217050195 0:0 CopyDeviceToHost:213:14696 +129856221931666:129856222699124 0:0 CopyHostToDevice:214:14696 +129856222785050:129856224007611 0:0 KernelExecution:218:14696 +129856222756874:129856224558196 0:0 CopyDeviceToHost:220:14696 +129856229435728:129856230202586 0:0 CopyHostToDevice:221:14696 +129856230289822:129856231510942 0:0 KernelExecution:225:14696 +129856230262176:129856232049379 0:0 CopyDeviceToHost:227:14696 +129856236838217:129856237549415 0:0 CopyHostToDevice:228:14696 +129856237635376:129856238857136 0:0 KernelExecution:232:14696 +129856237607782:129856239407224 0:0 CopyDeviceToHost:234:14696 +129856244299394:129856245007567 0:0 CopyHostToDevice:235:14696 +129856245099279:129856246322159 0:0 KernelExecution:239:14696 +129856245071193:129856246864706 0:0 CopyDeviceToHost:241:14696 +129856251723187:129856252431603 0:0 CopyHostToDevice:242:14696 +129856252521404:129856253744124 0:0 KernelExecution:246:14696 +129856252493576:129856254289474 0:0 CopyDeviceToHost:248:14696 +129856259171693:129856259879626 0:0 CopyHostToDevice:249:14696 +129856259964936:129856261188937 0:0 KernelExecution:253:14696 +129856259937195:129856261731637 0:0 CopyDeviceToHost:255:14696 +129856266605795:129856267371070 0:0 CopyHostToDevice:256:14696 +129856267455912:129856268680233 0:0 KernelExecution:260:14696 +129856267428297:129856269227260 0:0 CopyDeviceToHost:262:14696 +129856274075448:129856274840296 0:0 CopyHostToDevice:263:14696 +129856274927804:129856276150525 0:0 KernelExecution:267:14696 +129856274899679:129856276695018 0:0 CopyDeviceToHost:269:14696 +129856281565009:129856282326831 0:0 CopyHostToDevice:270:14696 +129856282411157:129856283637077 0:0 KernelExecution:274:14696 +129856282383503:129856284175523 0:0 CopyDeviceToHost:276:14696 +129856288995752:129856289705630 0:0 CopyHostToDevice:277:14696 +129856289793308:129856291014269 0:0 KernelExecution:281:14696 +129856289765547:129856291559219 0:0 CopyDeviceToHost:283:14696 +129856296360197:129856297069117 0:0 CopyHostToDevice:284:14696 +129856297157310:129856298378111 0:0 KernelExecution:288:14696 +129856297129589:129856298914568 0:0 CopyDeviceToHost:290:14696 +129856199220209:129856199993256 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :193 +129856199995165:129856199995166 14696:14696 MARK(name(before HIP LaunchKernel)) +129856199998331:129856199999016 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :195 +129856200000971:129856200001630 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :196 +129856200003348:129856200006409 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :197 +129856200007997:129856200007998 14696:14696 MARK(name(after HIP LaunchKernel)) +129856200009781:129856201864796 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :199 +129856206828954:129856207617612 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :200 +129856207619342:129856207619343 14696:14696 MARK(name(before HIP LaunchKernel)) +129856207633427:129856207634203 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :202 +129856207635929:129856207636565 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :203 +129856207638289:129856207641619 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :204 +129856207643379:129856207643380 14696:14696 MARK(name(after HIP LaunchKernel)) +129856207645338:129856209486625 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :206 +129856214367871:129856215199634 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :207 +129856215201421:129856215201422 14696:14696 MARK(name(before HIP LaunchKernel)) +129856215205034:129856215205701 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :209 +129856215207421:129856215208068 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :210 +129856215209926:129856215213001 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :211 +129856215214576:129856215214577 14696:14696 MARK(name(after HIP LaunchKernel)) +129856215216591:129856217062762 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :213 +129856221865656:129856222702390 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :214 +129856222704143:129856222704144 14696:14696 MARK(name(before HIP LaunchKernel)) +129856222707593:129856222708263 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :216 +129856222709907:129856222710533 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :217 +129856222712408:129856222715305 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :218 +129856222716820:129856222716821 14696:14696 MARK(name(after HIP LaunchKernel)) +129856222718703:129856224572291 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :220 +129856229369321:129856230206171 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :221 +129856230207933:129856230207934 14696:14696 MARK(name(before HIP LaunchKernel)) +129856230211408:129856230212070 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :223 +129856230213729:129856230214356 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :224 +129856230216306:129856230219552 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :225 +129856230221084:129856230221085 14696:14696 MARK(name(after HIP LaunchKernel)) +129856230222856:129856232061167 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :227 +129856236820359:129856237552651 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :228 +129856237554349:129856237554350 14696:14696 MARK(name(before HIP LaunchKernel)) +129856237557958:129856237558615 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :230 +129856237560382:129856237561016 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :231 +129856237562876:129856237566063 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :232 +129856237567608:129856237567609 14696:14696 MARK(name(after HIP LaunchKernel)) +129856237569296:129856239419101 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :234 +129856244174381:129856245010977 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :235 +129856245012718:129856245012719 14696:14696 MARK(name(before HIP LaunchKernel)) +129856245025693:129856245026451 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :237 +129856245028210:129856245028855 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :238 +129856245030730:129856245034177 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :239 +129856245035805:129856245035806 14696:14696 MARK(name(after HIP LaunchKernel)) +129856245038122:129856246876538 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :241 +129856251653109:129856252435896 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :242 +129856252437833:129856252437834 14696:14696 MARK(name(before HIP LaunchKernel)) +129856252441362:129856252442017 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :244 +129856252443660:129856252444296 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :245 +129856252446165:129856252449155 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :246 +129856252450809:129856252450810 14696:14696 MARK(name(after HIP LaunchKernel)) +129856252452579:129856254303055 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :248 +129856259101952:129856259882749 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :249 +129856259884515:129856259884516 14696:14696 MARK(name(before HIP LaunchKernel)) +129856259886742:129856259887392 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :251 +129856259889040:129856259889671 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :252 +129856259891415:129856259894919 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :253 +129856259896631:129856259896632 14696:14696 MARK(name(after HIP LaunchKernel)) +129856259898324:129856261743974 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :255 +129856266541050:129856267374498 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :256 +129856267376266:129856267376267 14696:14696 MARK(name(before HIP LaunchKernel)) +129856267379647:129856267380320 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :258 +129856267381929:129856267382540 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :259 +129856267384409:129856267387474 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :260 +129856267389033:129856267389034 14696:14696 MARK(name(after HIP LaunchKernel)) +129856267390764:129856269239563 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :262 +129856274008890:129856274843415 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :263 +129856274845095:129856274845096 14696:14696 MARK(name(before HIP LaunchKernel)) +129856274847806:129856274848470 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :265 +129856274850117:129856274850733 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :266 +129856274852427:129856274855749 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :267 +129856274857358:129856274857359 14696:14696 MARK(name(after HIP LaunchKernel)) +129856274859228:129856276707873 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :269 +129856281498759:129856282330118 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :270 +129856282332044:129856282332045 14696:14696 MARK(name(before HIP LaunchKernel)) +129856282335358:129856282336015 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :272 +129856282338029:129856282338668 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :273 +129856282340644:129856282343485 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :274 +129856282345028:129856282345029 14696:14696 MARK(name(after HIP LaunchKernel)) +129856282347024:129856284203838 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :276 +129856288978096:129856289708673 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :277 +129856289710414:129856289710415 14696:14696 MARK(name(before HIP LaunchKernel)) +129856289714250:129856289714924 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :279 +129856289716689:129856289717305 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :280 +129856289719150:129856289722057 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :281 +129856289723677:129856289723678 14696:14696 MARK(name(after HIP LaunchKernel)) +129856289725380:129856291571314 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :283 +129856296341271:129856297072486 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :284 +129856297074313:129856297074314 14696:14696 MARK(name(before HIP LaunchKernel)) +129856297077733:129856297078380 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :286 +129856297080109:129856297080733 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :287 +129856297082729:129856297085646 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :288 +129856297087184:129856297087185 14696:14696 MARK(name(after HIP LaunchKernel)) +129856297089004:129856298926004 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :290 PASSED! ## Iteration (58) ################# PASSED! ## Iteration (57) ################# PASSED! ## Iteration (56) ################# -3802702004315971:3802702006430907 0:0 CopyHostToDevice:249:1991 -3802702006513343:3802702007718885 0:0 KernelExecution:253:1991 -3802702006490217:3802702009041896 0:0 CopyDeviceToHost:255:1991 -3802702018262184:3802702019943876 0:0 CopyHostToDevice:256:1991 -3802702020050568:3802702021249295 0:0 KernelExecution:260:1991 -3802702020026907:3802702022584386 0:0 CopyDeviceToHost:262:1991 -3802702029541468:3802702031219270 0:0 CopyHostToDevice:263:1991 -3802702031312763:3802702032510305 0:0 KernelExecution:267:1991 -3802702031289161:3802702033843490 0:0 CopyDeviceToHost:269:1991 -3802702040805082:3802702042480244 0:0 CopyHostToDevice:270:1991 -3802702042572785:3802702043776105 0:0 KernelExecution:274:1991 -3802702042549004:3802702045110673 0:0 CopyDeviceToHost:276:1991 -3802702052065204:3802702053741167 0:0 CopyHostToDevice:277:1991 -3802702053835958:3802702055052463 0:0 KernelExecution:281:1991 -3802702053813487:3802702056374447 0:0 CopyDeviceToHost:283:1991 -3802702063333568:3802702065014061 0:0 CopyHostToDevice:284:1991 -3802702065111999:3802702066319615 0:0 KernelExecution:288:1991 -3802702065088771:3802702067654340 0:0 CopyDeviceToHost:290:1991 -3802702074618962:3802702076284625 0:0 CopyHostToDevice:291:1991 -3802702076384443:3802702077569835 0:0 KernelExecution:295:1991 -3802702076360685:3802702078904404 0:0 CopyDeviceToHost:297:1991 -3802702085881125:3802702087555758 0:0 CopyHostToDevice:298:1991 -3802702087649675:3802702088847958 0:0 KernelExecution:302:1991 -3802702087626608:3802702090183277 0:0 CopyDeviceToHost:304:1991 -3802702097151929:3802702098830722 0:0 CopyHostToDevice:305:1991 -3802702098924116:3802702100140473 0:0 KernelExecution:309:1991 -3802702098901192:3802702101472621 0:0 CopyDeviceToHost:311:1991 PASSED! ## Iteration (55) ################# PASSED! @@ -244,35 +521,140 @@ PASSED! ## Iteration (48) ################# PASSED! ## Iteration (47) ################# -3802702108615424:3802702110296796 0:0 CopyHostToDevice:312:1991 -3802702110392443:3802702111600207 0:0 KernelExecution:316:1991 -3802702110368957:3802702112934696 0:0 CopyDeviceToHost:318:1991 -3802702119898217:3802702121579670 0:0 CopyHostToDevice:319:1991 -3802702121673899:3802702122873960 0:0 KernelExecution:323:1991 -3802702121650880:3802702124193909 0:0 CopyDeviceToHost:325:1991 -3802702131156331:3802702132834494 0:0 CopyHostToDevice:326:1991 -3802702132927702:3802702134121984 0:0 KernelExecution:330:1991 -3802702132904324:3802702135456513 0:0 CopyDeviceToHost:332:1991 -3802702142434925:3802702144099207 0:0 CopyHostToDevice:333:1991 -3802702144200141:3802702145401090 0:0 KernelExecution:337:1991 -3802702144175248:3802702146735777 0:0 CopyDeviceToHost:339:1991 -3802702153706898:3802702155385711 0:0 CopyHostToDevice:340:1991 -3802702155488005:3802702156685843 0:0 KernelExecution:344:1991 -3802702155464581:3802702158018890 0:0 CopyDeviceToHost:346:1991 -3802702164987312:3802702166668385 0:0 CopyHostToDevice:347:1991 -3802702166762069:3802702167965537 0:0 KernelExecution:351:1991 -3802702166739105:3802702169298644 0:0 CopyDeviceToHost:353:1991 -3802702176260016:3802702177933188 0:0 CopyHostToDevice:354:1991 -3802702178026430:3802702179223971 0:0 KernelExecution:358:1991 -3802702178002518:3802702180540757 0:0 CopyDeviceToHost:360:1991 -3802702187490789:3802702189167931 0:0 CopyHostToDevice:361:1991 -3802702189262737:3802702190474501 0:0 KernelExecution:365:1991 -3802702189239082:3802702191808141 0:0 CopyDeviceToHost:367:1991 -3802702198761922:3802702200425845 0:0 CopyHostToDevice:368:1991 PASSED! ## Iteration (46) ################# PASSED! ## Iteration (45) ################# +129856303845436:129856304622018 0:0 CopyHostToDevice:291:14696 +129856304714456:129856305941176 0:0 KernelExecution:295:14696 +129856304686879:129856306490313 0:0 CopyDeviceToHost:297:14696 +129856311333818:129856312045157 0:0 CopyHostToDevice:298:14696 +129856312128568:129856313351929 0:0 KernelExecution:302:14696 +129856312100713:129856313892452 0:0 CopyDeviceToHost:304:14696 +129856318773490:129856319480599 0:0 CopyHostToDevice:305:14696 +129856319573103:129856320793904 0:0 KernelExecution:309:14696 +129856319544959:129856321343459 0:0 CopyDeviceToHost:311:14696 +129856326211019:129856326977511 0:0 CopyHostToDevice:312:14696 +129856327061875:129856328282996 0:0 KernelExecution:316:14696 +129856327034134:129856328825473 0:0 CopyDeviceToHost:318:14696 +129856333673698:129856334437330 0:0 CopyHostToDevice:319:14696 +129856334523567:129856335745168 0:0 KernelExecution:323:14696 +129856334495713:129856336293262 0:0 CopyDeviceToHost:325:14696 +129856341101442:129856341984561 0:0 CopyHostToDevice:326:14696 +129856342071670:129856343294870 0:0 KernelExecution:330:14696 +129856342043988:129856343840850 0:0 CopyDeviceToHost:332:14696 +129856348646308:129856349354803 0:0 CopyHostToDevice:333:14696 +129856349441279:129856350662399 0:0 KernelExecution:337:14696 +129856349413003:129856351203503 0:0 CopyDeviceToHost:339:14696 +129856356094471:129856356820623 0:0 CopyHostToDevice:340:14696 +129856356907355:129856358130235 0:0 KernelExecution:344:14696 +129856356879789:129856358671945 0:0 CopyDeviceToHost:346:14696 +129856363528023:129856364288036 0:0 CopyHostToDevice:347:14696 +129856364405580:129856365626380 0:0 KernelExecution:351:14696 +129856364377906:129856366172703 0:0 CopyDeviceToHost:353:14696 +129856371087592:129856371798847 0:0 CopyHostToDevice:354:14696 +129856371883929:129856373108889 0:0 KernelExecution:358:14696 +129856371855593:129856373655534 0:0 CopyDeviceToHost:360:14696 +129856378493711:129856379257336 0:0 CopyHostToDevice:361:14696 +129856379342581:129856380565301 0:0 KernelExecution:365:14696 +129856379314699:129856381113012 0:0 CopyDeviceToHost:367:14696 +129856385977586:129856386744228 0:0 CopyHostToDevice:368:14696 +129856386831442:129856388055123 0:0 KernelExecution:372:14696 +129856386803378:129856388598263 0:0 CopyDeviceToHost:374:14696 +129856393484361:129856394251866 0:0 CopyHostToDevice:375:14696 +129856394339138:129856395561058 0:0 KernelExecution:379:14696 +129856394311639:129856396103600 0:0 CopyDeviceToHost:381:14696 +129856303774990:129856304626161 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :291 +129856304627884:129856304627885 14696:14696 MARK(name(before HIP LaunchKernel)) +129856304631072:129856304631723 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :293 +129856304633373:129856304634007 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :294 +129856304635811:129856304639104 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :295 +129856304640848:129856304640849 14696:14696 MARK(name(after HIP LaunchKernel)) +129856304642651:129856306501959 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :297 +129856311264292:129856312048766 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :298 +129856312050539:129856312050540 14696:14696 MARK(name(before HIP LaunchKernel)) +129856312053498:129856312054174 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :300 +129856312055946:129856312056653 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :301 +129856312058397:129856312061589 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :302 +129856312063201:129856312063202 14696:14696 MARK(name(after HIP LaunchKernel)) +129856312065053:129856313904746 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :304 +129856318704110:129856319483869 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :305 +129856319485543:129856319485544 14696:14696 MARK(name(before HIP LaunchKernel)) +129856319499258:129856319500048 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :307 +129856319501759:129856319502401 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :308 +129856319504307:129856319507787 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :309 +129856319509535:129856319509536 14696:14696 MARK(name(after HIP LaunchKernel)) +129856319511552:129856321356021 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :311 +129856326144210:129856326980680 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :312 +129856326982483:129856326982484 14696:14696 MARK(name(before HIP LaunchKernel)) +129856326986163:129856326986815 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :314 +129856326988581:129856326989210 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :315 +129856326991095:129856326994082 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :316 +129856326995650:129856326995651 14696:14696 MARK(name(after HIP LaunchKernel)) +129856326997461:129856328838450 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :318 +129856333608209:129856334440902 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :319 +129856334442697:129856334442698 14696:14696 MARK(name(before HIP LaunchKernel)) +129856334446427:129856334447095 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :321 +129856334448793:129856334449426 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :322 +129856334451308:129856334454120 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :323 +129856334455718:129856334455719 14696:14696 MARK(name(after HIP LaunchKernel)) +129856334457508:129856336307654 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :325 +129856341084552:129856341987761 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :326 +129856341989501:129856341989502 14696:14696 MARK(name(before HIP LaunchKernel)) +129856341992961:129856341993616 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :328 +129856341995311:129856341995915 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :329 +129856341997784:129856342000844 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :330 +129856342002457:129856342002458 14696:14696 MARK(name(after HIP LaunchKernel)) +129856342004209:129856343852827 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :332 +129856348628207:129856349358297 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :333 +129856349360014:129856349360015 14696:14696 MARK(name(before HIP LaunchKernel)) +129856349363641:129856349364301 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :335 +129856349365955:129856349366590 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :336 +129856349368410:129856349371392 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :337 +129856349373001:129856349373002 14696:14696 MARK(name(after HIP LaunchKernel)) +129856349374736:129856351215163 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :339 +129856356026231:129856356823939 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :340 +129856356825939:129856356825940 14696:14696 MARK(name(before HIP LaunchKernel)) +129856356829316:129856356829967 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :342 +129856356831607:129856356832235 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :343 +129856356834103:129856356837300 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :344 +129856356838880:129856356838881 14696:14696 MARK(name(after HIP LaunchKernel)) +129856356840997:129856358683474 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :346 +129856363457621:129856364292098 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :347 +129856364293909:129856364293910 14696:14696 MARK(name(before HIP LaunchKernel)) +129856364296242:129856364296921 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :349 +129856364298665:129856364299325 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :350 +129856364301137:129856364304805 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :351 +129856364306614:129856364306615 14696:14696 MARK(name(after HIP LaunchKernel)) +129856364308432:129856366185192 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :353 +129856371019019:129856371802348 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :354 +129856371804072:129856371804073 14696:14696 MARK(name(before HIP LaunchKernel)) +129856371807407:129856371808089 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :356 +129856371809769:129856371810408 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :357 +129856371812409:129856371815399 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :358 +129856371816938:129856371816939 14696:14696 MARK(name(after HIP LaunchKernel)) +129856371818730:129856373668223 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :360 +129856378427685:129856379260530 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :361 +129856379262413:129856379262414 14696:14696 MARK(name(before HIP LaunchKernel)) +129856379266028:129856379266680 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :363 +129856379268334:129856379268974 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :364 +129856379270951:129856379274011 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :365 +129856379275576:129856379275577 14696:14696 MARK(name(after HIP LaunchKernel)) +129856379277516:129856381125442 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :367 +129856385912709:129856386747747 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :368 +129856386749617:129856386749618 14696:14696 MARK(name(before HIP LaunchKernel)) +129856386753015:129856386753700 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :370 +129856386755603:129856386756230 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :371 +129856386758107:129856386761145 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :372 +129856386762828:129856386762829 14696:14696 MARK(name(after HIP LaunchKernel)) +129856386764527:129856388613300 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :374 +129856393418103:129856394255127 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :375 +129856394257084:129856394257085 14696:14696 MARK(name(before HIP LaunchKernel)) +129856394260727:129856394261393 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :377 +129856394263117:129856394263752 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :378 +129856394266100:129856394269007 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :379 +129856394270594:129856394270595 14696:14696 MARK(name(after HIP LaunchKernel)) +129856394272528:129856396115719 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :381 PASSED! ## Iteration (44) ################# PASSED! @@ -287,33 +669,6 @@ PASSED! ## Iteration (39) ################# PASSED! ## Iteration (38) ################# -3802702200526879:3802702201738792 0:0 KernelExecution:372:1991 -3802702200502735:3802702203073505 0:0 CopyDeviceToHost:374:1991 -3802702210101096:3802702211781099 0:0 CopyHostToDevice:375:1991 -3802702211874278:3802702213070339 0:0 KernelExecution:379:1991 -3802702211851149:3802702214405528 0:0 CopyDeviceToHost:381:1991 -3802702221371170:3802702223046872 0:0 CopyHostToDevice:382:1991 -3802702223141537:3802702224348264 0:0 KernelExecution:386:1991 -3802702223118273:3802702225680452 0:0 CopyDeviceToHost:388:1991 -3802702232644404:3802702234313936 0:0 CopyHostToDevice:389:1991 -3802702234409358:3802702235607788 0:0 KernelExecution:393:1991 -3802702234385867:3802702236943196 0:0 CopyDeviceToHost:395:1991 -3802702243900787:3802702245580279 0:0 CopyHostToDevice:396:1991 -3802702245674663:3802702246878279 0:0 KernelExecution:400:1991 -3802702245651760:3802702248198969 0:0 CopyDeviceToHost:402:1991 -3802702255168930:3802702256847073 0:0 CopyHostToDevice:403:1991 -3802702256941454:3802702258155589 0:0 KernelExecution:407:1991 -3802702256918733:3802702259489683 0:0 CopyDeviceToHost:409:1991 -3802702266456174:3802702268121957 0:0 CopyHostToDevice:410:1991 -3802702268222984:3802702269419637 0:0 KernelExecution:414:1991 -3802702268198287:3802702270718936 0:0 CopyDeviceToHost:416:1991 -3802702277684438:3802702279355020 0:0 CopyHostToDevice:417:1991 -3802702279449065:3802702280656977 0:0 KernelExecution:421:1991 -3802702279425380:3802702281990519 0:0 CopyDeviceToHost:423:1991 -3802702288963001:3802702290626813 0:0 CopyHostToDevice:424:1991 -3802702290725647:3802702291916077 0:0 KernelExecution:428:1991 -3802702290702274:3802702293249973 0:0 CopyDeviceToHost:430:1991 -3802702300213905:3802702301888607 0:0 CopyHostToDevice:431:1991 PASSED! ## Iteration (37) ################# PASSED! @@ -326,38 +681,152 @@ PASSED! ## Iteration (33) ################# PASSED! ## Iteration (32) ################# +129856400949298:129856401690102 0:0 CopyHostToDevice:382:14696 +129856401774737:129856402998097 0:0 KernelExecution:386:14696 +129856401746598:129856403538591 0:0 CopyDeviceToHost:388:14696 +129856408364229:129856409075649 0:0 CopyHostToDevice:389:14696 +129856409181579:129856410405739 0:0 KernelExecution:393:14696 +129856409154049:129856410946890 0:0 CopyDeviceToHost:395:14696 +129856415833858:129856416545026 0:0 CopyHostToDevice:396:14696 +129856416634688:129856417856288 0:0 KernelExecution:400:14696 +129856416607076:129856418397645 0:0 CopyDeviceToHost:402:14696 +129856423255064:129856423962733 0:0 CopyHostToDevice:403:14696 +129856424049344:129856425272224 0:0 KernelExecution:407:14696 +129856424021555:129856425837337 0:0 CopyDeviceToHost:409:14696 +129856430719717:129856431443207 0:0 CopyHostToDevice:410:14696 +129856431530370:129856432753411 0:0 KernelExecution:414:14696 +129856431502760:129856433290891 0:0 CopyDeviceToHost:416:14696 +129856438127461:129856438893077 0:0 CopyHostToDevice:417:14696 +129856438981153:129856440204834 0:0 KernelExecution:421:14696 +129856438953062:129856440755527 0:0 CopyDeviceToHost:423:14696 +129856445658301:129856446425541 0:0 CopyHostToDevice:424:14696 +129856446512512:129856447734433 0:0 KernelExecution:428:14696 +129856446484748:129856448303143 0:0 CopyDeviceToHost:430:14696 +129856453131279:129856453895371 0:0 CopyHostToDevice:431:14696 +129856453982502:129856455205222 0:0 KernelExecution:435:14696 +129856453954390:129856455747092 0:0 CopyDeviceToHost:437:14696 +129856460549446:129856461267384 0:0 CopyHostToDevice:438:14696 +129856461354488:129856462578648 0:0 KernelExecution:442:14696 +129856461327009:129856463119514 0:0 CopyDeviceToHost:444:14696 +129856467954463:129856468665082 0:0 CopyHostToDevice:445:14696 +129856468756966:129856469978566 0:0 KernelExecution:449:14696 +129856468728958:129856470519550 0:0 CopyDeviceToHost:451:14696 +129856475396016:129856476106990 0:0 CopyHostToDevice:452:14696 +129856476191506:129856477415026 0:0 KernelExecution:456:14696 +129856476164143:129856477979522 0:0 CopyDeviceToHost:458:14696 +129856482841902:129856483550322 0:0 CopyHostToDevice:459:14696 +129856483636804:129856484858245 0:0 KernelExecution:463:14696 +129856483608842:129856485404598 0:0 CopyDeviceToHost:465:14696 +129856490264533:129856491036044 0:0 CopyHostToDevice:466:14696 +129856491121979:129856492348219 0:0 KernelExecution:470:14696 +129856491094217:129856492893929 0:0 CopyDeviceToHost:472:14696 +129856497730065:129856498496809 0:0 CopyHostToDevice:473:14696 +129856498583201:129856499806882 0:0 KernelExecution:477:14696 +129856498555486:129856500349740 0:0 CopyDeviceToHost:479:14696 +129856400931528:129856401693841 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :382 +129856401695697:129856401695698 14696:14696 MARK(name(before HIP LaunchKernel)) +129856401698086:129856401698763 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :384 +129856401700644:129856401701356 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :385 +129856401703387:129856401706670 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :386 +129856401708283:129856401708284 14696:14696 MARK(name(after HIP LaunchKernel)) +129856401710202:129856403550731 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :388 +129856408346178:129856409079144 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :389 +129856409080946:129856409080947 14696:14696 MARK(name(before HIP LaunchKernel)) +129856409119575:129856409120361 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :391 +129856409122350:129856409122982 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :392 +129856409124716:129856409127974 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :393 +129856409129722:129856409129723 14696:14696 MARK(name(after HIP LaunchKernel)) +129856409131595:129856410958682 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :395 +129856415764088:129856416549283 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :396 +129856416551147:129856416551148 14696:14696 MARK(name(before HIP LaunchKernel)) +129856416554753:129856416555457 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :398 +129856416557440:129856416558065 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :399 +129856416560077:129856416563543 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :400 +129856416565220:129856416565221 14696:14696 MARK(name(after HIP LaunchKernel)) +129856416567086:129856418410890 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :402 +129856423185992:129856423965984 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :403 +129856423967686:129856423967687 14696:14696 MARK(name(before HIP LaunchKernel)) +129856423971156:129856423971813 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :405 +129856423973453:129856423974058 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :406 +129856423975959:129856423979023 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :407 +129856423980620:129856423980621 14696:14696 MARK(name(after HIP LaunchKernel)) +129856423982481:129856425851437 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :409 +129856430649566:129856431446819 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :410 +129856431448647:129856431448648 14696:14696 MARK(name(before HIP LaunchKernel)) +129856431451980:129856431452627 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :412 +129856431454467:129856431455103 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :413 +129856431457061:129856431460021 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :414 +129856431461633:129856431461634 14696:14696 MARK(name(after HIP LaunchKernel)) +129856431463427:129856433305223 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :416 +129856438060199:129856438896337 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :417 +129856438898056:129856438898057 14696:14696 MARK(name(before HIP LaunchKernel)) +129856438901614:129856438902293 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :419 +129856438903944:129856438904582 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :420 +129856438906471:129856438909460 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :421 +129856438910995:129856438910996 14696:14696 MARK(name(after HIP LaunchKernel)) +129856438913099:129856440770029 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :423 +129856445589904:129856446428787 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :424 +129856446430525:129856446430526 14696:14696 MARK(name(before HIP LaunchKernel)) +129856446434097:129856446434755 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :426 +129856446436446:129856446437074 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :427 +129856446438958:129856446442103 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :428 +129856446443705:129856446443706 14696:14696 MARK(name(after HIP LaunchKernel)) +129856446445611:129856448319675 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :430 +129856453113306:129856453898651 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :431 +129856453900443:129856453900444 14696:14696 MARK(name(before HIP LaunchKernel)) +129856453903924:129856453904588 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :433 +129856453906239:129856453906854 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :434 +129856453908740:129856453911874 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :435 +129856453913486:129856453913487 14696:14696 MARK(name(after HIP LaunchKernel)) +129856453915356:129856455761272 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :437 +129856460531599:129856461270590 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :438 +129856461272368:129856461272369 14696:14696 MARK(name(before HIP LaunchKernel)) +129856461275845:129856461276515 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :440 +129856461278198:129856461278850 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :441 +129856461280791:129856461283899 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :442 +129856461285595:129856461285596 14696:14696 MARK(name(after HIP LaunchKernel)) +129856461287388:129856463133280 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :444 +129856467884995:129856468668564 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :445 +129856468670291:129856468670292 14696:14696 MARK(name(before HIP LaunchKernel)) +129856468673055:129856468673710 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :447 +129856468675408:129856468676048 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :448 +129856468677942:129856468681455 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :449 +129856468683148:129856468683149 14696:14696 MARK(name(after HIP LaunchKernel)) +129856468685101:129856470532724 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :451 +129856475326269:129856476110399 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :452 +129856476112220:129856476112221 14696:14696 MARK(name(before HIP LaunchKernel)) +129856476115691:129856476116355 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :454 +129856476118083:129856476118692 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :455 +129856476120553:129856476123478 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :456 +129856476125144:129856476125145 14696:14696 MARK(name(after HIP LaunchKernel)) +129856476126929:129856477993159 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :458 +129856482771986:129856483553655 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :459 +129856483555435:129856483555436 14696:14696 MARK(name(before HIP LaunchKernel)) +129856483559048:129856483559715 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :461 +129856483561368:129856483561995 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :462 +129856483563875:129856483567045 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :463 +129856483569037:129856483569038 14696:14696 MARK(name(after HIP LaunchKernel)) +129856483570875:129856485418803 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :465 +129856490199703:129856491039451 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :466 +129856491041225:129856491041226 14696:14696 MARK(name(before HIP LaunchKernel)) +129856491044551:129856491045204 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :468 +129856491046844:129856491047481 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :469 +129856491049291:129856491052245 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :470 +129856491053805:129856491053806 14696:14696 MARK(name(after HIP LaunchKernel)) +129856491055528:129856492907612 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :472 +129856497665310:129856498500405 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :473 +129856498502066:129856498502067 14696:14696 MARK(name(before HIP LaunchKernel)) +129856498505506:129856498506141 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :475 +129856498507858:129856498508491 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :476 +129856498510523:129856498513554 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :477 +129856498515137:129856498515138 14696:14696 MARK(name(after HIP LaunchKernel)) +129856498517011:129856500365762 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :479 PASSED! ## Iteration (31) ################# PASSED! ## Iteration (30) ################# PASSED! ## Iteration (29) ################# -3802702301982442:3802702303186799 0:0 KernelExecution:435:1991 -3802702301959178:3802702304503997 0:0 CopyDeviceToHost:437:1991 -3802702311466108:3802702313146390 0:0 CopyHostToDevice:438:1991 -3802702313238825:3802702314439626 0:0 KernelExecution:442:1991 -3802702313215791:3802702315773720 0:0 CopyDeviceToHost:444:1991 -3802702322736361:3802702324399864 0:0 CopyHostToDevice:445:1991 -3802702324503098:3802702325721085 0:0 KernelExecution:449:1991 -3802702324478794:3802702327055594 0:0 CopyDeviceToHost:451:1991 -3802702334030715:3802702335709388 0:0 CopyHostToDevice:452:1991 -3802702335806620:3802702337014532 0:0 KernelExecution:456:1991 -3802702335783968:3802702338348468 0:0 CopyDeviceToHost:458:1991 -3802702345327399:3802702347004601 0:0 CopyHostToDevice:459:1991 -3802702347117082:3802702348318476 0:0 KernelExecution:463:1991 -3802702347074012:3802702349651691 0:0 CopyDeviceToHost:465:1991 -3802702356616483:3802702358289405 0:0 CopyHostToDevice:466:1991 -3802702358382881:3802702359585164 0:0 KernelExecution:470:1991 -3802702358359406:3802702360920335 0:0 CopyDeviceToHost:472:1991 -3802702367904117:3802702369585909 0:0 CopyHostToDevice:473:1991 -3802702369679903:3802702370875371 0:0 KernelExecution:477:1991 -3802702369656630:3802702372196308 0:0 CopyDeviceToHost:479:1991 -3802702379156600:3802702380837792 0:0 CopyHostToDevice:480:1991 -3802702380930326:3802702382134830 0:0 KernelExecution:484:1991 -3802702380907402:3802702383472292 0:0 CopyDeviceToHost:486:1991 -3802702390441713:3802702392106996 0:0 CopyHostToDevice:487:1991 -3802702392207713:3802702393417847 0:0 KernelExecution:491:1991 -3802702392183556:3802702394752325 0:0 CopyDeviceToHost:493:1991 PASSED! ## Iteration (28) ################# PASSED! @@ -376,37 +845,147 @@ PASSED! ## Iteration (21) ################# PASSED! ## Iteration (20) ################# -3802702401709737:3802702403387670 0:0 CopyHostToDevice:494:1991 -3802702403489293:3802702404695872 0:0 KernelExecution:498:1991 -3802702403465280:3802702406030229 0:0 CopyDeviceToHost:500:1991 -3802702412982171:3802702414646163 0:0 CopyHostToDevice:501:1991 -3802702414739104:3802702415951461 0:0 KernelExecution:505:1991 -3802702414715973:3802702417282642 0:0 CopyDeviceToHost:507:1991 -3802702424250984:3802702425925207 0:0 CopyHostToDevice:508:1991 -3802702426022614:3802702427240009 0:0 KernelExecution:512:1991 -3802702425999277:3802702428556726 0:0 CopyDeviceToHost:514:1991 -3802702435521608:3802702437497583 0:0 CopyHostToDevice:515:1991 -3802702437591756:3802702438798483 0:0 KernelExecution:519:1991 -3802702437567843:3802702440117692 0:0 CopyDeviceToHost:521:1991 -3802702447076184:3802702448752496 0:0 CopyHostToDevice:522:1991 -3802702448844326:3802702450040979 0:0 KernelExecution:526:1991 -3802702448821457:3802702451374905 0:0 CopyDeviceToHost:528:1991 -3802702458338087:3802702460115460 0:0 CopyHostToDevice:529:1991 -3802702460219046:3802702461421625 0:0 KernelExecution:533:1991 -3802702460192041:3802702462758090 0:0 CopyDeviceToHost:535:1991 -3802702469730872:3802702471408304 0:0 CopyHostToDevice:536:1991 -3802702471502923:3802702472699724 0:0 KernelExecution:540:1991 -3802702471478905:3802702474035724 0:0 CopyDeviceToHost:542:1991 -3802702481000815:3802702482659947 0:0 CopyHostToDevice:543:1991 -3802702482757759:3802702483952190 0:0 KernelExecution:547:1991 -3802702482734898:3802702485283566 0:0 CopyDeviceToHost:549:1991 -3802702492244298:3802702493917401 0:0 CopyHostToDevice:550:1991 -3802702494011385:3802702495222705 0:0 KernelExecution:554:1991 -3802702493988441:3802702496538570 0:0 CopyDeviceToHost:556:1991 PASSED! ## Iteration (19) ################# PASSED! ## Iteration (18) ################# +129856505198157:129856505972108 0:0 CopyHostToDevice:480:14696 +129856506058878:129856507279678 0:0 KernelExecution:484:14696 +129856506031181:129856507818608 0:0 CopyDeviceToHost:486:14696 +129856512668452:129856513378344 0:0 CopyHostToDevice:487:14696 +129856513463906:129856514683906 0:0 KernelExecution:491:14696 +129856513435880:129856515225665 0:0 CopyDeviceToHost:493:14696 +129856520057898:129856520789533 0:0 CopyHostToDevice:494:14696 +129856520877018:129856522100858 0:0 KernelExecution:498:14696 +129856520849406:129856522643928 0:0 CopyDeviceToHost:500:14696 +129856527495540:129856528214422 0:0 CopyHostToDevice:501:14696 +129856528300948:129856529522228 0:0 KernelExecution:505:14696 +129856528273469:129856530060374 0:0 CopyDeviceToHost:507:14696 +129856534970413:129856535678341 0:0 CopyHostToDevice:508:14696 +129856535767312:129856536986193 0:0 KernelExecution:512:14696 +129856535739484:129856537527830 0:0 CopyDeviceToHost:514:14696 +129856542452848:129856543222239 0:0 CopyHostToDevice:515:14696 +129856543308707:129856544531907 0:0 KernelExecution:519:14696 +129856543281047:129856545069937 0:0 CopyDeviceToHost:521:14696 +129856549924160:129856550693828 0:0 CopyHostToDevice:522:14696 +129856550779510:129856552004150 0:0 KernelExecution:526:14696 +129856550751409:129856552552270 0:0 CopyDeviceToHost:528:14696 +129856557413139:129856558179223 0:0 CopyHostToDevice:529:14696 +129856558266309:129856559487269 0:0 KernelExecution:533:14696 +129856558237736:129856560027323 0:0 CopyDeviceToHost:535:14696 +129856564827841:129856565542599 0:0 CopyHostToDevice:536:14696 +129856565630041:129856566854841 0:0 KernelExecution:540:14696 +129856565602389:129856567397324 0:0 CopyDeviceToHost:542:14696 +129856572247710:129856572954375 0:0 CopyHostToDevice:543:14696 +129856573041963:129856574264203 0:0 KernelExecution:547:14696 +129856573013452:129856574809983 0:0 CopyDeviceToHost:549:14696 +129856579656436:129856580368439 0:0 CopyHostToDevice:550:14696 +129856580456039:129856581680039 0:0 KernelExecution:554:14696 +129856580428344:129856582226693 0:0 CopyDeviceToHost:556:14696 +129856587092681:129856587802199 0:0 CopyHostToDevice:557:14696 +129856587888587:129856589111627 0:0 KernelExecution:561:14696 +129856587861029:129856589654526 0:0 CopyDeviceToHost:563:14696 +129856594498640:129856595270698 0:0 CopyHostToDevice:564:14696 +129856595356053:129856596579733 0:0 KernelExecution:568:14696 +129856595328424:129856597128257 0:0 CopyDeviceToHost:570:14696 +129856601984341:129856602751266 0:0 CopyHostToDevice:571:14696 +129856505180003:129856505975222 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :480 +129856505976980:129856505976981 14696:14696 MARK(name(before HIP LaunchKernel)) +129856505980587:129856505981234 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :482 +129856505982935:129856505983566 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :483 +129856505985434:129856505988514 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :484 +129856505990096:129856505990097 14696:14696 MARK(name(after HIP LaunchKernel)) +129856505991997:129856507832334 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :486 +129856512649603:129856513382084 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :487 +129856513384599:129856513384600 14696:14696 MARK(name(before HIP LaunchKernel)) +129856513388119:129856513389080 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :489 +129856513391435:129856513392275 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :490 +129856513394697:129856513399367 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :491 +129856513401523:129856513401524 14696:14696 MARK(name(after HIP LaunchKernel)) +129856513404257:129856515239416 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :493 +129856519992571:129856520793180 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :494 +129856520794974:129856520794975 14696:14696 MARK(name(before HIP LaunchKernel)) +129856520798420:129856520799070 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :496 +129856520800911:129856520801530 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :497 +129856520803611:129856520806841 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :498 +129856520808737:129856520808738 14696:14696 MARK(name(after HIP LaunchKernel)) +129856520810545:129856522657358 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :500 +129856527425346:129856528218117 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :501 +129856528219874:129856528219875 14696:14696 MARK(name(before HIP LaunchKernel)) +129856528221975:129856528222627 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :503 +129856528224439:129856528225291 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :504 +129856528227108:129856528230172 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :505 +129856528231752:129856528231753 14696:14696 MARK(name(after HIP LaunchKernel)) +129856528233473:129856530074548 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :507 +129856534899214:129856535681957 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :508 +129856535683676:129856535683677 14696:14696 MARK(name(before HIP LaunchKernel)) +129856535686401:129856535687061 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :510 +129856535688790:129856535689423 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :511 +129856535691153:129856535694294 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :512 +129856535695868:129856535695869 14696:14696 MARK(name(after HIP LaunchKernel)) +129856535697671:129856537541753 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :514 +129856542387175:129856543225418 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :515 +129856543227192:129856543227193 14696:14696 MARK(name(before HIP LaunchKernel)) +129856543230911:129856543231570 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :517 +129856543233243:129856543233871 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :518 +129856543235930:129856543238762 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :519 +129856543240359:129856543240360 14696:14696 MARK(name(after HIP LaunchKernel)) +129856543242179:129856545084137 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :521 +129856549857104:129856550696919 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :522 +129856550698874:129856550698875 14696:14696 MARK(name(before HIP LaunchKernel)) +129856550702196:129856550702852 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :524 +129856550704612:129856550705254 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :525 +129856550707079:129856550709869 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :526 +129856550711442:129856550711443 14696:14696 MARK(name(after HIP LaunchKernel)) +129856550713182:129856552568840 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :528 +129856557336788:129856558182426 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :529 +129856558184195:129856558184196 14696:14696 MARK(name(before HIP LaunchKernel)) +129856558187727:129856558188380 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :531 +129856558190122:129856558190752 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :532 +129856558192774:129856558195554 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :533 +129856558197324:129856558197325 14696:14696 MARK(name(after HIP LaunchKernel)) +129856558199234:129856560041419 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :535 +129856564809360:129856565545640 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :536 +129856565547393:129856565547394 14696:14696 MARK(name(before HIP LaunchKernel)) +129856565549636:129856565550299 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :538 +129856565551969:129856565552581 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :539 +129856565554301:129856565557438 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :540 +129856565559047:129856565559048 14696:14696 MARK(name(after HIP LaunchKernel)) +129856565560847:129856567411065 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :542 +129856572215770:129856572957492 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :543 +129856572959234:129856572959235 14696:14696 MARK(name(before HIP LaunchKernel)) +129856572962526:129856572963184 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :545 +129856572964912:129856572965546 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :546 +129856572967421:129856572970453 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :547 +129856572972097:129856572972098 14696:14696 MARK(name(after HIP LaunchKernel)) +129856572974076:129856574823083 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :549 +129856579588261:129856580372449 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :550 +129856580374262:129856580374263 14696:14696 MARK(name(before HIP LaunchKernel)) +129856580376547:129856580377227 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :552 +129856580378975:129856580379619 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :553 +129856580381546:129856580384467 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :554 +129856580386225:129856580386226 14696:14696 MARK(name(after HIP LaunchKernel)) +129856580388205:129856582240020 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :556 +129856587022783:129856587805709 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :557 +129856587807440:129856587807441 14696:14696 MARK(name(before HIP LaunchKernel)) +129856587811171:129856587811825 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :559 +129856587813530:129856587814170 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :560 +129856587816040:129856587819243 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :561 +129856587820912:129856587820913 14696:14696 MARK(name(after HIP LaunchKernel)) +129856587822927:129856589666874 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :563 +129856594433516:129856595273993 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :564 +129856595275800:129856595275801 14696:14696 MARK(name(before HIP LaunchKernel)) +129856595278990:129856595279652 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :566 +129856595281384:129856595282018 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :567 +129856595283991:129856595287449 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :568 +129856595289101:129856595289102 14696:14696 MARK(name(after HIP LaunchKernel)) +129856595291045:129856597140491 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :570 +129856601919460:129856602754655 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :571 +129856602756445:129856602756446 14696:14696 MARK(name(before HIP LaunchKernel)) +129856602769740:129856602770661 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :573 +129856602772396:129856602773016 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :574 +129856602775079:129856602778192 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :575 +129856602779755:129856602779756 14696:14696 MARK(name(after HIP LaunchKernel)) PASSED! ## Iteration (17) ################# PASSED! @@ -419,33 +998,6 @@ PASSED! ## Iteration (13) ################# PASSED! ## Iteration (12) ################# -3802702503514682:3802702505171794 0:0 CopyHostToDevice:557:1991 -3802702505267652:3802702506468157 0:0 KernelExecution:561:1991 -3802702505243905:3802702507802814 0:0 CopyDeviceToHost:563:1991 -3802702514761386:3802702516425008 0:0 CopyHostToDevice:564:1991 -3802702516524080:3802702517731252 0:0 KernelExecution:568:1991 -3802702516500289:3802702519068477 0:0 CopyDeviceToHost:570:1991 -3802702526022439:3802702527700141 0:0 CopyHostToDevice:571:1991 -3802702527794104:3802702528982164 0:0 KernelExecution:575:1991 -3802702527771042:3802702530315231 0:0 CopyDeviceToHost:577:1991 -3802702537274272:3802702538953635 0:0 CopyHostToDevice:578:1991 -3802702539050334:3802702540254987 0:0 KernelExecution:582:1991 -3802702539025425:3802702541587134 0:0 CopyDeviceToHost:584:1991 -3802702548553016:3802702550225609 0:0 CopyHostToDevice:585:1991 -3802702550319337:3802702551518805 0:0 KernelExecution:589:1991 -3802702550295569:3802702552853758 0:0 CopyDeviceToHost:591:1991 -3802702559816479:3802702561495482 0:0 CopyHostToDevice:592:1991 -3802702561589855:3802702562788137 0:0 KernelExecution:596:1991 -3802702561565542:3802702564108601 0:0 CopyDeviceToHost:598:1991 -3802702571069693:3802702572746995 0:0 CopyHostToDevice:599:1991 -3802702572840650:3802702574041747 0:0 KernelExecution:603:1991 -3802702572817856:3802702575375565 0:0 CopyDeviceToHost:605:1991 -3802702582343137:3802702584028249 0:0 CopyHostToDevice:606:1991 -3802702584131111:3802702585314874 0:0 KernelExecution:610:1991 -3802702584098390:3802702586648988 0:0 CopyDeviceToHost:612:1991 -3802702593620890:3802702595300582 0:0 CopyHostToDevice:613:1991 -3802702595394737:3802702596603391 0:0 KernelExecution:617:1991 -3802702595371233:3802702597936882 0:0 CopyDeviceToHost:619:1991 PASSED! ## Iteration (11) ################# PASSED! @@ -460,37 +1012,143 @@ PASSED! ## Iteration (6) ################# PASSED! ## Iteration (5) ################# +129856602843500:129856604064780 0:0 KernelExecution:575:14696 +129856602815760:129856604621212 0:0 CopyDeviceToHost:577:14696 +129856609545997:129856610317997 0:0 CopyHostToDevice:578:14696 +129856610406851:129856611631491 0:0 KernelExecution:582:14696 +129856610379025:129856612168754 0:0 CopyDeviceToHost:584:14696 +129856616987475:129856617701731 0:0 CopyHostToDevice:585:14696 +129856617790176:129856619014496 0:0 KernelExecution:589:14696 +129856617761809:129856619559063 0:0 CopyDeviceToHost:591:14696 +129856624349579:129856625289209 0:0 CopyHostToDevice:592:14696 +129856625377836:129856626603916 0:0 KernelExecution:596:14696 +129856625350001:129856627147692 0:0 CopyDeviceToHost:598:14696 +129856632033149:129856632742303 0:0 CopyHostToDevice:599:14696 +129856632836527:129856634057647 0:0 KernelExecution:603:14696 +129856632808948:129856634598487 0:0 CopyDeviceToHost:605:14696 +129856639443412:129856640151030 0:0 CopyHostToDevice:606:14696 +129856640260250:129856641484890 0:0 KernelExecution:610:14696 +129856640232509:129856642041965 0:0 CopyDeviceToHost:612:14696 +129856646912100:129856647619752 0:0 CopyHostToDevice:613:14696 +129856647705914:129856648930874 0:0 KernelExecution:617:14696 +129856647678197:129856649476287 0:0 CopyDeviceToHost:619:14696 +129856654338593:129856655101879 0:0 CopyHostToDevice:620:14696 +129856655189659:129856656412699 0:0 KernelExecution:624:14696 +129856655161891:129856656960409 0:0 CopyDeviceToHost:626:14696 +129856661822483:129856662586330 0:0 CopyHostToDevice:627:14696 +129856662679432:129856663900712 0:0 KernelExecution:631:14696 +129856662650940:129856664447428 0:0 CopyDeviceToHost:633:14696 +129856669274444:129856670036595 0:0 CopyHostToDevice:634:14696 +129856670129015:129856671350615 0:0 KernelExecution:638:14696 +129856670101388:129856671895354 0:0 CopyDeviceToHost:640:14696 +129856676687339:129856677401038 0:0 CopyHostToDevice:641:14696 +129856677491350:129856678712950 0:0 KernelExecution:645:14696 +129856677463387:129856679258027 0:0 CopyDeviceToHost:647:14696 +129856684088485:129856684823542 0:0 CopyHostToDevice:648:14696 +129856684910895:129856686132975 0:0 KernelExecution:652:14696 +129856684882539:129856686675228 0:0 CopyDeviceToHost:654:14696 +129856691574066:129856692284982 0:0 CopyHostToDevice:655:14696 +129856692371897:129856693594617 0:0 KernelExecution:659:14696 +129856692344278:129856694142257 0:0 CopyDeviceToHost:661:14696 +129856699000899:129856699713058 0:0 CopyHostToDevice:662:14696 +129856699797526:129856701023446 0:0 KernelExecution:666:14696 +129856699769937:129856701569372 0:0 CopyDeviceToHost:668:14696 +129856602781709:129856604636152 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :577 +129856609479851:129856610321075 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :578 +129856610323078:129856610323079 14696:14696 MARK(name(before HIP LaunchKernel)) +129856610326500:129856610327162 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :580 +129856610328857:129856610329498 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :581 +129856610331492:129856610334664 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :582 +129856610336290:129856610336291 14696:14696 MARK(name(after HIP LaunchKernel)) +129856610338048:129856612222255 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :584 +129856616969217:129856617705105 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :585 +129856617706989:129856617706990 14696:14696 MARK(name(before HIP LaunchKernel)) +129856617710485:129856617711142 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :587 +129856617712846:129856617713491 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :588 +129856617715518:129856617718644 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :589 +129856617720274:129856617720275 14696:14696 MARK(name(after HIP LaunchKernel)) +129856617722118:129856619570993 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :591 +129856624331436:129856625292310 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :592 +129856625294207:129856625294208 14696:14696 MARK(name(before HIP LaunchKernel)) +129856625297113:129856625297761 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :594 +129856625299459:129856625300093 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :595 +129856625301835:129856625305409 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :596 +129856625307116:129856625307117 14696:14696 MARK(name(after HIP LaunchKernel)) +129856625309051:129856627159676 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :598 +129856631962417:129856632745795 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :599 +129856632747622:129856632747623 14696:14696 MARK(name(before HIP LaunchKernel)) +129856632761013:129856632761762 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :601 +129856632763565:129856632764219 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :602 +129856632766094:129856632769110 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :603 +129856632770707:129856632770708 14696:14696 MARK(name(after HIP LaunchKernel)) +129856632772662:129856634610068 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :605 +129856639375744:129856640154106 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :606 +129856640155933:129856640155934 14696:14696 MARK(name(before HIP LaunchKernel)) +129856640159565:129856640160216 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :608 +129856640161841:129856640162476 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :609 +129856640164410:129856640167293 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :610 +129856640168886:129856640168887 14696:14696 MARK(name(after HIP LaunchKernel)) +129856640170703:129856642054780 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :612 +129856646841774:129856647623131 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :613 +129856647624849:129856647624850 14696:14696 MARK(name(before HIP LaunchKernel)) +129856647628076:129856647628742 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :615 +129856647630426:129856647631050 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :616 +129856647632957:129856647636281 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :617 +129856647637872:129856647637873 14696:14696 MARK(name(after HIP LaunchKernel)) +129856647639599:129856649488719 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :619 +129856654273909:129856655105030 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :620 +129856655106878:129856655106879 14696:14696 MARK(name(before HIP LaunchKernel)) +129856655109847:129856655110497 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :622 +129856655112292:129856655112914 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :623 +129856655114757:129856655118162 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :624 +129856655119835:129856655119836 14696:14696 MARK(name(after HIP LaunchKernel)) +129856655121792:129856656973292 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :626 PASSED! ## Iteration (4) ################# +129856661755424:129856662589447 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :627 +129856662591236:129856662591237 14696:14696 MARK(name(before HIP LaunchKernel)) +129856662604066:129856662604831 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :629 +129856662606611:129856662607261 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :630 +129856662608995:129856662611988 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :631 +129856662613644:129856662613645 14696:14696 MARK(name(after HIP LaunchKernel)) +129856662615584:129856664462467 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :633 +129856669256336:129856670039683 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :634 +129856670041634:129856670041635 14696:14696 MARK(name(before HIP LaunchKernel)) +129856670054499:129856670055254 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :636 +129856670056982:129856670057615 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :637 +129856670059351:129856670062513 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :638 +129856670064113:129856670064114 14696:14696 MARK(name(after HIP LaunchKernel)) +129856670066200:129856671906923 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :640 +129856676668791:129856677404223 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :641 +129856677406068:129856677406069 14696:14696 MARK(name(before HIP LaunchKernel)) +129856677408812:129856677409484 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :643 +129856677411095:129856677411722 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :644 +129856677413461:129856677416941 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :645 +129856677418503:129856677418504 14696:14696 MARK(name(after HIP LaunchKernel)) +129856677420242:129856679269939 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :647 +129856684019418:129856684826552 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :648 +129856684828363:129856684828364 14696:14696 MARK(name(before HIP LaunchKernel)) +129856684832034:129856684832695 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :650 +129856684834368:129856684834970 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :651 +129856684836877:129856684839963 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :652 +129856684841560:129856684841561 14696:14696 MARK(name(after HIP LaunchKernel)) +129856684843320:129856686688518 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :654 +129856691504696:129856692288950 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :655 +129856692290798:129856692290799 14696:14696 MARK(name(before HIP LaunchKernel)) +129856692292859:129856692293513 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :657 +129856692295227:129856692295860 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :658 +129856692297819:129856692300821 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :659 +129856692302355:129856692302356 14696:14696 MARK(name(after HIP LaunchKernel)) +129856692304530:129856694153679 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :661 +129856698928289:129856699716162 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :662 +129856699717890:129856699717891 14696:14696 MARK(name(before HIP LaunchKernel)) +129856699720061:129856699720715 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :664 +129856699722330:129856699722941 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :665 +129856699724836:129856699728198 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :666 +129856699729953:129856699729954 14696:14696 MARK(name(after HIP LaunchKernel)) +129856699731887:129856701581422 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :668 PASSED! ## Iteration (3) ################# -3802702605023015:3802702606699537 0:0 CopyHostToDevice:620:1991 -3802702606793386:3802702607994484 0:0 KernelExecution:624:1991 -3802702606770418:3802702609331847 0:0 CopyDeviceToHost:626:1991 -3802702616295619:3802702617971351 0:0 CopyHostToDevice:627:1991 -3802702618064582:3802702619276198 0:0 KernelExecution:631:1991 -3802702618041252:3802702620593170 0:0 CopyDeviceToHost:633:1991 -3802702627572022:3802702629249514 0:0 CopyHostToDevice:634:1991 -3802702629343204:3802702630550228 0:0 KernelExecution:638:1991 -3802702629319715:3802702631886524 0:0 CopyDeviceToHost:640:1991 -3802702638854896:3802702640514568 0:0 CopyHostToDevice:641:1991 -3802702640601153:3802702641794250 0:0 KernelExecution:645:1991 -3802702640583338:3802702643131137 0:0 CopyDeviceToHost:647:1991 -3802702650106259:3802702651784942 0:0 CopyHostToDevice:648:1991 -3802702651876671:3802702653079250 0:0 KernelExecution:652:1991 -3802702651853582:3802702654414351 0:0 CopyDeviceToHost:654:1991 -3802702661383522:3802702663061155 0:0 CopyHostToDevice:655:1991 -3802702663154356:3802702664347453 0:0 KernelExecution:659:1991 -3802702663130645:3802702665680984 0:0 CopyDeviceToHost:661:1991 -3802702672630496:3802702674303238 0:0 CopyHostToDevice:662:1991 -3802702674398093:3802702675599190 0:0 KernelExecution:666:1991 -3802702674374489:3802702676932868 0:0 CopyDeviceToHost:668:1991 -3802702683898880:3802702685606503 0:0 CopyHostToDevice:669:1991 -3802702685701165:3802702686898410 0:0 KernelExecution:673:1991 -3802702685678193:3802702688219002 0:0 CopyDeviceToHost:675:1991 -3802702695162453:3802702696838515 0:0 CopyHostToDevice:676:1991 -3802702696932444:3802702698137097 0:0 KernelExecution:680:1991 -3802702696909796:3802702699473165 0:0 CopyDeviceToHost:682:1991 PASSED! ## Iteration (2) ################# PASSED! @@ -498,12 +1156,55 @@ PASSED! PASSED! ## Iteration (0) ################# PASSED! -3802702706580728:3802702708245350 0:0 CopyHostToDevice:683:1991 -3802702708346791:3802702709549370 0:0 KernelExecution:687:1991 -3802702708322181:3802702710885410 0:0 CopyDeviceToHost:689:1991 -3802702717849822:3802702719525044 0:0 CopyHostToDevice:690:1991 -3802702719618857:3802702720813139 0:0 KernelExecution:694:1991 -3802702719594825:3802702722149644 0:0 CopyDeviceToHost:696:1991 -3802702729111215:3802702730788167 0:0 CopyHostToDevice:697:1991 -3802702730881622:3802702732076497 0:0 KernelExecution:701:1991 -3802702730858498:3802702733412517 0:0 CopyDeviceToHost:703:1991 +129856706468741:129856707235310 0:0 CopyHostToDevice:669:14696 +129856707327230:129856708548510 0:0 KernelExecution:673:14696 +129856707299810:129856709098218 0:0 CopyDeviceToHost:675:14696 +129856713958124:129856714730788 0:0 CopyHostToDevice:676:14696 +129856714818472:129856716040872 0:0 KernelExecution:680:14696 +129856714790211:129856716592662 0:0 CopyDeviceToHost:682:14696 +129856721429109:129856722193080 0:0 CopyHostToDevice:683:14696 +129856722282194:129856723505714 0:0 KernelExecution:687:14696 +129856722254384:129856724056420 0:0 CopyDeviceToHost:689:14696 +129856728891611:129856729607012 0:0 CopyHostToDevice:690:14696 +129856729693911:129856730917431 0:0 KernelExecution:694:14696 +129856729665766:129856731460761 0:0 CopyDeviceToHost:696:14696 +129856736249266:129856736963101 0:0 CopyHostToDevice:697:14696 +129856737053267:129856738276147 0:0 KernelExecution:701:14696 +129856737025461:129856738822547 0:0 CopyDeviceToHost:703:14696 +129856706409352:129856707238410 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :669 +129856707240341:129856707240342 14696:14696 MARK(name(before HIP LaunchKernel)) +129856707253495:129856707254390 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :671 +129856707256214:129856707256878 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :672 +129856707258659:129856707261885 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :673 +129856707263518:129856707263519 14696:14696 MARK(name(after HIP LaunchKernel)) +129856707265698:129856709110388 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :675 +129856713891418:129856714734007 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :676 +129856714735794:129856714735795 14696:14696 MARK(name(before HIP LaunchKernel)) +129856714739058:129856714739715 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :678 +129856714741339:129856714741972 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :679 +129856714743986:129856714747316 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :680 +129856714748993:129856714748994 14696:14696 MARK(name(after HIP LaunchKernel)) +129856714750976:129856716607126 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :682 +129856721364192:129856722196489 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :683 +129856722198322:129856722198323 14696:14696 MARK(name(before HIP LaunchKernel)) +129856722202102:129856722202759 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :685 +129856722204452:129856722205080 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :686 +129856722207098:129856722210100 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :687 +129856722211652:129856722211653 14696:14696 MARK(name(after HIP LaunchKernel)) +129856722213452:129856724068250 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :689 +129856728873958:129856729610520 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :690 +129856729612474:129856729612475 14696:14696 MARK(name(before HIP LaunchKernel)) +129856729615953:129856729616618 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :692 +129856729618275:129856729618880 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :693 +129856729620844:129856729623983 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :694 +129856729625525:129856729625526 14696:14696 MARK(name(after HIP LaunchKernel)) +129856729627363:129856731472859 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :696 +129856736212718:129856736966611 14696:14696 hipMemcpy(dst=0x7fd65ce00000, src=0x7fd7781ff010, sizeBytes=4194304, kind=1) :697 +129856736968384:129856736968385 14696:14696 MARK(name(before HIP LaunchKernel)) +129856736971498:129856736972186 14696:14696 __hipPushCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :699 +129856736973934:129856736974581 14696:14696 __hipPopCallConfiguration(gridDim={}, blockDim={}, sharedMem=0, stream=0) :700 +129856736976433:129856736979849 14696:14696 hipLaunchKernel(function_address=0x4010c0, numBlocks={}, dimBlocks={}, args=0x7ffe6d9cea08, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :701 +129856736981559:129856736981560 14696:14696 MARK(name(after HIP LaunchKernel)) +129856736983603:129856738834349 14696:14696 hipMemcpy(dst=0x7fd65d707010, src=0x7fd65c800000, sizeBytes=4194304, kind=2) :703 +129856743571751:129856743607276 14696:14696 hipFree(ptr=0x7fd65ce00000) :704 +129856743609591:129856743621235 14696:14696 hipFree(ptr=0x7fd65c800000) :705 diff --git a/projects/roctracer/test/golden_traces/tests_trace_cmp_levels.txt b/projects/roctracer/test/golden_traces/tests_trace_cmp_levels.txt index 5e6dbaa7a4..5311d81333 100644 --- a/projects/roctracer/test/golden_traces/tests_trace_cmp_levels.txt +++ b/projects/roctracer/test/golden_traces/tests_trace_cmp_levels.txt @@ -10,3 +10,5 @@ MatrixTranspose_hip_flush_trace --check-order .* MatrixTranspose_kfd_trace --check-events .* ctrl_hsa_trace --check-event .* ctrl_hsa_input_trace --check-event .* +hsa_co_trace --check-none +code_obj_trace --check-none diff --git a/projects/roctracer/test/run.sh b/projects/roctracer/test/run.sh index c593106135..577bdd30d7 100755 --- a/projects/roctracer/test/run.sh +++ b/projects/roctracer/test/run.sh @@ -151,6 +151,13 @@ echo "