diff --git a/CMakeLists.txt b/CMakeLists.txt index 682fda8947..04136e0081 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,12 +90,25 @@ install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION lib ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer.h DESTINATION include ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_hip.h DESTINATION include ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_hcc.h DESTINATION include ) +install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_ext.h DESTINATION include ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/prof_protocol.h DESTINATION include/ext ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/hsa_rt_utils.hpp DESTINATION include/ext ) install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION ../include RENAME ${ROCTRACER_NAME} ) install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCTRACER_LIBRARY}.so ) install ( FILES ${PROJECT_BINARY_DIR}/test/libtracer_tool.so DESTINATION tool ) +## rocTX +set ( ROCTX_TARGET "roctx64" ) +set ( ROCTX_LIBRARY "lib${ROCTX_TARGET}" ) + +add_custom_target ( so-roctx-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTX_LIBRARY}.so so-roctx-link ) + +install ( TARGETS "roctx64" LIBRARY DESTINATION lib ) +install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include ) +install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION include ) +install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so ) + ## Packaging directives set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" ) set ( CPACK_PACKAGE_NAME "${ROCTRACER_NAME}-dev" ) diff --git a/inc/ext/prof_protocol.h b/inc/ext/prof_protocol.h index d4b2567021..ff195882ea 100644 --- a/inc/ext/prof_protocol.h +++ b/inc/ext/prof_protocol.h @@ -31,10 +31,17 @@ typedef enum { ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain - ACTIVITY_DOMAIN_ROCTX = 4, // ROCTX domain + ACTIVITY_DOMAIN_EXT_API = 4, // External ID domain + ACTIVITY_DOMAIN_ROCTX = 5, // ROCTX domain ACTIVITY_DOMAIN_NUMBER } activity_domain_t; +// Extension API opcodes +typedef enum { + ACTIVITY_EXT_OP_MARK = 0, + ACTIVITY_EXT_OP_EXTERN_ID = 1 +} activity_ext_op_t; + // API calback type typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid, const void* data, void* arg); typedef uint32_t activity_kind_t; @@ -67,6 +74,9 @@ struct activity_record_t { uint32_t process_id; // device id uint32_t thread_id; // thread id }; + struct { + activity_correlation_id_t external_id; // external correlatino id + }; }; size_t bytes; // data size bytes }; diff --git a/inc/roctracer.h b/inc/roctracer.h index 7ac5a23bb2..5f469616d2 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -210,10 +210,7 @@ roctracer_status_t roctracer_disable_activity(); roctracer_status_t roctracer_flush_activity( roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one -// Mark API -void roctracer_mark(const char* str); - -// Load/Un;oad methods +// Load/Unload methods // Set properties roctracer_status_t roctracer_set_properties( roctracer_domain_t domain, // tracing domain diff --git a/inc/roctracer_ext.h b/inc/roctracer_ext.h new file mode 100644 index 0000000000..6a1edb9af3 --- /dev/null +++ b/inc/roctracer_ext.h @@ -0,0 +1,61 @@ +/* +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +//////////////////////////////////////////////////////////////////////////////// +// +// ROC Tracer Extension API +// +// The API provides functionality for application annotation with event and +// external ranges correlation +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef INC_ROCTRACER_EXT_H_ +#define INC_ROCTRACER_EXT_H_ + +#include "roctracer.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +//////////////////////////////////////////////////////////////////////////////// +// Mark API +void roctracer_mark(const char* str); + +//////////////////////////////////////////////////////////////////////////////// +// External correlation id API + +// Notifies that the calling thread is entering an external API region. +// Push an external correlation id for the calling thread. +roctracer_status_t roctracer_activity_push_external_correlation_id(activity_correlation_id_t id); + +// Notifies that the calling thread is leaving an external API region. +// Pop an external correlation id for the calling thread. +// 'lastId' returns the last external correlation +roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id = NULL); + +#ifdef __cplusplus +} // extern "C" block +#endif // __cplusplus + +#endif // INC_ROCTRACER_EXT_H_ diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 92a2bc640d..7c38dcc3b3 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -29,6 +29,7 @@ THE SOFTWARE. #include #include +#include #include #include #include @@ -390,6 +391,8 @@ typedef std::mutex correlation_id_mutex_t; correlation_id_map_t* correlation_id_map = NULL; correlation_id_mutex_t correlation_id_mutex; +static thread_local std::stack external_id_stack; + static inline void CorrelationIdRegistr(const activity_correlation_id_t& correlation_id) { std::lock_guard lck(correlation_id_mutex); if (correlation_id_map == NULL) correlation_id_map = new correlation_id_map_t; @@ -432,6 +435,16 @@ roctracer_record_t* HIP_SyncActivityCallback( record->end_ns = timer.timestamp_ns(); record->process_id = syscall(__NR_getpid); record->thread_id = syscall(__NR_gettid); + + if (external_id_stack.empty() == false) { + roctracer_record_t ext_record{}; + ext_record.domain = ACTIVITY_DOMAIN_EXT_API; + ext_record.op = ACTIVITY_EXT_OP_EXTERN_ID; + ext_record.correlation_id = record->correlation_id; + ext_record.external_id = external_id_stack.top(); + pool->Write(ext_record); + } + pool->Write(*record); // Clearing correlatin ID correlation_id_tls = 0; @@ -581,7 +594,7 @@ std::atomic util::Logger::instance_{}; MemoryPool* memory_pool = NULL; typedef std::recursive_mutex memory_pool_mutex_t; memory_pool_mutex_t memory_pool_mutex; -} +} // namespace roctracer LOADER_INSTANTIATE(); @@ -652,6 +665,7 @@ static inline uint32_t get_op_num(const uint32_t& domain) { case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; case ACTIVITY_DOMAIN_HCC_OPS: return hc::HSA_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER; + case ACTIVITY_DOMAIN_EXT_API: return 0; case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER; default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); @@ -959,10 +973,37 @@ PUBLIC_API roctracer_status_t roctracer_flush_activity(roctracer_pool_t* pool) { API_METHOD_SUFFIX } +// Notifies that the calling thread is entering an external API region. +// Push an external correlation id for the calling thread. +PUBLIC_API roctracer_status_t roctracer_activity_push_external_correlation_id(activity_correlation_id_t id) { + API_METHOD_PREFIX + roctracer::external_id_stack.push(id); + API_METHOD_SUFFIX +} + +// Notifies that the calling thread is leaving an external API region. +// Pop an external correlation id for the calling thread. +// 'lastId' returns the last external correlation +PUBLIC_API roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id) { + API_METHOD_PREFIX + if (last_id != NULL) *last_id = 0; + + if (roctracer::external_id_stack.empty() != true) { + if (last_id != NULL) *last_id = roctracer::external_id_stack.top(); + roctracer::external_id_stack.pop(); + } else { +#if 0 + EXC_RAISING(ROCTRACER_STATUS_ERROR, "not matching external range pop"); +#endif + return ROCTRACER_STATUS_ERROR; + } + API_METHOD_SUFFIX +} + // Mark API PUBLIC_API void roctracer_mark(const char* str) { if (mark_api_callback_ptr) { - mark_api_callback_ptr(ACTIVITY_DOMAIN_NUMBER, 0, str, NULL); + mark_api_callback_ptr(ACTIVITY_DOMAIN_NUMBER, ACTIVITY_EXT_OP_MARK, str, NULL); roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id } } diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index dffb0eb984..154c2ade3d 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -22,6 +22,9 @@ THE SOFTWARE. #include +// roctracer extension API +#include + // hip header file #include @@ -94,17 +97,31 @@ int main() { hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + // correlation reagion32 + roctracer_activity_push_external_correlation_id(31); + // correlation reagion32 + roctracer_activity_push_external_correlation_id(32); + // Memory transfer from host to device hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); + // correlation reagion33 + roctracer_activity_push_external_correlation_id(33); + // Lauching kernel from host hipLaunchKernel(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); + // correlation reagion end + roctracer_activity_pop_external_correlation_id(NULL); + // Memory transfer from device to host hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + // correlation reagion end + roctracer_activity_pop_external_correlation_id(); + // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); @@ -126,6 +143,11 @@ int main() { hipFree(gpuMatrix); hipFree(gpuTransposeMatrix); + // correlation reagion end + roctracer_activity_pop_external_correlation_id(); + // correlation reagion end + roctracer_activity_pop_external_correlation_id(); + // free the resources on host side free(Matrix); free(TransposeMatrix); @@ -231,11 +253,15 @@ void activity_callback(const char* begin, const char* end, void* arg) { record->device_id, record->queue_id ); + if (record->op == hc::HSA_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); + } else if (record->domain == ACTIVITY_DOMAIN_EXT_API) { + fprintf(stdout, " external_id(%lu)", + record->external_id + ); } else { fprintf(stderr, "Bad domain %d\n", record->domain); abort(); } - if (record->op == hc::HSA_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); fprintf(stdout, "\n"); fflush(stdout); ROCTRACER_CALL(roctracer_next_record(record, &record)); diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index af48e6ebd9..f0ce7af6d9 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -240,7 +240,7 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) { const timestamp_t end_timestamp = entry->end; std::ostringstream oss; \ - const char* str = (domain < ACTIVITY_DOMAIN_NUMBER) ? roctracer_op_string(domain, cid, 0) : strdup("MARK"); + const char* str = (domain == ACTIVITY_DOMAIN_EXT_API) ? roctracer_op_string(domain, cid, 0) : strdup("MARK"); oss << std::dec << begin_timestamp << ":" << end_timestamp << " " << entry->pid << ":" << entry->tid << " " << str;