From de13d2ac5d68fce0341d5b6514d4051380d1e07c Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Thu, 25 Apr 2024 20:09:11 -0500 Subject: [PATCH] Public C++ header files and samples updates (#819) * Public C++ header files (source/include/rocprofiler-sdk/cxx) * Update samples/api_buffered_tracing - scratch memory and page migration - README * Update samples/api_buffered_tracing - page migration component in sample * Update tests/page-migration/validate.py - fix checks for page migration operation names * Update tests/page-migration/validate.py - fix get_allocated_pages * Update scratch memory and page migration validations * Fix include/rocprofiler-sdk/cxx installation * Rework include/rocprofiler-sdk/cxx - Improve name_info to support const char*, string_view, string * Update samples/api_{buffered,callback}_tracing * External correlation ID request sample - includes correlation ID retirement demo * Update samples/api_buffered_tracing/README.md * Update lib/rocprofiler-sdk/hsa/queue.cpp - generate correlation ID for kernel launch if one doesn't exist * Remove priority check from tool libraries (samples/tests) - if(priority > 0) return nullptr check in rocprofiler_configure has proliferated beyond its intended use * Apply suggestions from code review --- samples/CMakeLists.txt | 1 + samples/advanced_thread_trace/client.cpp | 7 +- samples/api_buffered_tracing/README.md | 18 + samples/api_buffered_tracing/client.cpp | 209 ++++-- samples/api_buffered_tracing/main.cpp | 238 ++++++- samples/api_callback_tracing/client.cpp | 19 +- samples/code_object_isa_decode/client.cpp | 7 +- samples/code_object_tracing/client.cpp | 7 +- samples/common/defines.hpp | 25 + samples/common/name_info.hpp | 141 +--- .../counter_collection/callback_client.cpp | 10 +- samples/counter_collection/client.cpp | 10 +- .../print_functional_counters.cpp | 10 +- .../CMakeLists.txt | 59 ++ .../external_correlation_id_request/README.md | 24 + .../client.cpp | 632 ++++++++++++++++++ .../client.hpp | 49 ++ .../external_correlation_id_request/main.cpp | 412 ++++++++++++ samples/intercept_table/client.cpp | 7 +- source/docs/tool_library_overview.md | 3 - source/include/rocprofiler-sdk/CMakeLists.txt | 1 + .../rocprofiler-sdk/cxx/CMakeLists.txt | 13 + .../cxx/details/CMakeLists.txt | 11 + .../rocprofiler-sdk/cxx/details/mpl.hpp | 84 +++ .../rocprofiler-sdk/cxx/details/name_info.hpp | 180 +++++ .../include/rocprofiler-sdk/cxx/name_info.hpp | 116 ++++ .../rocprofiler-sdk/cxx/serialization.hpp | 58 ++ source/include/rocprofiler-sdk/registration.h | 3 +- source/lib/rocprofiler-sdk/hsa/queue.cpp | 57 +- .../page_migration/defines.hpp | 2 +- tests/c-tool/CMakeLists.txt | 21 +- tests/common/name_info.hpp | 27 + tests/page-migration/validate.py | 58 +- tests/pytest-packages/pytest_utils/dotdict.py | 8 +- tests/scratch-memory-tracing/validate.py | 33 +- tests/tools/c-tool.c | 6 +- tests/tools/json-tool.cpp | 178 +---- 37 files changed, 2232 insertions(+), 512 deletions(-) create mode 100644 samples/api_buffered_tracing/README.md create mode 100644 samples/external_correlation_id_request/CMakeLists.txt create mode 100644 samples/external_correlation_id_request/README.md create mode 100644 samples/external_correlation_id_request/client.cpp create mode 100644 samples/external_correlation_id_request/client.hpp create mode 100644 samples/external_correlation_id_request/main.cpp create mode 100644 source/include/rocprofiler-sdk/cxx/CMakeLists.txt create mode 100644 source/include/rocprofiler-sdk/cxx/details/CMakeLists.txt create mode 100644 source/include/rocprofiler-sdk/cxx/details/mpl.hpp create mode 100644 source/include/rocprofiler-sdk/cxx/details/name_info.hpp create mode 100644 source/include/rocprofiler-sdk/cxx/name_info.hpp create mode 100644 source/include/rocprofiler-sdk/cxx/serialization.hpp create mode 100644 tests/common/name_info.hpp diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index fbd6032b66..5539961fa4 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -29,4 +29,5 @@ add_subdirectory(api_buffered_tracing) add_subdirectory(code_object_tracing) add_subdirectory(counter_collection) add_subdirectory(intercept_table) +add_subdirectory(external_correlation_id_request) # add_subdirectory(code_object_isa_decode) add_subdirectory(advanced_thread_trace) diff --git a/samples/advanced_thread_trace/client.cpp b/samples/advanced_thread_trace/client.cpp index 05773cd309..cc43c06165 100644 --- a/samples/advanced_thread_trace/client.cpp +++ b/samples/advanced_thread_trace/client.cpp @@ -543,9 +543,6 @@ rocprofiler_configure(uint32_t version, uint32_t priority, rocprofiler_client_id_t* id) { - // only activate if main tool - if(priority > 0) return nullptr; - // set the client name id->name = "Adv_Thread_Trace_Sample"; @@ -559,8 +556,8 @@ rocprofiler_configure(uint32_t version, // generate info string auto info = std::stringstream{}; - info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch - << " (" << runtime_version << ")"; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; std::clog << info.str() << std::endl; diff --git a/samples/api_buffered_tracing/README.md b/samples/api_buffered_tracing/README.md new file mode 100644 index 0000000000..7ae5e3ec62 --- /dev/null +++ b/samples/api_buffered_tracing/README.md @@ -0,0 +1,18 @@ +# API Buffer Tracing Sample + +## Services + +- Code object callback tracing for mapping kernel IDs to kernel names +- HSA API (Core, AMD Ext) +- HIP API (Runtime) +- Kernel dispatch +- Memory copy +- Page Migration +- Scratch Memory + +## Properties + +- Buffer size of 4096 bytes which is automatically flushed once >= 87.5% of buffer is filled (3584 bytes) +- Creation of dedicated thread for buffer callback delivery +- Push external correlation IDs once per thread (value is thread ID) +- Receives notifications for internal thread creation diff --git a/samples/api_buffered_tracing/client.cpp b/samples/api_buffered_tracing/client.cpp index b7303b5885..36de4ecfd7 100644 --- a/samples/api_buffered_tracing/client.cpp +++ b/samples/api_buffered_tracing/client.cpp @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -148,21 +149,29 @@ tool_tracing_callback(rocprofiler_context_id_t context, { auto* header = headers[i]; - if(header == nullptr) + auto kind_name = std::string{}; + if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING) { - throw std::runtime_error{ - "rocprofiler provided a null pointer to header. this should never happen"}; + const char* _name = nullptr; + auto _kind = static_cast(header->kind); + ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_name(_kind, &_name, nullptr), + "query buffer tracing kind name"); + if(_name) + { + static size_t len = 15; + + kind_name = std::string{_name}; + len = std::max(len, kind_name.length()); + kind_name.resize(len, ' '); + kind_name += " :: "; + } } - else if(header->hash != - rocprofiler_record_header_compute_hash(header->category, header->kind)) - { - throw std::runtime_error{"rocprofiler_record_header_t (category | kind) != hash"}; - } - else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && - (header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API || - header->kind == ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API || - header->kind == ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API || - header->kind == ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API)) + + if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && + (header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API || + header->kind == ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API || + header->kind == ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API || + header->kind == ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API)) { auto* record = static_cast(header->payload); @@ -173,7 +182,7 @@ tool_tracing_callback(rocprofiler_context_id_t context, << ", extern_cid=" << record->correlation_id.external.value << ", kind=" << record->kind << ", operation=" << record->operation << ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp - << ", name=" << client_name_info.operation_names[record->kind][record->operation]; + << ", name=" << client_name_info.at(record->kind, record->operation); if(record->start_timestamp > record->end_timestamp) { @@ -186,7 +195,7 @@ tool_tracing_callback(rocprofiler_context_id_t context, } static_cast(user_data)->emplace_back( - source_location{__FUNCTION__, __FILE__, __LINE__, info.str()}); + source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); } else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API) @@ -200,7 +209,7 @@ tool_tracing_callback(rocprofiler_context_id_t context, << ", extern_cid=" << record->correlation_id.external.value << ", kind=" << record->kind << ", operation=" << record->operation << ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp - << ", name=" << client_name_info.operation_names[record->kind][record->operation]; + << ", name=" << client_name_info[record->kind][record->operation]; if(record->start_timestamp > record->end_timestamp) { @@ -213,7 +222,7 @@ tool_tracing_callback(rocprofiler_context_id_t context, } static_cast(user_data)->emplace_back( - source_location{__FUNCTION__, __FILE__, __LINE__, info.str()}); + source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); } else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) @@ -223,15 +232,16 @@ tool_tracing_callback(rocprofiler_context_id_t context, auto info = std::stringstream{}; - info << "agent_id=" << record->dispatch_info.agent_id.handle + info << "tid=" << record->thread_id << ", context=" << context.handle + << ", buffer_id=" << buffer_id.handle + << ", cid=" << record->correlation_id.internal + << ", extern_cid=" << record->correlation_id.external.value + << ", kind=" << record->kind << ", operation=" << record->operation + << ", agent_id=" << record->dispatch_info.agent_id.handle << ", queue_id=" << record->dispatch_info.queue_id.handle << ", kernel_id=" << record->dispatch_info.kernel_id << ", kernel=" << client_kernels.at(record->dispatch_info.kernel_id).kernel_name - << ", context=" << context.handle << ", buffer_id=" << buffer_id.handle - << ", cid=" << record->correlation_id.internal - << ", extern_cid=" << record->correlation_id.external.value - << ", kind=" << record->kind << ", start=" << record->start_timestamp - << ", stop=" << record->end_timestamp + << ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp << ", private_segment_size=" << record->dispatch_info.private_segment_size << ", group_segment_size=" << record->dispatch_info.group_segment_size << ", workgroup_size=(" << record->dispatch_info.workgroup_size.x << "," @@ -244,7 +254,7 @@ tool_tracing_callback(rocprofiler_context_id_t context, throw std::runtime_error("kernel dispatch: start > end"); static_cast(user_data)->emplace_back( - source_location{__FUNCTION__, __FILE__, __LINE__, info.str()}); + source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); } else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) @@ -254,20 +264,111 @@ tool_tracing_callback(rocprofiler_context_id_t context, auto info = std::stringstream{}; - info << "src_agent_id=" << record->src_agent_id.handle - << ", dst_agent_id=" << record->dst_agent_id.handle - << ", direction=" << record->operation << ", context=" << context.handle + info << "tid=" << record->thread_id << ", context=" << context.handle << ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.internal << ", extern_cid=" << record->correlation_id.external.value - << ", kind=" << record->kind << ", start=" << record->start_timestamp - << ", stop=" << record->end_timestamp; + << ", kind=" << record->kind << ", operation=" << record->operation + << ", src_agent_id=" << record->src_agent_id.handle + << ", dst_agent_id=" << record->dst_agent_id.handle + << ", direction=" << record->operation << ", start=" << record->start_timestamp + << ", stop=" << record->end_timestamp + << ", name=" << client_name_info.at(record->kind, record->operation); if(record->start_timestamp > record->end_timestamp) throw std::runtime_error("memory copy: start > end"); static_cast(user_data)->emplace_back( - source_location{__FUNCTION__, __FILE__, __LINE__, info.str()}); + source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); + } + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && + header->kind == ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION) + { + auto* record = + static_cast(header->payload); + + auto info = std::stringstream{}; + + info << "kind=" << record->kind << ", operation=" << record->operation + << ", pid=" << record->pid << ", start=" << record->start_timestamp + << ", stop=" << record->end_timestamp + << ", name=" << client_name_info.at(record->kind, record->operation); + + switch(record->operation) + { + case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE: + { + info << ", page_fault=(" << record->page_fault.read_fault << ", " + << record->page_fault.migrated << ", " << record->page_fault.node_id + << ", " << std::hex << "0x" << record->page_fault.address << ")"; + break; + } + case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT: + { + info << ", page_migrate=(" << std::hex << "0x" + << record->page_migrate.start_addr << ", 0x" + << record->page_migrate.end_addr << ", " << std::dec + << record->page_migrate.from_node << ", " << record->page_migrate.to_node + << ", " << record->page_migrate.prefetch_node << ", " + << record->page_migrate.preferred_node << ", " + << record->page_migrate.trigger << ")"; + break; + } + case ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND: + { + info << ", queue_suspend=(" << record->queue_suspend.rescheduled << ", " + << record->queue_suspend.node_id << ", " << record->queue_suspend.trigger + << ")"; + break; + } + case ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU: + { + info << ", unmap_from_gpu=(" << record->unmap_from_gpu.node_id << std::hex + << ", 0x" << record->unmap_from_gpu.start_addr << ", 0x" + << record->unmap_from_gpu.end_addr << ", " << std::dec + << record->unmap_from_gpu.trigger << ")"; + break; + } + case ROCPROFILER_PAGE_MIGRATION_NONE: + case ROCPROFILER_PAGE_MIGRATION_LAST: + { + throw std::runtime_error{"unexpected page migration value"}; + break; + } + } + + if(record->start_timestamp > record->end_timestamp) + throw std::runtime_error("page migration: start > end"); + + static_cast(user_data)->emplace_back( + source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); + } + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && + header->kind == ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY) + { + auto* record = + static_cast(header->payload); + + auto info = std::stringstream{}; + + auto _elapsed = + std::chrono::duration_cast>( + std::chrono::nanoseconds{record->end_timestamp - record->start_timestamp}) + .count(); + + info << "tid=" << record->thread_id << ", context=" << context.handle + << ", buffer_id=" << buffer_id.handle + << ", cid=" << record->correlation_id.internal + << ", extern_cid=" << record->correlation_id.external.value + << ", kind=" << record->kind << ", operation=" << record->operation + << ", agent_id=" << record->agent_id.handle + << ", queue_id=" << record->queue_id.handle << ", thread_id=" << record->thread_id + << ", elapsed=" << std::setprecision(3) << std::fixed << _elapsed + << " usec, flags=" << record->flags + << ", name=" << client_name_info.at(record->kind, record->operation); + + static_cast(user_data)->emplace_back( + source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); } else { @@ -312,25 +413,25 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) client_name_info = common::get_buffer_tracing_names(); - for(const auto& itr : client_name_info.operation_names) + for(const auto& itr : client_name_info) { auto name_idx = std::stringstream{}; - name_idx << " [" << std::setw(3) << static_cast(itr.first) << "]"; + name_idx << " [" << std::setw(3) << itr.value << "]"; call_stack_v->emplace_back( source_location{"rocprofiler_buffer_tracing_kind_names " + name_idx.str(), __FILE__, __LINE__, - client_name_info.kind_names.at(itr.first)}); + std::string{itr.name}}); - for(const auto& ditr : itr.second) + for(auto [didx, ditr] : itr.items()) { auto operation_idx = std::stringstream{}; - operation_idx << " [" << std::setw(3) << static_cast(ditr.first) << "]"; + operation_idx << " [" << std::setw(3) << didx << "]"; call_stack_v->emplace_back(source_location{ "rocprofiler_buffer_tracing_kind_operation_names" + operation_idx.str(), __FILE__, __LINE__, - std::string{"- "} + std::string{ditr.second}}); + std::string{"- "} + std::string{*ditr}}); } } @@ -338,28 +439,32 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) ROCPROFILER_CALL(rocprofiler_create_context(&client_ctx), "context creation"); + auto code_object_ops = std::vector{ + ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER}; + ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service(client_ctx, ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, - nullptr, - 0, + code_object_ops.data(), + code_object_ops.size(), tool_code_object_callback, nullptr), "code object tracing service configure"); + constexpr auto buffer_size_bytes = 4096; + constexpr auto buffer_watermark_bytes = buffer_size_bytes - (buffer_size_bytes / 8); + ROCPROFILER_CALL(rocprofiler_create_buffer(client_ctx, - 4096, - 2048, + buffer_size_bytes, + buffer_watermark_bytes, ROCPROFILER_BUFFER_POLICY_LOSSLESS, tool_tracing_callback, tool_data, &client_buffer), "buffer creation"); - for(auto itr : {ROCPROFILER_BUFFER_TRACING_HSA_CORE_API, - ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API, - ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API, - ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API}) + for(auto itr : + {ROCPROFILER_BUFFER_TRACING_HSA_CORE_API, ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API}) { ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( client_ctx, itr, nullptr, 0, client_buffer), @@ -381,6 +486,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) client_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, nullptr, 0, client_buffer), "buffer tracing service for memory copy configure"); + // May have incompatible kernel so only emit a warning here + ROCPROFILER_WARN(rocprofiler_configure_buffer_tracing_service( + client_ctx, ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, nullptr, 0, client_buffer)); + + ROCPROFILER_CALL( + rocprofiler_configure_buffer_tracing_service( + client_ctx, ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, nullptr, 0, client_buffer), + "buffer tracing service for page migration configure"); + auto client_thread = rocprofiler_callback_thread_t{}; ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread), "creating callback thread"); @@ -469,9 +583,6 @@ rocprofiler_configure(uint32_t version, uint32_t priority, rocprofiler_client_id_t* id) { - // only activate if main tool - if(priority > 0) return nullptr; - // set the client name id->name = "ExampleTool"; @@ -485,8 +596,8 @@ rocprofiler_configure(uint32_t version, // generate info string auto info = std::stringstream{}; - info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch - << " (" << runtime_version << ")"; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; std::clog << info.str() << std::endl; diff --git a/samples/api_buffered_tracing/main.cpp b/samples/api_buffered_tracing/main.cpp index c87ccff4d7..de8b42cede 100644 --- a/samples/api_buffered_tracing/main.cpp +++ b/samples/api_buffered_tracing/main.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #define HIP_API_CALL(CALL) \ @@ -51,7 +52,7 @@ namespace { using auto_lock_t = std::unique_lock; auto print_lock = std::mutex{}; -size_t nthreads = 2; +size_t nthread_per_device = 2; size_t nitr = 500; size_t nsync = 10; constexpr unsigned shared_mem_tile_dim = 32; @@ -64,10 +65,19 @@ verify(int* in, int* out, int M, int N); } // namespace __global__ void -transpose_a(const int* in, int* out, int M, int N); +transpose(const int* in, int* out, int M, int N); void -run(int rank, int tid, hipStream_t stream, int argc, char** argv); +run(int rank, int tid, int devid, int argc, char** argv); + +void +run_transpose(int rank, int tid, hipStream_t stream, int argc, char** argv); + +void +run_migrate(int rank, int tid, hipStream_t stream, int, char** argv); + +void +run_scratch(int rank, int tid, hipStream_t stream, int argc, char** argv); int main(int argc, char** argv) @@ -76,6 +86,8 @@ main(int argc, char** argv) client::start(); // starts context before any API tables are available client::identify(1); + auto* exe_name = ::basename(argv[0]); + int rank = 0; for(int i = 1; i < argc; ++i) { @@ -83,47 +95,38 @@ main(int argc, char** argv) if(_arg == "?" || _arg == "-h" || _arg == "--help") { fprintf(stderr, - "usage: transpose [NUM_THREADS (%zu)] [NUM_ITERATION (%zu)] " + "usage: %s [NUM_THREADS_PER_DEVICE (%zu)] [NUM_ITERATION (%zu)] " "[SYNC_EVERY_N_ITERATIONS (%zu)]\n", - nthreads, + exe_name, + nthread_per_device, nitr, nsync); exit(EXIT_SUCCESS); } } - if(argc > 1) nthreads = atoll(argv[1]); + if(argc > 1) nthread_per_device = atoll(argv[1]); if(argc > 2) nitr = atoll(argv[2]); if(argc > 3) nsync = atoll(argv[3]); - printf("[transpose] Number of threads: %zu\n", nthreads); - printf("[transpose] Number of iterations: %zu\n", nitr); - printf("[transpose] Syncing every %zu iterations\n", nsync); - - // this is a temporary workaround in omnitrace when HIP + MPI is enabled int ndevice = 0; - int devid = rank; HIP_API_CALL(hipGetDeviceCount(&ndevice)); - printf("[transpose] Number of devices found: %i\n", ndevice); - if(ndevice > 0) + + auto nthreads = (ndevice * nthread_per_device); + + printf("[%s] Number of devices found: %i\n", exe_name, ndevice); + printf("[%s] Number of threads (per device): %zu\n", exe_name, nthread_per_device); + printf("[%s] Number of threads (total): %zu\n", exe_name, nthreads); + printf("[%s] Number of iterations: %zu\n", exe_name, nitr); + printf("[%s] Syncing every %zu iterations\n", exe_name, nsync); + { - devid = rank % ndevice; - HIP_API_CALL(hipSetDevice(devid)); - printf("[transpose] Rank %i assigned to device %i\n", rank, devid); - } - if(rank == devid && rank < ndevice) - { - std::vector _threads{}; - std::vector _streams(nthreads); + auto _threads = std::vector{}; for(size_t i = 0; i < nthreads; ++i) - HIP_API_CALL(hipStreamCreate(&_streams.at(i))); - for(size_t i = 1; i < nthreads; ++i) - _threads.emplace_back(run, rank, i, _streams.at(i), argc, argv); - run(rank, 0, _streams.at(0), argc, argv); + _threads.emplace_back(run, rank, i, i % ndevice, argc, argv); for(auto& itr : _threads) itr.join(); - for(size_t i = 0; i < nthreads; ++i) - HIP_API_CALL(hipStreamDestroy(_streams.at(i))); } + HIP_API_CALL(hipDeviceSynchronize()); HIP_API_CALL(hipDeviceReset()); @@ -134,7 +137,7 @@ main(int argc, char** argv) } __global__ void -transpose_a(const int* in, int* out, int M, int N) +transpose(const int* in, int* out, int M, int N) { __shared__ int tile[shared_mem_tile_dim][shared_mem_tile_dim]; @@ -145,17 +148,91 @@ transpose_a(const int* in, int* out, int M, int N) out[idx] = tile[threadIdx.x][threadIdx.y]; } +template +__global__ void +test_page_migrate(Tp* data, Tp val) +{ + int idx = (blockIdx.x * blockDim.x) + threadIdx.x; + data[idx] += val; +} + +__global__ void +test_kern_large(uint64_t* output) +{ + uint64_t result = 0; + int test[4000]; + memset(test, 5, 4000); + for(int& i : test) + { + i = i + 7; + *output += i; + result += i; + } + *output ^= result; + *output ^= result; +} + +__global__ void +test_kern_medium(uint64_t* output) +{ + uint64_t result = 0; + int test[175]; + memset(test, 5, 175); + for(int& i : test) + { + i = i + 7; + *output += i; + result += i; + } + *output ^= result; + *output ^= result; +} + +__global__ void +test_kern_small(uint64_t* output) +{ + uint64_t result = 0; + int test[2]; + for(int& i : test) + { + i = i + 7; + *output += i; + result += i; + } + *output ^= result; + *output ^= result; +} + void -run(int rank, int tid, hipStream_t stream, int argc, char** argv) +run(int rank, int tid, int devid, int argc, char** argv) { client::identify(tid + 1); + + auto* stream = hipStream_t{}; + HIP_API_CALL(hipSetDevice(devid)); + HIP_API_CALL(hipStreamCreate(&stream)); + + run_migrate(rank, tid, stream, argc, argv); + run_scratch(rank, tid, stream, argc, argv); + run_transpose(rank, tid, stream, argc, argv); + + HIP_API_CALL(hipStreamSynchronize(stream)); + HIP_API_CALL(hipStreamDestroy(stream)); +} + +void +run_transpose(int rank, int tid, hipStream_t stream, int argc, char** argv) +{ + auto* exe_name = ::basename(argv[0]); + unsigned int M = 4960 * 2; unsigned int N = 4960 * 2; if(argc > 2) nitr = atoll(argv[2]); if(argc > 3) nsync = atoll(argv[3]); auto_lock_t _lk{print_lock}; - std::cout << "[transpose][" << rank << "][" << tid << "] M: " << M << " N: " << N << std::endl; + std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid << "] M: " << M + << " N: " << N << std::endl; _lk.unlock(); std::default_random_engine _engine{std::random_device{}() * (rank + 1) * (tid + 1)}; @@ -180,10 +257,11 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv) HIP_API_CALL(hipStreamSynchronize(stream)); dim3 grid(M / 32, N / 32, 1); - dim3 block(32, 32, 1); // transpose_a + dim3 block(32, 32, 1); // transpose print_lock.lock(); - printf("[transpose][%i][%i] grid=(%i,%i,%i), block=(%i,%i,%i)\n", + printf("[%s][transpose][%i][%i] grid=(%i,%i,%i), block=(%i,%i,%i)\n", + exe_name, rank, tid, grid.x, @@ -197,7 +275,7 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv) auto t1 = std::chrono::high_resolution_clock::now(); for(size_t i = 0; i < nitr; ++i) { - transpose_a<<>>(in, out, M, N); + transpose<<>>(in, out, M, N); check_hip_error(); if(i % nsync == (nsync - 1)) HIP_API_CALL(hipStreamSynchronize(stream)); } @@ -208,9 +286,9 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv) float GB = (float) size * nitr * 2 / (1 << 30); print_lock.lock(); - std::cout << "[transpose][" << rank << "][" << tid << "] Runtime of transpose is " << time - << " sec\n"; - std::cout << "[transpose][" << rank << "][" << tid + std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid + << "] Runtime of transpose is " << time << " sec\n"; + std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid << "] The average performance of transpose is " << GB / time << " GBytes/sec" << std::endl; print_lock.unlock(); @@ -227,6 +305,92 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv) delete[] out_matrix; } +void +run_scratch(int rank, int tid, hipStream_t stream, int, char** argv) +{ + auto t1 = std::chrono::high_resolution_clock::now(); + + HIP_API_CALL(hipStreamSynchronize(stream)); + + const auto* exe_name = ::basename(argv[0]); + + uint64_t* data_ptr = nullptr; + HIP_API_CALL(hipHostMalloc(&data_ptr, sizeof(uint64_t), 0)); + *data_ptr = 0; + + test_kern_small<<<1000, 1, 0, stream>>>(data_ptr); + test_kern_medium<<<1000, 1, 0, stream>>>(data_ptr); + test_kern_small<<<1000, 1, 0, stream>>>(data_ptr); + test_kern_large<<<1100, 1, 0, stream>>>(data_ptr); + HIP_API_CALL(hipStreamSynchronize(stream)); + + test_kern_small<<<1000, 1, 0, stream>>>(data_ptr); + HIP_API_CALL(hipStreamSynchronize(stream)); + + test_kern_medium<<<1000, 1, 0, stream>>>(data_ptr); + HIP_API_CALL(hipStreamSynchronize(stream)); + + test_kern_small<<<1000, 1, 0, stream>>>(data_ptr); + HIP_API_CALL(hipStreamSynchronize(stream)); + + test_kern_large<<<1100, 1, 0, stream>>>(data_ptr); + HIP_API_CALL(hipStreamSynchronize(stream)); + + auto t2 = std::chrono::high_resolution_clock::now(); + double time = std::chrono::duration_cast>(t2 - t1).count(); + + print_lock.lock(); + std::cout << "[" << exe_name << "][scratch][" << rank << "][" << tid + << "] Runtime of scratch is " << time << " sec\n"; + print_lock.unlock(); +} + +void +run_migrate(int rank, int tid, hipStream_t stream, int, char** argv) +{ + using data_type = uint64_t; + constexpr data_type init_v = 1; + constexpr data_type incr_v = 1; + + auto t1 = std::chrono::high_resolution_clock::now(); + + HIP_API_CALL(hipStreamSynchronize(stream)); + + const auto* exe_name = ::basename(argv[0]); + auto page_data = std::vector(1024, 0); + + HIP_API_CALL(hipHostRegister( + page_data.data(), page_data.size() * sizeof(data_type), hipHostRegisterDefault)); + + for(auto& itr : page_data) + itr = init_v; + + test_page_migrate<<<1, 1024, 0, stream>>>(page_data.data(), incr_v); + + HIP_API_CALL(hipStreamSynchronize(stream)); + + for(auto& itr : page_data) + { + auto diff = (itr - incr_v); + if(diff != init_v) + { + auto msg = std::stringstream{}; + msg << "invalid diff: " << diff << ". expected: " << init_v; + throw std::runtime_error{msg.str()}; + } + } + + HIP_API_CALL(hipHostUnregister(page_data.data())); + + auto t2 = std::chrono::high_resolution_clock::now(); + double time = std::chrono::duration_cast>(t2 - t1).count(); + + print_lock.lock(); + std::cout << "[" << exe_name << "][migrate][" << rank << "][" << tid + << "] Runtime of migrate is " << time << " sec\n"; + print_lock.unlock(); +} + namespace { void diff --git a/samples/api_callback_tracing/client.cpp b/samples/api_callback_tracing/client.cpp index 6b22ab4d1f..87247f4f6c 100644 --- a/samples/api_callback_tracing/client.cpp +++ b/samples/api_callback_tracing/client.cpp @@ -191,25 +191,25 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) callback_name_info name_info = common::get_callback_id_names(); - for(const auto& itr : name_info.operation_names) + for(const auto& itr : name_info) { auto name_idx = std::stringstream{}; - name_idx << " [" << std::setw(3) << static_cast(itr.first) << "]"; + name_idx << " [" << std::setw(3) << itr.value << "]"; call_stack_v->emplace_back( source_location{"rocprofiler_callback_tracing_kind_names " + name_idx.str(), __FILE__, __LINE__, - name_info.kind_names.at(itr.first)}); + std::string{itr.name}}); - for(const auto& ditr : itr.second) + for(auto [didx, ditr] : itr.items()) { auto operation_idx = std::stringstream{}; - operation_idx << " [" << std::setw(3) << static_cast(ditr.first) << "]"; + operation_idx << " [" << std::setw(3) << didx << "]"; call_stack_v->emplace_back(source_location{ "rocprofiler_callback_tracing_kind_operation_names" + operation_idx.str(), __FILE__, __LINE__, - std::string{"- "} + std::string{ditr.second}}); + std::string{"- "} + std::string{*ditr}}); } } @@ -322,9 +322,6 @@ rocprofiler_configure(uint32_t version, uint32_t priority, rocprofiler_client_id_t* id) { - // only activate if main tool - if(priority > 0) return nullptr; - // set the client name id->name = "ExampleTool"; @@ -338,8 +335,8 @@ rocprofiler_configure(uint32_t version, // generate info string auto info = std::stringstream{}; - info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch - << " (" << runtime_version << ")"; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; std::clog << info.str() << std::endl; diff --git a/samples/code_object_isa_decode/client.cpp b/samples/code_object_isa_decode/client.cpp index 2faabf5318..2f2c491df6 100644 --- a/samples/code_object_isa_decode/client.cpp +++ b/samples/code_object_isa_decode/client.cpp @@ -217,9 +217,6 @@ rocprofiler_configure(uint32_t version, uint32_t priority, rocprofiler_client_id_t* id) { - // only activate if main tool - if(priority > 0) return nullptr; - // set the client name id->name = "ExampleTool"; @@ -233,8 +230,8 @@ rocprofiler_configure(uint32_t version, // generate info string auto info = std::stringstream{}; - info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch - << " (" << runtime_version << ")"; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; std::clog << info.str() << std::endl; diff --git a/samples/code_object_tracing/client.cpp b/samples/code_object_tracing/client.cpp index ecb9fe7630..66bc648ec6 100644 --- a/samples/code_object_tracing/client.cpp +++ b/samples/code_object_tracing/client.cpp @@ -352,9 +352,6 @@ rocprofiler_configure(uint32_t version, uint32_t priority, rocprofiler_client_id_t* id) { - // only activate if main tool - if(priority > 0) return nullptr; - // set the client name id->name = "ExampleTool"; @@ -368,8 +365,8 @@ rocprofiler_configure(uint32_t version, // generate info string auto info = std::stringstream{}; - info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch - << " (" << runtime_version << ")"; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; std::clog << info.str() << std::endl; diff --git a/samples/common/defines.hpp b/samples/common/defines.hpp index cb647e8238..279a637453 100644 --- a/samples/common/defines.hpp +++ b/samples/common/defines.hpp @@ -22,6 +22,31 @@ #pragma once +#define ROCPROFILER_WARN(result) \ + { \ + rocprofiler_status_t CHECKSTATUS = result; \ + if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ + { \ + std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ + std::cerr << "[" << __FILE__ << ":" << __LINE__ << "] " << #result \ + << " returned error code " << CHECKSTATUS << ": " << status_msg \ + << ". This is just a warning!" << std::endl; \ + } \ + } + +#define ROCPROFILER_CHECK(result) \ + { \ + rocprofiler_status_t CHECKSTATUS = result; \ + if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ + { \ + std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ + std::stringstream errmsg{}; \ + errmsg << "[" << __FILE__ << ":" << __LINE__ << "] " << #result \ + << " failed with error code " << CHECKSTATUS << " :: " << status_msg; \ + throw std::runtime_error(errmsg.str()); \ + } \ + } + #define ROCPROFILER_CALL(result, msg) \ { \ rocprofiler_status_t CHECKSTATUS = result; \ diff --git a/samples/common/name_info.hpp b/samples/common/name_info.hpp index 850ed79999..1964bd2624 100644 --- a/samples/common/name_info.hpp +++ b/samples/common/name_info.hpp @@ -24,6 +24,7 @@ #include #include +#include #include "defines.hpp" @@ -37,146 +38,18 @@ namespace common { -using buffer_kind_names_t = std::map; -using buffer_kind_operation_names_t = - std::map>; -using callback_kind_names_t = std::map; -using callback_kind_operation_names_t = - std::map>; +using callback_name_info = rocprofiler::sdk::callback_name_info; +using buffer_name_info = rocprofiler::sdk::buffer_name_info; -struct buffer_name_info -{ - buffer_kind_names_t kind_names = {}; - buffer_kind_operation_names_t operation_names = {}; -}; - -struct callback_name_info -{ - callback_kind_names_t kind_names = {}; - callback_kind_operation_names_t operation_names = {}; -}; - -inline buffer_name_info +inline auto get_buffer_tracing_names() { - static const auto supported_kinds = std::unordered_set{ - ROCPROFILER_BUFFER_TRACING_HSA_CORE_API, - ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API, - ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API, - ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API, - ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API, - ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, - ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API, - ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API, - ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API, - ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, - }; - - auto cb_name_info = buffer_name_info{}; - // - // callback for each kind operation - // - static auto tracing_kind_operation_cb = - [](rocprofiler_buffer_tracing_kind_t kindv, uint32_t operation, void* data_v) { - auto* name_info_v = static_cast(data_v); - - if(supported_kinds.count(kindv) > 0) - { - const char* name = nullptr; - ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_operation_name( - kindv, operation, &name, nullptr), - "query buffer tracing kind operation name"); - if(name) name_info_v->operation_names[kindv][operation] = name; - } - return 0; - }; - - // - // callback for each buffer kind (i.e. domain) - // - static auto tracing_kind_cb = [](rocprofiler_buffer_tracing_kind_t kind, void* data) { - // store the buffer kind name - auto* name_info_v = static_cast(data); - const char* name = nullptr; - ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_name(kind, &name, nullptr), - "query buffer tracing kind operation name"); - if(name) name_info_v->kind_names[kind] = name; - - if(supported_kinds.count(kind) > 0) - { - ROCPROFILER_CALL(rocprofiler_iterate_buffer_tracing_kind_operations( - kind, tracing_kind_operation_cb, static_cast(data)), - "iterating buffer tracing kind operations"); - } - return 0; - }; - - ROCPROFILER_CALL(rocprofiler_iterate_buffer_tracing_kinds(tracing_kind_cb, - static_cast(&cb_name_info)), - "iterating buffer tracing kinds"); - - return cb_name_info; + return rocprofiler::sdk::get_buffer_tracing_names(); } -inline callback_name_info +inline auto get_callback_id_names() { - static auto supported = std::unordered_set{ - ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API, - ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API, - ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API, - ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API, - ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API, - ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, - ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API, - ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API, - ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API, - ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, - }; - - auto cb_name_info = callback_name_info{}; - // - // callback for each kind operation - // - static auto tracing_kind_operation_cb = - [](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) { - auto* name_info_v = static_cast(data_v); - - if(supported.count(kindv) > 0) - { - const char* name = nullptr; - ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name( - kindv, operation, &name, nullptr), - "query callback tracing kind operation name"); - if(name) name_info_v->operation_names[kindv][operation] = name; - } - return 0; - }; - - // - // callback for each callback kind (i.e. domain) - // - static auto tracing_kind_cb = [](rocprofiler_callback_tracing_kind_t kind, void* data) { - // store the callback kind name - auto* name_info_v = static_cast(data); - const char* name = nullptr; - ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_name(kind, &name, nullptr), - "query callback tracing kind operation name"); - if(name) name_info_v->kind_names[kind] = name; - - if(supported.count(kind) > 0) - { - ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operations( - kind, tracing_kind_operation_cb, static_cast(data)), - "iterating callback tracing kind operations"); - } - return 0; - }; - - ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kinds(tracing_kind_cb, - static_cast(&cb_name_info)), - "iterating callback tracing kinds"); - - return cb_name_info; + return rocprofiler::sdk::get_callback_tracing_names(); } } // namespace common diff --git a/samples/counter_collection/callback_client.cpp b/samples/counter_collection/callback_client.cpp index 93d72158b6..3e6a8ef24f 100644 --- a/samples/counter_collection/callback_client.cpp +++ b/samples/counter_collection/callback_client.cpp @@ -207,9 +207,9 @@ tool_fini(void* user_data) } // namespace extern "C" rocprofiler_tool_configure_result_t* -rocprofiler_configure(uint32_t version, - const char* runtime_version, - uint32_t, +rocprofiler_configure(uint32_t version, + const char* runtime_version, + uint32_t priority, rocprofiler_client_id_t* id) { // set the client name @@ -222,8 +222,8 @@ rocprofiler_configure(uint32_t version, // generate info string auto info = std::stringstream{}; - info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch - << " (" << runtime_version << ")"; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; std::clog << info.str() << std::endl; diff --git a/samples/counter_collection/client.cpp b/samples/counter_collection/client.cpp index 3b12162b1b..9b115135da 100644 --- a/samples/counter_collection/client.cpp +++ b/samples/counter_collection/client.cpp @@ -286,9 +286,9 @@ tool_fini(void* user_data) } // namespace extern "C" rocprofiler_tool_configure_result_t* -rocprofiler_configure(uint32_t version, - const char* runtime_version, - uint32_t, +rocprofiler_configure(uint32_t version, + const char* runtime_version, + uint32_t priority, rocprofiler_client_id_t* id) { // set the client name @@ -301,8 +301,8 @@ rocprofiler_configure(uint32_t version, // generate info string auto info = std::stringstream{}; - info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch - << " (" << runtime_version << ")"; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; std::clog << info.str() << std::endl; diff --git a/samples/counter_collection/print_functional_counters.cpp b/samples/counter_collection/print_functional_counters.cpp index 971955b0a9..4c3da4efe7 100644 --- a/samples/counter_collection/print_functional_counters.cpp +++ b/samples/counter_collection/print_functional_counters.cpp @@ -415,9 +415,9 @@ tool_fini(void*) } // namespace extern "C" rocprofiler_tool_configure_result_t* -rocprofiler_configure(uint32_t version, - const char* runtime_version, - uint32_t, +rocprofiler_configure(uint32_t version, + const char* runtime_version, + uint32_t priority, rocprofiler_client_id_t* id) { // set the client name @@ -430,8 +430,8 @@ rocprofiler_configure(uint32_t version, // generate info string auto info = std::stringstream{}; - info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch - << " (" << runtime_version << ")"; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; std::clog << info.str() << std::endl; diff --git a/samples/external_correlation_id_request/CMakeLists.txt b/samples/external_correlation_id_request/CMakeLists.txt new file mode 100644 index 0000000000..ed03511fce --- /dev/null +++ b/samples/external_correlation_id_request/CMakeLists.txt @@ -0,0 +1,59 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +if(NOT CMAKE_HIP_COMPILER) + find_program( + amdclangpp_EXECUTABLE + NAMES amdclang++ + HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATH_SUFFIXES bin llvm/bin NO_CACHE) + mark_as_advanced(amdclangpp_EXECUTABLE) + + if(amdclangpp_EXECUTABLE) + set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}") + endif() +endif() + +project(rocprofiler-sdk-samples-external-correlation-id-request LANGUAGES CXX HIP) + +foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO) + if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "") + set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}") + endif() +endforeach() + +find_package(rocprofiler-sdk REQUIRED) + +add_library(external-correlation-id-request-client SHARED) +target_sources(external-correlation-id-request-client PRIVATE client.cpp client.hpp) +target_link_libraries( + external-correlation-id-request-client + PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags + rocprofiler::samples-common-library) + +set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) +find_package(Threads REQUIRED) + +add_executable(external-correlation-id-request) +target_sources(external-correlation-id-request PRIVATE main.cpp) +target_link_libraries( + external-correlation-id-request + PRIVATE external-correlation-id-request-client Threads::Threads + rocprofiler::samples-build-flags) + +rocprofiler_samples_get_preload_env(PRELOAD_ENV external-correlation-id-request-client) +rocprofiler_samples_get_ld_library_path_env(LIBRARY_PATH_ENV) + +set(external-correlation-id-request-env ${PRELOAD_ENV} ${LIBRARY_PATH_ENV}) + +add_test(NAME external-correlation-id-request + COMMAND $) + +set_tests_properties( + external-correlation-id-request + PROPERTIES TIMEOUT 45 LABELS "samples" ENVIRONMENT + "${external-correlation-id-request-env}" FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}") diff --git a/samples/external_correlation_id_request/README.md b/samples/external_correlation_id_request/README.md new file mode 100644 index 0000000000..3ba705f90b --- /dev/null +++ b/samples/external_correlation_id_request/README.md @@ -0,0 +1,24 @@ +# External Correlation ID Request Sample + +## Services + +- Code object callback tracing for mapping kernel IDs to kernel names +- HIP Runtime API: + - hipLaunchKernel + - hipMemcpyAsync + - hipMemsetAsync + - hipMalloc +- Kernel dispatch +- Memory Copy +- External correlation ID request: + - Kernel dispatch + - Memory copy +- Correlation ID retirement + +## Properties + +- Subscribes to an external correlation ID request for all kernel dispatches and async memory copies +- Generates an external correlation ID containing all the arguments passed to the request callback +- Demonstrates that all external correlation IDs which are requested are passed back to tool in buffer callbacks +- Demonstrates that all internal correlation IDs which are provided as an input argument to request are retired +- Buffer size of 4096 bytes which is automatically flushed once >= 87.5% of buffer is filled (3584 bytes) diff --git a/samples/external_correlation_id_request/client.cpp b/samples/external_correlation_id_request/client.cpp new file mode 100644 index 0000000000..5044efcf8e --- /dev/null +++ b/samples/external_correlation_id_request/client.cpp @@ -0,0 +1,632 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +/** + * @file samples/api_buffered_tracing/client.cpp + * + * @brief Example rocprofiler client (tool) + */ + +#include "client.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/call_stack.hpp" +#include "common/defines.hpp" +#include "common/filesystem.hpp" +#include "common/name_info.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace client +{ +namespace +{ +struct external_corr_id_data; + +using common::buffer_name_info; +using common::call_stack_t; +using common::source_location; + +using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t; +using kernel_symbol_map_t = std::unordered_map; +using external_corr_id_set_t = std::unordered_set; +using retired_corr_id_set_t = std::unordered_set; + +rocprofiler_client_id_t* client_id = nullptr; +rocprofiler_client_finalize_t client_fini_func = nullptr; +rocprofiler_context_id_t client_ctx = {}; +rocprofiler_buffer_id_t client_buffer = {}; +buffer_name_info* client_name_info = new buffer_name_info{}; +kernel_symbol_map_t* client_kernels = new kernel_symbol_map_t{}; +auto client_mutex = std::shared_mutex{}; +auto client_external_corr_ids = external_corr_id_set_t{}; +auto client_retired_corr_ids = retired_corr_id_set_t{}; + +void +print_call_stack(const call_stack_t& _call_stack) +{ + common::print_call_stack("external_correlation_id_request.log", _call_stack); +} + +void +tool_code_object_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, + void* callback_data) +{ + if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && + record.operation == ROCPROFILER_CODE_OBJECT_LOAD) + { + if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD) + { + // flush the buffer to ensure that any lookups for the client kernel names for the code + // object are completed + auto flush_status = rocprofiler_flush_buffer(client_buffer); + if(flush_status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY) + ROCPROFILER_CHECK(flush_status); + } + } + else if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && + record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER) + { + auto* data = static_cast(record.payload); + if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD) + { + client_kernels->emplace(data->kernel_id, *data); + } + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD) + { + auto flush_status = rocprofiler_flush_buffer(client_buffer); + if(flush_status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY) + ROCPROFILER_CHECK(flush_status); + + client_kernels->erase(data->kernel_id); + } + } + + (void) user_data; + (void) callback_data; +} + +struct external_corr_id_data +{ + using request_kind_t = rocprofiler_external_correlation_id_request_kind_t; + static constexpr auto request_none = ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE; + + rocprofiler_thread_id_t thread_id = 0; + rocprofiler_context_id_t context_id = {.handle = 0}; + request_kind_t kind = request_none; + rocprofiler_tracing_operation_t operation = 0; + uint64_t internal_corr_id = 0; + void* user_data = nullptr; + uint64_t seen_count = 0; + + bool valid() const; + + friend std::ostream& operator<<(std::ostream& os, external_corr_id_data data) + { + if(!data.valid()) return os; + auto ss = std::stringstream{}; + ss << "seen=" << data.seen_count << ", thr_id=" << data.thread_id + << ", context_id=" << data.context_id.handle << ", kind=" << data.kind + << ", operation=" << data.operation << ", corr_id=" << data.internal_corr_id + << ", user_data=" << data.user_data; + return (os << ss.str()); + } +}; + +bool +operator==(external_corr_id_data lhs, external_corr_id_data rhs) +{ + return std::tie(lhs.thread_id, + lhs.context_id.handle, + lhs.kind, + lhs.operation, + lhs.internal_corr_id, + lhs.user_data) == std::tie(rhs.thread_id, + rhs.context_id.handle, + rhs.kind, + rhs.operation, + rhs.internal_corr_id, + rhs.user_data); +} + +bool +operator!=(external_corr_id_data lhs, external_corr_id_data rhs) +{ + return !(lhs == rhs); +} + +bool +external_corr_id_data::valid() const +{ + static constexpr auto invalid_v = external_corr_id_data{}; + return (*this != invalid_v); +} + +int +set_external_correlation_id(rocprofiler_thread_id_t thr_id, + rocprofiler_context_id_t ctx_id, + rocprofiler_external_correlation_id_request_kind_t kind, + rocprofiler_tracing_operation_t op, + uint64_t internal_corr_id, + rocprofiler_user_data_t* external_corr_id, + void* user_data) +{ + auto* _data = + new external_corr_id_data{thr_id, ctx_id, kind, op, internal_corr_id, user_data, 0}; + + { + static auto _mtx = std::mutex{}; + auto _lk = std::unique_lock{_mtx}; + client_external_corr_ids.emplace(_data); + } + + external_corr_id->ptr = _data; + + return 0; +} + +void +tool_tracing_callback(rocprofiler_context_id_t context, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* user_data, + uint64_t /*drop_count*/) +{ + static const auto ensure_internal_correlation_id_retirement_ordering = [](uint64_t _corr_id) { + auto _lk = std::shared_lock{client_mutex}; + // this correlation ID should not have reported as retired yet so + // we are demoing the expectation here + if(client_retired_corr_ids.count(_corr_id) > 0) + { + auto msg = std::stringstream{}; + msg << "internal correlation id " << _corr_id << " was retired prematurely"; + throw std::runtime_error{msg.str()}; + } + }; + + for(size_t i = 0; i < num_headers; ++i) + { + auto* header = headers[i]; + + auto kind_name = std::string{}; + if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING) + { + const char* _name = nullptr; + auto _kind = static_cast(header->kind); + ROCPROFILER_CHECK(rocprofiler_query_buffer_tracing_kind_name(_kind, &_name, nullptr)); + if(_name) + { + static size_t len = 15; + + kind_name = std::string{_name}; + len = std::max(len, kind_name.length()); + kind_name.resize(len, ' '); + kind_name += " :: "; + } + } + + if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && + header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API) + { + auto* record = + static_cast(header->payload); + + // this should always be empty + auto _extern_corr_id = external_corr_id_data{}; + + // demonstrate reliability of correlation ID retirement ordering + ensure_internal_correlation_id_retirement_ordering(record->correlation_id.internal); + + auto info = std::stringstream{}; + info << "tid=" << record->thread_id << ", context=" << context.handle + << ", buffer_id=" << buffer_id.handle + << ", corr_id=" << record->correlation_id.internal << ", kind=" << record->kind + << ", operation=" << record->operation + << ", name=" << (*client_name_info)[record->kind][record->operation] + << ", extern_corr_id={" << _extern_corr_id << "}"; + + static_cast(user_data)->emplace_back( + source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); + } + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && + header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) + { + auto* record = + static_cast(header->payload); + + // demonstrate reliability of correlation ID retirement ordering + ensure_internal_correlation_id_retirement_ordering(record->correlation_id.internal); + + auto _extern_corr_id = external_corr_id_data{}; + if(record->correlation_id.external.ptr) + { + auto* _extcid = + static_cast(record->correlation_id.external.ptr); + _extcid->seen_count++; + _extern_corr_id = *_extcid; + // demonstrate reliability of correlation ID retirement ordering + ensure_internal_correlation_id_retirement_ordering(_extcid->internal_corr_id); + } + + auto info = std::stringstream{}; + + info << "tid=" << record->thread_id << ", context=" << context.handle + << ", buffer_id=" << buffer_id.handle + << ", corr_id=" << record->correlation_id.internal << ", kind=" << record->kind + << ", operation=" << record->operation + << ", agent_id=" << record->dispatch_info.agent_id.handle + << ", queue_id=" << record->dispatch_info.queue_id.handle + << ", dispatch_id=" << record->dispatch_info.dispatch_id + << ", kernel_id=" << record->dispatch_info.kernel_id + << ", kernel=" << client_kernels->at(record->dispatch_info.kernel_id).kernel_name + << ", extern_corr_id={" << _extern_corr_id << "}"; + + static_cast(user_data)->emplace_back( + source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); + } + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && + header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) + { + auto* record = + static_cast(header->payload); + + // demonstrate reliability of correlation ID retirement ordering + ensure_internal_correlation_id_retirement_ordering(record->correlation_id.internal); + + auto _extern_corr_id = external_corr_id_data{}; + if(record->correlation_id.external.ptr) + { + auto* _extcid = + static_cast(record->correlation_id.external.ptr); + _extcid->seen_count++; + _extern_corr_id = *_extcid; + // demonstrate reliability of correlation ID retirement ordering + ensure_internal_correlation_id_retirement_ordering(_extcid->internal_corr_id); + } + + auto info = std::stringstream{}; + + info << "tid=" << record->thread_id << ", context=" << context.handle + << ", buffer_id=" << buffer_id.handle + << ", corr_id=" << record->correlation_id.internal << ", kind=" << record->kind + << ", operation=" << record->operation + << ", src_agent_id=" << record->src_agent_id.handle + << ", dst_agent_id=" << record->dst_agent_id.handle + << ", direction=" << record->operation << ", start=" << record->start_timestamp + << ", stop=" << record->end_timestamp + << ", name=" << client_name_info->at(record->kind, record->operation) + << ", extern_corr_id={" << _extern_corr_id << "}"; + + static_cast(user_data)->emplace_back( + source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); + } + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && + header->kind == ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT) + { + auto* record = + static_cast( + header->payload); + + { + auto _lk = std::unique_lock{client_mutex}; + client_retired_corr_ids.emplace(record->internal_correlation_id); + } + + auto _extern_corr_id = external_corr_id_data{}; + auto info = std::stringstream{}; + + info << "context=" << context.handle << ", buffer_id=" << buffer_id.handle + << ", corr_id=" << record->internal_correlation_id << ", kind=" << record->kind + << ", timestamp=" << record->timestamp + << ", name=" << client_name_info->at(record->kind) << ", extern_corr_id={" + << _extern_corr_id << "}"; + + static_cast(user_data)->emplace_back( + source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); + } + else + { + auto _msg = std::stringstream{}; + _msg << "unexpected rocprofiler_record_header_t category + kind: (" << header->category + << " + " << header->kind << ")"; + throw std::runtime_error{_msg.str()}; + } + } +} + +template +auto +make_array(Arg arg, Args&&... args) +{ + constexpr auto N = 1 + sizeof...(Args); + return std::array{std::forward(arg), std::forward(args)...}; +} + +int +tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) +{ + assert(tool_data != nullptr); + + auto* call_stack_v = static_cast(tool_data); + + call_stack_v->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""}); + + *client_name_info = common::get_buffer_tracing_names(); + client_fini_func = fini_func; + + ROCPROFILER_CHECK(rocprofiler_create_context(&client_ctx)); + + auto code_object_ops = std::vector{ + ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER}; + + ROCPROFILER_CHECK( + rocprofiler_configure_callback_tracing_service(client_ctx, + ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, + code_object_ops.data(), + code_object_ops.size(), + tool_code_object_callback, + nullptr)); + + constexpr auto buffer_size_bytes = 4096; + constexpr auto buffer_watermark_bytes = buffer_size_bytes - (buffer_size_bytes / 8); + + ROCPROFILER_CHECK(rocprofiler_create_buffer(client_ctx, + buffer_size_bytes, + buffer_watermark_bytes, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + tool_tracing_callback, + tool_data, + &client_buffer)); + + auto external_corr_id_request_kinds = + make_array(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH, + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY); + + ROCPROFILER_CHECK(rocprofiler_configure_external_correlation_id_request_service( + client_ctx, + external_corr_id_request_kinds.data(), + external_corr_id_request_kinds.size(), + set_external_correlation_id, + nullptr)); + + auto hip_runtime_ops = std::vector{}; + const auto desired_hip_runtime_ops = std::unordered_set{ + "hipLaunchKernel", "hipMemcpyAsync", "hipMemsetAsync", "hipMalloc"}; + for(auto [idx, itr] : (*client_name_info)[ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API].items()) + { + if(desired_hip_runtime_ops.count(*itr) > 0) hip_runtime_ops.emplace_back(idx); + } + + if(desired_hip_runtime_ops.size() != hip_runtime_ops.size()) + throw std::runtime_error{"missing hip operations"}; + + ROCPROFILER_CHECK( + rocprofiler_configure_buffer_tracing_service(client_ctx, + ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API, + hip_runtime_ops.data(), + hip_runtime_ops.size(), + client_buffer)); + + ROCPROFILER_CHECK(rocprofiler_configure_buffer_tracing_service( + client_ctx, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0, client_buffer)); + + ROCPROFILER_CHECK(rocprofiler_configure_buffer_tracing_service( + client_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, nullptr, 0, client_buffer)); + + ROCPROFILER_CHECK(rocprofiler_configure_buffer_tracing_service( + client_ctx, + ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT, + nullptr, + 0, + client_buffer)); + + int valid_ctx = 0; + ROCPROFILER_CHECK(rocprofiler_context_is_valid(client_ctx, &valid_ctx)); + if(valid_ctx == 0) + { + // notify rocprofiler that initialization failed + // and all the contexts, buffers, etc. created + // should be ignored + return -1; + } + + ROCPROFILER_CHECK(rocprofiler_start_context(client_ctx)); + + // no errors + return 0; +} + +void +tool_fini(void* tool_data) +{ + assert(tool_data != nullptr); + client_fini_func = nullptr; + client_id = nullptr; + + std::cout << "finalizing...\n" << std::flush; + rocprofiler_stop_context(client_ctx); + ROCPROFILER_CHECK(rocprofiler_flush_buffer(client_buffer)); + + auto* _call_stack = static_cast(tool_data); + _call_stack->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""}); + + print_call_stack(*_call_stack); + + size_t unretired = 0; + size_t unseen = 0; + for(auto* itr : client_external_corr_ids) + { + if(itr->seen_count != 1) + { + std::cerr << "external correlation ID seen " << itr->seen_count << " times: {" << *itr + << "}\n" + << std::flush; + ++unseen; + } + if(client_retired_corr_ids.count(itr->internal_corr_id) != 1) + { + std::cerr << "internal correlation ID passed to external correlation ID request was " + "not retired: {" + << itr->internal_corr_id << "}\n" + << std::flush; + ++unretired; + } + + delete itr; + } + + std::cerr << "external correlation IDs not seen : " << unseen << "\n" << std::flush; + std::cerr << "internal correlation IDs not retired: " << unretired << "\n" << std::flush; + + if(unseen > 0) throw std::runtime_error{"unseen external correlation id data"}; + if(unretired > 0) throw std::runtime_error{"unretired internal correlation id values"}; + + delete _call_stack; +} +} // namespace + +void +setup() +{ + if(int status = 0; + rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0) + { + ROCPROFILER_CHECK(rocprofiler_force_configure(&rocprofiler_configure)); + } +} + +void +shutdown() +{ + if(client_id) + { + ROCPROFILER_CHECK(rocprofiler_flush_buffer(client_buffer)); + client_fini_func(*client_id); + } +} + +void +start() +{ + ROCPROFILER_CHECK(rocprofiler_start_context(client_ctx)); +} + +void +identify(uint64_t val) +{ + auto _tid = rocprofiler_thread_id_t{}; + rocprofiler_get_thread_id(&_tid); + rocprofiler_user_data_t user_data = {}; + user_data.value = val; + rocprofiler_push_external_correlation_id(client_ctx, _tid, user_data); +} + +void +stop() +{ + ROCPROFILER_CHECK(rocprofiler_stop_context(client_ctx)); +} +} // namespace client + +extern "C" rocprofiler_tool_configure_result_t* +rocprofiler_configure(uint32_t version, + const char* runtime_version, + uint32_t priority, + rocprofiler_client_id_t* id) +{ + // set the client name + id->name = "ExampleTool"; + + // store client info + client::client_id = id; + + // compute major/minor/patch version info + uint32_t major = version / 10000; + uint32_t minor = (version % 10000) / 100; + uint32_t patch = version % 100; + + // generate info string + auto info = std::stringstream{}; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; + + std::clog << info.str() << std::endl; + + auto* client_tool_data = new std::vector{}; + + client_tool_data->emplace_back( + client::source_location{__FUNCTION__, __FILE__, __LINE__, info.str()}); + + std::atexit([]() { + std::cout << "atexit handler...\n" << std::flush; + if(client::client_fini_func && client::client_id) + client::client_fini_func(*client::client_id); + }); + + // create configure data + static auto cfg = + rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t), + &client::tool_init, + &client::tool_fini, + static_cast(client_tool_data)}; + + // return pointer to configure data + return &cfg; +} diff --git a/samples/external_correlation_id_request/client.hpp b/samples/external_correlation_id_request/client.hpp new file mode 100644 index 0000000000..9ff8cda2d8 --- /dev/null +++ b/samples/external_correlation_id_request/client.hpp @@ -0,0 +1,49 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#ifdef buffered_api_tracing_client_EXPORTS +# define CLIENT_API __attribute__((visibility("default"))) +#else +# define CLIENT_API +#endif + +#include + +namespace client +{ +void +setup() CLIENT_API; + +void +shutdown() CLIENT_API; + +void +start() CLIENT_API; + +void +stop() CLIENT_API; + +void +identify(uint64_t corr_id) CLIENT_API; +} // namespace client diff --git a/samples/external_correlation_id_request/main.cpp b/samples/external_correlation_id_request/main.cpp new file mode 100644 index 0000000000..d78efed384 --- /dev/null +++ b/samples/external_correlation_id_request/main.cpp @@ -0,0 +1,412 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "hip/hip_runtime.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#define HIP_API_CALL(CALL) \ + { \ + hipError_t error_ = (CALL); \ + if(error_ != hipSuccess) \ + { \ + auto _hip_api_print_lk = auto_lock_t{print_lock}; \ + fprintf(stderr, \ + "%s:%d :: HIP error : %s\n", \ + __FILE__, \ + __LINE__, \ + hipGetErrorString(error_)); \ + throw std::runtime_error("hip_api_call"); \ + } \ + } + +namespace +{ +using auto_lock_t = std::unique_lock; +auto print_lock = std::mutex{}; +size_t nthread_per_device = 2; +size_t nitr = 500; +size_t nsync = 10; +constexpr unsigned shared_mem_tile_dim = 32; + +void +check_hip_error(void); + +void +verify(int* in, int* out, int M, int N); +} // namespace + +__global__ void +transpose(const int* in, int* out, int M, int N); + +void +run(int rank, int tid, int devid, int argc, char** argv); + +void +run_transpose(int rank, int tid, hipStream_t stream, int argc, char** argv); + +void +run_migrate(int rank, int tid, hipStream_t stream, int, char** argv); + +void +run_scratch(int rank, int tid, hipStream_t stream, int argc, char** argv); + +int +main(int argc, char** argv) +{ + auto* exe_name = ::basename(argv[0]); + + int rank = 0; + for(int i = 1; i < argc; ++i) + { + auto _arg = std::string{argv[i]}; + if(_arg == "?" || _arg == "-h" || _arg == "--help") + { + fprintf(stderr, + "usage: %s [NUM_THREADS_PER_DEVICE (%zu)] [NUM_ITERATION (%zu)] " + "[SYNC_EVERY_N_ITERATIONS (%zu)]\n", + exe_name, + nthread_per_device, + nitr, + nsync); + exit(EXIT_SUCCESS); + } + } + if(argc > 1) nthread_per_device = atoll(argv[1]); + if(argc > 2) nitr = atoll(argv[2]); + if(argc > 3) nsync = atoll(argv[3]); + + int ndevice = 0; + HIP_API_CALL(hipGetDeviceCount(&ndevice)); + + auto nthreads = (ndevice * nthread_per_device); + + printf("[%s] Number of devices found: %i\n", exe_name, ndevice); + printf("[%s] Number of threads (per device): %zu\n", exe_name, nthread_per_device); + printf("[%s] Number of threads (total): %zu\n", exe_name, nthreads); + printf("[%s] Number of iterations: %zu\n", exe_name, nitr); + printf("[%s] Syncing every %zu iterations\n", exe_name, nsync); + + { + auto _threads = std::vector{}; + for(size_t i = 0; i < nthreads; ++i) + _threads.emplace_back(run, rank, i, i % ndevice, argc, argv); + for(auto& itr : _threads) + itr.join(); + } + + HIP_API_CALL(hipDeviceSynchronize()); + HIP_API_CALL(hipDeviceReset()); + + return 0; +} + +__global__ void +transpose(const int* in, int* out, int M, int N) +{ + __shared__ int tile[shared_mem_tile_dim][shared_mem_tile_dim]; + + int idx = (blockIdx.y * blockDim.y + threadIdx.y) * M + blockIdx.x * blockDim.x + threadIdx.x; + tile[threadIdx.y][threadIdx.x] = in[idx]; + __syncthreads(); + idx = (blockIdx.x * blockDim.x + threadIdx.y) * N + blockIdx.y * blockDim.y + threadIdx.x; + out[idx] = tile[threadIdx.x][threadIdx.y]; +} + +template +__global__ void +test_page_migrate(Tp* data, Tp val) +{ + int idx = (blockIdx.x * blockDim.x) + threadIdx.x; + data[idx] += val; +} + +__global__ void +test_kern_large(uint64_t* output) +{ + uint64_t result = 0; + int test[4000]; + memset(test, 5, 4000); + for(int& i : test) + { + i = i + 7; + *output += i; + result += i; + } + *output ^= result; + *output ^= result; +} + +__global__ void +test_kern_medium(uint64_t* output) +{ + uint64_t result = 0; + int test[175]; + memset(test, 5, 175); + for(int& i : test) + { + i = i + 7; + *output += i; + result += i; + } + *output ^= result; + *output ^= result; +} + +__global__ void +test_kern_small(uint64_t* output) +{ + uint64_t result = 0; + int test[2]; + for(int& i : test) + { + i = i + 7; + *output += i; + result += i; + } + *output ^= result; + *output ^= result; +} + +void +run(int rank, int tid, int devid, int argc, char** argv) +{ + auto* stream = hipStream_t{}; + HIP_API_CALL(hipSetDevice(devid)); + HIP_API_CALL(hipStreamCreate(&stream)); + + run_migrate(rank, tid, stream, argc, argv); + run_scratch(rank, tid, stream, argc, argv); + run_transpose(rank, tid, stream, argc, argv); + + HIP_API_CALL(hipStreamSynchronize(stream)); + HIP_API_CALL(hipStreamDestroy(stream)); +} + +void +run_transpose(int rank, int tid, hipStream_t stream, int argc, char** argv) +{ + auto* exe_name = ::basename(argv[0]); + + unsigned int M = 4960 * 2; + unsigned int N = 4960 * 2; + if(argc > 2) nitr = atoll(argv[2]); + if(argc > 3) nsync = atoll(argv[3]); + + auto_lock_t _lk{print_lock}; + std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid << "] M: " << M + << " N: " << N << std::endl; + _lk.unlock(); + + std::default_random_engine _engine{std::random_device{}() * (rank + 1) * (tid + 1)}; + std::uniform_int_distribution _dist{0, 1000}; + + size_t size = sizeof(int) * M * N; + int* inp_matrix = new int[size]; + int* out_matrix = new int[size]; + for(size_t i = 0; i < M * N; i++) + { + inp_matrix[i] = _dist(_engine); + out_matrix[i] = 0; + } + int* in = nullptr; + int* out = nullptr; + + HIP_API_CALL(hipMalloc(&in, size)); + HIP_API_CALL(hipMalloc(&out, size)); + HIP_API_CALL(hipMemsetAsync(in, 0, size, stream)); + HIP_API_CALL(hipMemsetAsync(out, 0, size, stream)); + HIP_API_CALL(hipMemcpyAsync(in, inp_matrix, size, hipMemcpyHostToDevice, stream)); + HIP_API_CALL(hipStreamSynchronize(stream)); + + dim3 grid(M / 32, N / 32, 1); + dim3 block(32, 32, 1); // transpose + + print_lock.lock(); + printf("[%s][transpose][%i][%i] grid=(%i,%i,%i), block=(%i,%i,%i)\n", + exe_name, + rank, + tid, + grid.x, + grid.y, + grid.z, + block.x, + block.y, + block.z); + print_lock.unlock(); + + auto t1 = std::chrono::high_resolution_clock::now(); + for(size_t i = 0; i < nitr; ++i) + { + transpose<<>>(in, out, M, N); + check_hip_error(); + if(i % nsync == (nsync - 1)) HIP_API_CALL(hipStreamSynchronize(stream)); + } + auto t2 = std::chrono::high_resolution_clock::now(); + HIP_API_CALL(hipStreamSynchronize(stream)); + HIP_API_CALL(hipMemcpyAsync(out_matrix, out, size, hipMemcpyDeviceToHost, stream)); + double time = std::chrono::duration_cast>(t2 - t1).count(); + float GB = (float) size * nitr * 2 / (1 << 30); + + print_lock.lock(); + std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid + << "] Runtime of transpose is " << time << " sec\n"; + std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid + << "] The average performance of transpose is " << GB / time << " GBytes/sec" + << std::endl; + print_lock.unlock(); + + HIP_API_CALL(hipStreamSynchronize(stream)); + + // cpu_transpose(matrix, out_matrix, M, N); + verify(inp_matrix, out_matrix, M, N); + + HIP_API_CALL(hipFree(in)); + HIP_API_CALL(hipFree(out)); + + delete[] inp_matrix; + delete[] out_matrix; +} + +void +run_scratch(int rank, int tid, hipStream_t stream, int, char** argv) +{ + auto t1 = std::chrono::high_resolution_clock::now(); + + HIP_API_CALL(hipStreamSynchronize(stream)); + + const auto* exe_name = ::basename(argv[0]); + + uint64_t* data_ptr = nullptr; + HIP_API_CALL(hipHostMalloc(&data_ptr, sizeof(uint64_t), 0)); + *data_ptr = 0; + + test_kern_small<<<1000, 1, 0, stream>>>(data_ptr); + test_kern_medium<<<1000, 1, 0, stream>>>(data_ptr); + test_kern_small<<<1000, 1, 0, stream>>>(data_ptr); + test_kern_large<<<1100, 1, 0, stream>>>(data_ptr); + HIP_API_CALL(hipStreamSynchronize(stream)); + + test_kern_small<<<1000, 1, 0, stream>>>(data_ptr); + HIP_API_CALL(hipStreamSynchronize(stream)); + + test_kern_medium<<<1000, 1, 0, stream>>>(data_ptr); + HIP_API_CALL(hipStreamSynchronize(stream)); + + test_kern_small<<<1000, 1, 0, stream>>>(data_ptr); + HIP_API_CALL(hipStreamSynchronize(stream)); + + test_kern_large<<<1100, 1, 0, stream>>>(data_ptr); + HIP_API_CALL(hipStreamSynchronize(stream)); + + auto t2 = std::chrono::high_resolution_clock::now(); + double time = std::chrono::duration_cast>(t2 - t1).count(); + + print_lock.lock(); + std::cout << "[" << exe_name << "][scratch][" << rank << "][" << tid + << "] Runtime of scratch is " << time << " sec\n"; + print_lock.unlock(); +} + +void +run_migrate(int rank, int tid, hipStream_t stream, int, char** argv) +{ + using data_type = uint64_t; + constexpr data_type init_v = 1; + constexpr data_type incr_v = 1; + + auto t1 = std::chrono::high_resolution_clock::now(); + + HIP_API_CALL(hipStreamSynchronize(stream)); + + const auto* exe_name = ::basename(argv[0]); + auto page_data = std::vector(1024, 0); + + HIP_API_CALL(hipHostRegister( + page_data.data(), page_data.size() * sizeof(data_type), hipHostRegisterDefault)); + + for(auto& itr : page_data) + itr = init_v; + + test_page_migrate<<<1, 1024, 0, stream>>>(page_data.data(), incr_v); + + HIP_API_CALL(hipStreamSynchronize(stream)); + + for(auto& itr : page_data) + { + auto diff = (itr - incr_v); + if(diff != init_v) + { + auto msg = std::stringstream{}; + msg << "invalid diff: " << diff << ". expected: " << init_v; + throw std::runtime_error{msg.str()}; + } + } + + HIP_API_CALL(hipHostUnregister(page_data.data())); + + auto t2 = std::chrono::high_resolution_clock::now(); + double time = std::chrono::duration_cast>(t2 - t1).count(); + + print_lock.lock(); + std::cout << "[" << exe_name << "][migrate][" << rank << "][" << tid + << "] Runtime of migrate is " << time << " sec\n"; + print_lock.unlock(); +} + +namespace +{ +void +check_hip_error(void) +{ + hipError_t err = hipGetLastError(); + if(err != hipSuccess) + { + auto_lock_t _lk{print_lock}; + std::cerr << "Error: " << hipGetErrorString(err) << std::endl; + throw std::runtime_error("hip_api_call"); + } +} + +void +verify(int* in, int* out, int M, int N) +{ + for(int i = 0; i < 10; i++) + { + int row = rand() % M; + int col = rand() % N; + if(in[row * N + col] != out[col * M + row]) + { + auto_lock_t _lk{print_lock}; + std::cout << "mismatch: " << row << ", " << col << " : " << in[row * N + col] << " | " + << out[col * M + row] << "\n"; + } + } +} +} // namespace diff --git a/samples/intercept_table/client.cpp b/samples/intercept_table/client.cpp index ec3a696fbd..7f955ad861 100644 --- a/samples/intercept_table/client.cpp +++ b/samples/intercept_table/client.cpp @@ -266,9 +266,6 @@ rocprofiler_configure(uint32_t version, uint32_t priority, rocprofiler_client_id_t* id) { - // only activate if main tool - if(priority > 0) return nullptr; - // set the client name id->name = "ExampleTool"; @@ -282,8 +279,8 @@ rocprofiler_configure(uint32_t version, // generate info string auto info = std::stringstream{}; - info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch - << " (" << runtime_version << ")"; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; std::clog << info.str() << std::endl; diff --git a/source/docs/tool_library_overview.md b/source/docs/tool_library_overview.md index 864031a1f5..1625b07119 100644 --- a/source/docs/tool_library_overview.md +++ b/source/docs/tool_library_overview.md @@ -229,9 +229,6 @@ rocprofiler_configure(uint32_t version, uint32_t priority, rocprofiler_client_id_t* client_id) { - // If not first tool to register, indicate that the tool doesn't want to do anything - if(priority > 0) return nullptr; - // (optional) Provide a name for this tool to rocprofiler client_id->name = "ExampleTool"; diff --git a/source/include/rocprofiler-sdk/CMakeLists.txt b/source/include/rocprofiler-sdk/CMakeLists.txt index fcf3c53bc4..f040f9de02 100644 --- a/source/include/rocprofiler-sdk/CMakeLists.txt +++ b/source/include/rocprofiler-sdk/CMakeLists.txt @@ -41,3 +41,4 @@ install( add_subdirectory(hip) add_subdirectory(hsa) add_subdirectory(marker) +add_subdirectory(cxx) diff --git a/source/include/rocprofiler-sdk/cxx/CMakeLists.txt b/source/include/rocprofiler-sdk/cxx/CMakeLists.txt new file mode 100644 index 0000000000..9aae92d4a2 --- /dev/null +++ b/source/include/rocprofiler-sdk/cxx/CMakeLists.txt @@ -0,0 +1,13 @@ +# +# +# Installation of public C++ headers +# +# +set(ROCPROFILER_CXX_HEADER_FILES name_info.hpp serialization.hpp) + +install( + FILES ${ROCPROFILER_CXX_HEADER_FILES} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/cxx + COMPONENT development) + +add_subdirectory(details) diff --git a/source/include/rocprofiler-sdk/cxx/details/CMakeLists.txt b/source/include/rocprofiler-sdk/cxx/details/CMakeLists.txt new file mode 100644 index 0000000000..bdb3729883 --- /dev/null +++ b/source/include/rocprofiler-sdk/cxx/details/CMakeLists.txt @@ -0,0 +1,11 @@ +# +# +# Installation of public C++ headers (implementations) +# +# +set(ROCPROFILER_CXX_DETAILS_HEADER_FILES mpl.hpp name_info.hpp) + +install( + FILES ${ROCPROFILER_CXX_DETAILS_HEADER_FILES} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/cxx/details + COMPONENT development) diff --git a/source/include/rocprofiler-sdk/cxx/details/mpl.hpp b/source/include/rocprofiler-sdk/cxx/details/mpl.hpp new file mode 100644 index 0000000000..56bd4ee884 --- /dev/null +++ b/source/include/rocprofiler-sdk/cxx/details/mpl.hpp @@ -0,0 +1,84 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#pragma once + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace sdk +{ +namespace mpl +{ +template +struct string_support +{ + using type = Tp; + using return_type = void; + + static constexpr auto value = false; + static constexpr void default_value() {} +}; + +template <> +struct string_support +{ + using type = const char*; + using return_type = type; + + static constexpr auto value = true; + static constexpr type default_value() { return nullptr; } + + type operator()(const char* val) const { return val; } +}; + +template <> +struct string_support +{ + using type = std::string_view; + using return_type = type; + + static constexpr auto value = true; + static constexpr type default_value() { return type{}; } + + type operator()(const char* val) const { return type{val}; } +}; + +template <> +struct string_support +{ + using type = std::string; + using return_type = type; + + static constexpr auto value = true; + static type default_value() { return type{}; } + + type operator()(const char* val) const { return type{val}; } +}; +} // namespace mpl +} // namespace sdk +} // namespace rocprofiler diff --git a/source/include/rocprofiler-sdk/cxx/details/name_info.hpp b/source/include/rocprofiler-sdk/cxx/details/name_info.hpp new file mode 100644 index 0000000000..29883c1351 --- /dev/null +++ b/source/include/rocprofiler-sdk/cxx/details/name_info.hpp @@ -0,0 +1,180 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#pragma once + +#if !defined(ROCPROFILER_SDK_CXX_NAME_INFO_HPP_) +# include +#endif + +#include +#include +#include + +#include +#include +#include + +namespace rocprofiler +{ +namespace sdk +{ +namespace utility +{ +template +typename name_info_impl::item_array_t +name_info_impl::items() const +{ + auto ret = item_array_t{}; + ret.reserve(operations.size()); + rocprofiler_tracing_operation_t _idx = 0; + for(const auto& itr : operations) + ret.emplace_back(_idx++, &itr); + return ret; +} + +template +inline void +name_info::emplace(EnumT idx, const char* name) +{ + impl.resize(idx + 1, value_type{}); + impl.at(idx).value = idx; + impl.at(idx).name = support_type{}(name); +} + +template +inline void +name_info::emplace(EnumT idx, + rocprofiler_tracing_operation_t opidx, + const char* name) +{ + impl.resize(idx + 1, value_type{}); + impl.at(idx).operations.resize(opidx + 1, support_type::default_value()); + impl.at(idx).operations.at(opidx) = support_type{}(name); +} + +template +typename name_info::return_type +name_info::at(EnumT idx) const +{ + return impl.at(idx).name; +} + +template +typename name_info::return_type +name_info::at(EnumT idx, rocprofiler_tracing_operation_t opidx) const +{ + return impl.at(idx).operations.at(opidx); +} + +template +typename name_info::item_array_t +name_info::items() const +{ + auto ret = item_array_t{}; + ret.reserve(impl.size()); + for(const auto& itr : impl) + ret.emplace_back(&itr); + return ret; +} +} // namespace utility + +constexpr auto success_v = ROCPROFILER_STATUS_SUCCESS; + +template +inline callback_name_info_t +get_callback_tracing_names() +{ + auto cb_name_info = callback_name_info_t{}; + // + // callback for each kind operation + // + static auto tracing_kind_operation_cb = + [](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) { + auto* name_info_v = static_cast*>(data_v); + + const char* name = nullptr; + auto status = rocprofiler_query_callback_tracing_kind_operation_name( + kindv, operation, &name, nullptr); + if(status == success_v && name) name_info_v->emplace(kindv, operation, name); + return 0; + }; + + // + // callback for each buffer kind (i.e. domain) + // + static auto tracing_kind_cb = [](rocprofiler_callback_tracing_kind_t kind, void* data) { + // store the buffer kind name + auto* name_info_v = static_cast*>(data); + const char* name = nullptr; + auto status = rocprofiler_query_callback_tracing_kind_name(kind, &name, nullptr); + if(status == success_v && name) name_info_v->emplace(kind, name); + + rocprofiler_iterate_callback_tracing_kind_operations(kind, tracing_kind_operation_cb, data); + return 0; + }; + + rocprofiler_iterate_callback_tracing_kinds(tracing_kind_cb, &cb_name_info); + + return cb_name_info; +} + +template +inline buffer_name_info_t +get_buffer_tracing_names() +{ + auto cb_name_info = buffer_name_info_t{}; + // + // callback for each kind operation + // + static auto tracing_kind_operation_cb = + [](rocprofiler_buffer_tracing_kind_t kindv, uint32_t operation, void* data_v) { + auto* name_info_v = static_cast*>(data_v); + + const char* name = nullptr; + auto status = rocprofiler_query_buffer_tracing_kind_operation_name( + kindv, operation, &name, nullptr); + if(status == success_v && name) name_info_v->emplace(kindv, operation, name); + return 0; + }; + + // + // callback for each buffer kind (i.e. domain) + // + static auto tracing_kind_cb = [](rocprofiler_buffer_tracing_kind_t kind, void* data) { + // store the buffer kind name + auto* name_info_v = static_cast*>(data); + const char* name = nullptr; + auto status = rocprofiler_query_buffer_tracing_kind_name(kind, &name, nullptr); + if(status == success_v && name) name_info_v->emplace(kind, name); + + rocprofiler_iterate_buffer_tracing_kind_operations(kind, tracing_kind_operation_cb, data); + return 0; + }; + + rocprofiler_iterate_buffer_tracing_kinds(tracing_kind_cb, &cb_name_info); + + return cb_name_info; +} +} // namespace sdk +} // namespace rocprofiler diff --git a/source/include/rocprofiler-sdk/cxx/name_info.hpp b/source/include/rocprofiler-sdk/cxx/name_info.hpp new file mode 100644 index 0000000000..c29eacde90 --- /dev/null +++ b/source/include/rocprofiler-sdk/cxx/name_info.hpp @@ -0,0 +1,116 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace sdk +{ +namespace utility +{ +template +struct name_info_impl +{ + using support_type = mpl::string_support; + using enum_type = EnumT; + using value_type = ValueT; + using return_type = typename support_type::return_type; + using item_type = std::pair; + using item_array_t = std::vector; + + static_assert(support_type::value, + "value_type must be supported by rocprofiler::sdk::mpl::string_support"); + + return_type operator()() const { return name; } + return_type operator()(size_t idx) const { return operations.at(idx); } + return_type operator[](size_t idx) const { return operations.at(idx); } + + item_array_t items() const; + + EnumT value = static_cast(0); + value_type name = {}; + std::vector operations = {}; +}; + +template +struct name_info +{ + using value_type = name_info_impl; + using enum_type = EnumT; + using support_type = typename value_type::support_type; + using return_type = typename value_type::return_type; + using item_type = const value_type*; + using item_array_t = std::vector; + + void emplace(EnumT idx, const char* name); + void emplace(EnumT idx, rocprofiler_tracing_operation_t opidx, const char* name); + + return_type at(EnumT idx) const; + return_type at(EnumT idx, rocprofiler_tracing_operation_t opidx) const; + + item_array_t items() const; + + decltype(auto) size() const { return impl.size(); } + decltype(auto) begin() { return impl.begin(); } + decltype(auto) begin() const { return impl.begin(); } + decltype(auto) end() { return impl.end(); } + decltype(auto) end() const { return impl.end(); } + + value_type& operator[](size_t idx) { return impl.at(idx); } + const value_type& operator[](size_t idx) const { return impl.at(idx); } + +private: + std::vector impl = {}; +}; +} // namespace utility + +template +using callback_name_info_t = utility::name_info; + +template +using buffer_name_info_t = utility::name_info; + +using callback_name_info = callback_name_info_t; +using buffer_name_info = buffer_name_info_t; + +template +callback_name_info_t +get_callback_tracing_names(); + +template +buffer_name_info_t +get_buffer_tracing_names(); +} // namespace sdk +} // namespace rocprofiler + +#define ROCPROFILER_SDK_CXX_NAME_INFO_HPP_ 1 +#include diff --git a/source/include/rocprofiler-sdk/cxx/serialization.hpp b/source/include/rocprofiler-sdk/cxx/serialization.hpp new file mode 100644 index 0000000000..217938abb5 --- /dev/null +++ b/source/include/rocprofiler-sdk/cxx/serialization.hpp @@ -0,0 +1,58 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#pragma once + +#include + +#include + +#include +#include +#include + +namespace cereal +{ +template +void +save(ArchiveT& ar, const rocprofiler::sdk::utility::name_info& data) +{ + ar.makeArray(); + for(const auto& itr : data) + ar(cereal::make_nvp("entry", itr)); +} + +template +void +save(ArchiveT& ar, const rocprofiler::sdk::utility::name_info_impl& data) +{ + auto _name = std::string{data.name}; + auto _ops = std::vector{}; + _ops.reserve(data.operations.size()); + + ar(cereal::make_nvp("kind", _name)); + for(auto itr : data.operations) + _ops.emplace_back(itr); + ar(cereal::make_nvp("operations", _ops)); +} +} // namespace cereal diff --git a/source/include/rocprofiler-sdk/registration.h b/source/include/rocprofiler-sdk/registration.h index 8d720b01d6..4855f2175c 100644 --- a/source/include/rocprofiler-sdk/registration.h +++ b/source/include/rocprofiler-sdk/registration.h @@ -204,7 +204,8 @@ rocprofiler_is_finalized(int* status) ROCPROFILER_API ROCPROFILER_NONNULL(1); * uint32_t patch = version % 100; * * // print info - * printf("Configuring rocprofiler (v%u.%u.%u) [%s]\n", major, minor, patch, runtime_version); + * printf("Configuring %s with rocprofiler-sdk (v%u.%u.%u) [%s]\n", + * client_id->name, major, minor, patch, runtime_version); * * // create configure data * static auto cfg = rocprofiler_tool_configure_result_t{ &my_init_func, diff --git a/source/lib/rocprofiler-sdk/hsa/queue.cpp b/source/lib/rocprofiler-sdk/hsa/queue.cpp index 5c4774e291..e77e9e6c18 100644 --- a/source/lib/rocprofiler-sdk/hsa/queue.cpp +++ b/source/lib/rocprofiler-sdk/hsa/queue.cpp @@ -21,6 +21,7 @@ THE SOFTWARE. */ #include "lib/rocprofiler-sdk/hsa/queue.hpp" +#include "lib/common/scope_destructor.hpp" #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" @@ -156,8 +157,8 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data) LOG_IF(FATAL, _corr_id->get_ref_count() == 0) << "reference counter for correlation id " << _corr_id->internal << " from thread " << _corr_id->thread_idx << " has no reference count"; - _corr_id->sub_ref_count(); _corr_id->sub_kern_count(); + _corr_id->sub_ref_count(); } queue_info_session.queue.async_complete(); @@ -239,18 +240,6 @@ WriteInterceptor(const void* packets, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, tracing_data_v); - auto* corr_id = context::get_latest_correlation_id(); - auto thr_id = (corr_id) ? corr_id->thread_idx : common::get_tid(); - auto user_data = rocprofiler_user_data_t{.value = 0}; - auto internal_corr_id = (corr_id) ? corr_id->internal : 0; - - tracing::populate_external_correlation_ids( - tracing_data_v.external_correlation_ids, - thr_id, - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH, - ROCPROFILER_KERNEL_DISPATCH_ENQUEUE, - internal_corr_id); - const auto* packets_arr = static_cast(packets); auto transformed_packets = std::vector{}; @@ -267,6 +256,41 @@ WriteInterceptor(const void* packets, continue; } + auto* corr_id = context::get_latest_correlation_id(); + context::correlation_id* _corr_id_pop = nullptr; + + if(!corr_id) + { + constexpr auto ref_count = 1; + corr_id = context::correlation_tracing_service::construct(ref_count); + _corr_id_pop = corr_id; + } + + // increase the reference count to denote that this correlation id is being used in a kernel + corr_id->add_ref_count(); + corr_id->add_kern_count(); + + auto thr_id = (corr_id) ? corr_id->thread_idx : common::get_tid(); + auto user_data = rocprofiler_user_data_t{.value = 0}; + auto internal_corr_id = (corr_id) ? corr_id->internal : 0; + + // if we constructed a correlation id, this decrements the reference count after the + // underlying function returns + auto _corr_id_dtor = common::scope_destructor{[_corr_id_pop]() { + if(_corr_id_pop) + { + context::pop_latest_correlation_id(_corr_id_pop); + _corr_id_pop->sub_ref_count(); + } + }}; + + tracing::populate_external_correlation_ids( + tracing_data_v.external_correlation_ids, + thr_id, + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH, + ROCPROFILER_KERNEL_DISPATCH_ENQUEUE, + internal_corr_id); + queue.async_started(); // Copy kernel pkt, copy is to allow for signal to be modified rocprofiler_packet kernel_pkt = packets_arr[i]; @@ -274,13 +298,6 @@ WriteInterceptor(const void* packets, queue.create_signal(HSA_AMD_SIGNAL_AMD_GPU_ONLY, &kernel_pkt.ext_amd_aql_pm4.completion_signal); - // increase the reference count to denote that this correlation id is being used in a kernel - if(corr_id) - { - corr_id->add_ref_count(); - corr_id->add_kern_count(); - } - // computes the "size" based on the offset of reserved_padding field constexpr auto kernel_dispatch_info_rt_size = common::compute_runtime_sizeof(); diff --git a/source/lib/rocprofiler-sdk/page_migration/defines.hpp b/source/lib/rocprofiler-sdk/page_migration/defines.hpp index 0468c3804b..49b6cdec0a 100644 --- a/source/lib/rocprofiler-sdk/page_migration/defines.hpp +++ b/source/lib/rocprofiler-sdk/page_migration/defines.hpp @@ -69,7 +69,7 @@ struct page_migration_info \ { \ static constexpr auto operation_idx = ROCPROFILER_PAGE_MIGRATION_##TYPE; \ - static constexpr auto name = #TYPE; \ + static constexpr auto name = "PAGE_MIGRATION_" #TYPE; \ static constexpr size_t uvm_bitmask = \ bitmask(std::index_sequence()); \ static constexpr size_t kfd_bitmask = \ diff --git a/tests/c-tool/CMakeLists.txt b/tests/c-tool/CMakeLists.txt index 27a67f6f32..120b39ff25 100644 --- a/tests/c-tool/CMakeLists.txt +++ b/tests/c-tool/CMakeLists.txt @@ -26,13 +26,14 @@ set(c-tool-env set_tests_properties( test-c-tool-execute - PROPERTIES TIMEOUT - 45 - LABELS - "integration-tests" - ENVIRONMENT - "${c-tool-env}" - PASS_REGULAR_EXPRESSION - "Test C tool is using rocprofiler-sdk v([0-9]+\\.[0-9]+\\.[0-9]+)" - FAIL_REGULAR_EXPRESSION - "${ROCPROFILER_DEFAULT_FAIL_REGEX}") + PROPERTIES + TIMEOUT + 45 + LABELS + "integration-tests" + ENVIRONMENT + "${c-tool-env}" + PASS_REGULAR_EXPRESSION + "Test C tool \\(priority=0\\) is using rocprofiler-sdk v([0-9]+\\.[0-9]+\\.[0-9]+)" + FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}") diff --git a/tests/common/name_info.hpp b/tests/common/name_info.hpp new file mode 100644 index 0000000000..5e3bd4a5e3 --- /dev/null +++ b/tests/common/name_info.hpp @@ -0,0 +1,27 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#pragma once + +#include +#include diff --git a/tests/page-migration/validate.py b/tests/page-migration/validate.py index 131e9c8435..9486917f97 100644 --- a/tests/page-migration/validate.py +++ b/tests/page-migration/validate.py @@ -21,17 +21,17 @@ def to_dict(key_values): return a -def op_name(op_name, record): - found_op = False - op_key = None +def get_operation(record, kind_name, op_name=None): + for idx, itr in enumerate(record["names"]): + if kind_name == itr["kind"]: + if op_name is None: + return idx, itr["operations"] + else: + for oidx, oname in enumerate(itr["operations"]): + if op_name == oname: + return oidx - for kind_node in record["names"]["kind_names"]: - if kind_node["value"] == op_name: - op_key = kind_node["key"] - - for op_node in record["names"]["operation_names"]: - if op_node["key"] == op_key: - return op_key, to_dict(op_node["value"]) + return None def dict_from_value_key(d): @@ -259,18 +259,17 @@ def test_retired_correlation_ids(input_data): def get_allocated_pages(callback_records): # Get how many pages we allocated - hip_api_traces = callback_records["hip_api_traces"] - _, op_dict = op_name("HIP_RUNTIME_API", callback_records) - op_key = [k for k, v in op_dict.items() if v == "hipHostRegister"][0] + op_idx = get_operation(callback_records, "HIP_RUNTIME_API", "hipHostRegister") + rt_idx, rt_data = get_operation(callback_records, "HIP_RUNTIME_API") + + assert op_idx is not None, f"{rt_idx}:\n{rt_data}" host_register_record = [] - for r in hip_api_traces: - if ( - r["operation"] == op_key - and "sizeBytes" in r["args"] - and "hostPtr" in r["args"] - ): - host_register_record.append(r) + for itr in callback_records["hip_api_traces"]: + if itr["kind"] == rt_idx and itr["operation"] == op_idx and itr["phase"] == 2: + assert "sizeBytes" in itr["args"].keys(), f"{itr}" + assert "hostPtr" in itr["args"].keys(), f"{itr}" + host_register_record.append(itr) assert len(host_register_record) == 1 alloc_size = int(host_register_record[0]["args"]["sizeBytes"], 10) @@ -285,11 +284,11 @@ def test_page_migration_data(input_data): sdk_data = data["rocprofiler-sdk-json-tool"] buffer_records = sdk_data["buffer_records"] callback_records = sdk_data["callback_records"] - page_migtation_buffers = buffer_records["page_migration"] + page_migration_buffers = buffer_records["page_migration"] - bf_op_id, bf_op_names = op_name("PAGE_MIGRATION", buffer_records) - assert bf_op_names[0] == "NONE" - assert "PAGE_MIGRATE" in str(bf_op_names) + _, bf_op_names = get_operation(buffer_records, "PAGE_MIGRATION") + assert bf_op_names[0] == "PAGE_MIGRATION_NONE" + assert "PAGE_MIGRATION_PAGE_MIGRATE" in bf_op_names assert len(bf_op_names) == 5 node_ids = set(x["gpu_id"] for x in sdk_data["agents"]) @@ -299,15 +298,14 @@ def test_page_migration_data(input_data): assert int(alloc_size) == 16 * 4096 # We allocated 16 pages in the test # PID must be same - assert len(set(r["pid"] for r in page_migtation_buffers)) == 1 + assert len(set(r["pid"] for r in page_migration_buffers)) == 1 - for r in page_migtation_buffers: + for r in page_migration_buffers: op = r["operation"] assert r["size"] == 136 - assert r["kind"] == bf_op_id - assert op != 0 and bf_op_names[op] != "NONE" - assert bf_op_names[op].lower() in r + assert op != 0 and bf_op_names[op] != "PAGE_MIGRATION_NONE" + assert bf_op_names[op].lower().replace("page_migration_", "") in r.keys() if "page_migrate" in r: assert r["page_migrate"]["from_node"] in node_ids @@ -328,7 +326,7 @@ def test_page_migration_data(input_data): assert 0 < r["start_timestamp"] < r["end_timestamp"] # Check for events with our page - for r in page_migtation_buffers: + for r in page_migration_buffers: if "page_migrate" in r and r["page_migrate"]["start_addr"] == start_addr: assert end_addr == r["page_migrate"]["end_addr"] diff --git a/tests/pytest-packages/pytest_utils/dotdict.py b/tests/pytest-packages/pytest_utils/dotdict.py index cd8b59c3a4..e00c9565ec 100644 --- a/tests/pytest-packages/pytest_utils/dotdict.py +++ b/tests/pytest-packages/pytest_utils/dotdict.py @@ -35,6 +35,8 @@ class dotdict(dict): for k, v in self.items(): if isinstance(v, dict): self.__setitem__(k, dotdict(v)) - # print(k) - elif isinstance(v, list): - self.__setitem__(k, [dotdict(i) for i in v]) + elif isinstance(v, (list, tuple)): + self.__setitem__( + k, + [dotdict(i) if isinstance(i, (list, tuple, dict)) else i for i in v], + ) diff --git a/tests/scratch-memory-tracing/validate.py b/tests/scratch-memory-tracing/validate.py index ca94c270f3..449bf896e9 100755 --- a/tests/scratch-memory-tracing/validate.py +++ b/tests/scratch-memory-tracing/validate.py @@ -152,16 +152,15 @@ def test_external_correlation_ids(input_data): assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}" -def op_name(op_name, record): - op_key = None - - for kind_node in record["names"]["kind_names"]: - if kind_node["value"] == op_name: - op_key = kind_node["key"] - - for op_node in record["names"]["operation_names"]: - if op_node["key"] == op_key: - return op_node +def get_operation(record, kind_name, op_name=None): + for idx, itr in enumerate(record["names"]): + if kind_name == itr["kind"]: + if op_name is None: + return idx, itr["operations"] + else: + for oidx, oname in enumerate(itr["operations"]): + if op_name == oname: + return oidx return None @@ -179,16 +178,14 @@ def test_scratch_memory_tracking(input_data): assert len(scratch_callback_data) == 2 * len(scratch_buffer_data) - cb_op_names = op_name("SCRATCH_MEMORY", callback_records)["value"] - bf_op_names = op_name("SCRATCH_MEMORY", buffer_records)["value"] + _, cb_op_names = get_operation(callback_records, "SCRATCH_MEMORY") + _, bf_op_names = get_operation(buffer_records, "SCRATCH_MEMORY") assert len(cb_op_names) == 4 assert len(bf_op_names) == 4 # op name -> enum value - scratch_cb_op_map = {node["value"]: node["key"] for node in cb_op_names} - scratch_bf_op_map = {node["value"]: node["key"] for node in bf_op_names} - assert scratch_cb_op_map == scratch_bf_op_map + assert cb_op_names == bf_op_names scratch_reported_agent_ids = set() detected_agents_ids = set( @@ -253,10 +250,8 @@ def test_scratch_memory_tracking(input_data): ), f"this:\n{this_node}\n\nnext:\n{next_node}" # alloc has more data vs free and async reclaim - scratch_alloc_node = ( - this_node["operation"] == scratch_cb_op_map["SCRATCH_MEMORY_ALLOC"] - ) - if scratch_alloc_node: + scratch_alloc_node = cb_op_names[this_node["operation"]] + if scratch_alloc_node == "SCRATCH_MEMORY_ALLOC": assert ( pl(this_node)["queue_id"]["handle"] == pl(next_node)["queue_id"]["handle"] diff --git a/tests/tools/c-tool.c b/tests/tools/c-tool.c index dbe4cf2570..09e4f7256f 100644 --- a/tests/tools/c-tool.c +++ b/tests/tools/c-tool.c @@ -40,9 +40,6 @@ rocprofiler_configure(uint32_t version, uint32_t priority, rocprofiler_client_id_t* id) { - // only activate if main tool - if(priority > 0) return NULL; - // set the client name id->name = "Test C tool"; @@ -52,8 +49,9 @@ rocprofiler_configure(uint32_t version, uint32_t patch = version % 100; // generate info string - printf("%s is using rocprofiler-sdk v%i.%i.%i (%s)\n", + printf("%s (priority=%u) is using rocprofiler-sdk v%i.%i.%i (%s)\n", id->name, + priority, major, minor, patch, diff --git a/tests/tools/json-tool.cpp b/tests/tools/json-tool.cpp index bf7e2239c2..fa99bef12b 100644 --- a/tests/tools/json-tool.cpp +++ b/tests/tools/json-tool.cpp @@ -34,6 +34,7 @@ #include "common/defines.hpp" #include "common/filesystem.hpp" #include "common/hash.hpp" +#include "common/name_info.hpp" #include "common/perfetto.hpp" #include "common/serialization.hpp" @@ -197,146 +198,14 @@ make_array(Tp&& arg, Args&&... args) return std::array{std::forward(arg), std::forward(args)...}; } -using call_stack_t = std::vector; -using buffer_kind_names_t = std::map; -using buffer_kind_operation_names_t = - std::map>; - -using callback_kind_names_t = std::map; -using callback_kind_operation_names_t = - std::map>; +using call_stack_t = std::vector; using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t; using kernel_symbol_map_t = std::unordered_map; -struct callback_name_info -{ - callback_kind_names_t kind_names = {}; - callback_kind_operation_names_t operation_names = {}; - - template - void save(ArchiveT& ar) const - { - ar(cereal::make_nvp("kind_names", kind_names)); - ar(cereal::make_nvp("operation_names", operation_names)); - } -}; - -struct buffer_name_info -{ - buffer_kind_names_t kind_names = {}; - buffer_kind_operation_names_t operation_names = {}; - - template - void save(ArchiveT& ar) const - { - ar(cereal::make_nvp("kind_names", kind_names)); - ar(cereal::make_nvp("operation_names", operation_names)); - } -}; - rocprofiler_client_id_t* client_id = nullptr; rocprofiler_client_finalize_t client_fini_func = nullptr; -callback_name_info -get_callback_tracing_names() -{ - auto cb_name_info = callback_name_info{}; - // - // callback for each kind operation - // - static auto tracing_kind_operation_cb = - [](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) { - auto* name_info_v = static_cast(data_v); - - const char* name = nullptr; - ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name( - kindv, operation, &name, nullptr), - "query buffer tracing kind operation name"); - if(name) name_info_v->operation_names[kindv][operation] = name; - return 0; - }; - - // - // callback for each buffer kind (i.e. domain) - // - static auto tracing_kind_cb = [](rocprofiler_callback_tracing_kind_t kind, void* data) { - // store the buffer kind name - auto* name_info_v = static_cast(data); - const char* name = nullptr; - ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_name(kind, &name, nullptr), - "query buffer tracing kind operation name"); - if(name) name_info_v->kind_names[kind] = name; - - rocprofiler_iterate_callback_tracing_kind_operations( - kind, tracing_kind_operation_cb, static_cast(data)); - return 0; - }; - - ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kinds(tracing_kind_cb, - static_cast(&cb_name_info)), - "iterating buffer tracing kinds"); - - return cb_name_info; -} - -buffer_name_info -get_buffer_tracing_names() -{ - static const auto supported = std::unordered_set{ - ROCPROFILER_BUFFER_TRACING_HSA_CORE_API, - ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API, - ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API, - ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API, - ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API, - ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, - ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API, - ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API, - ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API, - ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, - ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, - ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, - }; - - auto cb_name_info = buffer_name_info{}; - // - // callback for each kind operation - // - static auto tracing_kind_operation_cb = - [](rocprofiler_buffer_tracing_kind_t kindv, uint32_t operation, void* data_v) { - auto* name_info_v = static_cast(data_v); - - const char* name = nullptr; - ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_operation_name( - kindv, operation, &name, nullptr), - "query buffer tracing kind operation name"); - if(name) name_info_v->operation_names[kindv][operation] = name; - return 0; - }; - - // - // callback for each buffer kind (i.e. domain) - // - static auto tracing_kind_cb = [](rocprofiler_buffer_tracing_kind_t kind, void* data) { - // store the buffer kind name - auto* name_info_v = static_cast(data); - const char* name = nullptr; - ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_name(kind, &name, nullptr), - "query buffer tracing kind operation name"); - if(name) name_info_v->kind_names[kind] = name; - - rocprofiler_iterate_buffer_tracing_kind_operations( - kind, tracing_kind_operation_cb, static_cast(data)); - return 0; - }; - - ROCPROFILER_CALL(rocprofiler_iterate_buffer_tracing_kinds(tracing_kind_cb, - static_cast(&cb_name_info)), - "iterating buffer tracing kinds"); - - return cb_name_info; -} - using callback_payload_t = std::variantargs; TRACE_EVENT_BEGIN(rocprofiler::trait::name::value, - ::perfetto::StaticString(name.c_str()), + ::perfetto::StaticString(name.data()), track, itr.start_timestamp, ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), @@ -1830,7 +1699,7 @@ write_perfetto() for(auto itr : hip_api_bf_records) { - auto& name = buffer_name_info.operation_names.at(itr.kind).at(itr.operation); + auto name = buffer_names.at(itr.kind, itr.operation); auto& track = thread_tracks.at(itr.thread_id); auto _args = callback_arg_array_t{}; @@ -1842,7 +1711,7 @@ write_perfetto() if(ritr != hip_api_cb_records.end()) _args = ritr->args; TRACE_EVENT_BEGIN(rocprofiler::trait::name::value, - ::perfetto::StaticString(name.c_str()), + ::perfetto::StaticString(name.data()), track, itr.start_timestamp, ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), @@ -1869,11 +1738,11 @@ write_perfetto() for(auto itr : memory_copy_bf_records) { - auto& name = buffer_name_info.operation_names.at(itr.kind).at(itr.operation); + auto name = buffer_names.at(itr.kind, itr.operation); auto& track = agent_tracks.at(itr.dst_agent_id.handle); TRACE_EVENT_BEGIN(rocprofiler::trait::name::value, - ::perfetto::StaticString(name.c_str()), + ::perfetto::StaticString(name.data()), track, itr.start_timestamp, ::perfetto::Flow::ProcessScoped(itr.correlation_id.internal), @@ -1886,7 +1755,9 @@ write_perfetto() "src_agent", agents_map.at(itr.src_agent_id).logical_node_id, "dst_agent", - agents_map.at(itr.dst_agent_id).logical_node_id); + agents_map.at(itr.dst_agent_id).logical_node_id, + "copy_bytes", + itr.bytes); TRACE_EVENT_END(rocprofiler::trait::name::value, track, itr.end_timestamp, @@ -2070,9 +1941,6 @@ rocprofiler_configure(uint32_t version, uint32_t priority, rocprofiler_client_id_t* id) { - // only activate if main tool - if(priority > 0) return nullptr; - // set the client name id->name = "rocprofiler-sdk-json-tool"; @@ -2086,8 +1954,8 @@ rocprofiler_configure(uint32_t version, // generate info string auto info = std::stringstream{}; - info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch - << " (" << runtime_version << ")"; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; std::clog << info.str() << std::endl;