Public C++ header files and samples updates (#819)
* Public C++ header files (source/include/rocprofiler-sdk/cxx)
* Update samples/api_buffered_tracing
- scratch memory and page migration
- README
* Update samples/api_buffered_tracing
- page migration component in sample
* Update tests/page-migration/validate.py
- fix checks for page migration operation names
* Update tests/page-migration/validate.py
- fix get_allocated_pages
* Update scratch memory and page migration validations
* Fix include/rocprofiler-sdk/cxx installation
* Rework include/rocprofiler-sdk/cxx
- Improve name_info to support const char*, string_view, string
* Update samples/api_{buffered,callback}_tracing
* External correlation ID request sample
- includes correlation ID retirement demo
* Update samples/api_buffered_tracing/README.md
* Update lib/rocprofiler-sdk/hsa/queue.cpp
- generate correlation ID for kernel launch if one doesn't exist
* Remove priority check from tool libraries (samples/tests)
- if(priority > 0) return nullptr check in rocprofiler_configure has proliferated beyond its intended use
* Apply suggestions from code review
[ROCm/rocprofiler-sdk commit: de13d2ac5d]
Esse commit está contido em:
@@ -29,4 +29,5 @@ add_subdirectory(api_buffered_tracing)
|
||||
add_subdirectory(code_object_tracing)
|
||||
add_subdirectory(counter_collection)
|
||||
add_subdirectory(intercept_table)
|
||||
add_subdirectory(external_correlation_id_request)
|
||||
# add_subdirectory(code_object_isa_decode) add_subdirectory(advanced_thread_trace)
|
||||
|
||||
@@ -543,9 +543,6 @@ rocprofiler_configure(uint32_t version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// only activate if main tool
|
||||
if(priority > 0) return nullptr;
|
||||
|
||||
// set the client name
|
||||
id->name = "Adv_Thread_Trace_Sample";
|
||||
|
||||
@@ -559,8 +556,8 @@ rocprofiler_configure(uint32_t version,
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
||||
<< " (" << runtime_version << ")";
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
# API Buffer Tracing Sample
|
||||
|
||||
## Services
|
||||
|
||||
- Code object callback tracing for mapping kernel IDs to kernel names
|
||||
- HSA API (Core, AMD Ext)
|
||||
- HIP API (Runtime)
|
||||
- Kernel dispatch
|
||||
- Memory copy
|
||||
- Page Migration
|
||||
- Scratch Memory
|
||||
|
||||
## Properties
|
||||
|
||||
- Buffer size of 4096 bytes which is automatically flushed once >= 87.5% of buffer is filled (3584 bytes)
|
||||
- Creation of dedicated thread for buffer callback delivery
|
||||
- Push external correlation IDs once per thread (value is thread ID)
|
||||
- Receives notifications for internal thread creation
|
||||
@@ -50,6 +50,7 @@
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
@@ -148,21 +149,29 @@ tool_tracing_callback(rocprofiler_context_id_t context,
|
||||
{
|
||||
auto* header = headers[i];
|
||||
|
||||
if(header == nullptr)
|
||||
auto kind_name = std::string{};
|
||||
if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING)
|
||||
{
|
||||
throw std::runtime_error{
|
||||
"rocprofiler provided a null pointer to header. this should never happen"};
|
||||
const char* _name = nullptr;
|
||||
auto _kind = static_cast<rocprofiler_buffer_tracing_kind_t>(header->kind);
|
||||
ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_name(_kind, &_name, nullptr),
|
||||
"query buffer tracing kind name");
|
||||
if(_name)
|
||||
{
|
||||
static size_t len = 15;
|
||||
|
||||
kind_name = std::string{_name};
|
||||
len = std::max(len, kind_name.length());
|
||||
kind_name.resize(len, ' ');
|
||||
kind_name += " :: ";
|
||||
}
|
||||
}
|
||||
else if(header->hash !=
|
||||
rocprofiler_record_header_compute_hash(header->category, header->kind))
|
||||
{
|
||||
throw std::runtime_error{"rocprofiler_record_header_t (category | kind) != hash"};
|
||||
}
|
||||
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
||||
(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API ||
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API ||
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API ||
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API))
|
||||
|
||||
if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
||||
(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API ||
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API ||
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API ||
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API))
|
||||
{
|
||||
auto* record =
|
||||
static_cast<rocprofiler_buffer_tracing_hsa_api_record_t*>(header->payload);
|
||||
@@ -173,7 +182,7 @@ tool_tracing_callback(rocprofiler_context_id_t context,
|
||||
<< ", extern_cid=" << record->correlation_id.external.value
|
||||
<< ", kind=" << record->kind << ", operation=" << record->operation
|
||||
<< ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp
|
||||
<< ", name=" << client_name_info.operation_names[record->kind][record->operation];
|
||||
<< ", name=" << client_name_info.at(record->kind, record->operation);
|
||||
|
||||
if(record->start_timestamp > record->end_timestamp)
|
||||
{
|
||||
@@ -186,7 +195,7 @@ tool_tracing_callback(rocprofiler_context_id_t context,
|
||||
}
|
||||
|
||||
static_cast<call_stack_t*>(user_data)->emplace_back(
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
|
||||
}
|
||||
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API)
|
||||
@@ -200,7 +209,7 @@ tool_tracing_callback(rocprofiler_context_id_t context,
|
||||
<< ", extern_cid=" << record->correlation_id.external.value
|
||||
<< ", kind=" << record->kind << ", operation=" << record->operation
|
||||
<< ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp
|
||||
<< ", name=" << client_name_info.operation_names[record->kind][record->operation];
|
||||
<< ", name=" << client_name_info[record->kind][record->operation];
|
||||
|
||||
if(record->start_timestamp > record->end_timestamp)
|
||||
{
|
||||
@@ -213,7 +222,7 @@ tool_tracing_callback(rocprofiler_context_id_t context,
|
||||
}
|
||||
|
||||
static_cast<call_stack_t*>(user_data)->emplace_back(
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
|
||||
}
|
||||
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH)
|
||||
@@ -223,15 +232,16 @@ tool_tracing_callback(rocprofiler_context_id_t context,
|
||||
|
||||
auto info = std::stringstream{};
|
||||
|
||||
info << "agent_id=" << record->dispatch_info.agent_id.handle
|
||||
info << "tid=" << record->thread_id << ", context=" << context.handle
|
||||
<< ", buffer_id=" << buffer_id.handle
|
||||
<< ", cid=" << record->correlation_id.internal
|
||||
<< ", extern_cid=" << record->correlation_id.external.value
|
||||
<< ", kind=" << record->kind << ", operation=" << record->operation
|
||||
<< ", agent_id=" << record->dispatch_info.agent_id.handle
|
||||
<< ", queue_id=" << record->dispatch_info.queue_id.handle
|
||||
<< ", kernel_id=" << record->dispatch_info.kernel_id
|
||||
<< ", kernel=" << client_kernels.at(record->dispatch_info.kernel_id).kernel_name
|
||||
<< ", context=" << context.handle << ", buffer_id=" << buffer_id.handle
|
||||
<< ", cid=" << record->correlation_id.internal
|
||||
<< ", extern_cid=" << record->correlation_id.external.value
|
||||
<< ", kind=" << record->kind << ", start=" << record->start_timestamp
|
||||
<< ", stop=" << record->end_timestamp
|
||||
<< ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp
|
||||
<< ", private_segment_size=" << record->dispatch_info.private_segment_size
|
||||
<< ", group_segment_size=" << record->dispatch_info.group_segment_size
|
||||
<< ", workgroup_size=(" << record->dispatch_info.workgroup_size.x << ","
|
||||
@@ -244,7 +254,7 @@ tool_tracing_callback(rocprofiler_context_id_t context,
|
||||
throw std::runtime_error("kernel dispatch: start > end");
|
||||
|
||||
static_cast<call_stack_t*>(user_data)->emplace_back(
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
|
||||
}
|
||||
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)
|
||||
@@ -254,20 +264,111 @@ tool_tracing_callback(rocprofiler_context_id_t context,
|
||||
|
||||
auto info = std::stringstream{};
|
||||
|
||||
info << "src_agent_id=" << record->src_agent_id.handle
|
||||
<< ", dst_agent_id=" << record->dst_agent_id.handle
|
||||
<< ", direction=" << record->operation << ", context=" << context.handle
|
||||
info << "tid=" << record->thread_id << ", context=" << context.handle
|
||||
<< ", buffer_id=" << buffer_id.handle
|
||||
<< ", cid=" << record->correlation_id.internal
|
||||
<< ", extern_cid=" << record->correlation_id.external.value
|
||||
<< ", kind=" << record->kind << ", start=" << record->start_timestamp
|
||||
<< ", stop=" << record->end_timestamp;
|
||||
<< ", kind=" << record->kind << ", operation=" << record->operation
|
||||
<< ", src_agent_id=" << record->src_agent_id.handle
|
||||
<< ", dst_agent_id=" << record->dst_agent_id.handle
|
||||
<< ", direction=" << record->operation << ", start=" << record->start_timestamp
|
||||
<< ", stop=" << record->end_timestamp
|
||||
<< ", name=" << client_name_info.at(record->kind, record->operation);
|
||||
|
||||
if(record->start_timestamp > record->end_timestamp)
|
||||
throw std::runtime_error("memory copy: start > end");
|
||||
|
||||
static_cast<call_stack_t*>(user_data)->emplace_back(
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
|
||||
}
|
||||
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)
|
||||
{
|
||||
auto* record =
|
||||
static_cast<rocprofiler_buffer_tracing_page_migration_record_t*>(header->payload);
|
||||
|
||||
auto info = std::stringstream{};
|
||||
|
||||
info << "kind=" << record->kind << ", operation=" << record->operation
|
||||
<< ", pid=" << record->pid << ", start=" << record->start_timestamp
|
||||
<< ", stop=" << record->end_timestamp
|
||||
<< ", name=" << client_name_info.at(record->kind, record->operation);
|
||||
|
||||
switch(record->operation)
|
||||
{
|
||||
case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE:
|
||||
{
|
||||
info << ", page_fault=(" << record->page_fault.read_fault << ", "
|
||||
<< record->page_fault.migrated << ", " << record->page_fault.node_id
|
||||
<< ", " << std::hex << "0x" << record->page_fault.address << ")";
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT:
|
||||
{
|
||||
info << ", page_migrate=(" << std::hex << "0x"
|
||||
<< record->page_migrate.start_addr << ", 0x"
|
||||
<< record->page_migrate.end_addr << ", " << std::dec
|
||||
<< record->page_migrate.from_node << ", " << record->page_migrate.to_node
|
||||
<< ", " << record->page_migrate.prefetch_node << ", "
|
||||
<< record->page_migrate.preferred_node << ", "
|
||||
<< record->page_migrate.trigger << ")";
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND:
|
||||
{
|
||||
info << ", queue_suspend=(" << record->queue_suspend.rescheduled << ", "
|
||||
<< record->queue_suspend.node_id << ", " << record->queue_suspend.trigger
|
||||
<< ")";
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU:
|
||||
{
|
||||
info << ", unmap_from_gpu=(" << record->unmap_from_gpu.node_id << std::hex
|
||||
<< ", 0x" << record->unmap_from_gpu.start_addr << ", 0x"
|
||||
<< record->unmap_from_gpu.end_addr << ", " << std::dec
|
||||
<< record->unmap_from_gpu.trigger << ")";
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PAGE_MIGRATION_NONE:
|
||||
case ROCPROFILER_PAGE_MIGRATION_LAST:
|
||||
{
|
||||
throw std::runtime_error{"unexpected page migration value"};
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(record->start_timestamp > record->end_timestamp)
|
||||
throw std::runtime_error("page migration: start > end");
|
||||
|
||||
static_cast<call_stack_t*>(user_data)->emplace_back(
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
|
||||
}
|
||||
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY)
|
||||
{
|
||||
auto* record =
|
||||
static_cast<rocprofiler_buffer_tracing_scratch_memory_record_t*>(header->payload);
|
||||
|
||||
auto info = std::stringstream{};
|
||||
|
||||
auto _elapsed =
|
||||
std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(
|
||||
std::chrono::nanoseconds{record->end_timestamp - record->start_timestamp})
|
||||
.count();
|
||||
|
||||
info << "tid=" << record->thread_id << ", context=" << context.handle
|
||||
<< ", buffer_id=" << buffer_id.handle
|
||||
<< ", cid=" << record->correlation_id.internal
|
||||
<< ", extern_cid=" << record->correlation_id.external.value
|
||||
<< ", kind=" << record->kind << ", operation=" << record->operation
|
||||
<< ", agent_id=" << record->agent_id.handle
|
||||
<< ", queue_id=" << record->queue_id.handle << ", thread_id=" << record->thread_id
|
||||
<< ", elapsed=" << std::setprecision(3) << std::fixed << _elapsed
|
||||
<< " usec, flags=" << record->flags
|
||||
<< ", name=" << client_name_info.at(record->kind, record->operation);
|
||||
|
||||
static_cast<call_stack_t*>(user_data)->emplace_back(
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -312,25 +413,25 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
|
||||
client_name_info = common::get_buffer_tracing_names();
|
||||
|
||||
for(const auto& itr : client_name_info.operation_names)
|
||||
for(const auto& itr : client_name_info)
|
||||
{
|
||||
auto name_idx = std::stringstream{};
|
||||
name_idx << " [" << std::setw(3) << static_cast<int32_t>(itr.first) << "]";
|
||||
name_idx << " [" << std::setw(3) << itr.value << "]";
|
||||
call_stack_v->emplace_back(
|
||||
source_location{"rocprofiler_buffer_tracing_kind_names " + name_idx.str(),
|
||||
__FILE__,
|
||||
__LINE__,
|
||||
client_name_info.kind_names.at(itr.first)});
|
||||
std::string{itr.name}});
|
||||
|
||||
for(const auto& ditr : itr.second)
|
||||
for(auto [didx, ditr] : itr.items())
|
||||
{
|
||||
auto operation_idx = std::stringstream{};
|
||||
operation_idx << " [" << std::setw(3) << static_cast<int32_t>(ditr.first) << "]";
|
||||
operation_idx << " [" << std::setw(3) << didx << "]";
|
||||
call_stack_v->emplace_back(source_location{
|
||||
"rocprofiler_buffer_tracing_kind_operation_names" + operation_idx.str(),
|
||||
__FILE__,
|
||||
__LINE__,
|
||||
std::string{"- "} + std::string{ditr.second}});
|
||||
std::string{"- "} + std::string{*ditr}});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -338,28 +439,32 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&client_ctx), "context creation");
|
||||
|
||||
auto code_object_ops = std::vector<rocprofiler_tracing_operation_t>{
|
||||
ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER};
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service(client_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT,
|
||||
nullptr,
|
||||
0,
|
||||
code_object_ops.data(),
|
||||
code_object_ops.size(),
|
||||
tool_code_object_callback,
|
||||
nullptr),
|
||||
"code object tracing service configure");
|
||||
|
||||
constexpr auto buffer_size_bytes = 4096;
|
||||
constexpr auto buffer_watermark_bytes = buffer_size_bytes - (buffer_size_bytes / 8);
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(client_ctx,
|
||||
4096,
|
||||
2048,
|
||||
buffer_size_bytes,
|
||||
buffer_watermark_bytes,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
tool_tracing_callback,
|
||||
tool_data,
|
||||
&client_buffer),
|
||||
"buffer creation");
|
||||
|
||||
for(auto itr : {ROCPROFILER_BUFFER_TRACING_HSA_CORE_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API})
|
||||
for(auto itr :
|
||||
{ROCPROFILER_BUFFER_TRACING_HSA_CORE_API, ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API})
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
|
||||
client_ctx, itr, nullptr, 0, client_buffer),
|
||||
@@ -381,6 +486,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
client_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, nullptr, 0, client_buffer),
|
||||
"buffer tracing service for memory copy configure");
|
||||
|
||||
// May have incompatible kernel so only emit a warning here
|
||||
ROCPROFILER_WARN(rocprofiler_configure_buffer_tracing_service(
|
||||
client_ctx, ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, nullptr, 0, client_buffer));
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_buffer_tracing_service(
|
||||
client_ctx, ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, nullptr, 0, client_buffer),
|
||||
"buffer tracing service for page migration configure");
|
||||
|
||||
auto client_thread = rocprofiler_callback_thread_t{};
|
||||
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread),
|
||||
"creating callback thread");
|
||||
@@ -469,9 +583,6 @@ rocprofiler_configure(uint32_t version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// only activate if main tool
|
||||
if(priority > 0) return nullptr;
|
||||
|
||||
// set the client name
|
||||
id->name = "ExampleTool";
|
||||
|
||||
@@ -485,8 +596,8 @@ rocprofiler_configure(uint32_t version,
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
||||
<< " (" << runtime_version << ")";
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <random>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#define HIP_API_CALL(CALL) \
|
||||
@@ -51,7 +52,7 @@ namespace
|
||||
{
|
||||
using auto_lock_t = std::unique_lock<std::mutex>;
|
||||
auto print_lock = std::mutex{};
|
||||
size_t nthreads = 2;
|
||||
size_t nthread_per_device = 2;
|
||||
size_t nitr = 500;
|
||||
size_t nsync = 10;
|
||||
constexpr unsigned shared_mem_tile_dim = 32;
|
||||
@@ -64,10 +65,19 @@ verify(int* in, int* out, int M, int N);
|
||||
} // namespace
|
||||
|
||||
__global__ void
|
||||
transpose_a(const int* in, int* out, int M, int N);
|
||||
transpose(const int* in, int* out, int M, int N);
|
||||
|
||||
void
|
||||
run(int rank, int tid, hipStream_t stream, int argc, char** argv);
|
||||
run(int rank, int tid, int devid, int argc, char** argv);
|
||||
|
||||
void
|
||||
run_transpose(int rank, int tid, hipStream_t stream, int argc, char** argv);
|
||||
|
||||
void
|
||||
run_migrate(int rank, int tid, hipStream_t stream, int, char** argv);
|
||||
|
||||
void
|
||||
run_scratch(int rank, int tid, hipStream_t stream, int argc, char** argv);
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
@@ -76,6 +86,8 @@ main(int argc, char** argv)
|
||||
client::start(); // starts context before any API tables are available
|
||||
client::identify(1);
|
||||
|
||||
auto* exe_name = ::basename(argv[0]);
|
||||
|
||||
int rank = 0;
|
||||
for(int i = 1; i < argc; ++i)
|
||||
{
|
||||
@@ -83,47 +95,38 @@ main(int argc, char** argv)
|
||||
if(_arg == "?" || _arg == "-h" || _arg == "--help")
|
||||
{
|
||||
fprintf(stderr,
|
||||
"usage: transpose [NUM_THREADS (%zu)] [NUM_ITERATION (%zu)] "
|
||||
"usage: %s [NUM_THREADS_PER_DEVICE (%zu)] [NUM_ITERATION (%zu)] "
|
||||
"[SYNC_EVERY_N_ITERATIONS (%zu)]\n",
|
||||
nthreads,
|
||||
exe_name,
|
||||
nthread_per_device,
|
||||
nitr,
|
||||
nsync);
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
||||
if(argc > 1) nthreads = atoll(argv[1]);
|
||||
if(argc > 1) nthread_per_device = atoll(argv[1]);
|
||||
if(argc > 2) nitr = atoll(argv[2]);
|
||||
if(argc > 3) nsync = atoll(argv[3]);
|
||||
|
||||
printf("[transpose] Number of threads: %zu\n", nthreads);
|
||||
printf("[transpose] Number of iterations: %zu\n", nitr);
|
||||
printf("[transpose] Syncing every %zu iterations\n", nsync);
|
||||
|
||||
// this is a temporary workaround in omnitrace when HIP + MPI is enabled
|
||||
int ndevice = 0;
|
||||
int devid = rank;
|
||||
HIP_API_CALL(hipGetDeviceCount(&ndevice));
|
||||
printf("[transpose] Number of devices found: %i\n", ndevice);
|
||||
if(ndevice > 0)
|
||||
|
||||
auto nthreads = (ndevice * nthread_per_device);
|
||||
|
||||
printf("[%s] Number of devices found: %i\n", exe_name, ndevice);
|
||||
printf("[%s] Number of threads (per device): %zu\n", exe_name, nthread_per_device);
|
||||
printf("[%s] Number of threads (total): %zu\n", exe_name, nthreads);
|
||||
printf("[%s] Number of iterations: %zu\n", exe_name, nitr);
|
||||
printf("[%s] Syncing every %zu iterations\n", exe_name, nsync);
|
||||
|
||||
{
|
||||
devid = rank % ndevice;
|
||||
HIP_API_CALL(hipSetDevice(devid));
|
||||
printf("[transpose] Rank %i assigned to device %i\n", rank, devid);
|
||||
}
|
||||
if(rank == devid && rank < ndevice)
|
||||
{
|
||||
std::vector<std::thread> _threads{};
|
||||
std::vector<hipStream_t> _streams(nthreads);
|
||||
auto _threads = std::vector<std::thread>{};
|
||||
for(size_t i = 0; i < nthreads; ++i)
|
||||
HIP_API_CALL(hipStreamCreate(&_streams.at(i)));
|
||||
for(size_t i = 1; i < nthreads; ++i)
|
||||
_threads.emplace_back(run, rank, i, _streams.at(i), argc, argv);
|
||||
run(rank, 0, _streams.at(0), argc, argv);
|
||||
_threads.emplace_back(run, rank, i, i % ndevice, argc, argv);
|
||||
for(auto& itr : _threads)
|
||||
itr.join();
|
||||
for(size_t i = 0; i < nthreads; ++i)
|
||||
HIP_API_CALL(hipStreamDestroy(_streams.at(i)));
|
||||
}
|
||||
|
||||
HIP_API_CALL(hipDeviceSynchronize());
|
||||
HIP_API_CALL(hipDeviceReset());
|
||||
|
||||
@@ -134,7 +137,7 @@ main(int argc, char** argv)
|
||||
}
|
||||
|
||||
__global__ void
|
||||
transpose_a(const int* in, int* out, int M, int N)
|
||||
transpose(const int* in, int* out, int M, int N)
|
||||
{
|
||||
__shared__ int tile[shared_mem_tile_dim][shared_mem_tile_dim];
|
||||
|
||||
@@ -145,17 +148,91 @@ transpose_a(const int* in, int* out, int M, int N)
|
||||
out[idx] = tile[threadIdx.x][threadIdx.y];
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
__global__ void
|
||||
test_page_migrate(Tp* data, Tp val)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
data[idx] += val;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
test_kern_large(uint64_t* output)
|
||||
{
|
||||
uint64_t result = 0;
|
||||
int test[4000];
|
||||
memset(test, 5, 4000);
|
||||
for(int& i : test)
|
||||
{
|
||||
i = i + 7;
|
||||
*output += i;
|
||||
result += i;
|
||||
}
|
||||
*output ^= result;
|
||||
*output ^= result;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
test_kern_medium(uint64_t* output)
|
||||
{
|
||||
uint64_t result = 0;
|
||||
int test[175];
|
||||
memset(test, 5, 175);
|
||||
for(int& i : test)
|
||||
{
|
||||
i = i + 7;
|
||||
*output += i;
|
||||
result += i;
|
||||
}
|
||||
*output ^= result;
|
||||
*output ^= result;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
test_kern_small(uint64_t* output)
|
||||
{
|
||||
uint64_t result = 0;
|
||||
int test[2];
|
||||
for(int& i : test)
|
||||
{
|
||||
i = i + 7;
|
||||
*output += i;
|
||||
result += i;
|
||||
}
|
||||
*output ^= result;
|
||||
*output ^= result;
|
||||
}
|
||||
|
||||
void
|
||||
run(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
run(int rank, int tid, int devid, int argc, char** argv)
|
||||
{
|
||||
client::identify(tid + 1);
|
||||
|
||||
auto* stream = hipStream_t{};
|
||||
HIP_API_CALL(hipSetDevice(devid));
|
||||
HIP_API_CALL(hipStreamCreate(&stream));
|
||||
|
||||
run_migrate(rank, tid, stream, argc, argv);
|
||||
run_scratch(rank, tid, stream, argc, argv);
|
||||
run_transpose(rank, tid, stream, argc, argv);
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
HIP_API_CALL(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
void
|
||||
run_transpose(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
{
|
||||
auto* exe_name = ::basename(argv[0]);
|
||||
|
||||
unsigned int M = 4960 * 2;
|
||||
unsigned int N = 4960 * 2;
|
||||
if(argc > 2) nitr = atoll(argv[2]);
|
||||
if(argc > 3) nsync = atoll(argv[3]);
|
||||
|
||||
auto_lock_t _lk{print_lock};
|
||||
std::cout << "[transpose][" << rank << "][" << tid << "] M: " << M << " N: " << N << std::endl;
|
||||
std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid << "] M: " << M
|
||||
<< " N: " << N << std::endl;
|
||||
_lk.unlock();
|
||||
|
||||
std::default_random_engine _engine{std::random_device{}() * (rank + 1) * (tid + 1)};
|
||||
@@ -180,10 +257,11 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
dim3 grid(M / 32, N / 32, 1);
|
||||
dim3 block(32, 32, 1); // transpose_a
|
||||
dim3 block(32, 32, 1); // transpose
|
||||
|
||||
print_lock.lock();
|
||||
printf("[transpose][%i][%i] grid=(%i,%i,%i), block=(%i,%i,%i)\n",
|
||||
printf("[%s][transpose][%i][%i] grid=(%i,%i,%i), block=(%i,%i,%i)\n",
|
||||
exe_name,
|
||||
rank,
|
||||
tid,
|
||||
grid.x,
|
||||
@@ -197,7 +275,7 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
for(size_t i = 0; i < nitr; ++i)
|
||||
{
|
||||
transpose_a<<<grid, block, 0, stream>>>(in, out, M, N);
|
||||
transpose<<<grid, block, 0, stream>>>(in, out, M, N);
|
||||
check_hip_error();
|
||||
if(i % nsync == (nsync - 1)) HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
}
|
||||
@@ -208,9 +286,9 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
float GB = (float) size * nitr * 2 / (1 << 30);
|
||||
|
||||
print_lock.lock();
|
||||
std::cout << "[transpose][" << rank << "][" << tid << "] Runtime of transpose is " << time
|
||||
<< " sec\n";
|
||||
std::cout << "[transpose][" << rank << "][" << tid
|
||||
std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid
|
||||
<< "] Runtime of transpose is " << time << " sec\n";
|
||||
std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid
|
||||
<< "] The average performance of transpose is " << GB / time << " GBytes/sec"
|
||||
<< std::endl;
|
||||
print_lock.unlock();
|
||||
@@ -227,6 +305,92 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
delete[] out_matrix;
|
||||
}
|
||||
|
||||
void
|
||||
run_scratch(int rank, int tid, hipStream_t stream, int, char** argv)
|
||||
{
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
const auto* exe_name = ::basename(argv[0]);
|
||||
|
||||
uint64_t* data_ptr = nullptr;
|
||||
HIP_API_CALL(hipHostMalloc(&data_ptr, sizeof(uint64_t), 0));
|
||||
*data_ptr = 0;
|
||||
|
||||
test_kern_small<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
test_kern_medium<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
test_kern_small<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
test_kern_large<<<1100, 1, 0, stream>>>(data_ptr);
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
test_kern_small<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
test_kern_medium<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
test_kern_small<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
test_kern_large<<<1100, 1, 0, stream>>>(data_ptr);
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
double time = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1).count();
|
||||
|
||||
print_lock.lock();
|
||||
std::cout << "[" << exe_name << "][scratch][" << rank << "][" << tid
|
||||
<< "] Runtime of scratch is " << time << " sec\n";
|
||||
print_lock.unlock();
|
||||
}
|
||||
|
||||
void
|
||||
run_migrate(int rank, int tid, hipStream_t stream, int, char** argv)
|
||||
{
|
||||
using data_type = uint64_t;
|
||||
constexpr data_type init_v = 1;
|
||||
constexpr data_type incr_v = 1;
|
||||
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
const auto* exe_name = ::basename(argv[0]);
|
||||
auto page_data = std::vector<data_type>(1024, 0);
|
||||
|
||||
HIP_API_CALL(hipHostRegister(
|
||||
page_data.data(), page_data.size() * sizeof(data_type), hipHostRegisterDefault));
|
||||
|
||||
for(auto& itr : page_data)
|
||||
itr = init_v;
|
||||
|
||||
test_page_migrate<<<1, 1024, 0, stream>>>(page_data.data(), incr_v);
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
for(auto& itr : page_data)
|
||||
{
|
||||
auto diff = (itr - incr_v);
|
||||
if(diff != init_v)
|
||||
{
|
||||
auto msg = std::stringstream{};
|
||||
msg << "invalid diff: " << diff << ". expected: " << init_v;
|
||||
throw std::runtime_error{msg.str()};
|
||||
}
|
||||
}
|
||||
|
||||
HIP_API_CALL(hipHostUnregister(page_data.data()));
|
||||
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
double time = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1).count();
|
||||
|
||||
print_lock.lock();
|
||||
std::cout << "[" << exe_name << "][migrate][" << rank << "][" << tid
|
||||
<< "] Runtime of migrate is " << time << " sec\n";
|
||||
print_lock.unlock();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void
|
||||
|
||||
@@ -191,25 +191,25 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
|
||||
callback_name_info name_info = common::get_callback_id_names();
|
||||
|
||||
for(const auto& itr : name_info.operation_names)
|
||||
for(const auto& itr : name_info)
|
||||
{
|
||||
auto name_idx = std::stringstream{};
|
||||
name_idx << " [" << std::setw(3) << static_cast<int32_t>(itr.first) << "]";
|
||||
name_idx << " [" << std::setw(3) << itr.value << "]";
|
||||
call_stack_v->emplace_back(
|
||||
source_location{"rocprofiler_callback_tracing_kind_names " + name_idx.str(),
|
||||
__FILE__,
|
||||
__LINE__,
|
||||
name_info.kind_names.at(itr.first)});
|
||||
std::string{itr.name}});
|
||||
|
||||
for(const auto& ditr : itr.second)
|
||||
for(auto [didx, ditr] : itr.items())
|
||||
{
|
||||
auto operation_idx = std::stringstream{};
|
||||
operation_idx << " [" << std::setw(3) << static_cast<int32_t>(ditr.first) << "]";
|
||||
operation_idx << " [" << std::setw(3) << didx << "]";
|
||||
call_stack_v->emplace_back(source_location{
|
||||
"rocprofiler_callback_tracing_kind_operation_names" + operation_idx.str(),
|
||||
__FILE__,
|
||||
__LINE__,
|
||||
std::string{"- "} + std::string{ditr.second}});
|
||||
std::string{"- "} + std::string{*ditr}});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -322,9 +322,6 @@ rocprofiler_configure(uint32_t version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// only activate if main tool
|
||||
if(priority > 0) return nullptr;
|
||||
|
||||
// set the client name
|
||||
id->name = "ExampleTool";
|
||||
|
||||
@@ -338,8 +335,8 @@ rocprofiler_configure(uint32_t version,
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
||||
<< " (" << runtime_version << ")";
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
|
||||
@@ -217,9 +217,6 @@ rocprofiler_configure(uint32_t version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// only activate if main tool
|
||||
if(priority > 0) return nullptr;
|
||||
|
||||
// set the client name
|
||||
id->name = "ExampleTool";
|
||||
|
||||
@@ -233,8 +230,8 @@ rocprofiler_configure(uint32_t version,
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
||||
<< " (" << runtime_version << ")";
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
|
||||
@@ -352,9 +352,6 @@ rocprofiler_configure(uint32_t version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// only activate if main tool
|
||||
if(priority > 0) return nullptr;
|
||||
|
||||
// set the client name
|
||||
id->name = "ExampleTool";
|
||||
|
||||
@@ -368,8 +365,8 @@ rocprofiler_configure(uint32_t version,
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
||||
<< " (" << runtime_version << ")";
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
|
||||
@@ -22,6 +22,31 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#define ROCPROFILER_WARN(result) \
|
||||
{ \
|
||||
rocprofiler_status_t CHECKSTATUS = result; \
|
||||
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
|
||||
{ \
|
||||
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
|
||||
std::cerr << "[" << __FILE__ << ":" << __LINE__ << "] " << #result \
|
||||
<< " returned error code " << CHECKSTATUS << ": " << status_msg \
|
||||
<< ". This is just a warning!" << std::endl; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ROCPROFILER_CHECK(result) \
|
||||
{ \
|
||||
rocprofiler_status_t CHECKSTATUS = result; \
|
||||
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
|
||||
{ \
|
||||
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
|
||||
std::stringstream errmsg{}; \
|
||||
errmsg << "[" << __FILE__ << ":" << __LINE__ << "] " << #result \
|
||||
<< " failed with error code " << CHECKSTATUS << " :: " << status_msg; \
|
||||
throw std::runtime_error(errmsg.str()); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ROCPROFILER_CALL(result, msg) \
|
||||
{ \
|
||||
rocprofiler_status_t CHECKSTATUS = result; \
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
#include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
|
||||
#include "defines.hpp"
|
||||
|
||||
@@ -37,146 +38,18 @@
|
||||
|
||||
namespace common
|
||||
{
|
||||
using buffer_kind_names_t = std::map<rocprofiler_buffer_tracing_kind_t, const char*>;
|
||||
using buffer_kind_operation_names_t =
|
||||
std::map<rocprofiler_buffer_tracing_kind_t, std::map<uint32_t, const char*>>;
|
||||
using callback_kind_names_t = std::map<rocprofiler_callback_tracing_kind_t, const char*>;
|
||||
using callback_kind_operation_names_t =
|
||||
std::map<rocprofiler_callback_tracing_kind_t, std::map<uint32_t, const char*>>;
|
||||
using callback_name_info = rocprofiler::sdk::callback_name_info;
|
||||
using buffer_name_info = rocprofiler::sdk::buffer_name_info;
|
||||
|
||||
struct buffer_name_info
|
||||
{
|
||||
buffer_kind_names_t kind_names = {};
|
||||
buffer_kind_operation_names_t operation_names = {};
|
||||
};
|
||||
|
||||
struct callback_name_info
|
||||
{
|
||||
callback_kind_names_t kind_names = {};
|
||||
callback_kind_operation_names_t operation_names = {};
|
||||
};
|
||||
|
||||
inline buffer_name_info
|
||||
inline auto
|
||||
get_buffer_tracing_names()
|
||||
{
|
||||
static const auto supported_kinds = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_CORE_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API,
|
||||
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API,
|
||||
ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API,
|
||||
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API,
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
};
|
||||
|
||||
auto cb_name_info = buffer_name_info{};
|
||||
//
|
||||
// callback for each kind operation
|
||||
//
|
||||
static auto tracing_kind_operation_cb =
|
||||
[](rocprofiler_buffer_tracing_kind_t kindv, uint32_t operation, void* data_v) {
|
||||
auto* name_info_v = static_cast<buffer_name_info*>(data_v);
|
||||
|
||||
if(supported_kinds.count(kindv) > 0)
|
||||
{
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_operation_name(
|
||||
kindv, operation, &name, nullptr),
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->operation_names[kindv][operation] = name;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
//
|
||||
// callback for each buffer kind (i.e. domain)
|
||||
//
|
||||
static auto tracing_kind_cb = [](rocprofiler_buffer_tracing_kind_t kind, void* data) {
|
||||
// store the buffer kind name
|
||||
auto* name_info_v = static_cast<buffer_name_info*>(data);
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_name(kind, &name, nullptr),
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->kind_names[kind] = name;
|
||||
|
||||
if(supported_kinds.count(kind) > 0)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_buffer_tracing_kind_operations(
|
||||
kind, tracing_kind_operation_cb, static_cast<void*>(data)),
|
||||
"iterating buffer tracing kind operations");
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_buffer_tracing_kinds(tracing_kind_cb,
|
||||
static_cast<void*>(&cb_name_info)),
|
||||
"iterating buffer tracing kinds");
|
||||
|
||||
return cb_name_info;
|
||||
return rocprofiler::sdk::get_buffer_tracing_names();
|
||||
}
|
||||
|
||||
inline callback_name_info
|
||||
inline auto
|
||||
get_callback_id_names()
|
||||
{
|
||||
static auto supported = std::unordered_set<rocprofiler_callback_tracing_kind_t>{
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT,
|
||||
};
|
||||
|
||||
auto cb_name_info = callback_name_info{};
|
||||
//
|
||||
// callback for each kind operation
|
||||
//
|
||||
static auto tracing_kind_operation_cb =
|
||||
[](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) {
|
||||
auto* name_info_v = static_cast<callback_name_info*>(data_v);
|
||||
|
||||
if(supported.count(kindv) > 0)
|
||||
{
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name(
|
||||
kindv, operation, &name, nullptr),
|
||||
"query callback tracing kind operation name");
|
||||
if(name) name_info_v->operation_names[kindv][operation] = name;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
//
|
||||
// callback for each callback kind (i.e. domain)
|
||||
//
|
||||
static auto tracing_kind_cb = [](rocprofiler_callback_tracing_kind_t kind, void* data) {
|
||||
// store the callback kind name
|
||||
auto* name_info_v = static_cast<callback_name_info*>(data);
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_name(kind, &name, nullptr),
|
||||
"query callback tracing kind operation name");
|
||||
if(name) name_info_v->kind_names[kind] = name;
|
||||
|
||||
if(supported.count(kind) > 0)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operations(
|
||||
kind, tracing_kind_operation_cb, static_cast<void*>(data)),
|
||||
"iterating callback tracing kind operations");
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kinds(tracing_kind_cb,
|
||||
static_cast<void*>(&cb_name_info)),
|
||||
"iterating callback tracing kinds");
|
||||
|
||||
return cb_name_info;
|
||||
return rocprofiler::sdk::get_callback_tracing_names();
|
||||
}
|
||||
} // namespace common
|
||||
|
||||
@@ -207,9 +207,9 @@ tool_fini(void* user_data)
|
||||
} // namespace
|
||||
|
||||
extern "C" rocprofiler_tool_configure_result_t*
|
||||
rocprofiler_configure(uint32_t version,
|
||||
const char* runtime_version,
|
||||
uint32_t,
|
||||
rocprofiler_configure(uint32_t version,
|
||||
const char* runtime_version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// set the client name
|
||||
@@ -222,8 +222,8 @@ rocprofiler_configure(uint32_t version,
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
||||
<< " (" << runtime_version << ")";
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
|
||||
@@ -286,9 +286,9 @@ tool_fini(void* user_data)
|
||||
} // namespace
|
||||
|
||||
extern "C" rocprofiler_tool_configure_result_t*
|
||||
rocprofiler_configure(uint32_t version,
|
||||
const char* runtime_version,
|
||||
uint32_t,
|
||||
rocprofiler_configure(uint32_t version,
|
||||
const char* runtime_version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// set the client name
|
||||
@@ -301,8 +301,8 @@ rocprofiler_configure(uint32_t version,
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
||||
<< " (" << runtime_version << ")";
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
|
||||
@@ -415,9 +415,9 @@ tool_fini(void*)
|
||||
} // namespace
|
||||
|
||||
extern "C" rocprofiler_tool_configure_result_t*
|
||||
rocprofiler_configure(uint32_t version,
|
||||
const char* runtime_version,
|
||||
uint32_t,
|
||||
rocprofiler_configure(uint32_t version,
|
||||
const char* runtime_version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// set the client name
|
||||
@@ -430,8 +430,8 @@ rocprofiler_configure(uint32_t version,
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
||||
<< " (" << runtime_version << ")";
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
if(NOT CMAKE_HIP_COMPILER)
|
||||
find_program(
|
||||
amdclangpp_EXECUTABLE
|
||||
NAMES amdclang++
|
||||
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATH_SUFFIXES bin llvm/bin NO_CACHE)
|
||||
mark_as_advanced(amdclangpp_EXECUTABLE)
|
||||
|
||||
if(amdclangpp_EXECUTABLE)
|
||||
set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
project(rocprofiler-sdk-samples-external-correlation-id-request LANGUAGES CXX HIP)
|
||||
|
||||
foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
|
||||
if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "")
|
||||
set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
|
||||
add_library(external-correlation-id-request-client SHARED)
|
||||
target_sources(external-correlation-id-request-client PRIVATE client.cpp client.hpp)
|
||||
target_link_libraries(
|
||||
external-correlation-id-request-client
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-build-flags
|
||||
rocprofiler::samples-common-library)
|
||||
|
||||
set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
add_executable(external-correlation-id-request)
|
||||
target_sources(external-correlation-id-request PRIVATE main.cpp)
|
||||
target_link_libraries(
|
||||
external-correlation-id-request
|
||||
PRIVATE external-correlation-id-request-client Threads::Threads
|
||||
rocprofiler::samples-build-flags)
|
||||
|
||||
rocprofiler_samples_get_preload_env(PRELOAD_ENV external-correlation-id-request-client)
|
||||
rocprofiler_samples_get_ld_library_path_env(LIBRARY_PATH_ENV)
|
||||
|
||||
set(external-correlation-id-request-env ${PRELOAD_ENV} ${LIBRARY_PATH_ENV})
|
||||
|
||||
add_test(NAME external-correlation-id-request
|
||||
COMMAND $<TARGET_FILE:external-correlation-id-request>)
|
||||
|
||||
set_tests_properties(
|
||||
external-correlation-id-request
|
||||
PROPERTIES TIMEOUT 45 LABELS "samples" ENVIRONMENT
|
||||
"${external-correlation-id-request-env}" FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
|
||||
@@ -0,0 +1,24 @@
|
||||
# External Correlation ID Request Sample
|
||||
|
||||
## Services
|
||||
|
||||
- Code object callback tracing for mapping kernel IDs to kernel names
|
||||
- HIP Runtime API:
|
||||
- hipLaunchKernel
|
||||
- hipMemcpyAsync
|
||||
- hipMemsetAsync
|
||||
- hipMalloc
|
||||
- Kernel dispatch
|
||||
- Memory Copy
|
||||
- External correlation ID request:
|
||||
- Kernel dispatch
|
||||
- Memory copy
|
||||
- Correlation ID retirement
|
||||
|
||||
## Properties
|
||||
|
||||
- Subscribes to an external correlation ID request for all kernel dispatches and async memory copies
|
||||
- Generates an external correlation ID containing all the arguments passed to the request callback
|
||||
- Demonstrates that all external correlation IDs which are requested are passed back to tool in buffer callbacks
|
||||
- Demonstrates that all internal correlation IDs which are provided as an input argument to request are retired
|
||||
- Buffer size of 4096 bytes which is automatically flushed once >= 87.5% of buffer is filled (3584 bytes)
|
||||
@@ -0,0 +1,632 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
// undefine NDEBUG so asserts are implemented
|
||||
#ifdef NDEBUG
|
||||
# undef NDEBUG
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @file samples/api_buffered_tracing/client.cpp
|
||||
*
|
||||
* @brief Example rocprofiler client (tool)
|
||||
*/
|
||||
|
||||
#include "client.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/buffer.h>
|
||||
#include <rocprofiler-sdk/buffer_tracing.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/context.h>
|
||||
#include <rocprofiler-sdk/external_correlation.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hip/runtime_api_id.h>
|
||||
#include <rocprofiler-sdk/internal_threading.h>
|
||||
#include <rocprofiler-sdk/registration.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include "common/call_stack.hpp"
|
||||
#include "common/defines.hpp"
|
||||
#include "common/filesystem.hpp"
|
||||
#include "common/name_info.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <thread>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
namespace client
|
||||
{
|
||||
namespace
|
||||
{
|
||||
struct external_corr_id_data;
|
||||
|
||||
using common::buffer_name_info;
|
||||
using common::call_stack_t;
|
||||
using common::source_location;
|
||||
|
||||
using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
|
||||
using kernel_symbol_map_t = std::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data_t>;
|
||||
using external_corr_id_set_t = std::unordered_set<external_corr_id_data*>;
|
||||
using retired_corr_id_set_t = std::unordered_set<uint64_t>;
|
||||
|
||||
rocprofiler_client_id_t* client_id = nullptr;
|
||||
rocprofiler_client_finalize_t client_fini_func = nullptr;
|
||||
rocprofiler_context_id_t client_ctx = {};
|
||||
rocprofiler_buffer_id_t client_buffer = {};
|
||||
buffer_name_info* client_name_info = new buffer_name_info{};
|
||||
kernel_symbol_map_t* client_kernels = new kernel_symbol_map_t{};
|
||||
auto client_mutex = std::shared_mutex{};
|
||||
auto client_external_corr_ids = external_corr_id_set_t{};
|
||||
auto client_retired_corr_ids = retired_corr_id_set_t{};
|
||||
|
||||
void
|
||||
print_call_stack(const call_stack_t& _call_stack)
|
||||
{
|
||||
common::print_call_stack("external_correlation_id_request.log", _call_stack);
|
||||
}
|
||||
|
||||
void
|
||||
tool_code_object_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* user_data,
|
||||
void* callback_data)
|
||||
{
|
||||
if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
|
||||
record.operation == ROCPROFILER_CODE_OBJECT_LOAD)
|
||||
{
|
||||
if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD)
|
||||
{
|
||||
// flush the buffer to ensure that any lookups for the client kernel names for the code
|
||||
// object are completed
|
||||
auto flush_status = rocprofiler_flush_buffer(client_buffer);
|
||||
if(flush_status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY)
|
||||
ROCPROFILER_CHECK(flush_status);
|
||||
}
|
||||
}
|
||||
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
|
||||
record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER)
|
||||
{
|
||||
auto* data = static_cast<kernel_symbol_data_t*>(record.payload);
|
||||
if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD)
|
||||
{
|
||||
client_kernels->emplace(data->kernel_id, *data);
|
||||
}
|
||||
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD)
|
||||
{
|
||||
auto flush_status = rocprofiler_flush_buffer(client_buffer);
|
||||
if(flush_status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY)
|
||||
ROCPROFILER_CHECK(flush_status);
|
||||
|
||||
client_kernels->erase(data->kernel_id);
|
||||
}
|
||||
}
|
||||
|
||||
(void) user_data;
|
||||
(void) callback_data;
|
||||
}
|
||||
|
||||
struct external_corr_id_data
|
||||
{
|
||||
using request_kind_t = rocprofiler_external_correlation_id_request_kind_t;
|
||||
static constexpr auto request_none = ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE;
|
||||
|
||||
rocprofiler_thread_id_t thread_id = 0;
|
||||
rocprofiler_context_id_t context_id = {.handle = 0};
|
||||
request_kind_t kind = request_none;
|
||||
rocprofiler_tracing_operation_t operation = 0;
|
||||
uint64_t internal_corr_id = 0;
|
||||
void* user_data = nullptr;
|
||||
uint64_t seen_count = 0;
|
||||
|
||||
bool valid() const;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, external_corr_id_data data)
|
||||
{
|
||||
if(!data.valid()) return os;
|
||||
auto ss = std::stringstream{};
|
||||
ss << "seen=" << data.seen_count << ", thr_id=" << data.thread_id
|
||||
<< ", context_id=" << data.context_id.handle << ", kind=" << data.kind
|
||||
<< ", operation=" << data.operation << ", corr_id=" << data.internal_corr_id
|
||||
<< ", user_data=" << data.user_data;
|
||||
return (os << ss.str());
|
||||
}
|
||||
};
|
||||
|
||||
bool
|
||||
operator==(external_corr_id_data lhs, external_corr_id_data rhs)
|
||||
{
|
||||
return std::tie(lhs.thread_id,
|
||||
lhs.context_id.handle,
|
||||
lhs.kind,
|
||||
lhs.operation,
|
||||
lhs.internal_corr_id,
|
||||
lhs.user_data) == std::tie(rhs.thread_id,
|
||||
rhs.context_id.handle,
|
||||
rhs.kind,
|
||||
rhs.operation,
|
||||
rhs.internal_corr_id,
|
||||
rhs.user_data);
|
||||
}
|
||||
|
||||
bool
|
||||
operator!=(external_corr_id_data lhs, external_corr_id_data rhs)
|
||||
{
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
bool
|
||||
external_corr_id_data::valid() const
|
||||
{
|
||||
static constexpr auto invalid_v = external_corr_id_data{};
|
||||
return (*this != invalid_v);
|
||||
}
|
||||
|
||||
int
|
||||
set_external_correlation_id(rocprofiler_thread_id_t thr_id,
|
||||
rocprofiler_context_id_t ctx_id,
|
||||
rocprofiler_external_correlation_id_request_kind_t kind,
|
||||
rocprofiler_tracing_operation_t op,
|
||||
uint64_t internal_corr_id,
|
||||
rocprofiler_user_data_t* external_corr_id,
|
||||
void* user_data)
|
||||
{
|
||||
auto* _data =
|
||||
new external_corr_id_data{thr_id, ctx_id, kind, op, internal_corr_id, user_data, 0};
|
||||
|
||||
{
|
||||
static auto _mtx = std::mutex{};
|
||||
auto _lk = std::unique_lock{_mtx};
|
||||
client_external_corr_ids.emplace(_data);
|
||||
}
|
||||
|
||||
external_corr_id->ptr = _data;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
tool_tracing_callback(rocprofiler_context_id_t context,
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_record_header_t** headers,
|
||||
size_t num_headers,
|
||||
void* user_data,
|
||||
uint64_t /*drop_count*/)
|
||||
{
|
||||
static const auto ensure_internal_correlation_id_retirement_ordering = [](uint64_t _corr_id) {
|
||||
auto _lk = std::shared_lock<std::shared_mutex>{client_mutex};
|
||||
// this correlation ID should not have reported as retired yet so
|
||||
// we are demoing the expectation here
|
||||
if(client_retired_corr_ids.count(_corr_id) > 0)
|
||||
{
|
||||
auto msg = std::stringstream{};
|
||||
msg << "internal correlation id " << _corr_id << " was retired prematurely";
|
||||
throw std::runtime_error{msg.str()};
|
||||
}
|
||||
};
|
||||
|
||||
for(size_t i = 0; i < num_headers; ++i)
|
||||
{
|
||||
auto* header = headers[i];
|
||||
|
||||
auto kind_name = std::string{};
|
||||
if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING)
|
||||
{
|
||||
const char* _name = nullptr;
|
||||
auto _kind = static_cast<rocprofiler_buffer_tracing_kind_t>(header->kind);
|
||||
ROCPROFILER_CHECK(rocprofiler_query_buffer_tracing_kind_name(_kind, &_name, nullptr));
|
||||
if(_name)
|
||||
{
|
||||
static size_t len = 15;
|
||||
|
||||
kind_name = std::string{_name};
|
||||
len = std::max(len, kind_name.length());
|
||||
kind_name.resize(len, ' ');
|
||||
kind_name += " :: ";
|
||||
}
|
||||
}
|
||||
|
||||
if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API)
|
||||
{
|
||||
auto* record =
|
||||
static_cast<rocprofiler_buffer_tracing_hip_api_record_t*>(header->payload);
|
||||
|
||||
// this should always be empty
|
||||
auto _extern_corr_id = external_corr_id_data{};
|
||||
|
||||
// demonstrate reliability of correlation ID retirement ordering
|
||||
ensure_internal_correlation_id_retirement_ordering(record->correlation_id.internal);
|
||||
|
||||
auto info = std::stringstream{};
|
||||
info << "tid=" << record->thread_id << ", context=" << context.handle
|
||||
<< ", buffer_id=" << buffer_id.handle
|
||||
<< ", corr_id=" << record->correlation_id.internal << ", kind=" << record->kind
|
||||
<< ", operation=" << record->operation
|
||||
<< ", name=" << (*client_name_info)[record->kind][record->operation]
|
||||
<< ", extern_corr_id={" << _extern_corr_id << "}";
|
||||
|
||||
static_cast<call_stack_t*>(user_data)->emplace_back(
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
|
||||
}
|
||||
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH)
|
||||
{
|
||||
auto* record =
|
||||
static_cast<rocprofiler_buffer_tracing_kernel_dispatch_record_t*>(header->payload);
|
||||
|
||||
// demonstrate reliability of correlation ID retirement ordering
|
||||
ensure_internal_correlation_id_retirement_ordering(record->correlation_id.internal);
|
||||
|
||||
auto _extern_corr_id = external_corr_id_data{};
|
||||
if(record->correlation_id.external.ptr)
|
||||
{
|
||||
auto* _extcid =
|
||||
static_cast<external_corr_id_data*>(record->correlation_id.external.ptr);
|
||||
_extcid->seen_count++;
|
||||
_extern_corr_id = *_extcid;
|
||||
// demonstrate reliability of correlation ID retirement ordering
|
||||
ensure_internal_correlation_id_retirement_ordering(_extcid->internal_corr_id);
|
||||
}
|
||||
|
||||
auto info = std::stringstream{};
|
||||
|
||||
info << "tid=" << record->thread_id << ", context=" << context.handle
|
||||
<< ", buffer_id=" << buffer_id.handle
|
||||
<< ", corr_id=" << record->correlation_id.internal << ", kind=" << record->kind
|
||||
<< ", operation=" << record->operation
|
||||
<< ", agent_id=" << record->dispatch_info.agent_id.handle
|
||||
<< ", queue_id=" << record->dispatch_info.queue_id.handle
|
||||
<< ", dispatch_id=" << record->dispatch_info.dispatch_id
|
||||
<< ", kernel_id=" << record->dispatch_info.kernel_id
|
||||
<< ", kernel=" << client_kernels->at(record->dispatch_info.kernel_id).kernel_name
|
||||
<< ", extern_corr_id={" << _extern_corr_id << "}";
|
||||
|
||||
static_cast<call_stack_t*>(user_data)->emplace_back(
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
|
||||
}
|
||||
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)
|
||||
{
|
||||
auto* record =
|
||||
static_cast<rocprofiler_buffer_tracing_memory_copy_record_t*>(header->payload);
|
||||
|
||||
// demonstrate reliability of correlation ID retirement ordering
|
||||
ensure_internal_correlation_id_retirement_ordering(record->correlation_id.internal);
|
||||
|
||||
auto _extern_corr_id = external_corr_id_data{};
|
||||
if(record->correlation_id.external.ptr)
|
||||
{
|
||||
auto* _extcid =
|
||||
static_cast<external_corr_id_data*>(record->correlation_id.external.ptr);
|
||||
_extcid->seen_count++;
|
||||
_extern_corr_id = *_extcid;
|
||||
// demonstrate reliability of correlation ID retirement ordering
|
||||
ensure_internal_correlation_id_retirement_ordering(_extcid->internal_corr_id);
|
||||
}
|
||||
|
||||
auto info = std::stringstream{};
|
||||
|
||||
info << "tid=" << record->thread_id << ", context=" << context.handle
|
||||
<< ", buffer_id=" << buffer_id.handle
|
||||
<< ", corr_id=" << record->correlation_id.internal << ", kind=" << record->kind
|
||||
<< ", operation=" << record->operation
|
||||
<< ", src_agent_id=" << record->src_agent_id.handle
|
||||
<< ", dst_agent_id=" << record->dst_agent_id.handle
|
||||
<< ", direction=" << record->operation << ", start=" << record->start_timestamp
|
||||
<< ", stop=" << record->end_timestamp
|
||||
<< ", name=" << client_name_info->at(record->kind, record->operation)
|
||||
<< ", extern_corr_id={" << _extern_corr_id << "}";
|
||||
|
||||
static_cast<call_stack_t*>(user_data)->emplace_back(
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
|
||||
}
|
||||
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT)
|
||||
{
|
||||
auto* record =
|
||||
static_cast<rocprofiler_buffer_tracing_correlation_id_retirement_record_t*>(
|
||||
header->payload);
|
||||
|
||||
{
|
||||
auto _lk = std::unique_lock<std::shared_mutex>{client_mutex};
|
||||
client_retired_corr_ids.emplace(record->internal_correlation_id);
|
||||
}
|
||||
|
||||
auto _extern_corr_id = external_corr_id_data{};
|
||||
auto info = std::stringstream{};
|
||||
|
||||
info << "context=" << context.handle << ", buffer_id=" << buffer_id.handle
|
||||
<< ", corr_id=" << record->internal_correlation_id << ", kind=" << record->kind
|
||||
<< ", timestamp=" << record->timestamp
|
||||
<< ", name=" << client_name_info->at(record->kind) << ", extern_corr_id={"
|
||||
<< _extern_corr_id << "}";
|
||||
|
||||
static_cast<call_stack_t*>(user_data)->emplace_back(
|
||||
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
|
||||
}
|
||||
else
|
||||
{
|
||||
auto _msg = std::stringstream{};
|
||||
_msg << "unexpected rocprofiler_record_header_t category + kind: (" << header->category
|
||||
<< " + " << header->kind << ")";
|
||||
throw std::runtime_error{_msg.str()};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Arg, typename... Args>
|
||||
auto
|
||||
make_array(Arg arg, Args&&... args)
|
||||
{
|
||||
constexpr auto N = 1 + sizeof...(Args);
|
||||
return std::array<Arg, N>{std::forward<Arg>(arg), std::forward<Args>(args)...};
|
||||
}
|
||||
|
||||
int
|
||||
tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
{
|
||||
assert(tool_data != nullptr);
|
||||
|
||||
auto* call_stack_v = static_cast<call_stack_t*>(tool_data);
|
||||
|
||||
call_stack_v->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""});
|
||||
|
||||
*client_name_info = common::get_buffer_tracing_names();
|
||||
client_fini_func = fini_func;
|
||||
|
||||
ROCPROFILER_CHECK(rocprofiler_create_context(&client_ctx));
|
||||
|
||||
auto code_object_ops = std::vector<rocprofiler_tracing_operation_t>{
|
||||
ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER};
|
||||
|
||||
ROCPROFILER_CHECK(
|
||||
rocprofiler_configure_callback_tracing_service(client_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT,
|
||||
code_object_ops.data(),
|
||||
code_object_ops.size(),
|
||||
tool_code_object_callback,
|
||||
nullptr));
|
||||
|
||||
constexpr auto buffer_size_bytes = 4096;
|
||||
constexpr auto buffer_watermark_bytes = buffer_size_bytes - (buffer_size_bytes / 8);
|
||||
|
||||
ROCPROFILER_CHECK(rocprofiler_create_buffer(client_ctx,
|
||||
buffer_size_bytes,
|
||||
buffer_watermark_bytes,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
tool_tracing_callback,
|
||||
tool_data,
|
||||
&client_buffer));
|
||||
|
||||
auto external_corr_id_request_kinds =
|
||||
make_array(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH,
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY);
|
||||
|
||||
ROCPROFILER_CHECK(rocprofiler_configure_external_correlation_id_request_service(
|
||||
client_ctx,
|
||||
external_corr_id_request_kinds.data(),
|
||||
external_corr_id_request_kinds.size(),
|
||||
set_external_correlation_id,
|
||||
nullptr));
|
||||
|
||||
auto hip_runtime_ops = std::vector<rocprofiler_tracing_operation_t>{};
|
||||
const auto desired_hip_runtime_ops = std::unordered_set<std::string_view>{
|
||||
"hipLaunchKernel", "hipMemcpyAsync", "hipMemsetAsync", "hipMalloc"};
|
||||
for(auto [idx, itr] : (*client_name_info)[ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API].items())
|
||||
{
|
||||
if(desired_hip_runtime_ops.count(*itr) > 0) hip_runtime_ops.emplace_back(idx);
|
||||
}
|
||||
|
||||
if(desired_hip_runtime_ops.size() != hip_runtime_ops.size())
|
||||
throw std::runtime_error{"missing hip operations"};
|
||||
|
||||
ROCPROFILER_CHECK(
|
||||
rocprofiler_configure_buffer_tracing_service(client_ctx,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API,
|
||||
hip_runtime_ops.data(),
|
||||
hip_runtime_ops.size(),
|
||||
client_buffer));
|
||||
|
||||
ROCPROFILER_CHECK(rocprofiler_configure_buffer_tracing_service(
|
||||
client_ctx, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0, client_buffer));
|
||||
|
||||
ROCPROFILER_CHECK(rocprofiler_configure_buffer_tracing_service(
|
||||
client_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, nullptr, 0, client_buffer));
|
||||
|
||||
ROCPROFILER_CHECK(rocprofiler_configure_buffer_tracing_service(
|
||||
client_ctx,
|
||||
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT,
|
||||
nullptr,
|
||||
0,
|
||||
client_buffer));
|
||||
|
||||
int valid_ctx = 0;
|
||||
ROCPROFILER_CHECK(rocprofiler_context_is_valid(client_ctx, &valid_ctx));
|
||||
if(valid_ctx == 0)
|
||||
{
|
||||
// notify rocprofiler that initialization failed
|
||||
// and all the contexts, buffers, etc. created
|
||||
// should be ignored
|
||||
return -1;
|
||||
}
|
||||
|
||||
ROCPROFILER_CHECK(rocprofiler_start_context(client_ctx));
|
||||
|
||||
// no errors
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
tool_fini(void* tool_data)
|
||||
{
|
||||
assert(tool_data != nullptr);
|
||||
client_fini_func = nullptr;
|
||||
client_id = nullptr;
|
||||
|
||||
std::cout << "finalizing...\n" << std::flush;
|
||||
rocprofiler_stop_context(client_ctx);
|
||||
ROCPROFILER_CHECK(rocprofiler_flush_buffer(client_buffer));
|
||||
|
||||
auto* _call_stack = static_cast<call_stack_t*>(tool_data);
|
||||
_call_stack->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""});
|
||||
|
||||
print_call_stack(*_call_stack);
|
||||
|
||||
size_t unretired = 0;
|
||||
size_t unseen = 0;
|
||||
for(auto* itr : client_external_corr_ids)
|
||||
{
|
||||
if(itr->seen_count != 1)
|
||||
{
|
||||
std::cerr << "external correlation ID seen " << itr->seen_count << " times: {" << *itr
|
||||
<< "}\n"
|
||||
<< std::flush;
|
||||
++unseen;
|
||||
}
|
||||
if(client_retired_corr_ids.count(itr->internal_corr_id) != 1)
|
||||
{
|
||||
std::cerr << "internal correlation ID passed to external correlation ID request was "
|
||||
"not retired: {"
|
||||
<< itr->internal_corr_id << "}\n"
|
||||
<< std::flush;
|
||||
++unretired;
|
||||
}
|
||||
|
||||
delete itr;
|
||||
}
|
||||
|
||||
std::cerr << "external correlation IDs not seen : " << unseen << "\n" << std::flush;
|
||||
std::cerr << "internal correlation IDs not retired: " << unretired << "\n" << std::flush;
|
||||
|
||||
if(unseen > 0) throw std::runtime_error{"unseen external correlation id data"};
|
||||
if(unretired > 0) throw std::runtime_error{"unretired internal correlation id values"};
|
||||
|
||||
delete _call_stack;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
setup()
|
||||
{
|
||||
if(int status = 0;
|
||||
rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0)
|
||||
{
|
||||
ROCPROFILER_CHECK(rocprofiler_force_configure(&rocprofiler_configure));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
shutdown()
|
||||
{
|
||||
if(client_id)
|
||||
{
|
||||
ROCPROFILER_CHECK(rocprofiler_flush_buffer(client_buffer));
|
||||
client_fini_func(*client_id);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
start()
|
||||
{
|
||||
ROCPROFILER_CHECK(rocprofiler_start_context(client_ctx));
|
||||
}
|
||||
|
||||
void
|
||||
identify(uint64_t val)
|
||||
{
|
||||
auto _tid = rocprofiler_thread_id_t{};
|
||||
rocprofiler_get_thread_id(&_tid);
|
||||
rocprofiler_user_data_t user_data = {};
|
||||
user_data.value = val;
|
||||
rocprofiler_push_external_correlation_id(client_ctx, _tid, user_data);
|
||||
}
|
||||
|
||||
void
|
||||
stop()
|
||||
{
|
||||
ROCPROFILER_CHECK(rocprofiler_stop_context(client_ctx));
|
||||
}
|
||||
} // namespace client
|
||||
|
||||
extern "C" rocprofiler_tool_configure_result_t*
|
||||
rocprofiler_configure(uint32_t version,
|
||||
const char* runtime_version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// set the client name
|
||||
id->name = "ExampleTool";
|
||||
|
||||
// store client info
|
||||
client::client_id = id;
|
||||
|
||||
// compute major/minor/patch version info
|
||||
uint32_t major = version / 10000;
|
||||
uint32_t minor = (version % 10000) / 100;
|
||||
uint32_t patch = version % 100;
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
auto* client_tool_data = new std::vector<client::source_location>{};
|
||||
|
||||
client_tool_data->emplace_back(
|
||||
client::source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
|
||||
|
||||
std::atexit([]() {
|
||||
std::cout << "atexit handler...\n" << std::flush;
|
||||
if(client::client_fini_func && client::client_id)
|
||||
client::client_fini_func(*client::client_id);
|
||||
});
|
||||
|
||||
// create configure data
|
||||
static auto cfg =
|
||||
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
|
||||
&client::tool_init,
|
||||
&client::tool_fini,
|
||||
static_cast<void*>(client_tool_data)};
|
||||
|
||||
// return pointer to configure data
|
||||
return &cfg;
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef buffered_api_tracing_client_EXPORTS
|
||||
# define CLIENT_API __attribute__((visibility("default")))
|
||||
#else
|
||||
# define CLIENT_API
|
||||
#endif
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace client
|
||||
{
|
||||
void
|
||||
setup() CLIENT_API;
|
||||
|
||||
void
|
||||
shutdown() CLIENT_API;
|
||||
|
||||
void
|
||||
start() CLIENT_API;
|
||||
|
||||
void
|
||||
stop() CLIENT_API;
|
||||
|
||||
void
|
||||
identify(uint64_t corr_id) CLIENT_API;
|
||||
} // namespace client
|
||||
@@ -0,0 +1,412 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <random>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#define HIP_API_CALL(CALL) \
|
||||
{ \
|
||||
hipError_t error_ = (CALL); \
|
||||
if(error_ != hipSuccess) \
|
||||
{ \
|
||||
auto _hip_api_print_lk = auto_lock_t{print_lock}; \
|
||||
fprintf(stderr, \
|
||||
"%s:%d :: HIP error : %s\n", \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
hipGetErrorString(error_)); \
|
||||
throw std::runtime_error("hip_api_call"); \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
using auto_lock_t = std::unique_lock<std::mutex>;
|
||||
auto print_lock = std::mutex{};
|
||||
size_t nthread_per_device = 2;
|
||||
size_t nitr = 500;
|
||||
size_t nsync = 10;
|
||||
constexpr unsigned shared_mem_tile_dim = 32;
|
||||
|
||||
void
|
||||
check_hip_error(void);
|
||||
|
||||
void
|
||||
verify(int* in, int* out, int M, int N);
|
||||
} // namespace
|
||||
|
||||
__global__ void
|
||||
transpose(const int* in, int* out, int M, int N);
|
||||
|
||||
void
|
||||
run(int rank, int tid, int devid, int argc, char** argv);
|
||||
|
||||
void
|
||||
run_transpose(int rank, int tid, hipStream_t stream, int argc, char** argv);
|
||||
|
||||
void
|
||||
run_migrate(int rank, int tid, hipStream_t stream, int, char** argv);
|
||||
|
||||
void
|
||||
run_scratch(int rank, int tid, hipStream_t stream, int argc, char** argv);
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
auto* exe_name = ::basename(argv[0]);
|
||||
|
||||
int rank = 0;
|
||||
for(int i = 1; i < argc; ++i)
|
||||
{
|
||||
auto _arg = std::string{argv[i]};
|
||||
if(_arg == "?" || _arg == "-h" || _arg == "--help")
|
||||
{
|
||||
fprintf(stderr,
|
||||
"usage: %s [NUM_THREADS_PER_DEVICE (%zu)] [NUM_ITERATION (%zu)] "
|
||||
"[SYNC_EVERY_N_ITERATIONS (%zu)]\n",
|
||||
exe_name,
|
||||
nthread_per_device,
|
||||
nitr,
|
||||
nsync);
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
||||
if(argc > 1) nthread_per_device = atoll(argv[1]);
|
||||
if(argc > 2) nitr = atoll(argv[2]);
|
||||
if(argc > 3) nsync = atoll(argv[3]);
|
||||
|
||||
int ndevice = 0;
|
||||
HIP_API_CALL(hipGetDeviceCount(&ndevice));
|
||||
|
||||
auto nthreads = (ndevice * nthread_per_device);
|
||||
|
||||
printf("[%s] Number of devices found: %i\n", exe_name, ndevice);
|
||||
printf("[%s] Number of threads (per device): %zu\n", exe_name, nthread_per_device);
|
||||
printf("[%s] Number of threads (total): %zu\n", exe_name, nthreads);
|
||||
printf("[%s] Number of iterations: %zu\n", exe_name, nitr);
|
||||
printf("[%s] Syncing every %zu iterations\n", exe_name, nsync);
|
||||
|
||||
{
|
||||
auto _threads = std::vector<std::thread>{};
|
||||
for(size_t i = 0; i < nthreads; ++i)
|
||||
_threads.emplace_back(run, rank, i, i % ndevice, argc, argv);
|
||||
for(auto& itr : _threads)
|
||||
itr.join();
|
||||
}
|
||||
|
||||
HIP_API_CALL(hipDeviceSynchronize());
|
||||
HIP_API_CALL(hipDeviceReset());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
transpose(const int* in, int* out, int M, int N)
|
||||
{
|
||||
__shared__ int tile[shared_mem_tile_dim][shared_mem_tile_dim];
|
||||
|
||||
int idx = (blockIdx.y * blockDim.y + threadIdx.y) * M + blockIdx.x * blockDim.x + threadIdx.x;
|
||||
tile[threadIdx.y][threadIdx.x] = in[idx];
|
||||
__syncthreads();
|
||||
idx = (blockIdx.x * blockDim.x + threadIdx.y) * N + blockIdx.y * blockDim.y + threadIdx.x;
|
||||
out[idx] = tile[threadIdx.x][threadIdx.y];
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
__global__ void
|
||||
test_page_migrate(Tp* data, Tp val)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
data[idx] += val;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
test_kern_large(uint64_t* output)
|
||||
{
|
||||
uint64_t result = 0;
|
||||
int test[4000];
|
||||
memset(test, 5, 4000);
|
||||
for(int& i : test)
|
||||
{
|
||||
i = i + 7;
|
||||
*output += i;
|
||||
result += i;
|
||||
}
|
||||
*output ^= result;
|
||||
*output ^= result;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
test_kern_medium(uint64_t* output)
|
||||
{
|
||||
uint64_t result = 0;
|
||||
int test[175];
|
||||
memset(test, 5, 175);
|
||||
for(int& i : test)
|
||||
{
|
||||
i = i + 7;
|
||||
*output += i;
|
||||
result += i;
|
||||
}
|
||||
*output ^= result;
|
||||
*output ^= result;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
test_kern_small(uint64_t* output)
|
||||
{
|
||||
uint64_t result = 0;
|
||||
int test[2];
|
||||
for(int& i : test)
|
||||
{
|
||||
i = i + 7;
|
||||
*output += i;
|
||||
result += i;
|
||||
}
|
||||
*output ^= result;
|
||||
*output ^= result;
|
||||
}
|
||||
|
||||
void
|
||||
run(int rank, int tid, int devid, int argc, char** argv)
|
||||
{
|
||||
auto* stream = hipStream_t{};
|
||||
HIP_API_CALL(hipSetDevice(devid));
|
||||
HIP_API_CALL(hipStreamCreate(&stream));
|
||||
|
||||
run_migrate(rank, tid, stream, argc, argv);
|
||||
run_scratch(rank, tid, stream, argc, argv);
|
||||
run_transpose(rank, tid, stream, argc, argv);
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
HIP_API_CALL(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
void
|
||||
run_transpose(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
{
|
||||
auto* exe_name = ::basename(argv[0]);
|
||||
|
||||
unsigned int M = 4960 * 2;
|
||||
unsigned int N = 4960 * 2;
|
||||
if(argc > 2) nitr = atoll(argv[2]);
|
||||
if(argc > 3) nsync = atoll(argv[3]);
|
||||
|
||||
auto_lock_t _lk{print_lock};
|
||||
std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid << "] M: " << M
|
||||
<< " N: " << N << std::endl;
|
||||
_lk.unlock();
|
||||
|
||||
std::default_random_engine _engine{std::random_device{}() * (rank + 1) * (tid + 1)};
|
||||
std::uniform_int_distribution<int> _dist{0, 1000};
|
||||
|
||||
size_t size = sizeof(int) * M * N;
|
||||
int* inp_matrix = new int[size];
|
||||
int* out_matrix = new int[size];
|
||||
for(size_t i = 0; i < M * N; i++)
|
||||
{
|
||||
inp_matrix[i] = _dist(_engine);
|
||||
out_matrix[i] = 0;
|
||||
}
|
||||
int* in = nullptr;
|
||||
int* out = nullptr;
|
||||
|
||||
HIP_API_CALL(hipMalloc(&in, size));
|
||||
HIP_API_CALL(hipMalloc(&out, size));
|
||||
HIP_API_CALL(hipMemsetAsync(in, 0, size, stream));
|
||||
HIP_API_CALL(hipMemsetAsync(out, 0, size, stream));
|
||||
HIP_API_CALL(hipMemcpyAsync(in, inp_matrix, size, hipMemcpyHostToDevice, stream));
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
dim3 grid(M / 32, N / 32, 1);
|
||||
dim3 block(32, 32, 1); // transpose
|
||||
|
||||
print_lock.lock();
|
||||
printf("[%s][transpose][%i][%i] grid=(%i,%i,%i), block=(%i,%i,%i)\n",
|
||||
exe_name,
|
||||
rank,
|
||||
tid,
|
||||
grid.x,
|
||||
grid.y,
|
||||
grid.z,
|
||||
block.x,
|
||||
block.y,
|
||||
block.z);
|
||||
print_lock.unlock();
|
||||
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
for(size_t i = 0; i < nitr; ++i)
|
||||
{
|
||||
transpose<<<grid, block, 0, stream>>>(in, out, M, N);
|
||||
check_hip_error();
|
||||
if(i % nsync == (nsync - 1)) HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
}
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
HIP_API_CALL(hipMemcpyAsync(out_matrix, out, size, hipMemcpyDeviceToHost, stream));
|
||||
double time = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1).count();
|
||||
float GB = (float) size * nitr * 2 / (1 << 30);
|
||||
|
||||
print_lock.lock();
|
||||
std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid
|
||||
<< "] Runtime of transpose is " << time << " sec\n";
|
||||
std::cout << "[" << exe_name << "][transpose][" << rank << "][" << tid
|
||||
<< "] The average performance of transpose is " << GB / time << " GBytes/sec"
|
||||
<< std::endl;
|
||||
print_lock.unlock();
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
// cpu_transpose(matrix, out_matrix, M, N);
|
||||
verify(inp_matrix, out_matrix, M, N);
|
||||
|
||||
HIP_API_CALL(hipFree(in));
|
||||
HIP_API_CALL(hipFree(out));
|
||||
|
||||
delete[] inp_matrix;
|
||||
delete[] out_matrix;
|
||||
}
|
||||
|
||||
void
|
||||
run_scratch(int rank, int tid, hipStream_t stream, int, char** argv)
|
||||
{
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
const auto* exe_name = ::basename(argv[0]);
|
||||
|
||||
uint64_t* data_ptr = nullptr;
|
||||
HIP_API_CALL(hipHostMalloc(&data_ptr, sizeof(uint64_t), 0));
|
||||
*data_ptr = 0;
|
||||
|
||||
test_kern_small<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
test_kern_medium<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
test_kern_small<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
test_kern_large<<<1100, 1, 0, stream>>>(data_ptr);
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
test_kern_small<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
test_kern_medium<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
test_kern_small<<<1000, 1, 0, stream>>>(data_ptr);
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
test_kern_large<<<1100, 1, 0, stream>>>(data_ptr);
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
double time = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1).count();
|
||||
|
||||
print_lock.lock();
|
||||
std::cout << "[" << exe_name << "][scratch][" << rank << "][" << tid
|
||||
<< "] Runtime of scratch is " << time << " sec\n";
|
||||
print_lock.unlock();
|
||||
}
|
||||
|
||||
void
|
||||
run_migrate(int rank, int tid, hipStream_t stream, int, char** argv)
|
||||
{
|
||||
using data_type = uint64_t;
|
||||
constexpr data_type init_v = 1;
|
||||
constexpr data_type incr_v = 1;
|
||||
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
const auto* exe_name = ::basename(argv[0]);
|
||||
auto page_data = std::vector<data_type>(1024, 0);
|
||||
|
||||
HIP_API_CALL(hipHostRegister(
|
||||
page_data.data(), page_data.size() * sizeof(data_type), hipHostRegisterDefault));
|
||||
|
||||
for(auto& itr : page_data)
|
||||
itr = init_v;
|
||||
|
||||
test_page_migrate<<<1, 1024, 0, stream>>>(page_data.data(), incr_v);
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
|
||||
for(auto& itr : page_data)
|
||||
{
|
||||
auto diff = (itr - incr_v);
|
||||
if(diff != init_v)
|
||||
{
|
||||
auto msg = std::stringstream{};
|
||||
msg << "invalid diff: " << diff << ". expected: " << init_v;
|
||||
throw std::runtime_error{msg.str()};
|
||||
}
|
||||
}
|
||||
|
||||
HIP_API_CALL(hipHostUnregister(page_data.data()));
|
||||
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
double time = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1).count();
|
||||
|
||||
print_lock.lock();
|
||||
std::cout << "[" << exe_name << "][migrate][" << rank << "][" << tid
|
||||
<< "] Runtime of migrate is " << time << " sec\n";
|
||||
print_lock.unlock();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void
|
||||
check_hip_error(void)
|
||||
{
|
||||
hipError_t err = hipGetLastError();
|
||||
if(err != hipSuccess)
|
||||
{
|
||||
auto_lock_t _lk{print_lock};
|
||||
std::cerr << "Error: " << hipGetErrorString(err) << std::endl;
|
||||
throw std::runtime_error("hip_api_call");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
verify(int* in, int* out, int M, int N)
|
||||
{
|
||||
for(int i = 0; i < 10; i++)
|
||||
{
|
||||
int row = rand() % M;
|
||||
int col = rand() % N;
|
||||
if(in[row * N + col] != out[col * M + row])
|
||||
{
|
||||
auto_lock_t _lk{print_lock};
|
||||
std::cout << "mismatch: " << row << ", " << col << " : " << in[row * N + col] << " | "
|
||||
<< out[col * M + row] << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
@@ -266,9 +266,6 @@ rocprofiler_configure(uint32_t version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// only activate if main tool
|
||||
if(priority > 0) return nullptr;
|
||||
|
||||
// set the client name
|
||||
id->name = "ExampleTool";
|
||||
|
||||
@@ -282,8 +279,8 @@ rocprofiler_configure(uint32_t version,
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
||||
<< " (" << runtime_version << ")";
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
|
||||
@@ -229,9 +229,6 @@ rocprofiler_configure(uint32_t version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* client_id)
|
||||
{
|
||||
// If not first tool to register, indicate that the tool doesn't want to do anything
|
||||
if(priority > 0) return nullptr;
|
||||
|
||||
// (optional) Provide a name for this tool to rocprofiler
|
||||
client_id->name = "ExampleTool";
|
||||
|
||||
|
||||
@@ -41,3 +41,4 @@ install(
|
||||
add_subdirectory(hip)
|
||||
add_subdirectory(hsa)
|
||||
add_subdirectory(marker)
|
||||
add_subdirectory(cxx)
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
#
|
||||
#
|
||||
# Installation of public C++ headers
|
||||
#
|
||||
#
|
||||
set(ROCPROFILER_CXX_HEADER_FILES name_info.hpp serialization.hpp)
|
||||
|
||||
install(
|
||||
FILES ${ROCPROFILER_CXX_HEADER_FILES}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/cxx
|
||||
COMPONENT development)
|
||||
|
||||
add_subdirectory(details)
|
||||
@@ -0,0 +1,11 @@
|
||||
#
|
||||
#
|
||||
# Installation of public C++ headers (implementations)
|
||||
#
|
||||
#
|
||||
set(ROCPROFILER_CXX_DETAILS_HEADER_FILES mpl.hpp name_info.hpp)
|
||||
|
||||
install(
|
||||
FILES ${ROCPROFILER_CXX_DETAILS_HEADER_FILES}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/cxx/details
|
||||
COMPONENT development)
|
||||
@@ -0,0 +1,84 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace sdk
|
||||
{
|
||||
namespace mpl
|
||||
{
|
||||
template <typename Tp>
|
||||
struct string_support
|
||||
{
|
||||
using type = Tp;
|
||||
using return_type = void;
|
||||
|
||||
static constexpr auto value = false;
|
||||
static constexpr void default_value() {}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct string_support<const char*>
|
||||
{
|
||||
using type = const char*;
|
||||
using return_type = type;
|
||||
|
||||
static constexpr auto value = true;
|
||||
static constexpr type default_value() { return nullptr; }
|
||||
|
||||
type operator()(const char* val) const { return val; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct string_support<std::string_view>
|
||||
{
|
||||
using type = std::string_view;
|
||||
using return_type = type;
|
||||
|
||||
static constexpr auto value = true;
|
||||
static constexpr type default_value() { return type{}; }
|
||||
|
||||
type operator()(const char* val) const { return type{val}; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct string_support<std::string>
|
||||
{
|
||||
using type = std::string;
|
||||
using return_type = type;
|
||||
|
||||
static constexpr auto value = true;
|
||||
static type default_value() { return type{}; }
|
||||
|
||||
type operator()(const char* val) const { return type{val}; }
|
||||
};
|
||||
} // namespace mpl
|
||||
} // namespace sdk
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,180 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !defined(ROCPROFILER_SDK_CXX_NAME_INFO_HPP_)
|
||||
# include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
#endif
|
||||
|
||||
#include <rocprofiler-sdk/buffer_tracing.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace sdk
|
||||
{
|
||||
namespace utility
|
||||
{
|
||||
template <typename EnumT, typename ValueT>
|
||||
typename name_info_impl<EnumT, ValueT>::item_array_t
|
||||
name_info_impl<EnumT, ValueT>::items() const
|
||||
{
|
||||
auto ret = item_array_t{};
|
||||
ret.reserve(operations.size());
|
||||
rocprofiler_tracing_operation_t _idx = 0;
|
||||
for(const auto& itr : operations)
|
||||
ret.emplace_back(_idx++, &itr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename EnumT, typename ValueT>
|
||||
inline void
|
||||
name_info<EnumT, ValueT>::emplace(EnumT idx, const char* name)
|
||||
{
|
||||
impl.resize(idx + 1, value_type{});
|
||||
impl.at(idx).value = idx;
|
||||
impl.at(idx).name = support_type{}(name);
|
||||
}
|
||||
|
||||
template <typename EnumT, typename ValueT>
|
||||
inline void
|
||||
name_info<EnumT, ValueT>::emplace(EnumT idx,
|
||||
rocprofiler_tracing_operation_t opidx,
|
||||
const char* name)
|
||||
{
|
||||
impl.resize(idx + 1, value_type{});
|
||||
impl.at(idx).operations.resize(opidx + 1, support_type::default_value());
|
||||
impl.at(idx).operations.at(opidx) = support_type{}(name);
|
||||
}
|
||||
|
||||
template <typename EnumT, typename ValueT>
|
||||
typename name_info<EnumT, ValueT>::return_type
|
||||
name_info<EnumT, ValueT>::at(EnumT idx) const
|
||||
{
|
||||
return impl.at(idx).name;
|
||||
}
|
||||
|
||||
template <typename EnumT, typename ValueT>
|
||||
typename name_info<EnumT, ValueT>::return_type
|
||||
name_info<EnumT, ValueT>::at(EnumT idx, rocprofiler_tracing_operation_t opidx) const
|
||||
{
|
||||
return impl.at(idx).operations.at(opidx);
|
||||
}
|
||||
|
||||
template <typename EnumT, typename ValueT>
|
||||
typename name_info<EnumT, ValueT>::item_array_t
|
||||
name_info<EnumT, ValueT>::items() const
|
||||
{
|
||||
auto ret = item_array_t{};
|
||||
ret.reserve(impl.size());
|
||||
for(const auto& itr : impl)
|
||||
ret.emplace_back(&itr);
|
||||
return ret;
|
||||
}
|
||||
} // namespace utility
|
||||
|
||||
constexpr auto success_v = ROCPROFILER_STATUS_SUCCESS;
|
||||
|
||||
template <typename Tp>
|
||||
inline callback_name_info_t<Tp>
|
||||
get_callback_tracing_names()
|
||||
{
|
||||
auto cb_name_info = callback_name_info_t<Tp>{};
|
||||
//
|
||||
// callback for each kind operation
|
||||
//
|
||||
static auto tracing_kind_operation_cb =
|
||||
[](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) {
|
||||
auto* name_info_v = static_cast<callback_name_info_t<Tp>*>(data_v);
|
||||
|
||||
const char* name = nullptr;
|
||||
auto status = rocprofiler_query_callback_tracing_kind_operation_name(
|
||||
kindv, operation, &name, nullptr);
|
||||
if(status == success_v && name) name_info_v->emplace(kindv, operation, name);
|
||||
return 0;
|
||||
};
|
||||
|
||||
//
|
||||
// callback for each buffer kind (i.e. domain)
|
||||
//
|
||||
static auto tracing_kind_cb = [](rocprofiler_callback_tracing_kind_t kind, void* data) {
|
||||
// store the buffer kind name
|
||||
auto* name_info_v = static_cast<callback_name_info_t<Tp>*>(data);
|
||||
const char* name = nullptr;
|
||||
auto status = rocprofiler_query_callback_tracing_kind_name(kind, &name, nullptr);
|
||||
if(status == success_v && name) name_info_v->emplace(kind, name);
|
||||
|
||||
rocprofiler_iterate_callback_tracing_kind_operations(kind, tracing_kind_operation_cb, data);
|
||||
return 0;
|
||||
};
|
||||
|
||||
rocprofiler_iterate_callback_tracing_kinds(tracing_kind_cb, &cb_name_info);
|
||||
|
||||
return cb_name_info;
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
inline buffer_name_info_t<Tp>
|
||||
get_buffer_tracing_names()
|
||||
{
|
||||
auto cb_name_info = buffer_name_info_t<Tp>{};
|
||||
//
|
||||
// callback for each kind operation
|
||||
//
|
||||
static auto tracing_kind_operation_cb =
|
||||
[](rocprofiler_buffer_tracing_kind_t kindv, uint32_t operation, void* data_v) {
|
||||
auto* name_info_v = static_cast<buffer_name_info_t<Tp>*>(data_v);
|
||||
|
||||
const char* name = nullptr;
|
||||
auto status = rocprofiler_query_buffer_tracing_kind_operation_name(
|
||||
kindv, operation, &name, nullptr);
|
||||
if(status == success_v && name) name_info_v->emplace(kindv, operation, name);
|
||||
return 0;
|
||||
};
|
||||
|
||||
//
|
||||
// callback for each buffer kind (i.e. domain)
|
||||
//
|
||||
static auto tracing_kind_cb = [](rocprofiler_buffer_tracing_kind_t kind, void* data) {
|
||||
// store the buffer kind name
|
||||
auto* name_info_v = static_cast<buffer_name_info_t<Tp>*>(data);
|
||||
const char* name = nullptr;
|
||||
auto status = rocprofiler_query_buffer_tracing_kind_name(kind, &name, nullptr);
|
||||
if(status == success_v && name) name_info_v->emplace(kind, name);
|
||||
|
||||
rocprofiler_iterate_buffer_tracing_kind_operations(kind, tracing_kind_operation_cb, data);
|
||||
return 0;
|
||||
};
|
||||
|
||||
rocprofiler_iterate_buffer_tracing_kinds(tracing_kind_cb, &cb_name_info);
|
||||
|
||||
return cb_name_info;
|
||||
}
|
||||
} // namespace sdk
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,116 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/cxx/details/mpl.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace sdk
|
||||
{
|
||||
namespace utility
|
||||
{
|
||||
template <typename EnumT, typename ValueT = std::string_view>
|
||||
struct name_info_impl
|
||||
{
|
||||
using support_type = mpl::string_support<ValueT>;
|
||||
using enum_type = EnumT;
|
||||
using value_type = ValueT;
|
||||
using return_type = typename support_type::return_type;
|
||||
using item_type = std::pair<rocprofiler_tracing_operation_t, const value_type*>;
|
||||
using item_array_t = std::vector<item_type>;
|
||||
|
||||
static_assert(support_type::value,
|
||||
"value_type must be supported by rocprofiler::sdk::mpl::string_support");
|
||||
|
||||
return_type operator()() const { return name; }
|
||||
return_type operator()(size_t idx) const { return operations.at(idx); }
|
||||
return_type operator[](size_t idx) const { return operations.at(idx); }
|
||||
|
||||
item_array_t items() const;
|
||||
|
||||
EnumT value = static_cast<EnumT>(0);
|
||||
value_type name = {};
|
||||
std::vector<value_type> operations = {};
|
||||
};
|
||||
|
||||
template <typename EnumT, typename ValueT = std::string_view>
|
||||
struct name_info
|
||||
{
|
||||
using value_type = name_info_impl<EnumT, ValueT>;
|
||||
using enum_type = EnumT;
|
||||
using support_type = typename value_type::support_type;
|
||||
using return_type = typename value_type::return_type;
|
||||
using item_type = const value_type*;
|
||||
using item_array_t = std::vector<item_type>;
|
||||
|
||||
void emplace(EnumT idx, const char* name);
|
||||
void emplace(EnumT idx, rocprofiler_tracing_operation_t opidx, const char* name);
|
||||
|
||||
return_type at(EnumT idx) const;
|
||||
return_type at(EnumT idx, rocprofiler_tracing_operation_t opidx) const;
|
||||
|
||||
item_array_t items() const;
|
||||
|
||||
decltype(auto) size() const { return impl.size(); }
|
||||
decltype(auto) begin() { return impl.begin(); }
|
||||
decltype(auto) begin() const { return impl.begin(); }
|
||||
decltype(auto) end() { return impl.end(); }
|
||||
decltype(auto) end() const { return impl.end(); }
|
||||
|
||||
value_type& operator[](size_t idx) { return impl.at(idx); }
|
||||
const value_type& operator[](size_t idx) const { return impl.at(idx); }
|
||||
|
||||
private:
|
||||
std::vector<value_type> impl = {};
|
||||
};
|
||||
} // namespace utility
|
||||
|
||||
template <typename Tp = std::string_view>
|
||||
using callback_name_info_t = utility::name_info<rocprofiler_callback_tracing_kind_t, Tp>;
|
||||
|
||||
template <typename Tp = std::string_view>
|
||||
using buffer_name_info_t = utility::name_info<rocprofiler_buffer_tracing_kind_t, Tp>;
|
||||
|
||||
using callback_name_info = callback_name_info_t<std::string_view>;
|
||||
using buffer_name_info = buffer_name_info_t<std::string_view>;
|
||||
|
||||
template <typename Tp = std::string_view>
|
||||
callback_name_info_t<Tp>
|
||||
get_callback_tracing_names();
|
||||
|
||||
template <typename Tp = std::string_view>
|
||||
buffer_name_info_t<Tp>
|
||||
get_buffer_tracing_names();
|
||||
} // namespace sdk
|
||||
} // namespace rocprofiler
|
||||
|
||||
#define ROCPROFILER_SDK_CXX_NAME_INFO_HPP_ 1
|
||||
#include <rocprofiler-sdk/cxx/details/name_info.hpp>
|
||||
@@ -0,0 +1,58 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
|
||||
#include <cereal/cereal.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
namespace cereal
|
||||
{
|
||||
template <typename ArchiveT, typename EnumT, typename ValueT>
|
||||
void
|
||||
save(ArchiveT& ar, const rocprofiler::sdk::utility::name_info<EnumT, ValueT>& data)
|
||||
{
|
||||
ar.makeArray();
|
||||
for(const auto& itr : data)
|
||||
ar(cereal::make_nvp("entry", itr));
|
||||
}
|
||||
|
||||
template <typename ArchiveT, typename EnumT, typename ValueT>
|
||||
void
|
||||
save(ArchiveT& ar, const rocprofiler::sdk::utility::name_info_impl<EnumT, ValueT>& data)
|
||||
{
|
||||
auto _name = std::string{data.name};
|
||||
auto _ops = std::vector<std::string>{};
|
||||
_ops.reserve(data.operations.size());
|
||||
|
||||
ar(cereal::make_nvp("kind", _name));
|
||||
for(auto itr : data.operations)
|
||||
_ops.emplace_back(itr);
|
||||
ar(cereal::make_nvp("operations", _ops));
|
||||
}
|
||||
} // namespace cereal
|
||||
@@ -204,7 +204,8 @@ rocprofiler_is_finalized(int* status) ROCPROFILER_API ROCPROFILER_NONNULL(1);
|
||||
* uint32_t patch = version % 100;
|
||||
*
|
||||
* // print info
|
||||
* printf("Configuring rocprofiler (v%u.%u.%u) [%s]\n", major, minor, patch, runtime_version);
|
||||
* printf("Configuring %s with rocprofiler-sdk (v%u.%u.%u) [%s]\n",
|
||||
* client_id->name, major, minor, patch, runtime_version);
|
||||
*
|
||||
* // create configure data
|
||||
* static auto cfg = rocprofiler_tool_configure_result_t{ &my_init_func,
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "lib/rocprofiler-sdk/hsa/queue.hpp"
|
||||
#include "lib/common/scope_destructor.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/buffer.hpp"
|
||||
@@ -156,8 +157,8 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data)
|
||||
LOG_IF(FATAL, _corr_id->get_ref_count() == 0)
|
||||
<< "reference counter for correlation id " << _corr_id->internal << " from thread "
|
||||
<< _corr_id->thread_idx << " has no reference count";
|
||||
_corr_id->sub_ref_count();
|
||||
_corr_id->sub_kern_count();
|
||||
_corr_id->sub_ref_count();
|
||||
}
|
||||
|
||||
queue_info_session.queue.async_complete();
|
||||
@@ -239,18 +240,6 @@ WriteInterceptor(const void* packets,
|
||||
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH,
|
||||
tracing_data_v);
|
||||
|
||||
auto* corr_id = context::get_latest_correlation_id();
|
||||
auto thr_id = (corr_id) ? corr_id->thread_idx : common::get_tid();
|
||||
auto user_data = rocprofiler_user_data_t{.value = 0};
|
||||
auto internal_corr_id = (corr_id) ? corr_id->internal : 0;
|
||||
|
||||
tracing::populate_external_correlation_ids(
|
||||
tracing_data_v.external_correlation_ids,
|
||||
thr_id,
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH,
|
||||
ROCPROFILER_KERNEL_DISPATCH_ENQUEUE,
|
||||
internal_corr_id);
|
||||
|
||||
const auto* packets_arr = static_cast<const rocprofiler_packet*>(packets);
|
||||
auto transformed_packets = std::vector<rocprofiler_packet>{};
|
||||
|
||||
@@ -267,6 +256,41 @@ WriteInterceptor(const void* packets,
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* corr_id = context::get_latest_correlation_id();
|
||||
context::correlation_id* _corr_id_pop = nullptr;
|
||||
|
||||
if(!corr_id)
|
||||
{
|
||||
constexpr auto ref_count = 1;
|
||||
corr_id = context::correlation_tracing_service::construct(ref_count);
|
||||
_corr_id_pop = corr_id;
|
||||
}
|
||||
|
||||
// increase the reference count to denote that this correlation id is being used in a kernel
|
||||
corr_id->add_ref_count();
|
||||
corr_id->add_kern_count();
|
||||
|
||||
auto thr_id = (corr_id) ? corr_id->thread_idx : common::get_tid();
|
||||
auto user_data = rocprofiler_user_data_t{.value = 0};
|
||||
auto internal_corr_id = (corr_id) ? corr_id->internal : 0;
|
||||
|
||||
// if we constructed a correlation id, this decrements the reference count after the
|
||||
// underlying function returns
|
||||
auto _corr_id_dtor = common::scope_destructor{[_corr_id_pop]() {
|
||||
if(_corr_id_pop)
|
||||
{
|
||||
context::pop_latest_correlation_id(_corr_id_pop);
|
||||
_corr_id_pop->sub_ref_count();
|
||||
}
|
||||
}};
|
||||
|
||||
tracing::populate_external_correlation_ids(
|
||||
tracing_data_v.external_correlation_ids,
|
||||
thr_id,
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH,
|
||||
ROCPROFILER_KERNEL_DISPATCH_ENQUEUE,
|
||||
internal_corr_id);
|
||||
|
||||
queue.async_started();
|
||||
// Copy kernel pkt, copy is to allow for signal to be modified
|
||||
rocprofiler_packet kernel_pkt = packets_arr[i];
|
||||
@@ -274,13 +298,6 @@ WriteInterceptor(const void* packets,
|
||||
queue.create_signal(HSA_AMD_SIGNAL_AMD_GPU_ONLY,
|
||||
&kernel_pkt.ext_amd_aql_pm4.completion_signal);
|
||||
|
||||
// increase the reference count to denote that this correlation id is being used in a kernel
|
||||
if(corr_id)
|
||||
{
|
||||
corr_id->add_ref_count();
|
||||
corr_id->add_kern_count();
|
||||
}
|
||||
|
||||
// computes the "size" based on the offset of reserved_padding field
|
||||
constexpr auto kernel_dispatch_info_rt_size =
|
||||
common::compute_runtime_sizeof<rocprofiler_kernel_dispatch_info_t>();
|
||||
|
||||
@@ -69,7 +69,7 @@
|
||||
struct page_migration_info<ROCPROFILER_PAGE_MIGRATION_##TYPE> \
|
||||
{ \
|
||||
static constexpr auto operation_idx = ROCPROFILER_PAGE_MIGRATION_##TYPE; \
|
||||
static constexpr auto name = #TYPE; \
|
||||
static constexpr auto name = "PAGE_MIGRATION_" #TYPE; \
|
||||
static constexpr size_t uvm_bitmask = \
|
||||
bitmask(std::index_sequence<GET_UVM_ENUMS(__VA_ARGS__)>()); \
|
||||
static constexpr size_t kfd_bitmask = \
|
||||
|
||||
@@ -26,13 +26,14 @@ set(c-tool-env
|
||||
|
||||
set_tests_properties(
|
||||
test-c-tool-execute
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
ENVIRONMENT
|
||||
"${c-tool-env}"
|
||||
PASS_REGULAR_EXPRESSION
|
||||
"Test C tool is using rocprofiler-sdk v([0-9]+\\.[0-9]+\\.[0-9]+)"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
|
||||
PROPERTIES
|
||||
TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
ENVIRONMENT
|
||||
"${c-tool-env}"
|
||||
PASS_REGULAR_EXPRESSION
|
||||
"Test C tool \\(priority=0\\) is using rocprofiler-sdk v([0-9]+\\.[0-9]+\\.[0-9]+)"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
#include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
@@ -21,17 +21,17 @@ def to_dict(key_values):
|
||||
return a
|
||||
|
||||
|
||||
def op_name(op_name, record):
|
||||
found_op = False
|
||||
op_key = None
|
||||
def get_operation(record, kind_name, op_name=None):
|
||||
for idx, itr in enumerate(record["names"]):
|
||||
if kind_name == itr["kind"]:
|
||||
if op_name is None:
|
||||
return idx, itr["operations"]
|
||||
else:
|
||||
for oidx, oname in enumerate(itr["operations"]):
|
||||
if op_name == oname:
|
||||
return oidx
|
||||
|
||||
for kind_node in record["names"]["kind_names"]:
|
||||
if kind_node["value"] == op_name:
|
||||
op_key = kind_node["key"]
|
||||
|
||||
for op_node in record["names"]["operation_names"]:
|
||||
if op_node["key"] == op_key:
|
||||
return op_key, to_dict(op_node["value"])
|
||||
return None
|
||||
|
||||
|
||||
def dict_from_value_key(d):
|
||||
@@ -259,18 +259,17 @@ def test_retired_correlation_ids(input_data):
|
||||
|
||||
def get_allocated_pages(callback_records):
|
||||
# Get how many pages we allocated
|
||||
hip_api_traces = callback_records["hip_api_traces"]
|
||||
_, op_dict = op_name("HIP_RUNTIME_API", callback_records)
|
||||
op_key = [k for k, v in op_dict.items() if v == "hipHostRegister"][0]
|
||||
op_idx = get_operation(callback_records, "HIP_RUNTIME_API", "hipHostRegister")
|
||||
rt_idx, rt_data = get_operation(callback_records, "HIP_RUNTIME_API")
|
||||
|
||||
assert op_idx is not None, f"{rt_idx}:\n{rt_data}"
|
||||
|
||||
host_register_record = []
|
||||
for r in hip_api_traces:
|
||||
if (
|
||||
r["operation"] == op_key
|
||||
and "sizeBytes" in r["args"]
|
||||
and "hostPtr" in r["args"]
|
||||
):
|
||||
host_register_record.append(r)
|
||||
for itr in callback_records["hip_api_traces"]:
|
||||
if itr["kind"] == rt_idx and itr["operation"] == op_idx and itr["phase"] == 2:
|
||||
assert "sizeBytes" in itr["args"].keys(), f"{itr}"
|
||||
assert "hostPtr" in itr["args"].keys(), f"{itr}"
|
||||
host_register_record.append(itr)
|
||||
|
||||
assert len(host_register_record) == 1
|
||||
alloc_size = int(host_register_record[0]["args"]["sizeBytes"], 10)
|
||||
@@ -285,11 +284,11 @@ def test_page_migration_data(input_data):
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
buffer_records = sdk_data["buffer_records"]
|
||||
callback_records = sdk_data["callback_records"]
|
||||
page_migtation_buffers = buffer_records["page_migration"]
|
||||
page_migration_buffers = buffer_records["page_migration"]
|
||||
|
||||
bf_op_id, bf_op_names = op_name("PAGE_MIGRATION", buffer_records)
|
||||
assert bf_op_names[0] == "NONE"
|
||||
assert "PAGE_MIGRATE" in str(bf_op_names)
|
||||
_, bf_op_names = get_operation(buffer_records, "PAGE_MIGRATION")
|
||||
assert bf_op_names[0] == "PAGE_MIGRATION_NONE"
|
||||
assert "PAGE_MIGRATION_PAGE_MIGRATE" in bf_op_names
|
||||
assert len(bf_op_names) == 5
|
||||
|
||||
node_ids = set(x["gpu_id"] for x in sdk_data["agents"])
|
||||
@@ -299,15 +298,14 @@ def test_page_migration_data(input_data):
|
||||
assert int(alloc_size) == 16 * 4096 # We allocated 16 pages in the test
|
||||
|
||||
# PID must be same
|
||||
assert len(set(r["pid"] for r in page_migtation_buffers)) == 1
|
||||
assert len(set(r["pid"] for r in page_migration_buffers)) == 1
|
||||
|
||||
for r in page_migtation_buffers:
|
||||
for r in page_migration_buffers:
|
||||
op = r["operation"]
|
||||
|
||||
assert r["size"] == 136
|
||||
assert r["kind"] == bf_op_id
|
||||
assert op != 0 and bf_op_names[op] != "NONE"
|
||||
assert bf_op_names[op].lower() in r
|
||||
assert op != 0 and bf_op_names[op] != "PAGE_MIGRATION_NONE"
|
||||
assert bf_op_names[op].lower().replace("page_migration_", "") in r.keys()
|
||||
|
||||
if "page_migrate" in r:
|
||||
assert r["page_migrate"]["from_node"] in node_ids
|
||||
@@ -328,7 +326,7 @@ def test_page_migration_data(input_data):
|
||||
assert 0 < r["start_timestamp"] < r["end_timestamp"]
|
||||
|
||||
# Check for events with our page
|
||||
for r in page_migtation_buffers:
|
||||
for r in page_migration_buffers:
|
||||
|
||||
if "page_migrate" in r and r["page_migrate"]["start_addr"] == start_addr:
|
||||
assert end_addr == r["page_migrate"]["end_addr"]
|
||||
|
||||
@@ -35,6 +35,8 @@ class dotdict(dict):
|
||||
for k, v in self.items():
|
||||
if isinstance(v, dict):
|
||||
self.__setitem__(k, dotdict(v))
|
||||
# print(k)
|
||||
elif isinstance(v, list):
|
||||
self.__setitem__(k, [dotdict(i) for i in v])
|
||||
elif isinstance(v, (list, tuple)):
|
||||
self.__setitem__(
|
||||
k,
|
||||
[dotdict(i) if isinstance(i, (list, tuple, dict)) else i for i in v],
|
||||
)
|
||||
|
||||
@@ -152,16 +152,15 @@ def test_external_correlation_ids(input_data):
|
||||
assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}"
|
||||
|
||||
|
||||
def op_name(op_name, record):
|
||||
op_key = None
|
||||
|
||||
for kind_node in record["names"]["kind_names"]:
|
||||
if kind_node["value"] == op_name:
|
||||
op_key = kind_node["key"]
|
||||
|
||||
for op_node in record["names"]["operation_names"]:
|
||||
if op_node["key"] == op_key:
|
||||
return op_node
|
||||
def get_operation(record, kind_name, op_name=None):
|
||||
for idx, itr in enumerate(record["names"]):
|
||||
if kind_name == itr["kind"]:
|
||||
if op_name is None:
|
||||
return idx, itr["operations"]
|
||||
else:
|
||||
for oidx, oname in enumerate(itr["operations"]):
|
||||
if op_name == oname:
|
||||
return oidx
|
||||
|
||||
return None
|
||||
|
||||
@@ -179,16 +178,14 @@ def test_scratch_memory_tracking(input_data):
|
||||
|
||||
assert len(scratch_callback_data) == 2 * len(scratch_buffer_data)
|
||||
|
||||
cb_op_names = op_name("SCRATCH_MEMORY", callback_records)["value"]
|
||||
bf_op_names = op_name("SCRATCH_MEMORY", buffer_records)["value"]
|
||||
_, cb_op_names = get_operation(callback_records, "SCRATCH_MEMORY")
|
||||
_, bf_op_names = get_operation(buffer_records, "SCRATCH_MEMORY")
|
||||
|
||||
assert len(cb_op_names) == 4
|
||||
assert len(bf_op_names) == 4
|
||||
|
||||
# op name -> enum value
|
||||
scratch_cb_op_map = {node["value"]: node["key"] for node in cb_op_names}
|
||||
scratch_bf_op_map = {node["value"]: node["key"] for node in bf_op_names}
|
||||
assert scratch_cb_op_map == scratch_bf_op_map
|
||||
assert cb_op_names == bf_op_names
|
||||
|
||||
scratch_reported_agent_ids = set()
|
||||
detected_agents_ids = set(
|
||||
@@ -253,10 +250,8 @@ def test_scratch_memory_tracking(input_data):
|
||||
), f"this:\n{this_node}\n\nnext:\n{next_node}"
|
||||
|
||||
# alloc has more data vs free and async reclaim
|
||||
scratch_alloc_node = (
|
||||
this_node["operation"] == scratch_cb_op_map["SCRATCH_MEMORY_ALLOC"]
|
||||
)
|
||||
if scratch_alloc_node:
|
||||
scratch_alloc_node = cb_op_names[this_node["operation"]]
|
||||
if scratch_alloc_node == "SCRATCH_MEMORY_ALLOC":
|
||||
assert (
|
||||
pl(this_node)["queue_id"]["handle"]
|
||||
== pl(next_node)["queue_id"]["handle"]
|
||||
|
||||
@@ -40,9 +40,6 @@ rocprofiler_configure(uint32_t version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// only activate if main tool
|
||||
if(priority > 0) return NULL;
|
||||
|
||||
// set the client name
|
||||
id->name = "Test C tool";
|
||||
|
||||
@@ -52,8 +49,9 @@ rocprofiler_configure(uint32_t version,
|
||||
uint32_t patch = version % 100;
|
||||
|
||||
// generate info string
|
||||
printf("%s is using rocprofiler-sdk v%i.%i.%i (%s)\n",
|
||||
printf("%s (priority=%u) is using rocprofiler-sdk v%i.%i.%i (%s)\n",
|
||||
id->name,
|
||||
priority,
|
||||
major,
|
||||
minor,
|
||||
patch,
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include "common/defines.hpp"
|
||||
#include "common/filesystem.hpp"
|
||||
#include "common/hash.hpp"
|
||||
#include "common/name_info.hpp"
|
||||
#include "common/perfetto.hpp"
|
||||
#include "common/serialization.hpp"
|
||||
|
||||
@@ -197,146 +198,14 @@ make_array(Tp&& arg, Args&&... args)
|
||||
return std::array<Tp, N>{std::forward<Tp>(arg), std::forward<Args>(args)...};
|
||||
}
|
||||
|
||||
using call_stack_t = std::vector<source_location>;
|
||||
using buffer_kind_names_t = std::map<rocprofiler_buffer_tracing_kind_t, std::string>;
|
||||
using buffer_kind_operation_names_t =
|
||||
std::map<rocprofiler_buffer_tracing_kind_t, std::map<uint32_t, std::string>>;
|
||||
|
||||
using callback_kind_names_t = std::map<rocprofiler_callback_tracing_kind_t, std::string>;
|
||||
using callback_kind_operation_names_t =
|
||||
std::map<rocprofiler_callback_tracing_kind_t, std::map<uint32_t, std::string>>;
|
||||
using call_stack_t = std::vector<source_location>;
|
||||
|
||||
using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
|
||||
using kernel_symbol_map_t = std::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data_t>;
|
||||
|
||||
struct callback_name_info
|
||||
{
|
||||
callback_kind_names_t kind_names = {};
|
||||
callback_kind_operation_names_t operation_names = {};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
ar(cereal::make_nvp("kind_names", kind_names));
|
||||
ar(cereal::make_nvp("operation_names", operation_names));
|
||||
}
|
||||
};
|
||||
|
||||
struct buffer_name_info
|
||||
{
|
||||
buffer_kind_names_t kind_names = {};
|
||||
buffer_kind_operation_names_t operation_names = {};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
ar(cereal::make_nvp("kind_names", kind_names));
|
||||
ar(cereal::make_nvp("operation_names", operation_names));
|
||||
}
|
||||
};
|
||||
|
||||
rocprofiler_client_id_t* client_id = nullptr;
|
||||
rocprofiler_client_finalize_t client_fini_func = nullptr;
|
||||
|
||||
callback_name_info
|
||||
get_callback_tracing_names()
|
||||
{
|
||||
auto cb_name_info = callback_name_info{};
|
||||
//
|
||||
// callback for each kind operation
|
||||
//
|
||||
static auto tracing_kind_operation_cb =
|
||||
[](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) {
|
||||
auto* name_info_v = static_cast<callback_name_info*>(data_v);
|
||||
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name(
|
||||
kindv, operation, &name, nullptr),
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->operation_names[kindv][operation] = name;
|
||||
return 0;
|
||||
};
|
||||
|
||||
//
|
||||
// callback for each buffer kind (i.e. domain)
|
||||
//
|
||||
static auto tracing_kind_cb = [](rocprofiler_callback_tracing_kind_t kind, void* data) {
|
||||
// store the buffer kind name
|
||||
auto* name_info_v = static_cast<callback_name_info*>(data);
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_name(kind, &name, nullptr),
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->kind_names[kind] = name;
|
||||
|
||||
rocprofiler_iterate_callback_tracing_kind_operations(
|
||||
kind, tracing_kind_operation_cb, static_cast<void*>(data));
|
||||
return 0;
|
||||
};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kinds(tracing_kind_cb,
|
||||
static_cast<void*>(&cb_name_info)),
|
||||
"iterating buffer tracing kinds");
|
||||
|
||||
return cb_name_info;
|
||||
}
|
||||
|
||||
buffer_name_info
|
||||
get_buffer_tracing_names()
|
||||
{
|
||||
static const auto supported = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_CORE_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API,
|
||||
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API,
|
||||
ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API,
|
||||
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API,
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY,
|
||||
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION,
|
||||
};
|
||||
|
||||
auto cb_name_info = buffer_name_info{};
|
||||
//
|
||||
// callback for each kind operation
|
||||
//
|
||||
static auto tracing_kind_operation_cb =
|
||||
[](rocprofiler_buffer_tracing_kind_t kindv, uint32_t operation, void* data_v) {
|
||||
auto* name_info_v = static_cast<buffer_name_info*>(data_v);
|
||||
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_operation_name(
|
||||
kindv, operation, &name, nullptr),
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->operation_names[kindv][operation] = name;
|
||||
return 0;
|
||||
};
|
||||
|
||||
//
|
||||
// callback for each buffer kind (i.e. domain)
|
||||
//
|
||||
static auto tracing_kind_cb = [](rocprofiler_buffer_tracing_kind_t kind, void* data) {
|
||||
// store the buffer kind name
|
||||
auto* name_info_v = static_cast<buffer_name_info*>(data);
|
||||
const char* name = nullptr;
|
||||
ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_name(kind, &name, nullptr),
|
||||
"query buffer tracing kind operation name");
|
||||
if(name) name_info_v->kind_names[kind] = name;
|
||||
|
||||
rocprofiler_iterate_buffer_tracing_kind_operations(
|
||||
kind, tracing_kind_operation_cb, static_cast<void*>(data));
|
||||
return 0;
|
||||
};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_buffer_tracing_kinds(tracing_kind_cb,
|
||||
static_cast<void*>(&cb_name_info)),
|
||||
"iterating buffer tracing kinds");
|
||||
|
||||
return cb_name_info;
|
||||
}
|
||||
|
||||
using callback_payload_t =
|
||||
std::variant<rocprofiler_callback_tracing_code_object_load_data_t,
|
||||
rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t,
|
||||
@@ -1569,12 +1438,12 @@ write_json(call_stack_t* _call_stack)
|
||||
{
|
||||
using JSONOutputArchive = cereal::MinimalJSONOutputArchive;
|
||||
|
||||
constexpr auto json_prec = 32;
|
||||
constexpr auto json_indent = JSONOutputArchive::Options::IndentChar::space;
|
||||
auto json_opts = JSONOutputArchive::Options{json_prec, json_indent, 1};
|
||||
auto json_ar = JSONOutputArchive{*ofs, json_opts};
|
||||
auto buffer_name_info = get_buffer_tracing_names();
|
||||
auto callback_name_info = get_callback_tracing_names();
|
||||
constexpr auto json_prec = 32;
|
||||
constexpr auto json_indent = JSONOutputArchive::Options::IndentChar::space;
|
||||
auto json_opts = JSONOutputArchive::Options{json_prec, json_indent, 1};
|
||||
auto json_ar = JSONOutputArchive{*ofs, json_opts};
|
||||
auto buffer_names = rocprofiler::sdk::get_buffer_tracing_names();
|
||||
auto callbk_names = rocprofiler::sdk::get_callback_tracing_names();
|
||||
auto validate_page_migration =
|
||||
(page_migration_status != ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL);
|
||||
|
||||
@@ -1598,7 +1467,7 @@ write_json(call_stack_t* _call_stack)
|
||||
json_ar.startNode();
|
||||
try
|
||||
{
|
||||
json_ar(cereal::make_nvp("names", callback_name_info));
|
||||
json_ar(cereal::make_nvp("names", callbk_names));
|
||||
json_ar(cereal::make_nvp("code_objects", code_object_records));
|
||||
json_ar(cereal::make_nvp("kernel_symbols", kernel_symbol_records));
|
||||
json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_cb_records));
|
||||
@@ -1619,7 +1488,7 @@ write_json(call_stack_t* _call_stack)
|
||||
json_ar.startNode();
|
||||
try
|
||||
{
|
||||
json_ar(cereal::make_nvp("names", buffer_name_info));
|
||||
json_ar(cereal::make_nvp("names", buffer_names));
|
||||
json_ar(cereal::make_nvp("kernel_dispatches", kernel_dispatch_bf_records));
|
||||
json_ar(cereal::make_nvp("memory_copies", memory_copy_bf_records));
|
||||
json_ar(cereal::make_nvp("scratch_memory_traces", scratch_memory_records));
|
||||
@@ -1786,12 +1655,12 @@ write_perfetto()
|
||||
}
|
||||
|
||||
{
|
||||
auto buffer_name_info = get_buffer_tracing_names();
|
||||
auto callbk_name_info = get_callback_tracing_names();
|
||||
auto buffer_names = rocprofiler::sdk::get_buffer_tracing_names();
|
||||
auto callbk_name_info = rocprofiler::sdk::get_callback_tracing_names();
|
||||
|
||||
for(auto itr : hsa_api_bf_records)
|
||||
{
|
||||
auto& name = buffer_name_info.operation_names.at(itr.kind).at(itr.operation);
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
auto _args = callback_arg_array_t{};
|
||||
@@ -1803,7 +1672,7 @@ write_perfetto()
|
||||
if(ritr != hsa_api_cb_records.end()) _args = ritr->args;
|
||||
|
||||
TRACE_EVENT_BEGIN(rocprofiler::trait::name<rocprofiler::category::hsa_api>::value,
|
||||
::perfetto::StaticString(name.c_str()),
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
@@ -1830,7 +1699,7 @@ write_perfetto()
|
||||
|
||||
for(auto itr : hip_api_bf_records)
|
||||
{
|
||||
auto& name = buffer_name_info.operation_names.at(itr.kind).at(itr.operation);
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
auto _args = callback_arg_array_t{};
|
||||
@@ -1842,7 +1711,7 @@ write_perfetto()
|
||||
if(ritr != hip_api_cb_records.end()) _args = ritr->args;
|
||||
|
||||
TRACE_EVENT_BEGIN(rocprofiler::trait::name<rocprofiler::category::hip_api>::value,
|
||||
::perfetto::StaticString(name.c_str()),
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
@@ -1869,11 +1738,11 @@ write_perfetto()
|
||||
|
||||
for(auto itr : memory_copy_bf_records)
|
||||
{
|
||||
auto& name = buffer_name_info.operation_names.at(itr.kind).at(itr.operation);
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
auto& track = agent_tracks.at(itr.dst_agent_id.handle);
|
||||
|
||||
TRACE_EVENT_BEGIN(rocprofiler::trait::name<rocprofiler::category::memory_copy>::value,
|
||||
::perfetto::StaticString(name.c_str()),
|
||||
::perfetto::StaticString(name.data()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
@@ -1886,7 +1755,9 @@ write_perfetto()
|
||||
"src_agent",
|
||||
agents_map.at(itr.src_agent_id).logical_node_id,
|
||||
"dst_agent",
|
||||
agents_map.at(itr.dst_agent_id).logical_node_id);
|
||||
agents_map.at(itr.dst_agent_id).logical_node_id,
|
||||
"copy_bytes",
|
||||
itr.bytes);
|
||||
TRACE_EVENT_END(rocprofiler::trait::name<rocprofiler::category::memory_copy>::value,
|
||||
track,
|
||||
itr.end_timestamp,
|
||||
@@ -2070,9 +1941,6 @@ rocprofiler_configure(uint32_t version,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// only activate if main tool
|
||||
if(priority > 0) return nullptr;
|
||||
|
||||
// set the client name
|
||||
id->name = "rocprofiler-sdk-json-tool";
|
||||
|
||||
@@ -2086,8 +1954,8 @@ rocprofiler_configure(uint32_t version,
|
||||
|
||||
// generate info string
|
||||
auto info = std::stringstream{};
|
||||
info << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
||||
<< " (" << runtime_version << ")";
|
||||
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
|
||||
<< minor << "." << patch << " (" << runtime_version << ")";
|
||||
|
||||
std::clog << info.str() << std::endl;
|
||||
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário