diff --git a/projects/rocprofiler/CHANGELOG.md b/projects/rocprofiler/CHANGELOG.md index ad7407ab4f..cb2cb81b10 100644 --- a/projects/rocprofiler/CHANGELOG.md +++ b/projects/rocprofiler/CHANGELOG.md @@ -212,6 +212,7 @@ The resulting `a.out` will depend on ### Added - 'end_time' need to be disabled in roctx_trace.txt - Every API trace in V2 reported synchrounusly will have two records, one for Enter phase and for Exit phase +- support for hsa_amd_memory_async_copy_on_engine API function trace ### Fixed - rocprof in ROcm/5.4.0 gpu selector broken. - rocprof in ROCm/5.4.1 fails to generate kernel info. diff --git a/projects/rocprofiler/plugin/file/file.cpp b/projects/rocprofiler/plugin/file/file.cpp index 711d779163..35e56f6133 100644 --- a/projects/rocprofiler/plugin/file/file.cpp +++ b/projects/rocprofiler/plugin/file/file.cpp @@ -291,7 +291,7 @@ class file_plugin_t { if (kernel_name.size() > 1) *output_file << " Kernel_Name(" << kernel_name.c_str() << "),"; if (tracer_record.phase == ROCPROFILER_PHASE_NONE) { *output_file << " Begin(" << tracer_record.timestamps.begin.value - << "), End(" << tracer_record.timestamps.end.value << ")"; + << "), End(" << tracer_record.timestamps.end.value << "),"; } else { if(tracer_record.phase == ROCPROFILER_PHASE_ENTER && tracer_record.domain != ACTIVITY_DOMAIN_ROCTX){ rocprofiler_timestamp_t timestamp; diff --git a/projects/rocprofiler/src/core/hsa/hsa_support.cpp b/projects/rocprofiler/src/core/hsa/hsa_support.cpp index c3c22abf7b..481f658bd1 100644 --- a/projects/rocprofiler/src/core/hsa/hsa_support.cpp +++ b/projects/rocprofiler/src/core/hsa/hsa_support.cpp @@ -52,7 +52,6 @@ namespace { hsa_status_t hsa_executable_iteration_callback(hsa_executable_t executable, hsa_agent_t agent, hsa_executable_symbol_t symbol, void* args) { - hsa_symbol_kind_t type; rocmtools::hsa_support::GetCoreApiTable().hsa_executable_symbol_get_info_fn( symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &type); @@ -63,7 +62,7 @@ hsa_status_t hsa_executable_iteration_callback(hsa_executable_t executable, hsa_ // TODO(aelwazir): to be removed if the HSA fixed the issue of corrupted // names overflowing the length given if (name_length > 1) { - if(!(*static_cast(args))) { + if (!(*static_cast(args))) { char name[name_length + 1]; uint64_t kernel_object; rocmtools::hsa_support::GetCoreApiTable().hsa_executable_symbol_get_info_fn( @@ -92,7 +91,8 @@ bool IsEnabled(rocprofiler_tracer_activity_domain_t domain, uint32_t operation_i return report && report(domain, operation_id, nullptr) == 0; } -void ReportActivity(rocprofiler_tracer_activity_domain_t domain, uint32_t operation_id, void* data) { +void ReportActivity(rocprofiler_tracer_activity_domain_t domain, uint32_t operation_id, + void* data) { if (auto report = report_activity.load(std::memory_order_relaxed)) report(domain, operation_id, data); } @@ -486,12 +486,14 @@ hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) { return rocmtools::hsa_support::GetCoreApiTable().hsa_executable_destroy_fn(executable); } -bool profiling_async_copy_enable = false; +std::atomic profiling_async_copy_enable{false}; hsa_status_t ProfilingAsyncCopyEnableIntercept(bool enable) { hsa_status_t status = rocmtools::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn(enable); - if (status == HSA_STATUS_SUCCESS) profiling_async_copy_enable = enable; + if (status == HSA_STATUS_SUCCESS) { + profiling_async_copy_enable.exchange(enable, std::memory_order_release); + } return status; } @@ -515,7 +517,7 @@ hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const vo // FIXME: what happens if the state changes before returning? [[maybe_unused]] hsa_status_t status = rocmtools::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn( - profiling_async_copy_enable | is_enabled); + profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled); assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); if (!is_enabled) { @@ -547,7 +549,7 @@ hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst, // FIXME: what happens if the state changes before returning? [[maybe_unused]] hsa_status_t status = rocmtools::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn( - profiling_async_copy_enable | is_enabled); + profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled); assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); if (!is_enabled) { @@ -569,6 +571,36 @@ hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst, return status; } +hsa_status_t MemoryASyncCopyOnEngineIntercept( + void* dst, hsa_agent_t dst_agent, const void* src, hsa_agent_t src_agent, size_t size, + uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal, + hsa_amd_sdma_engine_id_t engine_id, bool force_copy_on_sdma) { + bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY); + + // FIXME: what happens if the state changes before returning? + [[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn( + profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled); + assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed"); + + if (!is_enabled) { + return saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn( + dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal, + engine_id, force_copy_on_sdma); + } + + Tracker::entry_t* entry = new Tracker::entry_t(); + entry->handler = MemoryASyncCopyHandler; + entry->correlation_id = CorrelationId(); + Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); + + status = saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn( + dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal, engine_id, + force_copy_on_sdma); + if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); + + return status; +} + } // namespace rocprofiler_timestamp_t timestamp_ns() { @@ -748,21 +780,21 @@ void Initialize(HsaApiTable* table) { rocmtools::queue::InitializePools(cpu_agent); break; case HSA_DEVICE_TYPE_GPU: - // XXX FIXME: When multiple ranks are used, each rank's first - // logical device always has GPU ID 0, regardless of which - // physical device is selected with CUDA_VISIBLE_DEVICES. - // Because of this, when merging traces from multiple ranks, - // GPU IDs from different processes may overlap. - // - // The long term solution is to use KFD's gpu_id, which is - // stable across APIs and processes, but it isn't currently - // exposed by ROCr. We could use the agent's - // HSA_AMD_AGENT_INFO_DRIVER_NODE_ID in the meantime, as even - // that would be an improvement--it's what legacy roctracer - // is currently doing as well as the roctracer compatibility - // code earlier in this file. - agent_info.setIndex(gpu_agent_count++); - break; + // XXX FIXME: When multiple ranks are used, each rank's first + // logical device always has GPU ID 0, regardless of which + // physical device is selected with CUDA_VISIBLE_DEVICES. + // Because of this, when merging traces from multiple ranks, + // GPU IDs from different processes may overlap. + // + // The long term solution is to use KFD's gpu_id, which is + // stable across APIs and processes, but it isn't currently + // exposed by ROCr. We could use the agent's + // HSA_AMD_AGENT_INFO_DRIVER_NODE_ID in the meantime, as even + // that would be an improvement--it's what legacy roctracer + // is currently doing as well as the roctracer compatibility + // code earlier in this file. + agent_info.setIndex(gpu_agent_count++); + break; default: agent_info.setIndex(other_agent_count++); break; @@ -787,6 +819,8 @@ void Initialize(HsaApiTable* table) { roctracer::hsa_support::MemoryASyncCopyRectIntercept; table->amd_ext_->hsa_amd_profiling_async_copy_enable_fn = roctracer::hsa_support::ProfilingAsyncCopyEnableIntercept; + table->amd_ext_->hsa_amd_memory_async_copy_on_engine_fn = + roctracer::hsa_support::MemoryASyncCopyOnEngineIntercept; // Install the HSA_EVT intercept table->core_->hsa_memory_allocate_fn = roctracer::hsa_support::MemoryAllocateIntercept; @@ -875,11 +909,11 @@ bool IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback const rocprofiler_counter_info_t counter_info = rocprofiler_counter_info_t{strdup(name.c_str()), - strdup(descr.c_str()), - expr.empty() ? nullptr : strdup(expr.c_str()), - query.instance_count, - block_name.c_str(), - block_counters}; + strdup(descr.c_str()), + expr.empty() ? nullptr : strdup(expr.c_str()), + query.instance_count, + block_name.c_str(), + block_counters}; counters_info_callback(counter_info, gpu_name.c_str(), gpu_counter); } gpu_counter++; @@ -890,7 +924,8 @@ bool IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback // std::string expr_str; // if (expr) expr_str = expr->GetStr().c_str(); // const rocprofiler_counter_info_t counter_info = - // rocprofiler_counter_info_t{start->first.c_str(), "", expr ? expr_str.c_str() : nullptr}; + // rocprofiler_counter_info_t{start->first.c_str(), "", expr ? expr_str.c_str() : + // nullptr}; // counters_info_callback(counter_info, gpu_name.c_str(), gpu_counter); // start++; // } diff --git a/projects/rocprofiler/tests/featuretests/tracer/CMakeLists.txt b/projects/rocprofiler/tests/featuretests/tracer/CMakeLists.txt index b6c4a7582c..a61828e878 100644 --- a/projects/rocprofiler/tests/featuretests/tracer/CMakeLists.txt +++ b/projects/rocprofiler/tests/featuretests/tracer/CMakeLists.txt @@ -29,6 +29,14 @@ set_source_files_properties(apps/hello_world.cpp PROPERTIES HIP_SOURCE_PROPERTY_ hip_add_executable(tracer_hip_helloworld apps/hello_world.cpp) set_target_properties(tracer_hip_helloworld PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/tests/featuretests/tracer/apps") +#hsa-mem_async_copy and async_copy_on_engine +set_source_files_properties(apps/copy_on_engine.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) +hip_add_executable(copy_on_engine apps/copy_on_engine.cpp) +set_target_properties(copy_on_engine PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/tests/featuretests/tracer/apps") +target_link_options(copy_on_engine PRIVATE "-Wl,--build-id=md5") +install(TARGETS copy_on_engine RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests/featuretests/tracer/apps COMPONENT tests) +target_link_libraries(copy_on_engine hsa-runtime64::hsa-runtime64 Threads::Threads dl stdc++fs) + # Add test cpp file add_executable(runTracerFeatureTests tracer_gtest.cpp ${GTEST_MAIN_SRC_FILE} diff --git a/projects/rocprofiler/tests/featuretests/tracer/apps/copy_on_engine.cpp b/projects/rocprofiler/tests/featuretests/tracer/apps/copy_on_engine.cpp new file mode 100644 index 0000000000..ec5d89b578 --- /dev/null +++ b/projects/rocprofiler/tests/featuretests/tracer/apps/copy_on_engine.cpp @@ -0,0 +1,338 @@ +#include +#include + +#include "hsa/hsa.h" +#include "hsa/hsa_ext_amd.h" + + +// This program illustrates the usage of the asynchronous copy capability of +// the RocR runtime library. The program will create a system memory buffer and +// a local buffer for each GPU, up to 2 GPUs, if the system has at least 2 +// GPUs. The program will copy data to/from the host from/to the GPU. If 2 +// GPUs are available, the program will also copy data from one to the other. +// Update: Added aditional call async_copy_on_engine + +#define RET_IF_HSA_ERR(err) \ + { \ + if ((err) != HSA_STATUS_SUCCESS) { \ + const char* msg = 0; \ + hsa_status_string(err, &msg); \ + std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << __FILE__ \ + << ". Call returned " << err << std::endl; \ + std::cout << msg << std::endl; \ + return (err); \ + } \ + } + +static const uint32_t kTestFillValue1 = 0xabcdef12; +static const uint32_t kTestFillValue2 = 0xba5eba11; +static const uint32_t kTestFillValue3 = 0xfeed5a1e; +static const uint32_t kTestInitValue = 0xbaadf00d; + +// This structure holds an agent pointer and associated memory pool to be used +// for this test program. +struct async_mem_cpy_agent { + hsa_agent_t dev; + hsa_amd_memory_pool_t pool; + size_t granule; + void* ptr; +}; +struct async_mem_cpy_pool_query { + async_mem_cpy_agent* pool_info; + hsa_agent_t peer_device; +}; +struct callback_args { + struct async_mem_cpy_agent cpu; + struct async_mem_cpy_agent gpu1; + struct async_mem_cpy_agent gpu2; +}; + + +// This function is meant to be a callback to hsa_iterate_agents. For each +// input agent the iterator provides as input, this function will check to +// see if the input agent is a CPU agent. If so, it will update the +// async_mem_cpy_agent structure pointed to by the input parameter "data". +// Return values: +// HSA_STATUS_INFO_BREAK -- CPU agent has been found and stored. Iterator +// should stop iterating +// HSA_STATUS_SUCCESS -- CPU agent has not yet been found; iterator +// should keep iterating +// Other -- Some error occurred +static hsa_status_t FindPool(hsa_amd_memory_pool_t in_pool, void* data) { + hsa_amd_segment_t segment; + hsa_status_t err; + if (nullptr == data) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + struct async_mem_cpy_pool_query* args = (struct async_mem_cpy_pool_query*)data; + err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); + RET_IF_HSA_ERR(err); + if (segment != HSA_AMD_SEGMENT_GLOBAL) { + return HSA_STATUS_SUCCESS; + } + bool canAlloc; + err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, + &canAlloc); + RET_IF_HSA_ERR(err); + if (!canAlloc) { + return HSA_STATUS_SUCCESS; + } + if (args->peer_device.handle != 0) { + hsa_amd_memory_pool_access_t access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED; + err = hsa_amd_agent_memory_pool_get_info(args->peer_device, in_pool, + HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access); + RET_IF_HSA_ERR(err); + if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) { + return HSA_STATUS_SUCCESS; + } + } + err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, + &args->pool_info->granule); + RET_IF_HSA_ERR(err); + args->pool_info->pool = in_pool; + return HSA_STATUS_INFO_BREAK; +} + +// Find the least common multiple of 2 numbers +static uint32_t lcm(uint32_t a, uint32_t b) { + int tmp_a; + int tmp_b; + tmp_a = a; + tmp_b = b; + while (tmp_a != tmp_b) { + if (tmp_a < tmp_b) { + tmp_a = tmp_a + a; + } else { + tmp_b = tmp_b + b; + } + } + return tmp_a; +} +static hsa_status_t FindGPUs(hsa_agent_t agent, void* data) { + if (data == NULL) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + hsa_device_type_t hsa_device_type; + hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type); + RET_IF_HSA_ERR(err); + if (hsa_device_type != HSA_DEVICE_TYPE_GPU) { + return HSA_STATUS_SUCCESS; + } + struct callback_args* args = (struct callback_args*)data; + struct async_mem_cpy_agent* gpu; + async_mem_cpy_pool_query pool_query = {0, 0}; + if (args->gpu1.dev.handle == 0) { + gpu = &args->gpu1; + } else { + gpu = &args->gpu2; + // Check that gpu1 has peer access into the selected pool. + pool_query.peer_device = args->gpu1.dev; + } + // Make sure GPU device has pool host can access + gpu->dev = agent; + pool_query.pool_info = gpu; + err = hsa_amd_agent_iterate_memory_pools(agent, FindPool, &pool_query); + if (err == HSA_STATUS_INFO_BREAK) { + if (gpu == &args->gpu2) { + // We found 2 gpu's + return HSA_STATUS_INFO_BREAK; + } else { + // Keep looking for another gpu + return HSA_STATUS_SUCCESS; + } + } else { + gpu->dev = {0}; + } + RET_IF_HSA_ERR(err); + // Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating + return HSA_STATUS_SUCCESS; +} + +// This function is a callback for hsa_amd_agent_iterate_memory_pools() +// and will test whether the provided memory pool is 1) in the GLOBAL +// segment, 2) allows allocation and 3) is accessible by the provided +// agent. The "data" input parameter is assumed to be pointing to a +// struct async_mem_cpy_agent. If the provided pool meets these criteria, +// HSA_STATUS_INFO_BREAK is returned. + +static hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) { + if (data == NULL) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + hsa_device_type_t hsa_device_type; + hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type); + RET_IF_HSA_ERR(err); + if (hsa_device_type == HSA_DEVICE_TYPE_CPU) { + struct async_mem_cpy_agent* args = (struct async_mem_cpy_agent*)data; + args->dev = agent; + async_mem_cpy_pool_query pool_query; + pool_query.peer_device.handle = 0; + pool_query.pool_info = args; + err = hsa_amd_agent_iterate_memory_pools(agent, FindPool, &pool_query); + if (err == HSA_STATUS_INFO_BREAK) { // we found what we were looking for + return HSA_STATUS_INFO_BREAK; + } else { + args->dev = {0}; + return err; + } + } + // Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating + return HSA_STATUS_SUCCESS; +} + +// This is the main test, showing various paths of async. copy. Source and +// destination agents and their respective pools should already be discovered. +// Additionally, buffer from the pools should already be allocated and availble +// from the input parameters. +static hsa_status_t AsyncCpyTest(async_mem_cpy_agent* dst, async_mem_cpy_agent* src, + callback_args* args, size_t sz, uint32_t val) { + hsa_status_t err; + hsa_signal_t copy_signal; + // Initialize the system and destination buffers with a value so we can later + // validate it has been overwritten + void* sysPtr = args->cpu.ptr; + err = hsa_amd_memory_fill(sysPtr, kTestInitValue, sz / sizeof(uint32_t)); + RET_IF_HSA_ERR(err); + if (dst->ptr != sysPtr) { + err = hsa_amd_memory_fill(dst->ptr, kTestInitValue, sz / sizeof(uint32_t)); + RET_IF_HSA_ERR(err); + } + // Fill the source buffer with the provided uint32_t value + err = hsa_amd_memory_fill(src->ptr, val, sz / sizeof(uint32_t)); + RET_IF_HSA_ERR(err); + // Make sure the target and destination agents have access to the buffer. + hsa_agent_t ag_list[2] = {dst->dev, src->dev}; + err = hsa_amd_agents_allow_access(2, ag_list, NULL, dst->ptr); + RET_IF_HSA_ERR(err); + // Create a signal that will be used to inform us when the copy is done + err = hsa_signal_create(1, 0, NULL, ©_signal); + RET_IF_HSA_ERR(err); + // Do the copy... + err = hsa_amd_memory_async_copy(dst->ptr, dst->dev, src->ptr, src->dev, sz, 0, NULL, copy_signal); + RET_IF_HSA_ERR(err); + + // call following APIs to make sure we intercept hsa_amd_memory_async_copy_on_engine + uint32_t engine_ids_mask = 0; + err = hsa_amd_memory_copy_engine_status(args->cpu.dev, args->gpu1.dev, &engine_ids_mask); + hsa_amd_sdma_engine_id_t engine_id = HSA_AMD_SDMA_ENGINE_0; + err = hsa_amd_memory_async_copy_on_engine(dst->ptr, dst->dev, src->ptr, src->dev, sz, 0, NULL, + copy_signal, engine_id, false); + + // Here we do a blocking wait. Alternatively, we could also use a + // non-blocking wait in a loop, and do other work while waiting. + if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT, 1, -1, + HSA_WAIT_STATE_BLOCKED) != 0) { + printf("Async copy returned error value.\n"); + return HSA_STATUS_ERROR; + } + // Verify the copy was successful; copy from the dst buffer to the sysBuf, + // (if the result is not already in sys. mem.) and check the sysBuf values + if (dst->ptr != sysPtr) { + if (src->ptr != sysPtr) { + // In this case, we need to give the gpu dev that owns dst->ptr access + // to the system memory we are going to copy to. + hsa_agent_t ag_list_ck[2] = {dst->dev, args->cpu.dev}; + err = hsa_amd_agents_allow_access(2, ag_list_ck, NULL, sysPtr); + RET_IF_HSA_ERR(err); + } + // Reset signal to 1 + hsa_signal_store_screlease(copy_signal, 1); + err = hsa_amd_memory_async_copy(sysPtr, args->cpu.dev, dst->ptr, dst->dev, sz, 0, NULL, + copy_signal); + RET_IF_HSA_ERR(err); + if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT, 1, -1, + HSA_WAIT_STATE_BLOCKED) != 0) { + printf("Async copy returned error value.\n"); + return HSA_STATUS_ERROR; + } + } + // Check that the contents of the buffer are what is expected. + for (uint32_t i = 0; i < sz / sizeof(uint32_t); ++i) { + if (reinterpret_cast(sysPtr)[i] != val) { + fprintf(stdout, "Expected 0x%x but got 0x%x in buffer at index %d.\n", val, + reinterpret_cast(sysPtr)[i], i); + return HSA_STATUS_ERROR; + } + } + return HSA_STATUS_SUCCESS; +} + +int main() { + hsa_status_t err; + struct callback_args args; + bool twoGPUs = false; + err = hsa_init(); + RET_IF_HSA_ERR(err); + // First, find the cpu agent and associated pool + args.cpu = {0, 0, 0}; + err = hsa_iterate_agents(FindCPUDevice, reinterpret_cast(&args.cpu)); + assert(err == HSA_STATUS_INFO_BREAK); + if (err != HSA_STATUS_INFO_BREAK) { + return -1; + } + // Now, find 1 or 2 (if possible) GPUs and associated pool(s) for our test + args.gpu1 = {0, 0, 0}; + args.gpu2 = {0, 0, 0}; + err = hsa_iterate_agents(FindGPUs, &args); + if (err == HSA_STATUS_INFO_BREAK) { + twoGPUs = true; + } else { + // See if we at least have 1 GPU + if (args.gpu1.dev.handle == 0) { + fprintf(stdout, "GPU with accessible VRAM not found; at least 1 required. Exiting\n"); + return -1; + } + fprintf(stdout, + "Only 1 GPU found with required VRAM. " + "Peer-to-Peer copy will be skipped.\n"); + } + // We will use the smallest amount of allocatable memory that works for all + // potential sources and destinations of the copy + size_t sz = lcm(args.cpu.granule, args.gpu1.granule); + // Allocate memory on each source/destination + if (twoGPUs) { + sz = lcm(sz, args.gpu2.granule); + err = hsa_amd_memory_pool_allocate(args.gpu2.pool, sz, 0, + reinterpret_cast(&args.gpu2.ptr)); + RET_IF_HSA_ERR(err); + } + + err = hsa_amd_memory_pool_allocate(args.cpu.pool, sz, 0, reinterpret_cast(&args.cpu.ptr)); + RET_IF_HSA_ERR(err); + err = + hsa_amd_memory_pool_allocate(args.gpu1.pool, sz, 0, reinterpret_cast(&args.gpu1.ptr)); + RET_IF_HSA_ERR(err); + char name[64]; + err = hsa_agent_get_info(args.cpu.dev, HSA_AGENT_INFO_NAME, &name); + fprintf(stdout, "CPU is \"%s\"\n", name); + err = hsa_agent_get_info(args.gpu1.dev, HSA_AGENT_INFO_NAME, &name); + fprintf(stdout, "GPU1 is \"%s\"\n", name); + if (twoGPUs) { + err = hsa_agent_get_info(args.gpu2.dev, HSA_AGENT_INFO_NAME, &name); + fprintf(stdout, "GPU2 is \"%s\"\n", name); + } + fprintf(stdout, "Copying %lu bytes from gpu1 memory to system memory...\n", sz); + err = AsyncCpyTest(&args.cpu, &args.gpu1, &args, sz, kTestFillValue1); + RET_IF_HSA_ERR(err); + fprintf(stdout, "Success!\n"); + fprintf(stdout, "Copying %lu bytes from system memory to gpu1 memory...\n", sz); + err = AsyncCpyTest(&args.gpu1, &args.cpu, &args, sz, kTestFillValue2); + RET_IF_HSA_ERR(err); + fprintf(stdout, "Success!\n"); + + if (twoGPUs) { + fprintf(stdout, "Copying %lu bytes from gpu1 memory to gpu2 memory...\n", sz); + err = AsyncCpyTest(&args.gpu2, &args.gpu1, &args, sz, kTestFillValue3); + RET_IF_HSA_ERR(err); + fprintf(stdout, "Success!\n"); + } + // Clean up + err = hsa_amd_memory_pool_free(args.cpu.ptr); + RET_IF_HSA_ERR(err); + err = hsa_amd_memory_pool_free(args.gpu1.ptr); + RET_IF_HSA_ERR(err); + if (twoGPUs) { + err = hsa_amd_memory_pool_free(args.gpu2.ptr); + RET_IF_HSA_ERR(err); + } +} \ No newline at end of file diff --git a/projects/rocprofiler/tests/featuretests/tracer/apps/goldentraces/async_copy_trace.txt b/projects/rocprofiler/tests/featuretests/tracer/apps/goldentraces/async_copy_trace.txt new file mode 100644 index 0000000000..67e6f1ebfd --- /dev/null +++ b/projects/rocprofiler/tests/featuretests/tracer/apps/goldentraces/async_copy_trace.txt @@ -0,0 +1,5 @@ +Record(68), Domain(HSA_OPS_DOMAIN), Begin(357395893062183), End(357395893066774) Correlation_ID(22) +Record(78), Domain(HSA_OPS_DOMAIN), Begin(357395893673708), End(357395893677559) Correlation_ID(24) +Record(103), Domain(HSA_OPS_DOMAIN), Begin(357395893769384), End(357395893773087) Correlation_ID(31) +Record(110), Domain(HSA_OPS_DOMAIN), Begin(357395893777826), End(357395893781529) Correlation_ID(33) +Record(116), Domain(HSA_OPS_DOMAIN), Begin(357395893796340), End(357395893799598) Correlation_ID(36) diff --git a/projects/rocprofiler/tests/featuretests/tracer/apps/goldentraces/hip_helloworld_golden_traces.txt b/projects/rocprofiler/tests/featuretests/tracer/apps/goldentraces/hip_helloworld_golden_traces.txt index 5568e4b13f..4757a6f722 100755 --- a/projects/rocprofiler/tests/featuretests/tracer/apps/goldentraces/hip_helloworld_golden_traces.txt +++ b/projects/rocprofiler/tests/featuretests/tracer/apps/goldentraces/hip_helloworld_golden_traces.txt @@ -21,4 +21,3 @@ Record(24), Domain(HIP_API_DOMAIN), Function(hipMemcpy), End(2995594192228011), Record(26), Domain(HIP_API_DOMAIN), Function(hipFree), Begin(2995594192237078), Correlation_ID(9) Record(27), Domain(HIP_API_DOMAIN), Function(hipFree), End(2995594192256085), Correlation_ID(9) Record(29), Domain(HIP_API_DOMAIN), Function(hipFree), Begin(2995594192259622), Correlation_ID(10) -Record(30), Domain(HIP_API_DOMAIN), Function(hipFree), End(2995594192264101), Correlation_ID(10) diff --git a/projects/rocprofiler/tests/featuretests/tracer/apps/goldentraces/hsa_api_trace.txt b/projects/rocprofiler/tests/featuretests/tracer/apps/goldentraces/hsa_api_trace.txt new file mode 100644 index 0000000000..0453c6c511 --- /dev/null +++ b/projects/rocprofiler/tests/featuretests/tracer/apps/goldentraces/hsa_api_trace.txt @@ -0,0 +1,78 @@ +Record(1), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), Begin(357395885625757), Correlation_ID(1) +Record(2), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885636767), Correlation_ID(2) +Record(3), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885640407), Correlation_ID(2) +Record(5), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), Begin(357395885645827), Correlation_ID(3) +Record(6), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885650247), Correlation_ID(4) +Record(7), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885653007), Correlation_ID(4) +Record(9), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885655747), Correlation_ID(5) +Record(10), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885658277), Correlation_ID(5) +Record(12), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885660877), Correlation_ID(6) +Record(13), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885663467), Correlation_ID(6) +Record(15), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), End(357395885666197), Correlation_ID(3) +Record(17), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), End(357395885668847), Correlation_ID(1) +Record(19), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), Begin(357395885671437), Correlation_ID(7) +Record(20), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885674007), Correlation_ID(8) +Record(21), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885676607), Correlation_ID(8) +Record(23), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885679327), Correlation_ID(9) +Record(24), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885681837), Correlation_ID(9) +Record(26), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), Begin(357395885684537), Correlation_ID(10) +Record(27), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885687177), Correlation_ID(11) +Record(28), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885690227), Correlation_ID(11) +Record(30), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885692857), Correlation_ID(12) +Record(31), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885695397), Correlation_ID(12) +Record(33), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885697917), Correlation_ID(13) +Record(34), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885700357), Correlation_ID(13) +Record(36), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), End(357395885702907), Correlation_ID(10) +Record(38), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), End(357395885705447), Correlation_ID(7) +Record(40), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), Begin(357395885713627), Correlation_ID(14) +Record(41), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), End(357395885737288), Correlation_ID(14) +Record(43), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), Begin(357395885740878), Correlation_ID(15) +Record(44), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), End(357395885804208), Correlation_ID(15) +Record(46), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885809478), Correlation_ID(16) +Record(47), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885812198), Correlation_ID(16) +Record(49), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885819548), Correlation_ID(17) +Record(50), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885822698), Correlation_ID(17) +Record(52), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395885831788), Correlation_ID(18) +Record(53), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395885836338), Correlation_ID(18) +Record(55), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395885839008), Correlation_ID(19) +Record(56), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395892240457), Correlation_ID(19) +Record(58), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), Begin(357395892269217), Correlation_ID(20) +Record(59), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), End(357395892311137), Correlation_ID(20) +Record(61), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), Begin(357395892318827), Correlation_ID(21) +Record(62), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), End(357395892324097), Correlation_ID(21) +Record(64), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), Begin(357395892328127), Correlation_ID(22) +Record(65), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), End(357395893033290), Correlation_ID(22) +Record(67), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), Begin(357395893039770), Correlation_ID(23) +Record(69), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), End(357395893655353), Correlation_ID(23) +Record(71), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), Begin(357395893661713), Correlation_ID(24) +Record(72), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), End(357395893668973), Correlation_ID(24) +Record(74), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), Begin(357395893671633), Correlation_ID(25) +Record(75), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), End(357395893673753), Correlation_ID(25) +Record(77), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395893687933), Correlation_ID(26) +Record(79), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395893714413), Correlation_ID(26) +Record(81), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395893716553), Correlation_ID(27) +Record(82), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395893728583), Correlation_ID(27) +Record(84), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395893730223), Correlation_ID(28) +Record(85), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395893742133), Correlation_ID(28) +Record(87), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), Begin(357395893744023), Correlation_ID(29) +Record(88), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), End(357395893747313), Correlation_ID(29) +Record(90), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), Begin(357395893749013), Correlation_ID(30) +Record(91), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), End(357395893751313), Correlation_ID(30) +Record(93), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), Begin(357395893753503), Correlation_ID(31) +Record(94), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), End(357395893759683), Correlation_ID(31) +Record(96), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), Begin(357395893762933), Correlation_ID(32) +Record(97), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), End(357395893764903), Correlation_ID(32) +Record(99), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), Begin(357395893766573), Correlation_ID(33) +Record(100), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), End(357395893770533), Correlation_ID(33) +Record(102), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), Begin(357395893772203), Correlation_ID(34) +Record(104), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), End(357395893775493), Correlation_ID(34) +Record(106), Domain(HSA_API_DOMAIN), Function(hsa_signal_store_screlease), Begin(357395893778003), Correlation_ID(35) +Record(107), Domain(HSA_API_DOMAIN), Function(hsa_signal_store_screlease), End(357395893779583), Correlation_ID(35) +Record(109), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), Begin(357395893781263), Correlation_ID(36) +Record(111), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), End(357395893786634), Correlation_ID(36) +Record(113), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), Begin(357395893795734), Correlation_ID(37) +Record(114), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), End(357395893797624), Correlation_ID(37) +Record(117), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), Begin(357395893802314), Correlation_ID(38) +Record(118), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), End(357395893829684), Correlation_ID(38) +Record(120), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), Begin(357395893833114), Correlation_ID(39) +Record(121), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), End(357395893835624), Correlation_ID(39) diff --git a/projects/rocprofiler/tests/featuretests/tracer/tracer_gtest.cpp b/projects/rocprofiler/tests/featuretests/tracer/tracer_gtest.cpp index 0f3b578831..8377ac674c 100644 --- a/projects/rocprofiler/tests/featuretests/tracer/tracer_gtest.cpp +++ b/projects/rocprofiler/tests/featuretests/tracer/tracer_gtest.cpp @@ -19,6 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include #include #include "tracer_gtest.h" #include "../utils/test_utils.h" @@ -33,8 +35,19 @@ void ApplicationParser::SetApplicationEnv(const char* app_name, const char* trac hsa_tools_lib_path << app_path << "librocprofiler_tool.so"; setenv("LD_PRELOAD", hsa_tools_lib_path.str().c_str(), true); - // set --hip-api option - setenv("ROCPROFILER_HIP_API_TRACE", "1", true); + std::string trace_type{trace_option}; + + if (trace_type.find("hip") != std::string::npos) { + // set --hip-api option + setenv("ROCPROFILER_HIP_API_TRACE", "1", true); + } + + if (trace_type.find("hsa") != std::string::npos) { + // set --hsa-api and --hsa-activity + setenv("ROCPROFILER_HSA_API_TRACE", "1", true); + setenv("ROCPROFILER_HSA_ACTIVITY_TRACE", "1", true); + } + std::stringstream os; os << app_path << "tests/featuretests/tracer/apps/" << app_name; @@ -42,32 +55,45 @@ void ApplicationParser::SetApplicationEnv(const char* app_name, const char* trac } /** - * Parses kernel-info after running profiler against curent application + * Parses kernel-info after running tracer against curent application * and saves them in a vector. */ void ApplicationParser::GetKernelInfoForRunningApplication( std::vector* kernel_info_output) { KernelInfo kinfo; for (std::string line : output_lines) { - if (std::regex_match(line, std::regex("(Record)(.*)"))) { - int spos = line.find("["); - int epos = line.find("]", spos); - std::string sub = line.substr(spos + 1, epos - spos - 1); + // if (std::regex_match(line, std::regex("(Record)(.*)"))) { + // Record id + size_t found = line.find("Record"); + if (found != std::string::npos) { + int spos = found; + int epos = line.find(")", spos); + int length = std::string("Record").length(); + std::string sub = line.substr(spos + length + 1, epos - spos - length - 1); + kinfo.record_id = sub; - kernel_info_output->push_back(kinfo); - - // Kernel-Name - size_t found = line.find("Function"); - if (found != std::string::npos) { - int spos = found; - int epos = line.find(")", spos); - int length = std::string("kernel-name").length(); - std::string sub = line.substr(spos + length + 1, epos - spos - length - 1); - - kinfo.function = sub; - kernel_info_output->push_back(kinfo); - } } + + // Kernel-Name + found = line.find("Function"); + if (found != std::string::npos) { + int spos = found; + int epos = line.find(")", spos); + int length = std::string("Function").length(); + std::string sub = line.substr(spos + length + 1, epos - spos - length - 1); + kinfo.function = sub; + } + + // corealtion-ids + found = line.find("Correlation_ID"); + if (found != std::string::npos) { + int spos = found; + int epos = line.find(")", spos); + int length = std::string("Correlation_ID").length(); + std::string sub = line.substr(spos + length + 1, epos - spos - length - 1); + kinfo.corelation_id = sub; + } + kernel_info_output->push_back(kinfo); } } @@ -86,7 +112,7 @@ void ApplicationParser::GetKernelInfoForGoldenOutput(const char* app_name, std:: } /** - * Runs a given appllication and saves profiler output. + * Runs a given appllication and saves tracer output. * These output lines can be letter passed for kernel informations * i.e: kernel_names */ @@ -125,45 +151,70 @@ void ApplicationParser::ParseKernelInfoFields(const std::string& s, std::ifstream golden_file(s); while (!golden_file.eof()) { getline(golden_file, line); - if (std::regex_match(line, std::regex("(Record)(.*)"))) { - int spos = line.find("["); - int epos = line.find("]", spos); - std::string sub = line.substr(spos + 1, epos - spos - 1); + // if (std::regex_match(line, std::regex("(Record)(.*)"))) { + // Record id + size_t found = line.find("Record"); + if (found != std::string::npos) { + int spos = found; + int epos = line.find(")", spos); + int length = std::string("Record").length(); + std::string sub = line.substr(spos + length + 1, epos - spos - length - 1); + kinfo.record_id = sub; - kernel_info_output->push_back(kinfo); - - // Kernel-Name - size_t found = line.find("Function"); - if (found != std::string::npos) { - int spos = found; - int epos = line.find(")", spos); - int length = std::string("kernel-name").length(); - std::string sub = line.substr(spos + length + 1, epos - spos - length - 1); - - kinfo.function = sub; - kernel_info_output->push_back(kinfo); - } + // kernel_info_output->push_back(kinfo); } + + // Kernel-Name + found = line.find("Function"); + if (found != std::string::npos) { + int spos = found; + int epos = line.find(")", spos); + int length = std::string("kernel-name").length(); + std::string sub = line.substr(spos + length + 1, epos - spos - length - 1); + + kinfo.function = sub; + // kernel_info_output->push_back(kinfo); + } + + // corealtion-ids + found = line.find("Correlation_ID"); + if (found != std::string::npos) { + int spos = found; + int epos = line.find(")", spos); + int length = std::string("Correlation_ID").length(); + std::string sub = line.substr(spos + length + 1, epos - spos - length - 1); + + kinfo.corelation_id = sub; + // kernel_info_output->push_back(kinfo); + } + //} + kernel_info_output->push_back(kinfo); } golden_file.close(); } +/* + * ################################################### + * ############ HelloWorld HIP Tests ################ + * ################################################### + */ constexpr auto kGoldenOutputHelloworld = "hip_helloworld_golden_traces.txt"; -class HelloWorldTest : public ProfilerTest { +class HelloWorldTest : public Tracertest { protected: std::vector golden_kernel_info; void SetUp() { - ProfilerTest::SetUp("tracer_hip_helloworld", "--hip-api "); + Tracertest::SetUp("tracer_hip_helloworld", "--hip-api "); GetKernelInfoForGoldenOutput("tracer_hip_helloworld", kGoldenOutputHelloworld, &golden_kernel_info); } + void TearDown() { output_lines.clear(); } }; // Test:1 Compares total num of kernel-names in golden output against current -// profiler output +// tracer output TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelInfoMatchWithGoldenOutput) { - // kernel info in current profiler run + // kernel info in current profler run std::vector current_kernel_info; GetKernelInfoForRunningApplication(¤t_kernel_info); @@ -173,9 +224,9 @@ TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelInfoMatchWithGoldenOutp } // Test:2 Compares order of kernel-names in golden output against current -// profiler output -TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenFunctionNamessMatchWithGoldenOutput) { - // kernel info in current profiler run +// tracer output +TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenFunctionNamessMatchWithGoldenOutput) { + // kernel info in current tracer run std::vector current_kernel_info; GetKernelInfoForRunningApplication(¤t_kernel_info); @@ -186,13 +237,66 @@ TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenFunctionNamessMatchWithGold } // Test:3 Compares order of kernel-names in golden output against current -// profiler output -TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenKernelDurationShouldBePositive) { - // kernel info in current profiler run +// tracer output +TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelDurationShouldBePositive) { + // kernel info in current tracer run std::vector current_kernel_info; GetKernelInfoForRunningApplication(¤t_kernel_info); ASSERT_TRUE(current_kernel_info.size()); EXPECT_GT(current_kernel_info.size(), 0); +} + + +/* + * ################################################### + * ############ Async COopy HSA Tests ################ + * ################################################### + */ + +class AsyncCopyTest : public Tracertest { + protected: + void SetUp() { Tracertest::SetUp("copy_on_engine", "--hsa-api --hsa-activity"); } + void TearDown() { output_lines.clear(); } +}; + +// Test:1 Compares total num of kernel-names in golden output against current +// tracer output +TEST_F(AsyncCopyTest, WhenRunningTracerWithAppThenAsyncCopyOutputIsgenerated) { + // kernel info in current profler run + std::vector current_kernel_info; + + GetKernelInfoForRunningApplication(¤t_kernel_info); + ASSERT_TRUE(current_kernel_info.size()); +} + +// Test:2 Matches coelation Ids +TEST_F(AsyncCopyTest, WhenRunningTracerWithAppThenAsyncCorelationCountIsCorrect) { + // kernel info in current profler run + std::vector current_kernel_info; + + GetKernelInfoForRunningApplication(¤t_kernel_info); + ASSERT_TRUE(current_kernel_info.size()); + + std::vector> corelation_pair{}; + for (const auto& itr : current_kernel_info) { + if (itr.function.find("async_copy_on_engine") != std::string::npos) { + corelation_pair.push_back({itr.record_id, itr.corelation_id}); + } + } + ASSERT_TRUE(corelation_pair.size()); + + uint32_t corealtion_count = 0; + // check if corelation id appears more than twice + for (size_t i = 0; i < corelation_pair.size(); i++) { + for (const auto& itr : current_kernel_info) { + if ((itr.corelation_id == corelation_pair[i].second) && + (itr.record_id != corelation_pair[i].first)) { + corealtion_count++; + } + } + } + + EXPECT_GT(corealtion_count, corelation_pair.size()); } \ No newline at end of file diff --git a/projects/rocprofiler/tests/featuretests/tracer/tracer_gtest.h b/projects/rocprofiler/tests/featuretests/tracer/tracer_gtest.h index ba45b45e1d..0548d5d6ed 100644 --- a/projects/rocprofiler/tests/featuretests/tracer/tracer_gtest.h +++ b/projects/rocprofiler/tests/featuretests/tracer/tracer_gtest.h @@ -35,7 +35,7 @@ THE SOFTWARE. /* --------------------------------------------------------------------------*/ /** - * @Synopsis Implementation of a Parser class for Profiler output + * @Synopsis Implementation of a Parser class for Tracer output * Parses pre-saved golden output for kernel info and saves them in a vector * Executes appliaction(passed as param:app_name) and saves parsed kernel info * in a vector. @@ -60,7 +60,7 @@ class ApplicationParser : public ::testing::Test { std::string function; }; - //!< saves lines of profiler output + //!< saves lines of tracer output std::vector output_lines; public: @@ -72,12 +72,12 @@ class ApplicationParser : public ::testing::Test { void GetKernelInfoForGoldenOutput(const char* app_name, std::string filename, std::vector* kernel_info_output); - //!< Parses kernel-info after running profiler against curent application + //!< Parses kernel-info after running tracer against curent application // and saves them in a vector. void GetKernelInfoForRunningApplication(std::vector* kernel_info_output); private: - //!< Runs a given appllication and saves profiler output. + //!< Runs a given appllication and saves tracer output. // These output lines can be letter passed for kernel informations // i.e: kernel_names void ProcessApplication(std::stringstream& ss); @@ -89,12 +89,12 @@ class ApplicationParser : public ::testing::Test { /* --------------------------------------------------------------------------*/ /** - * @Synopsis Implementation of a ProfilerTest + * @Synopsis Implementation of a Tracertest * Subsequent tests can use this to parse different applications */ /* --------------------------------------------------------------------------*/ -class ProfilerTest : public ApplicationParser { +class Tracertest : public ApplicationParser { protected: virtual void SetUp(const char* app_name, const char* trace_option) { ApplicationParser::SetUp(app_name, trace_option);