SWDEV-398161, SWDEV-398764 Fixed --hsa-api for v2
Change-Id: I1902ff463851a3485b48f7b9400f7116a073365a
[ROCm/rocprofiler commit: c3107fc258]
Bu işleme şunda yer alıyor:
işlemeyi yapan:
Ammar Elwazir
ebeveyn
39e398bfd7
işleme
614fd47cd3
@@ -212,6 +212,7 @@ The resulting `a.out` will depend on
|
||||
### Added
|
||||
- 'end_time' need to be disabled in roctx_trace.txt
|
||||
- Every API trace in V2 reported synchrounusly will have two records, one for Enter phase and for Exit phase
|
||||
- support for hsa_amd_memory_async_copy_on_engine API function trace
|
||||
### Fixed
|
||||
- rocprof in ROcm/5.4.0 gpu selector broken.
|
||||
- rocprof in ROCm/5.4.1 fails to generate kernel info.
|
||||
|
||||
@@ -291,7 +291,7 @@ class file_plugin_t {
|
||||
if (kernel_name.size() > 1) *output_file << " Kernel_Name(" << kernel_name.c_str() << "),";
|
||||
if (tracer_record.phase == ROCPROFILER_PHASE_NONE) {
|
||||
*output_file << " Begin(" << tracer_record.timestamps.begin.value
|
||||
<< "), End(" << tracer_record.timestamps.end.value << ")";
|
||||
<< "), End(" << tracer_record.timestamps.end.value << "),";
|
||||
} else {
|
||||
if(tracer_record.phase == ROCPROFILER_PHASE_ENTER && tracer_record.domain != ACTIVITY_DOMAIN_ROCTX){
|
||||
rocprofiler_timestamp_t timestamp;
|
||||
|
||||
@@ -52,7 +52,6 @@ namespace {
|
||||
|
||||
hsa_status_t hsa_executable_iteration_callback(hsa_executable_t executable, hsa_agent_t agent,
|
||||
hsa_executable_symbol_t symbol, void* args) {
|
||||
|
||||
hsa_symbol_kind_t type;
|
||||
rocmtools::hsa_support::GetCoreApiTable().hsa_executable_symbol_get_info_fn(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &type);
|
||||
@@ -63,7 +62,7 @@ hsa_status_t hsa_executable_iteration_callback(hsa_executable_t executable, hsa_
|
||||
// TODO(aelwazir): to be removed if the HSA fixed the issue of corrupted
|
||||
// names overflowing the length given
|
||||
if (name_length > 1) {
|
||||
if(!(*static_cast<bool*>(args))) {
|
||||
if (!(*static_cast<bool*>(args))) {
|
||||
char name[name_length + 1];
|
||||
uint64_t kernel_object;
|
||||
rocmtools::hsa_support::GetCoreApiTable().hsa_executable_symbol_get_info_fn(
|
||||
@@ -92,7 +91,8 @@ bool IsEnabled(rocprofiler_tracer_activity_domain_t domain, uint32_t operation_i
|
||||
return report && report(domain, operation_id, nullptr) == 0;
|
||||
}
|
||||
|
||||
void ReportActivity(rocprofiler_tracer_activity_domain_t domain, uint32_t operation_id, void* data) {
|
||||
void ReportActivity(rocprofiler_tracer_activity_domain_t domain, uint32_t operation_id,
|
||||
void* data) {
|
||||
if (auto report = report_activity.load(std::memory_order_relaxed))
|
||||
report(domain, operation_id, data);
|
||||
}
|
||||
@@ -486,12 +486,14 @@ hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) {
|
||||
return rocmtools::hsa_support::GetCoreApiTable().hsa_executable_destroy_fn(executable);
|
||||
}
|
||||
|
||||
bool profiling_async_copy_enable = false;
|
||||
std::atomic<bool> profiling_async_copy_enable{false};
|
||||
|
||||
hsa_status_t ProfilingAsyncCopyEnableIntercept(bool enable) {
|
||||
hsa_status_t status =
|
||||
rocmtools::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn(enable);
|
||||
if (status == HSA_STATUS_SUCCESS) profiling_async_copy_enable = enable;
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
profiling_async_copy_enable.exchange(enable, std::memory_order_release);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -515,7 +517,7 @@ hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const vo
|
||||
// FIXME: what happens if the state changes before returning?
|
||||
[[maybe_unused]] hsa_status_t status =
|
||||
rocmtools::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn(
|
||||
profiling_async_copy_enable | is_enabled);
|
||||
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
|
||||
if (!is_enabled) {
|
||||
@@ -547,7 +549,7 @@ hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst,
|
||||
// FIXME: what happens if the state changes before returning?
|
||||
[[maybe_unused]] hsa_status_t status =
|
||||
rocmtools::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn(
|
||||
profiling_async_copy_enable | is_enabled);
|
||||
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
|
||||
if (!is_enabled) {
|
||||
@@ -569,6 +571,36 @@ hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst,
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryASyncCopyOnEngineIntercept(
|
||||
void* dst, hsa_agent_t dst_agent, const void* src, hsa_agent_t src_agent, size_t size,
|
||||
uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal,
|
||||
hsa_amd_sdma_engine_id_t engine_id, bool force_copy_on_sdma) {
|
||||
bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY);
|
||||
|
||||
// FIXME: what happens if the state changes before returning?
|
||||
[[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(
|
||||
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
|
||||
if (!is_enabled) {
|
||||
return saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn(
|
||||
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal,
|
||||
engine_id, force_copy_on_sdma);
|
||||
}
|
||||
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = MemoryASyncCopyHandler;
|
||||
entry->correlation_id = CorrelationId();
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
status = saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn(
|
||||
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal, engine_id,
|
||||
force_copy_on_sdma);
|
||||
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
rocprofiler_timestamp_t timestamp_ns() {
|
||||
@@ -748,21 +780,21 @@ void Initialize(HsaApiTable* table) {
|
||||
rocmtools::queue::InitializePools(cpu_agent);
|
||||
break;
|
||||
case HSA_DEVICE_TYPE_GPU:
|
||||
// XXX FIXME: When multiple ranks are used, each rank's first
|
||||
// logical device always has GPU ID 0, regardless of which
|
||||
// physical device is selected with CUDA_VISIBLE_DEVICES.
|
||||
// Because of this, when merging traces from multiple ranks,
|
||||
// GPU IDs from different processes may overlap.
|
||||
//
|
||||
// The long term solution is to use KFD's gpu_id, which is
|
||||
// stable across APIs and processes, but it isn't currently
|
||||
// exposed by ROCr. We could use the agent's
|
||||
// HSA_AMD_AGENT_INFO_DRIVER_NODE_ID in the meantime, as even
|
||||
// that would be an improvement--it's what legacy roctracer
|
||||
// is currently doing as well as the roctracer compatibility
|
||||
// code earlier in this file.
|
||||
agent_info.setIndex(gpu_agent_count++);
|
||||
break;
|
||||
// XXX FIXME: When multiple ranks are used, each rank's first
|
||||
// logical device always has GPU ID 0, regardless of which
|
||||
// physical device is selected with CUDA_VISIBLE_DEVICES.
|
||||
// Because of this, when merging traces from multiple ranks,
|
||||
// GPU IDs from different processes may overlap.
|
||||
//
|
||||
// The long term solution is to use KFD's gpu_id, which is
|
||||
// stable across APIs and processes, but it isn't currently
|
||||
// exposed by ROCr. We could use the agent's
|
||||
// HSA_AMD_AGENT_INFO_DRIVER_NODE_ID in the meantime, as even
|
||||
// that would be an improvement--it's what legacy roctracer
|
||||
// is currently doing as well as the roctracer compatibility
|
||||
// code earlier in this file.
|
||||
agent_info.setIndex(gpu_agent_count++);
|
||||
break;
|
||||
default:
|
||||
agent_info.setIndex(other_agent_count++);
|
||||
break;
|
||||
@@ -787,6 +819,8 @@ void Initialize(HsaApiTable* table) {
|
||||
roctracer::hsa_support::MemoryASyncCopyRectIntercept;
|
||||
table->amd_ext_->hsa_amd_profiling_async_copy_enable_fn =
|
||||
roctracer::hsa_support::ProfilingAsyncCopyEnableIntercept;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_on_engine_fn =
|
||||
roctracer::hsa_support::MemoryASyncCopyOnEngineIntercept;
|
||||
|
||||
// Install the HSA_EVT intercept
|
||||
table->core_->hsa_memory_allocate_fn = roctracer::hsa_support::MemoryAllocateIntercept;
|
||||
@@ -875,11 +909,11 @@ bool IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback
|
||||
|
||||
const rocprofiler_counter_info_t counter_info =
|
||||
rocprofiler_counter_info_t{strdup(name.c_str()),
|
||||
strdup(descr.c_str()),
|
||||
expr.empty() ? nullptr : strdup(expr.c_str()),
|
||||
query.instance_count,
|
||||
block_name.c_str(),
|
||||
block_counters};
|
||||
strdup(descr.c_str()),
|
||||
expr.empty() ? nullptr : strdup(expr.c_str()),
|
||||
query.instance_count,
|
||||
block_name.c_str(),
|
||||
block_counters};
|
||||
counters_info_callback(counter_info, gpu_name.c_str(), gpu_counter);
|
||||
}
|
||||
gpu_counter++;
|
||||
@@ -890,7 +924,8 @@ bool IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback
|
||||
// std::string expr_str;
|
||||
// if (expr) expr_str = expr->GetStr().c_str();
|
||||
// const rocprofiler_counter_info_t counter_info =
|
||||
// rocprofiler_counter_info_t{start->first.c_str(), "", expr ? expr_str.c_str() : nullptr};
|
||||
// rocprofiler_counter_info_t{start->first.c_str(), "", expr ? expr_str.c_str() :
|
||||
// nullptr};
|
||||
// counters_info_callback(counter_info, gpu_name.c_str(), gpu_counter);
|
||||
// start++;
|
||||
// }
|
||||
|
||||
@@ -29,6 +29,14 @@ set_source_files_properties(apps/hello_world.cpp PROPERTIES HIP_SOURCE_PROPERTY_
|
||||
hip_add_executable(tracer_hip_helloworld apps/hello_world.cpp)
|
||||
set_target_properties(tracer_hip_helloworld PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/tests/featuretests/tracer/apps")
|
||||
|
||||
#hsa-mem_async_copy and async_copy_on_engine
|
||||
set_source_files_properties(apps/copy_on_engine.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(copy_on_engine apps/copy_on_engine.cpp)
|
||||
set_target_properties(copy_on_engine PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/tests/featuretests/tracer/apps")
|
||||
target_link_options(copy_on_engine PRIVATE "-Wl,--build-id=md5")
|
||||
install(TARGETS copy_on_engine RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests/featuretests/tracer/apps COMPONENT tests)
|
||||
target_link_libraries(copy_on_engine hsa-runtime64::hsa-runtime64 Threads::Threads dl stdc++fs)
|
||||
|
||||
# Add test cpp file
|
||||
add_executable(runTracerFeatureTests tracer_gtest.cpp
|
||||
${GTEST_MAIN_SRC_FILE}
|
||||
|
||||
@@ -0,0 +1,338 @@
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
|
||||
|
||||
// This program illustrates the usage of the asynchronous copy capability of
|
||||
// the RocR runtime library. The program will create a system memory buffer and
|
||||
// a local buffer for each GPU, up to 2 GPUs, if the system has at least 2
|
||||
// GPUs. The program will copy data to/from the host from/to the GPU. If 2
|
||||
// GPUs are available, the program will also copy data from one to the other.
|
||||
// Update: Added aditional call async_copy_on_engine
|
||||
|
||||
#define RET_IF_HSA_ERR(err) \
|
||||
{ \
|
||||
if ((err) != HSA_STATUS_SUCCESS) { \
|
||||
const char* msg = 0; \
|
||||
hsa_status_string(err, &msg); \
|
||||
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << __FILE__ \
|
||||
<< ". Call returned " << err << std::endl; \
|
||||
std::cout << msg << std::endl; \
|
||||
return (err); \
|
||||
} \
|
||||
}
|
||||
|
||||
static const uint32_t kTestFillValue1 = 0xabcdef12;
|
||||
static const uint32_t kTestFillValue2 = 0xba5eba11;
|
||||
static const uint32_t kTestFillValue3 = 0xfeed5a1e;
|
||||
static const uint32_t kTestInitValue = 0xbaadf00d;
|
||||
|
||||
// This structure holds an agent pointer and associated memory pool to be used
|
||||
// for this test program.
|
||||
struct async_mem_cpy_agent {
|
||||
hsa_agent_t dev;
|
||||
hsa_amd_memory_pool_t pool;
|
||||
size_t granule;
|
||||
void* ptr;
|
||||
};
|
||||
struct async_mem_cpy_pool_query {
|
||||
async_mem_cpy_agent* pool_info;
|
||||
hsa_agent_t peer_device;
|
||||
};
|
||||
struct callback_args {
|
||||
struct async_mem_cpy_agent cpu;
|
||||
struct async_mem_cpy_agent gpu1;
|
||||
struct async_mem_cpy_agent gpu2;
|
||||
};
|
||||
|
||||
|
||||
// This function is meant to be a callback to hsa_iterate_agents. For each
|
||||
// input agent the iterator provides as input, this function will check to
|
||||
// see if the input agent is a CPU agent. If so, it will update the
|
||||
// async_mem_cpy_agent structure pointed to by the input parameter "data".
|
||||
// Return values:
|
||||
// HSA_STATUS_INFO_BREAK -- CPU agent has been found and stored. Iterator
|
||||
// should stop iterating
|
||||
// HSA_STATUS_SUCCESS -- CPU agent has not yet been found; iterator
|
||||
// should keep iterating
|
||||
// Other -- Some error occurred
|
||||
static hsa_status_t FindPool(hsa_amd_memory_pool_t in_pool, void* data) {
|
||||
hsa_amd_segment_t segment;
|
||||
hsa_status_t err;
|
||||
if (nullptr == data) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
struct async_mem_cpy_pool_query* args = (struct async_mem_cpy_pool_query*)data;
|
||||
err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
RET_IF_HSA_ERR(err);
|
||||
if (segment != HSA_AMD_SEGMENT_GLOBAL) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
bool canAlloc;
|
||||
err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
|
||||
&canAlloc);
|
||||
RET_IF_HSA_ERR(err);
|
||||
if (!canAlloc) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
if (args->peer_device.handle != 0) {
|
||||
hsa_amd_memory_pool_access_t access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
|
||||
err = hsa_amd_agent_memory_pool_get_info(args->peer_device, in_pool,
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
|
||||
RET_IF_HSA_ERR(err);
|
||||
if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
|
||||
&args->pool_info->granule);
|
||||
RET_IF_HSA_ERR(err);
|
||||
args->pool_info->pool = in_pool;
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
|
||||
// Find the least common multiple of 2 numbers
|
||||
static uint32_t lcm(uint32_t a, uint32_t b) {
|
||||
int tmp_a;
|
||||
int tmp_b;
|
||||
tmp_a = a;
|
||||
tmp_b = b;
|
||||
while (tmp_a != tmp_b) {
|
||||
if (tmp_a < tmp_b) {
|
||||
tmp_a = tmp_a + a;
|
||||
} else {
|
||||
tmp_b = tmp_b + b;
|
||||
}
|
||||
}
|
||||
return tmp_a;
|
||||
}
|
||||
static hsa_status_t FindGPUs(hsa_agent_t agent, void* data) {
|
||||
if (data == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
hsa_device_type_t hsa_device_type;
|
||||
hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
|
||||
RET_IF_HSA_ERR(err);
|
||||
if (hsa_device_type != HSA_DEVICE_TYPE_GPU) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
struct callback_args* args = (struct callback_args*)data;
|
||||
struct async_mem_cpy_agent* gpu;
|
||||
async_mem_cpy_pool_query pool_query = {0, 0};
|
||||
if (args->gpu1.dev.handle == 0) {
|
||||
gpu = &args->gpu1;
|
||||
} else {
|
||||
gpu = &args->gpu2;
|
||||
// Check that gpu1 has peer access into the selected pool.
|
||||
pool_query.peer_device = args->gpu1.dev;
|
||||
}
|
||||
// Make sure GPU device has pool host can access
|
||||
gpu->dev = agent;
|
||||
pool_query.pool_info = gpu;
|
||||
err = hsa_amd_agent_iterate_memory_pools(agent, FindPool, &pool_query);
|
||||
if (err == HSA_STATUS_INFO_BREAK) {
|
||||
if (gpu == &args->gpu2) {
|
||||
// We found 2 gpu's
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
} else {
|
||||
// Keep looking for another gpu
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
} else {
|
||||
gpu->dev = {0};
|
||||
}
|
||||
RET_IF_HSA_ERR(err);
|
||||
// Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// This function is a callback for hsa_amd_agent_iterate_memory_pools()
|
||||
// and will test whether the provided memory pool is 1) in the GLOBAL
|
||||
// segment, 2) allows allocation and 3) is accessible by the provided
|
||||
// agent. The "data" input parameter is assumed to be pointing to a
|
||||
// struct async_mem_cpy_agent. If the provided pool meets these criteria,
|
||||
// HSA_STATUS_INFO_BREAK is returned.
|
||||
|
||||
static hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) {
|
||||
if (data == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
hsa_device_type_t hsa_device_type;
|
||||
hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
|
||||
RET_IF_HSA_ERR(err);
|
||||
if (hsa_device_type == HSA_DEVICE_TYPE_CPU) {
|
||||
struct async_mem_cpy_agent* args = (struct async_mem_cpy_agent*)data;
|
||||
args->dev = agent;
|
||||
async_mem_cpy_pool_query pool_query;
|
||||
pool_query.peer_device.handle = 0;
|
||||
pool_query.pool_info = args;
|
||||
err = hsa_amd_agent_iterate_memory_pools(agent, FindPool, &pool_query);
|
||||
if (err == HSA_STATUS_INFO_BREAK) { // we found what we were looking for
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
} else {
|
||||
args->dev = {0};
|
||||
return err;
|
||||
}
|
||||
}
|
||||
// Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// This is the main test, showing various paths of async. copy. Source and
|
||||
// destination agents and their respective pools should already be discovered.
|
||||
// Additionally, buffer from the pools should already be allocated and availble
|
||||
// from the input parameters.
|
||||
static hsa_status_t AsyncCpyTest(async_mem_cpy_agent* dst, async_mem_cpy_agent* src,
|
||||
callback_args* args, size_t sz, uint32_t val) {
|
||||
hsa_status_t err;
|
||||
hsa_signal_t copy_signal;
|
||||
// Initialize the system and destination buffers with a value so we can later
|
||||
// validate it has been overwritten
|
||||
void* sysPtr = args->cpu.ptr;
|
||||
err = hsa_amd_memory_fill(sysPtr, kTestInitValue, sz / sizeof(uint32_t));
|
||||
RET_IF_HSA_ERR(err);
|
||||
if (dst->ptr != sysPtr) {
|
||||
err = hsa_amd_memory_fill(dst->ptr, kTestInitValue, sz / sizeof(uint32_t));
|
||||
RET_IF_HSA_ERR(err);
|
||||
}
|
||||
// Fill the source buffer with the provided uint32_t value
|
||||
err = hsa_amd_memory_fill(src->ptr, val, sz / sizeof(uint32_t));
|
||||
RET_IF_HSA_ERR(err);
|
||||
// Make sure the target and destination agents have access to the buffer.
|
||||
hsa_agent_t ag_list[2] = {dst->dev, src->dev};
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, dst->ptr);
|
||||
RET_IF_HSA_ERR(err);
|
||||
// Create a signal that will be used to inform us when the copy is done
|
||||
err = hsa_signal_create(1, 0, NULL, ©_signal);
|
||||
RET_IF_HSA_ERR(err);
|
||||
// Do the copy...
|
||||
err = hsa_amd_memory_async_copy(dst->ptr, dst->dev, src->ptr, src->dev, sz, 0, NULL, copy_signal);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// call following APIs to make sure we intercept hsa_amd_memory_async_copy_on_engine
|
||||
uint32_t engine_ids_mask = 0;
|
||||
err = hsa_amd_memory_copy_engine_status(args->cpu.dev, args->gpu1.dev, &engine_ids_mask);
|
||||
hsa_amd_sdma_engine_id_t engine_id = HSA_AMD_SDMA_ENGINE_0;
|
||||
err = hsa_amd_memory_async_copy_on_engine(dst->ptr, dst->dev, src->ptr, src->dev, sz, 0, NULL,
|
||||
copy_signal, engine_id, false);
|
||||
|
||||
// Here we do a blocking wait. Alternatively, we could also use a
|
||||
// non-blocking wait in a loop, and do other work while waiting.
|
||||
if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT, 1, -1,
|
||||
HSA_WAIT_STATE_BLOCKED) != 0) {
|
||||
printf("Async copy returned error value.\n");
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
// Verify the copy was successful; copy from the dst buffer to the sysBuf,
|
||||
// (if the result is not already in sys. mem.) and check the sysBuf values
|
||||
if (dst->ptr != sysPtr) {
|
||||
if (src->ptr != sysPtr) {
|
||||
// In this case, we need to give the gpu dev that owns dst->ptr access
|
||||
// to the system memory we are going to copy to.
|
||||
hsa_agent_t ag_list_ck[2] = {dst->dev, args->cpu.dev};
|
||||
err = hsa_amd_agents_allow_access(2, ag_list_ck, NULL, sysPtr);
|
||||
RET_IF_HSA_ERR(err);
|
||||
}
|
||||
// Reset signal to 1
|
||||
hsa_signal_store_screlease(copy_signal, 1);
|
||||
err = hsa_amd_memory_async_copy(sysPtr, args->cpu.dev, dst->ptr, dst->dev, sz, 0, NULL,
|
||||
copy_signal);
|
||||
RET_IF_HSA_ERR(err);
|
||||
if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT, 1, -1,
|
||||
HSA_WAIT_STATE_BLOCKED) != 0) {
|
||||
printf("Async copy returned error value.\n");
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
// Check that the contents of the buffer are what is expected.
|
||||
for (uint32_t i = 0; i < sz / sizeof(uint32_t); ++i) {
|
||||
if (reinterpret_cast<uint32_t*>(sysPtr)[i] != val) {
|
||||
fprintf(stdout, "Expected 0x%x but got 0x%x in buffer at index %d.\n", val,
|
||||
reinterpret_cast<uint32_t*>(sysPtr)[i], i);
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
int main() {
|
||||
hsa_status_t err;
|
||||
struct callback_args args;
|
||||
bool twoGPUs = false;
|
||||
err = hsa_init();
|
||||
RET_IF_HSA_ERR(err);
|
||||
// First, find the cpu agent and associated pool
|
||||
args.cpu = {0, 0, 0};
|
||||
err = hsa_iterate_agents(FindCPUDevice, reinterpret_cast<void*>(&args.cpu));
|
||||
assert(err == HSA_STATUS_INFO_BREAK);
|
||||
if (err != HSA_STATUS_INFO_BREAK) {
|
||||
return -1;
|
||||
}
|
||||
// Now, find 1 or 2 (if possible) GPUs and associated pool(s) for our test
|
||||
args.gpu1 = {0, 0, 0};
|
||||
args.gpu2 = {0, 0, 0};
|
||||
err = hsa_iterate_agents(FindGPUs, &args);
|
||||
if (err == HSA_STATUS_INFO_BREAK) {
|
||||
twoGPUs = true;
|
||||
} else {
|
||||
// See if we at least have 1 GPU
|
||||
if (args.gpu1.dev.handle == 0) {
|
||||
fprintf(stdout, "GPU with accessible VRAM not found; at least 1 required. Exiting\n");
|
||||
return -1;
|
||||
}
|
||||
fprintf(stdout,
|
||||
"Only 1 GPU found with required VRAM. "
|
||||
"Peer-to-Peer copy will be skipped.\n");
|
||||
}
|
||||
// We will use the smallest amount of allocatable memory that works for all
|
||||
// potential sources and destinations of the copy
|
||||
size_t sz = lcm(args.cpu.granule, args.gpu1.granule);
|
||||
// Allocate memory on each source/destination
|
||||
if (twoGPUs) {
|
||||
sz = lcm(sz, args.gpu2.granule);
|
||||
err = hsa_amd_memory_pool_allocate(args.gpu2.pool, sz, 0,
|
||||
reinterpret_cast<void**>(&args.gpu2.ptr));
|
||||
RET_IF_HSA_ERR(err);
|
||||
}
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(args.cpu.pool, sz, 0, reinterpret_cast<void**>(&args.cpu.ptr));
|
||||
RET_IF_HSA_ERR(err);
|
||||
err =
|
||||
hsa_amd_memory_pool_allocate(args.gpu1.pool, sz, 0, reinterpret_cast<void**>(&args.gpu1.ptr));
|
||||
RET_IF_HSA_ERR(err);
|
||||
char name[64];
|
||||
err = hsa_agent_get_info(args.cpu.dev, HSA_AGENT_INFO_NAME, &name);
|
||||
fprintf(stdout, "CPU is \"%s\"\n", name);
|
||||
err = hsa_agent_get_info(args.gpu1.dev, HSA_AGENT_INFO_NAME, &name);
|
||||
fprintf(stdout, "GPU1 is \"%s\"\n", name);
|
||||
if (twoGPUs) {
|
||||
err = hsa_agent_get_info(args.gpu2.dev, HSA_AGENT_INFO_NAME, &name);
|
||||
fprintf(stdout, "GPU2 is \"%s\"\n", name);
|
||||
}
|
||||
fprintf(stdout, "Copying %lu bytes from gpu1 memory to system memory...\n", sz);
|
||||
err = AsyncCpyTest(&args.cpu, &args.gpu1, &args, sz, kTestFillValue1);
|
||||
RET_IF_HSA_ERR(err);
|
||||
fprintf(stdout, "Success!\n");
|
||||
fprintf(stdout, "Copying %lu bytes from system memory to gpu1 memory...\n", sz);
|
||||
err = AsyncCpyTest(&args.gpu1, &args.cpu, &args, sz, kTestFillValue2);
|
||||
RET_IF_HSA_ERR(err);
|
||||
fprintf(stdout, "Success!\n");
|
||||
|
||||
if (twoGPUs) {
|
||||
fprintf(stdout, "Copying %lu bytes from gpu1 memory to gpu2 memory...\n", sz);
|
||||
err = AsyncCpyTest(&args.gpu2, &args.gpu1, &args, sz, kTestFillValue3);
|
||||
RET_IF_HSA_ERR(err);
|
||||
fprintf(stdout, "Success!\n");
|
||||
}
|
||||
// Clean up
|
||||
err = hsa_amd_memory_pool_free(args.cpu.ptr);
|
||||
RET_IF_HSA_ERR(err);
|
||||
err = hsa_amd_memory_pool_free(args.gpu1.ptr);
|
||||
RET_IF_HSA_ERR(err);
|
||||
if (twoGPUs) {
|
||||
err = hsa_amd_memory_pool_free(args.gpu2.ptr);
|
||||
RET_IF_HSA_ERR(err);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
Record(68), Domain(HSA_OPS_DOMAIN), Begin(357395893062183), End(357395893066774) Correlation_ID(22)
|
||||
Record(78), Domain(HSA_OPS_DOMAIN), Begin(357395893673708), End(357395893677559) Correlation_ID(24)
|
||||
Record(103), Domain(HSA_OPS_DOMAIN), Begin(357395893769384), End(357395893773087) Correlation_ID(31)
|
||||
Record(110), Domain(HSA_OPS_DOMAIN), Begin(357395893777826), End(357395893781529) Correlation_ID(33)
|
||||
Record(116), Domain(HSA_OPS_DOMAIN), Begin(357395893796340), End(357395893799598) Correlation_ID(36)
|
||||
@@ -21,4 +21,3 @@ Record(24), Domain(HIP_API_DOMAIN), Function(hipMemcpy), End(2995594192228011),
|
||||
Record(26), Domain(HIP_API_DOMAIN), Function(hipFree), Begin(2995594192237078), Correlation_ID(9)
|
||||
Record(27), Domain(HIP_API_DOMAIN), Function(hipFree), End(2995594192256085), Correlation_ID(9)
|
||||
Record(29), Domain(HIP_API_DOMAIN), Function(hipFree), Begin(2995594192259622), Correlation_ID(10)
|
||||
Record(30), Domain(HIP_API_DOMAIN), Function(hipFree), End(2995594192264101), Correlation_ID(10)
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
Record(1), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), Begin(357395885625757), Correlation_ID(1)
|
||||
Record(2), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885636767), Correlation_ID(2)
|
||||
Record(3), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885640407), Correlation_ID(2)
|
||||
Record(5), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), Begin(357395885645827), Correlation_ID(3)
|
||||
Record(6), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885650247), Correlation_ID(4)
|
||||
Record(7), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885653007), Correlation_ID(4)
|
||||
Record(9), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885655747), Correlation_ID(5)
|
||||
Record(10), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885658277), Correlation_ID(5)
|
||||
Record(12), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885660877), Correlation_ID(6)
|
||||
Record(13), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885663467), Correlation_ID(6)
|
||||
Record(15), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), End(357395885666197), Correlation_ID(3)
|
||||
Record(17), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), End(357395885668847), Correlation_ID(1)
|
||||
Record(19), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), Begin(357395885671437), Correlation_ID(7)
|
||||
Record(20), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885674007), Correlation_ID(8)
|
||||
Record(21), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885676607), Correlation_ID(8)
|
||||
Record(23), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885679327), Correlation_ID(9)
|
||||
Record(24), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885681837), Correlation_ID(9)
|
||||
Record(26), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), Begin(357395885684537), Correlation_ID(10)
|
||||
Record(27), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885687177), Correlation_ID(11)
|
||||
Record(28), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885690227), Correlation_ID(11)
|
||||
Record(30), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885692857), Correlation_ID(12)
|
||||
Record(31), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885695397), Correlation_ID(12)
|
||||
Record(33), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885697917), Correlation_ID(13)
|
||||
Record(34), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885700357), Correlation_ID(13)
|
||||
Record(36), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), End(357395885702907), Correlation_ID(10)
|
||||
Record(38), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), End(357395885705447), Correlation_ID(7)
|
||||
Record(40), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), Begin(357395885713627), Correlation_ID(14)
|
||||
Record(41), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), End(357395885737288), Correlation_ID(14)
|
||||
Record(43), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), Begin(357395885740878), Correlation_ID(15)
|
||||
Record(44), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), End(357395885804208), Correlation_ID(15)
|
||||
Record(46), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885809478), Correlation_ID(16)
|
||||
Record(47), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885812198), Correlation_ID(16)
|
||||
Record(49), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885819548), Correlation_ID(17)
|
||||
Record(50), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885822698), Correlation_ID(17)
|
||||
Record(52), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395885831788), Correlation_ID(18)
|
||||
Record(53), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395885836338), Correlation_ID(18)
|
||||
Record(55), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395885839008), Correlation_ID(19)
|
||||
Record(56), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395892240457), Correlation_ID(19)
|
||||
Record(58), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), Begin(357395892269217), Correlation_ID(20)
|
||||
Record(59), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), End(357395892311137), Correlation_ID(20)
|
||||
Record(61), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), Begin(357395892318827), Correlation_ID(21)
|
||||
Record(62), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), End(357395892324097), Correlation_ID(21)
|
||||
Record(64), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), Begin(357395892328127), Correlation_ID(22)
|
||||
Record(65), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), End(357395893033290), Correlation_ID(22)
|
||||
Record(67), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), Begin(357395893039770), Correlation_ID(23)
|
||||
Record(69), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), End(357395893655353), Correlation_ID(23)
|
||||
Record(71), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), Begin(357395893661713), Correlation_ID(24)
|
||||
Record(72), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), End(357395893668973), Correlation_ID(24)
|
||||
Record(74), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), Begin(357395893671633), Correlation_ID(25)
|
||||
Record(75), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), End(357395893673753), Correlation_ID(25)
|
||||
Record(77), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395893687933), Correlation_ID(26)
|
||||
Record(79), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395893714413), Correlation_ID(26)
|
||||
Record(81), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395893716553), Correlation_ID(27)
|
||||
Record(82), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395893728583), Correlation_ID(27)
|
||||
Record(84), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395893730223), Correlation_ID(28)
|
||||
Record(85), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395893742133), Correlation_ID(28)
|
||||
Record(87), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), Begin(357395893744023), Correlation_ID(29)
|
||||
Record(88), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), End(357395893747313), Correlation_ID(29)
|
||||
Record(90), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), Begin(357395893749013), Correlation_ID(30)
|
||||
Record(91), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), End(357395893751313), Correlation_ID(30)
|
||||
Record(93), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), Begin(357395893753503), Correlation_ID(31)
|
||||
Record(94), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), End(357395893759683), Correlation_ID(31)
|
||||
Record(96), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), Begin(357395893762933), Correlation_ID(32)
|
||||
Record(97), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), End(357395893764903), Correlation_ID(32)
|
||||
Record(99), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), Begin(357395893766573), Correlation_ID(33)
|
||||
Record(100), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), End(357395893770533), Correlation_ID(33)
|
||||
Record(102), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), Begin(357395893772203), Correlation_ID(34)
|
||||
Record(104), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), End(357395893775493), Correlation_ID(34)
|
||||
Record(106), Domain(HSA_API_DOMAIN), Function(hsa_signal_store_screlease), Begin(357395893778003), Correlation_ID(35)
|
||||
Record(107), Domain(HSA_API_DOMAIN), Function(hsa_signal_store_screlease), End(357395893779583), Correlation_ID(35)
|
||||
Record(109), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), Begin(357395893781263), Correlation_ID(36)
|
||||
Record(111), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), End(357395893786634), Correlation_ID(36)
|
||||
Record(113), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), Begin(357395893795734), Correlation_ID(37)
|
||||
Record(114), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), End(357395893797624), Correlation_ID(37)
|
||||
Record(117), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), Begin(357395893802314), Correlation_ID(38)
|
||||
Record(118), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), End(357395893829684), Correlation_ID(38)
|
||||
Record(120), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), Begin(357395893833114), Correlation_ID(39)
|
||||
Record(121), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), End(357395893835624), Correlation_ID(39)
|
||||
@@ -19,6 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
#include "tracer_gtest.h"
|
||||
#include "../utils/test_utils.h"
|
||||
@@ -33,8 +35,19 @@ void ApplicationParser::SetApplicationEnv(const char* app_name, const char* trac
|
||||
hsa_tools_lib_path << app_path << "librocprofiler_tool.so";
|
||||
setenv("LD_PRELOAD", hsa_tools_lib_path.str().c_str(), true);
|
||||
|
||||
// set --hip-api option
|
||||
setenv("ROCPROFILER_HIP_API_TRACE", "1", true);
|
||||
std::string trace_type{trace_option};
|
||||
|
||||
if (trace_type.find("hip") != std::string::npos) {
|
||||
// set --hip-api option
|
||||
setenv("ROCPROFILER_HIP_API_TRACE", "1", true);
|
||||
}
|
||||
|
||||
if (trace_type.find("hsa") != std::string::npos) {
|
||||
// set --hsa-api and --hsa-activity
|
||||
setenv("ROCPROFILER_HSA_API_TRACE", "1", true);
|
||||
setenv("ROCPROFILER_HSA_ACTIVITY_TRACE", "1", true);
|
||||
}
|
||||
|
||||
|
||||
std::stringstream os;
|
||||
os << app_path << "tests/featuretests/tracer/apps/" << app_name;
|
||||
@@ -42,32 +55,45 @@ void ApplicationParser::SetApplicationEnv(const char* app_name, const char* trac
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses kernel-info after running profiler against curent application
|
||||
* Parses kernel-info after running tracer against curent application
|
||||
* and saves them in a vector.
|
||||
*/
|
||||
void ApplicationParser::GetKernelInfoForRunningApplication(
|
||||
std::vector<KernelInfo>* kernel_info_output) {
|
||||
KernelInfo kinfo;
|
||||
for (std::string line : output_lines) {
|
||||
if (std::regex_match(line, std::regex("(Record)(.*)"))) {
|
||||
int spos = line.find("[");
|
||||
int epos = line.find("]", spos);
|
||||
std::string sub = line.substr(spos + 1, epos - spos - 1);
|
||||
// if (std::regex_match(line, std::regex("(Record)(.*)"))) {
|
||||
// Record id
|
||||
size_t found = line.find("Record");
|
||||
if (found != std::string::npos) {
|
||||
int spos = found;
|
||||
int epos = line.find(")", spos);
|
||||
int length = std::string("Record").length();
|
||||
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
|
||||
|
||||
kinfo.record_id = sub;
|
||||
kernel_info_output->push_back(kinfo);
|
||||
|
||||
// Kernel-Name
|
||||
size_t found = line.find("Function");
|
||||
if (found != std::string::npos) {
|
||||
int spos = found;
|
||||
int epos = line.find(")", spos);
|
||||
int length = std::string("kernel-name").length();
|
||||
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
|
||||
|
||||
kinfo.function = sub;
|
||||
kernel_info_output->push_back(kinfo);
|
||||
}
|
||||
}
|
||||
|
||||
// Kernel-Name
|
||||
found = line.find("Function");
|
||||
if (found != std::string::npos) {
|
||||
int spos = found;
|
||||
int epos = line.find(")", spos);
|
||||
int length = std::string("Function").length();
|
||||
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
|
||||
kinfo.function = sub;
|
||||
}
|
||||
|
||||
// corealtion-ids
|
||||
found = line.find("Correlation_ID");
|
||||
if (found != std::string::npos) {
|
||||
int spos = found;
|
||||
int epos = line.find(")", spos);
|
||||
int length = std::string("Correlation_ID").length();
|
||||
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
|
||||
kinfo.corelation_id = sub;
|
||||
}
|
||||
kernel_info_output->push_back(kinfo);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,7 +112,7 @@ void ApplicationParser::GetKernelInfoForGoldenOutput(const char* app_name, std::
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs a given appllication and saves profiler output.
|
||||
* Runs a given appllication and saves tracer output.
|
||||
* These output lines can be letter passed for kernel informations
|
||||
* i.e: kernel_names
|
||||
*/
|
||||
@@ -125,45 +151,70 @@ void ApplicationParser::ParseKernelInfoFields(const std::string& s,
|
||||
std::ifstream golden_file(s);
|
||||
while (!golden_file.eof()) {
|
||||
getline(golden_file, line);
|
||||
if (std::regex_match(line, std::regex("(Record)(.*)"))) {
|
||||
int spos = line.find("[");
|
||||
int epos = line.find("]", spos);
|
||||
std::string sub = line.substr(spos + 1, epos - spos - 1);
|
||||
// if (std::regex_match(line, std::regex("(Record)(.*)"))) {
|
||||
// Record id
|
||||
size_t found = line.find("Record");
|
||||
if (found != std::string::npos) {
|
||||
int spos = found;
|
||||
int epos = line.find(")", spos);
|
||||
int length = std::string("Record").length();
|
||||
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
|
||||
|
||||
kinfo.record_id = sub;
|
||||
kernel_info_output->push_back(kinfo);
|
||||
|
||||
// Kernel-Name
|
||||
size_t found = line.find("Function");
|
||||
if (found != std::string::npos) {
|
||||
int spos = found;
|
||||
int epos = line.find(")", spos);
|
||||
int length = std::string("kernel-name").length();
|
||||
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
|
||||
|
||||
kinfo.function = sub;
|
||||
kernel_info_output->push_back(kinfo);
|
||||
}
|
||||
// kernel_info_output->push_back(kinfo);
|
||||
}
|
||||
|
||||
// Kernel-Name
|
||||
found = line.find("Function");
|
||||
if (found != std::string::npos) {
|
||||
int spos = found;
|
||||
int epos = line.find(")", spos);
|
||||
int length = std::string("kernel-name").length();
|
||||
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
|
||||
|
||||
kinfo.function = sub;
|
||||
// kernel_info_output->push_back(kinfo);
|
||||
}
|
||||
|
||||
// corealtion-ids
|
||||
found = line.find("Correlation_ID");
|
||||
if (found != std::string::npos) {
|
||||
int spos = found;
|
||||
int epos = line.find(")", spos);
|
||||
int length = std::string("Correlation_ID").length();
|
||||
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
|
||||
|
||||
kinfo.corelation_id = sub;
|
||||
// kernel_info_output->push_back(kinfo);
|
||||
}
|
||||
//}
|
||||
kernel_info_output->push_back(kinfo);
|
||||
}
|
||||
golden_file.close();
|
||||
}
|
||||
/*
|
||||
* ###################################################
|
||||
* ############ HelloWorld HIP Tests ################
|
||||
* ###################################################
|
||||
*/
|
||||
|
||||
constexpr auto kGoldenOutputHelloworld = "hip_helloworld_golden_traces.txt";
|
||||
|
||||
class HelloWorldTest : public ProfilerTest {
|
||||
class HelloWorldTest : public Tracertest {
|
||||
protected:
|
||||
std::vector<KernelInfo> golden_kernel_info;
|
||||
void SetUp() {
|
||||
ProfilerTest::SetUp("tracer_hip_helloworld", "--hip-api ");
|
||||
Tracertest::SetUp("tracer_hip_helloworld", "--hip-api ");
|
||||
GetKernelInfoForGoldenOutput("tracer_hip_helloworld", kGoldenOutputHelloworld,
|
||||
&golden_kernel_info);
|
||||
}
|
||||
void TearDown() { output_lines.clear(); }
|
||||
};
|
||||
|
||||
// Test:1 Compares total num of kernel-names in golden output against current
|
||||
// profiler output
|
||||
// tracer output
|
||||
TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelInfoMatchWithGoldenOutput) {
|
||||
// kernel info in current profiler run
|
||||
// kernel info in current profler run
|
||||
std::vector<KernelInfo> current_kernel_info;
|
||||
|
||||
GetKernelInfoForRunningApplication(¤t_kernel_info);
|
||||
@@ -173,9 +224,9 @@ TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelInfoMatchWithGoldenOutp
|
||||
}
|
||||
|
||||
// Test:2 Compares order of kernel-names in golden output against current
|
||||
// profiler output
|
||||
TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenFunctionNamessMatchWithGoldenOutput) {
|
||||
// kernel info in current profiler run
|
||||
// tracer output
|
||||
TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenFunctionNamessMatchWithGoldenOutput) {
|
||||
// kernel info in current tracer run
|
||||
std::vector<KernelInfo> current_kernel_info;
|
||||
GetKernelInfoForRunningApplication(¤t_kernel_info);
|
||||
|
||||
@@ -186,13 +237,66 @@ TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenFunctionNamessMatchWithGold
|
||||
}
|
||||
|
||||
// Test:3 Compares order of kernel-names in golden output against current
|
||||
// profiler output
|
||||
TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenKernelDurationShouldBePositive) {
|
||||
// kernel info in current profiler run
|
||||
// tracer output
|
||||
TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelDurationShouldBePositive) {
|
||||
// kernel info in current tracer run
|
||||
std::vector<KernelInfo> current_kernel_info;
|
||||
|
||||
GetKernelInfoForRunningApplication(¤t_kernel_info);
|
||||
ASSERT_TRUE(current_kernel_info.size());
|
||||
|
||||
EXPECT_GT(current_kernel_info.size(), 0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ###################################################
|
||||
* ############ Async COopy HSA Tests ################
|
||||
* ###################################################
|
||||
*/
|
||||
|
||||
class AsyncCopyTest : public Tracertest {
|
||||
protected:
|
||||
void SetUp() { Tracertest::SetUp("copy_on_engine", "--hsa-api --hsa-activity"); }
|
||||
void TearDown() { output_lines.clear(); }
|
||||
};
|
||||
|
||||
// Test:1 Compares total num of kernel-names in golden output against current
|
||||
// tracer output
|
||||
TEST_F(AsyncCopyTest, WhenRunningTracerWithAppThenAsyncCopyOutputIsgenerated) {
|
||||
// kernel info in current profler run
|
||||
std::vector<KernelInfo> current_kernel_info;
|
||||
|
||||
GetKernelInfoForRunningApplication(¤t_kernel_info);
|
||||
ASSERT_TRUE(current_kernel_info.size());
|
||||
}
|
||||
|
||||
// Test:2 Matches coelation Ids
|
||||
TEST_F(AsyncCopyTest, WhenRunningTracerWithAppThenAsyncCorelationCountIsCorrect) {
|
||||
// kernel info in current profler run
|
||||
std::vector<KernelInfo> current_kernel_info;
|
||||
|
||||
GetKernelInfoForRunningApplication(¤t_kernel_info);
|
||||
ASSERT_TRUE(current_kernel_info.size());
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> corelation_pair{};
|
||||
for (const auto& itr : current_kernel_info) {
|
||||
if (itr.function.find("async_copy_on_engine") != std::string::npos) {
|
||||
corelation_pair.push_back({itr.record_id, itr.corelation_id});
|
||||
}
|
||||
}
|
||||
ASSERT_TRUE(corelation_pair.size());
|
||||
|
||||
uint32_t corealtion_count = 0;
|
||||
// check if corelation id appears more than twice
|
||||
for (size_t i = 0; i < corelation_pair.size(); i++) {
|
||||
for (const auto& itr : current_kernel_info) {
|
||||
if ((itr.corelation_id == corelation_pair[i].second) &&
|
||||
(itr.record_id != corelation_pair[i].first)) {
|
||||
corealtion_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_GT(corealtion_count, corelation_pair.size());
|
||||
}
|
||||
@@ -35,7 +35,7 @@ THE SOFTWARE.
|
||||
|
||||
/* --------------------------------------------------------------------------*/
|
||||
/**
|
||||
* @Synopsis Implementation of a Parser class for Profiler output
|
||||
* @Synopsis Implementation of a Parser class for Tracer output
|
||||
* Parses pre-saved golden output for kernel info and saves them in a vector
|
||||
* Executes appliaction(passed as param:app_name) and saves parsed kernel info
|
||||
* in a vector.
|
||||
@@ -60,7 +60,7 @@ class ApplicationParser : public ::testing::Test {
|
||||
std::string function;
|
||||
};
|
||||
|
||||
//!< saves lines of profiler output
|
||||
//!< saves lines of tracer output
|
||||
std::vector<std::string> output_lines;
|
||||
|
||||
public:
|
||||
@@ -72,12 +72,12 @@ class ApplicationParser : public ::testing::Test {
|
||||
void GetKernelInfoForGoldenOutput(const char* app_name, std::string filename,
|
||||
std::vector<KernelInfo>* kernel_info_output);
|
||||
|
||||
//!< Parses kernel-info after running profiler against curent application
|
||||
//!< Parses kernel-info after running tracer against curent application
|
||||
// and saves them in a vector.
|
||||
void GetKernelInfoForRunningApplication(std::vector<KernelInfo>* kernel_info_output);
|
||||
|
||||
private:
|
||||
//!< Runs a given appllication and saves profiler output.
|
||||
//!< Runs a given appllication and saves tracer output.
|
||||
// These output lines can be letter passed for kernel informations
|
||||
// i.e: kernel_names
|
||||
void ProcessApplication(std::stringstream& ss);
|
||||
@@ -89,12 +89,12 @@ class ApplicationParser : public ::testing::Test {
|
||||
|
||||
/* --------------------------------------------------------------------------*/
|
||||
/**
|
||||
* @Synopsis Implementation of a ProfilerTest
|
||||
* @Synopsis Implementation of a Tracertest
|
||||
* Subsequent tests can use this to parse different applications
|
||||
*/
|
||||
/* --------------------------------------------------------------------------*/
|
||||
|
||||
class ProfilerTest : public ApplicationParser {
|
||||
class Tracertest : public ApplicationParser {
|
||||
protected:
|
||||
virtual void SetUp(const char* app_name, const char* trace_option) {
|
||||
ApplicationParser::SetUp(app_name, trace_option);
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle