SWDEV-398161, SWDEV-398764 Fixed --hsa-api for v2

Change-Id: I1902ff463851a3485b48f7b9400f7116a073365a


[ROCm/rocprofiler commit: c3107fc258]
Bu işleme şunda yer alıyor:
gobhardw
2023-05-15 19:01:07 +05:30
işlemeyi yapan: Ammar Elwazir
ebeveyn 39e398bfd7
işleme 614fd47cd3
10 değiştirilmiş dosya ile 652 ekleme ve 84 silme
+1
Dosyayı Görüntüle
@@ -212,6 +212,7 @@ The resulting `a.out` will depend on
### Added
- 'end_time' need to be disabled in roctx_trace.txt
- Every API trace in V2 reported synchrounusly will have two records, one for Enter phase and for Exit phase
- support for hsa_amd_memory_async_copy_on_engine API function trace
### Fixed
- rocprof in ROcm/5.4.0 gpu selector broken.
- rocprof in ROCm/5.4.1 fails to generate kernel info.
+1 -1
Dosyayı Görüntüle
@@ -291,7 +291,7 @@ class file_plugin_t {
if (kernel_name.size() > 1) *output_file << " Kernel_Name(" << kernel_name.c_str() << "),";
if (tracer_record.phase == ROCPROFILER_PHASE_NONE) {
*output_file << " Begin(" << tracer_record.timestamps.begin.value
<< "), End(" << tracer_record.timestamps.end.value << ")";
<< "), End(" << tracer_record.timestamps.end.value << "),";
} else {
if(tracer_record.phase == ROCPROFILER_PHASE_ENTER && tracer_record.domain != ACTIVITY_DOMAIN_ROCTX){
rocprofiler_timestamp_t timestamp;
+63 -28
Dosyayı Görüntüle
@@ -52,7 +52,6 @@ namespace {
hsa_status_t hsa_executable_iteration_callback(hsa_executable_t executable, hsa_agent_t agent,
hsa_executable_symbol_t symbol, void* args) {
hsa_symbol_kind_t type;
rocmtools::hsa_support::GetCoreApiTable().hsa_executable_symbol_get_info_fn(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &type);
@@ -63,7 +62,7 @@ hsa_status_t hsa_executable_iteration_callback(hsa_executable_t executable, hsa_
// TODO(aelwazir): to be removed if the HSA fixed the issue of corrupted
// names overflowing the length given
if (name_length > 1) {
if(!(*static_cast<bool*>(args))) {
if (!(*static_cast<bool*>(args))) {
char name[name_length + 1];
uint64_t kernel_object;
rocmtools::hsa_support::GetCoreApiTable().hsa_executable_symbol_get_info_fn(
@@ -92,7 +91,8 @@ bool IsEnabled(rocprofiler_tracer_activity_domain_t domain, uint32_t operation_i
return report && report(domain, operation_id, nullptr) == 0;
}
void ReportActivity(rocprofiler_tracer_activity_domain_t domain, uint32_t operation_id, void* data) {
void ReportActivity(rocprofiler_tracer_activity_domain_t domain, uint32_t operation_id,
void* data) {
if (auto report = report_activity.load(std::memory_order_relaxed))
report(domain, operation_id, data);
}
@@ -486,12 +486,14 @@ hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) {
return rocmtools::hsa_support::GetCoreApiTable().hsa_executable_destroy_fn(executable);
}
bool profiling_async_copy_enable = false;
std::atomic<bool> profiling_async_copy_enable{false};
hsa_status_t ProfilingAsyncCopyEnableIntercept(bool enable) {
hsa_status_t status =
rocmtools::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn(enable);
if (status == HSA_STATUS_SUCCESS) profiling_async_copy_enable = enable;
if (status == HSA_STATUS_SUCCESS) {
profiling_async_copy_enable.exchange(enable, std::memory_order_release);
}
return status;
}
@@ -515,7 +517,7 @@ hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const vo
// FIXME: what happens if the state changes before returning?
[[maybe_unused]] hsa_status_t status =
rocmtools::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn(
profiling_async_copy_enable | is_enabled);
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
if (!is_enabled) {
@@ -547,7 +549,7 @@ hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst,
// FIXME: what happens if the state changes before returning?
[[maybe_unused]] hsa_status_t status =
rocmtools::hsa_support::GetAmdExtTable().hsa_amd_profiling_async_copy_enable_fn(
profiling_async_copy_enable | is_enabled);
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
if (!is_enabled) {
@@ -569,6 +571,36 @@ hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst,
return status;
}
hsa_status_t MemoryASyncCopyOnEngineIntercept(
void* dst, hsa_agent_t dst_agent, const void* src, hsa_agent_t src_agent, size_t size,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal,
hsa_amd_sdma_engine_id_t engine_id, bool force_copy_on_sdma) {
bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY);
// FIXME: what happens if the state changes before returning?
[[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
if (!is_enabled) {
return saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal,
engine_id, force_copy_on_sdma);
}
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = MemoryASyncCopyHandler;
entry->correlation_id = CorrelationId();
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
status = saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal, engine_id,
force_copy_on_sdma);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
return status;
}
} // namespace
rocprofiler_timestamp_t timestamp_ns() {
@@ -748,21 +780,21 @@ void Initialize(HsaApiTable* table) {
rocmtools::queue::InitializePools(cpu_agent);
break;
case HSA_DEVICE_TYPE_GPU:
// XXX FIXME: When multiple ranks are used, each rank's first
// logical device always has GPU ID 0, regardless of which
// physical device is selected with CUDA_VISIBLE_DEVICES.
// Because of this, when merging traces from multiple ranks,
// GPU IDs from different processes may overlap.
//
// The long term solution is to use KFD's gpu_id, which is
// stable across APIs and processes, but it isn't currently
// exposed by ROCr. We could use the agent's
// HSA_AMD_AGENT_INFO_DRIVER_NODE_ID in the meantime, as even
// that would be an improvement--it's what legacy roctracer
// is currently doing as well as the roctracer compatibility
// code earlier in this file.
agent_info.setIndex(gpu_agent_count++);
break;
// XXX FIXME: When multiple ranks are used, each rank's first
// logical device always has GPU ID 0, regardless of which
// physical device is selected with CUDA_VISIBLE_DEVICES.
// Because of this, when merging traces from multiple ranks,
// GPU IDs from different processes may overlap.
//
// The long term solution is to use KFD's gpu_id, which is
// stable across APIs and processes, but it isn't currently
// exposed by ROCr. We could use the agent's
// HSA_AMD_AGENT_INFO_DRIVER_NODE_ID in the meantime, as even
// that would be an improvement--it's what legacy roctracer
// is currently doing as well as the roctracer compatibility
// code earlier in this file.
agent_info.setIndex(gpu_agent_count++);
break;
default:
agent_info.setIndex(other_agent_count++);
break;
@@ -787,6 +819,8 @@ void Initialize(HsaApiTable* table) {
roctracer::hsa_support::MemoryASyncCopyRectIntercept;
table->amd_ext_->hsa_amd_profiling_async_copy_enable_fn =
roctracer::hsa_support::ProfilingAsyncCopyEnableIntercept;
table->amd_ext_->hsa_amd_memory_async_copy_on_engine_fn =
roctracer::hsa_support::MemoryASyncCopyOnEngineIntercept;
// Install the HSA_EVT intercept
table->core_->hsa_memory_allocate_fn = roctracer::hsa_support::MemoryAllocateIntercept;
@@ -875,11 +909,11 @@ bool IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback
const rocprofiler_counter_info_t counter_info =
rocprofiler_counter_info_t{strdup(name.c_str()),
strdup(descr.c_str()),
expr.empty() ? nullptr : strdup(expr.c_str()),
query.instance_count,
block_name.c_str(),
block_counters};
strdup(descr.c_str()),
expr.empty() ? nullptr : strdup(expr.c_str()),
query.instance_count,
block_name.c_str(),
block_counters};
counters_info_callback(counter_info, gpu_name.c_str(), gpu_counter);
}
gpu_counter++;
@@ -890,7 +924,8 @@ bool IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback
// std::string expr_str;
// if (expr) expr_str = expr->GetStr().c_str();
// const rocprofiler_counter_info_t counter_info =
// rocprofiler_counter_info_t{start->first.c_str(), "", expr ? expr_str.c_str() : nullptr};
// rocprofiler_counter_info_t{start->first.c_str(), "", expr ? expr_str.c_str() :
// nullptr};
// counters_info_callback(counter_info, gpu_name.c_str(), gpu_counter);
// start++;
// }
+8
Dosyayı Görüntüle
@@ -29,6 +29,14 @@ set_source_files_properties(apps/hello_world.cpp PROPERTIES HIP_SOURCE_PROPERTY_
hip_add_executable(tracer_hip_helloworld apps/hello_world.cpp)
set_target_properties(tracer_hip_helloworld PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/tests/featuretests/tracer/apps")
#hsa-mem_async_copy and async_copy_on_engine
set_source_files_properties(apps/copy_on_engine.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(copy_on_engine apps/copy_on_engine.cpp)
set_target_properties(copy_on_engine PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/tests/featuretests/tracer/apps")
target_link_options(copy_on_engine PRIVATE "-Wl,--build-id=md5")
install(TARGETS copy_on_engine RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/tests/featuretests/tracer/apps COMPONENT tests)
target_link_libraries(copy_on_engine hsa-runtime64::hsa-runtime64 Threads::Threads dl stdc++fs)
# Add test cpp file
add_executable(runTracerFeatureTests tracer_gtest.cpp
${GTEST_MAIN_SRC_FILE}
+338
Dosyayı Görüntüle
@@ -0,0 +1,338 @@
#include <cassert>
#include <iostream>
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
// This program illustrates the usage of the asynchronous copy capability of
// the RocR runtime library. The program will create a system memory buffer and
// a local buffer for each GPU, up to 2 GPUs, if the system has at least 2
// GPUs. The program will copy data to/from the host from/to the GPU. If 2
// GPUs are available, the program will also copy data from one to the other.
// Update: Added aditional call async_copy_on_engine
#define RET_IF_HSA_ERR(err) \
{ \
if ((err) != HSA_STATUS_SUCCESS) { \
const char* msg = 0; \
hsa_status_string(err, &msg); \
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << __FILE__ \
<< ". Call returned " << err << std::endl; \
std::cout << msg << std::endl; \
return (err); \
} \
}
static const uint32_t kTestFillValue1 = 0xabcdef12;
static const uint32_t kTestFillValue2 = 0xba5eba11;
static const uint32_t kTestFillValue3 = 0xfeed5a1e;
static const uint32_t kTestInitValue = 0xbaadf00d;
// This structure holds an agent pointer and associated memory pool to be used
// for this test program.
struct async_mem_cpy_agent {
hsa_agent_t dev;
hsa_amd_memory_pool_t pool;
size_t granule;
void* ptr;
};
struct async_mem_cpy_pool_query {
async_mem_cpy_agent* pool_info;
hsa_agent_t peer_device;
};
struct callback_args {
struct async_mem_cpy_agent cpu;
struct async_mem_cpy_agent gpu1;
struct async_mem_cpy_agent gpu2;
};
// This function is meant to be a callback to hsa_iterate_agents. For each
// input agent the iterator provides as input, this function will check to
// see if the input agent is a CPU agent. If so, it will update the
// async_mem_cpy_agent structure pointed to by the input parameter "data".
// Return values:
// HSA_STATUS_INFO_BREAK -- CPU agent has been found and stored. Iterator
// should stop iterating
// HSA_STATUS_SUCCESS -- CPU agent has not yet been found; iterator
// should keep iterating
// Other -- Some error occurred
static hsa_status_t FindPool(hsa_amd_memory_pool_t in_pool, void* data) {
hsa_amd_segment_t segment;
hsa_status_t err;
if (nullptr == data) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
struct async_mem_cpy_pool_query* args = (struct async_mem_cpy_pool_query*)data;
err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
RET_IF_HSA_ERR(err);
if (segment != HSA_AMD_SEGMENT_GLOBAL) {
return HSA_STATUS_SUCCESS;
}
bool canAlloc;
err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
&canAlloc);
RET_IF_HSA_ERR(err);
if (!canAlloc) {
return HSA_STATUS_SUCCESS;
}
if (args->peer_device.handle != 0) {
hsa_amd_memory_pool_access_t access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
err = hsa_amd_agent_memory_pool_get_info(args->peer_device, in_pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
RET_IF_HSA_ERR(err);
if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
return HSA_STATUS_SUCCESS;
}
}
err = hsa_amd_memory_pool_get_info(in_pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
&args->pool_info->granule);
RET_IF_HSA_ERR(err);
args->pool_info->pool = in_pool;
return HSA_STATUS_INFO_BREAK;
}
// Find the least common multiple of 2 numbers
static uint32_t lcm(uint32_t a, uint32_t b) {
int tmp_a;
int tmp_b;
tmp_a = a;
tmp_b = b;
while (tmp_a != tmp_b) {
if (tmp_a < tmp_b) {
tmp_a = tmp_a + a;
} else {
tmp_b = tmp_b + b;
}
}
return tmp_a;
}
static hsa_status_t FindGPUs(hsa_agent_t agent, void* data) {
if (data == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
hsa_device_type_t hsa_device_type;
hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
RET_IF_HSA_ERR(err);
if (hsa_device_type != HSA_DEVICE_TYPE_GPU) {
return HSA_STATUS_SUCCESS;
}
struct callback_args* args = (struct callback_args*)data;
struct async_mem_cpy_agent* gpu;
async_mem_cpy_pool_query pool_query = {0, 0};
if (args->gpu1.dev.handle == 0) {
gpu = &args->gpu1;
} else {
gpu = &args->gpu2;
// Check that gpu1 has peer access into the selected pool.
pool_query.peer_device = args->gpu1.dev;
}
// Make sure GPU device has pool host can access
gpu->dev = agent;
pool_query.pool_info = gpu;
err = hsa_amd_agent_iterate_memory_pools(agent, FindPool, &pool_query);
if (err == HSA_STATUS_INFO_BREAK) {
if (gpu == &args->gpu2) {
// We found 2 gpu's
return HSA_STATUS_INFO_BREAK;
} else {
// Keep looking for another gpu
return HSA_STATUS_SUCCESS;
}
} else {
gpu->dev = {0};
}
RET_IF_HSA_ERR(err);
// Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
return HSA_STATUS_SUCCESS;
}
// This function is a callback for hsa_amd_agent_iterate_memory_pools()
// and will test whether the provided memory pool is 1) in the GLOBAL
// segment, 2) allows allocation and 3) is accessible by the provided
// agent. The "data" input parameter is assumed to be pointing to a
// struct async_mem_cpy_agent. If the provided pool meets these criteria,
// HSA_STATUS_INFO_BREAK is returned.
static hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) {
if (data == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
hsa_device_type_t hsa_device_type;
hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
RET_IF_HSA_ERR(err);
if (hsa_device_type == HSA_DEVICE_TYPE_CPU) {
struct async_mem_cpy_agent* args = (struct async_mem_cpy_agent*)data;
args->dev = agent;
async_mem_cpy_pool_query pool_query;
pool_query.peer_device.handle = 0;
pool_query.pool_info = args;
err = hsa_amd_agent_iterate_memory_pools(agent, FindPool, &pool_query);
if (err == HSA_STATUS_INFO_BREAK) { // we found what we were looking for
return HSA_STATUS_INFO_BREAK;
} else {
args->dev = {0};
return err;
}
}
// Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
return HSA_STATUS_SUCCESS;
}
// This is the main test, showing various paths of async. copy. Source and
// destination agents and their respective pools should already be discovered.
// Additionally, buffer from the pools should already be allocated and availble
// from the input parameters.
static hsa_status_t AsyncCpyTest(async_mem_cpy_agent* dst, async_mem_cpy_agent* src,
callback_args* args, size_t sz, uint32_t val) {
hsa_status_t err;
hsa_signal_t copy_signal;
// Initialize the system and destination buffers with a value so we can later
// validate it has been overwritten
void* sysPtr = args->cpu.ptr;
err = hsa_amd_memory_fill(sysPtr, kTestInitValue, sz / sizeof(uint32_t));
RET_IF_HSA_ERR(err);
if (dst->ptr != sysPtr) {
err = hsa_amd_memory_fill(dst->ptr, kTestInitValue, sz / sizeof(uint32_t));
RET_IF_HSA_ERR(err);
}
// Fill the source buffer with the provided uint32_t value
err = hsa_amd_memory_fill(src->ptr, val, sz / sizeof(uint32_t));
RET_IF_HSA_ERR(err);
// Make sure the target and destination agents have access to the buffer.
hsa_agent_t ag_list[2] = {dst->dev, src->dev};
err = hsa_amd_agents_allow_access(2, ag_list, NULL, dst->ptr);
RET_IF_HSA_ERR(err);
// Create a signal that will be used to inform us when the copy is done
err = hsa_signal_create(1, 0, NULL, &copy_signal);
RET_IF_HSA_ERR(err);
// Do the copy...
err = hsa_amd_memory_async_copy(dst->ptr, dst->dev, src->ptr, src->dev, sz, 0, NULL, copy_signal);
RET_IF_HSA_ERR(err);
// call following APIs to make sure we intercept hsa_amd_memory_async_copy_on_engine
uint32_t engine_ids_mask = 0;
err = hsa_amd_memory_copy_engine_status(args->cpu.dev, args->gpu1.dev, &engine_ids_mask);
hsa_amd_sdma_engine_id_t engine_id = HSA_AMD_SDMA_ENGINE_0;
err = hsa_amd_memory_async_copy_on_engine(dst->ptr, dst->dev, src->ptr, src->dev, sz, 0, NULL,
copy_signal, engine_id, false);
// Here we do a blocking wait. Alternatively, we could also use a
// non-blocking wait in a loop, and do other work while waiting.
if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT, 1, -1,
HSA_WAIT_STATE_BLOCKED) != 0) {
printf("Async copy returned error value.\n");
return HSA_STATUS_ERROR;
}
// Verify the copy was successful; copy from the dst buffer to the sysBuf,
// (if the result is not already in sys. mem.) and check the sysBuf values
if (dst->ptr != sysPtr) {
if (src->ptr != sysPtr) {
// In this case, we need to give the gpu dev that owns dst->ptr access
// to the system memory we are going to copy to.
hsa_agent_t ag_list_ck[2] = {dst->dev, args->cpu.dev};
err = hsa_amd_agents_allow_access(2, ag_list_ck, NULL, sysPtr);
RET_IF_HSA_ERR(err);
}
// Reset signal to 1
hsa_signal_store_screlease(copy_signal, 1);
err = hsa_amd_memory_async_copy(sysPtr, args->cpu.dev, dst->ptr, dst->dev, sz, 0, NULL,
copy_signal);
RET_IF_HSA_ERR(err);
if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT, 1, -1,
HSA_WAIT_STATE_BLOCKED) != 0) {
printf("Async copy returned error value.\n");
return HSA_STATUS_ERROR;
}
}
// Check that the contents of the buffer are what is expected.
for (uint32_t i = 0; i < sz / sizeof(uint32_t); ++i) {
if (reinterpret_cast<uint32_t*>(sysPtr)[i] != val) {
fprintf(stdout, "Expected 0x%x but got 0x%x in buffer at index %d.\n", val,
reinterpret_cast<uint32_t*>(sysPtr)[i], i);
return HSA_STATUS_ERROR;
}
}
return HSA_STATUS_SUCCESS;
}
int main() {
hsa_status_t err;
struct callback_args args;
bool twoGPUs = false;
err = hsa_init();
RET_IF_HSA_ERR(err);
// First, find the cpu agent and associated pool
args.cpu = {0, 0, 0};
err = hsa_iterate_agents(FindCPUDevice, reinterpret_cast<void*>(&args.cpu));
assert(err == HSA_STATUS_INFO_BREAK);
if (err != HSA_STATUS_INFO_BREAK) {
return -1;
}
// Now, find 1 or 2 (if possible) GPUs and associated pool(s) for our test
args.gpu1 = {0, 0, 0};
args.gpu2 = {0, 0, 0};
err = hsa_iterate_agents(FindGPUs, &args);
if (err == HSA_STATUS_INFO_BREAK) {
twoGPUs = true;
} else {
// See if we at least have 1 GPU
if (args.gpu1.dev.handle == 0) {
fprintf(stdout, "GPU with accessible VRAM not found; at least 1 required. Exiting\n");
return -1;
}
fprintf(stdout,
"Only 1 GPU found with required VRAM. "
"Peer-to-Peer copy will be skipped.\n");
}
// We will use the smallest amount of allocatable memory that works for all
// potential sources and destinations of the copy
size_t sz = lcm(args.cpu.granule, args.gpu1.granule);
// Allocate memory on each source/destination
if (twoGPUs) {
sz = lcm(sz, args.gpu2.granule);
err = hsa_amd_memory_pool_allocate(args.gpu2.pool, sz, 0,
reinterpret_cast<void**>(&args.gpu2.ptr));
RET_IF_HSA_ERR(err);
}
err = hsa_amd_memory_pool_allocate(args.cpu.pool, sz, 0, reinterpret_cast<void**>(&args.cpu.ptr));
RET_IF_HSA_ERR(err);
err =
hsa_amd_memory_pool_allocate(args.gpu1.pool, sz, 0, reinterpret_cast<void**>(&args.gpu1.ptr));
RET_IF_HSA_ERR(err);
char name[64];
err = hsa_agent_get_info(args.cpu.dev, HSA_AGENT_INFO_NAME, &name);
fprintf(stdout, "CPU is \"%s\"\n", name);
err = hsa_agent_get_info(args.gpu1.dev, HSA_AGENT_INFO_NAME, &name);
fprintf(stdout, "GPU1 is \"%s\"\n", name);
if (twoGPUs) {
err = hsa_agent_get_info(args.gpu2.dev, HSA_AGENT_INFO_NAME, &name);
fprintf(stdout, "GPU2 is \"%s\"\n", name);
}
fprintf(stdout, "Copying %lu bytes from gpu1 memory to system memory...\n", sz);
err = AsyncCpyTest(&args.cpu, &args.gpu1, &args, sz, kTestFillValue1);
RET_IF_HSA_ERR(err);
fprintf(stdout, "Success!\n");
fprintf(stdout, "Copying %lu bytes from system memory to gpu1 memory...\n", sz);
err = AsyncCpyTest(&args.gpu1, &args.cpu, &args, sz, kTestFillValue2);
RET_IF_HSA_ERR(err);
fprintf(stdout, "Success!\n");
if (twoGPUs) {
fprintf(stdout, "Copying %lu bytes from gpu1 memory to gpu2 memory...\n", sz);
err = AsyncCpyTest(&args.gpu2, &args.gpu1, &args, sz, kTestFillValue3);
RET_IF_HSA_ERR(err);
fprintf(stdout, "Success!\n");
}
// Clean up
err = hsa_amd_memory_pool_free(args.cpu.ptr);
RET_IF_HSA_ERR(err);
err = hsa_amd_memory_pool_free(args.gpu1.ptr);
RET_IF_HSA_ERR(err);
if (twoGPUs) {
err = hsa_amd_memory_pool_free(args.gpu2.ptr);
RET_IF_HSA_ERR(err);
}
}
@@ -0,0 +1,5 @@
Record(68), Domain(HSA_OPS_DOMAIN), Begin(357395893062183), End(357395893066774) Correlation_ID(22)
Record(78), Domain(HSA_OPS_DOMAIN), Begin(357395893673708), End(357395893677559) Correlation_ID(24)
Record(103), Domain(HSA_OPS_DOMAIN), Begin(357395893769384), End(357395893773087) Correlation_ID(31)
Record(110), Domain(HSA_OPS_DOMAIN), Begin(357395893777826), End(357395893781529) Correlation_ID(33)
Record(116), Domain(HSA_OPS_DOMAIN), Begin(357395893796340), End(357395893799598) Correlation_ID(36)
@@ -21,4 +21,3 @@ Record(24), Domain(HIP_API_DOMAIN), Function(hipMemcpy), End(2995594192228011),
Record(26), Domain(HIP_API_DOMAIN), Function(hipFree), Begin(2995594192237078), Correlation_ID(9)
Record(27), Domain(HIP_API_DOMAIN), Function(hipFree), End(2995594192256085), Correlation_ID(9)
Record(29), Domain(HIP_API_DOMAIN), Function(hipFree), Begin(2995594192259622), Correlation_ID(10)
Record(30), Domain(HIP_API_DOMAIN), Function(hipFree), End(2995594192264101), Correlation_ID(10)
@@ -0,0 +1,78 @@
Record(1), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), Begin(357395885625757), Correlation_ID(1)
Record(2), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885636767), Correlation_ID(2)
Record(3), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885640407), Correlation_ID(2)
Record(5), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), Begin(357395885645827), Correlation_ID(3)
Record(6), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885650247), Correlation_ID(4)
Record(7), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885653007), Correlation_ID(4)
Record(9), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885655747), Correlation_ID(5)
Record(10), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885658277), Correlation_ID(5)
Record(12), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885660877), Correlation_ID(6)
Record(13), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885663467), Correlation_ID(6)
Record(15), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), End(357395885666197), Correlation_ID(3)
Record(17), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), End(357395885668847), Correlation_ID(1)
Record(19), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), Begin(357395885671437), Correlation_ID(7)
Record(20), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885674007), Correlation_ID(8)
Record(21), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885676607), Correlation_ID(8)
Record(23), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885679327), Correlation_ID(9)
Record(24), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885681837), Correlation_ID(9)
Record(26), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), Begin(357395885684537), Correlation_ID(10)
Record(27), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885687177), Correlation_ID(11)
Record(28), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885690227), Correlation_ID(11)
Record(30), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885692857), Correlation_ID(12)
Record(31), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885695397), Correlation_ID(12)
Record(33), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), Begin(357395885697917), Correlation_ID(13)
Record(34), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_get_info), End(357395885700357), Correlation_ID(13)
Record(36), Domain(HSA_API_DOMAIN), Function(hsa_amd_agent_iterate_memory_pools), End(357395885702907), Correlation_ID(10)
Record(38), Domain(HSA_API_DOMAIN), Function(hsa_iterate_agents), End(357395885705447), Correlation_ID(7)
Record(40), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), Begin(357395885713627), Correlation_ID(14)
Record(41), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), End(357395885737288), Correlation_ID(14)
Record(43), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), Begin(357395885740878), Correlation_ID(15)
Record(44), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_allocate), End(357395885804208), Correlation_ID(15)
Record(46), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885809478), Correlation_ID(16)
Record(47), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885812198), Correlation_ID(16)
Record(49), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), Begin(357395885819548), Correlation_ID(17)
Record(50), Domain(HSA_API_DOMAIN), Function(hsa_agent_get_info), End(357395885822698), Correlation_ID(17)
Record(52), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395885831788), Correlation_ID(18)
Record(53), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395885836338), Correlation_ID(18)
Record(55), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395885839008), Correlation_ID(19)
Record(56), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395892240457), Correlation_ID(19)
Record(58), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), Begin(357395892269217), Correlation_ID(20)
Record(59), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), End(357395892311137), Correlation_ID(20)
Record(61), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), Begin(357395892318827), Correlation_ID(21)
Record(62), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), End(357395892324097), Correlation_ID(21)
Record(64), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), Begin(357395892328127), Correlation_ID(22)
Record(65), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), End(357395893033290), Correlation_ID(22)
Record(67), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), Begin(357395893039770), Correlation_ID(23)
Record(69), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), End(357395893655353), Correlation_ID(23)
Record(71), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), Begin(357395893661713), Correlation_ID(24)
Record(72), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), End(357395893668973), Correlation_ID(24)
Record(74), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), Begin(357395893671633), Correlation_ID(25)
Record(75), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), End(357395893673753), Correlation_ID(25)
Record(77), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395893687933), Correlation_ID(26)
Record(79), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395893714413), Correlation_ID(26)
Record(81), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395893716553), Correlation_ID(27)
Record(82), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395893728583), Correlation_ID(27)
Record(84), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), Begin(357395893730223), Correlation_ID(28)
Record(85), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_fill), End(357395893742133), Correlation_ID(28)
Record(87), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), Begin(357395893744023), Correlation_ID(29)
Record(88), Domain(HSA_API_DOMAIN), Function(hsa_amd_agents_allow_access), End(357395893747313), Correlation_ID(29)
Record(90), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), Begin(357395893749013), Correlation_ID(30)
Record(91), Domain(HSA_API_DOMAIN), Function(hsa_signal_create), End(357395893751313), Correlation_ID(30)
Record(93), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), Begin(357395893753503), Correlation_ID(31)
Record(94), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), End(357395893759683), Correlation_ID(31)
Record(96), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), Begin(357395893762933), Correlation_ID(32)
Record(97), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_copy_engine_status), End(357395893764903), Correlation_ID(32)
Record(99), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), Begin(357395893766573), Correlation_ID(33)
Record(100), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy_on_engine), End(357395893770533), Correlation_ID(33)
Record(102), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), Begin(357395893772203), Correlation_ID(34)
Record(104), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), End(357395893775493), Correlation_ID(34)
Record(106), Domain(HSA_API_DOMAIN), Function(hsa_signal_store_screlease), Begin(357395893778003), Correlation_ID(35)
Record(107), Domain(HSA_API_DOMAIN), Function(hsa_signal_store_screlease), End(357395893779583), Correlation_ID(35)
Record(109), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), Begin(357395893781263), Correlation_ID(36)
Record(111), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_async_copy), End(357395893786634), Correlation_ID(36)
Record(113), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), Begin(357395893795734), Correlation_ID(37)
Record(114), Domain(HSA_API_DOMAIN), Function(hsa_signal_wait_relaxed), End(357395893797624), Correlation_ID(37)
Record(117), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), Begin(357395893802314), Correlation_ID(38)
Record(118), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), End(357395893829684), Correlation_ID(38)
Record(120), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), Begin(357395893833114), Correlation_ID(39)
Record(121), Domain(HSA_API_DOMAIN), Function(hsa_amd_memory_pool_free), End(357395893835624), Correlation_ID(39)
+152 -48
Dosyayı Görüntüle
@@ -19,6 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <gtest/gtest.h>
#include <ostream>
#include <vector>
#include "tracer_gtest.h"
#include "../utils/test_utils.h"
@@ -33,8 +35,19 @@ void ApplicationParser::SetApplicationEnv(const char* app_name, const char* trac
hsa_tools_lib_path << app_path << "librocprofiler_tool.so";
setenv("LD_PRELOAD", hsa_tools_lib_path.str().c_str(), true);
// set --hip-api option
setenv("ROCPROFILER_HIP_API_TRACE", "1", true);
std::string trace_type{trace_option};
if (trace_type.find("hip") != std::string::npos) {
// set --hip-api option
setenv("ROCPROFILER_HIP_API_TRACE", "1", true);
}
if (trace_type.find("hsa") != std::string::npos) {
// set --hsa-api and --hsa-activity
setenv("ROCPROFILER_HSA_API_TRACE", "1", true);
setenv("ROCPROFILER_HSA_ACTIVITY_TRACE", "1", true);
}
std::stringstream os;
os << app_path << "tests/featuretests/tracer/apps/" << app_name;
@@ -42,32 +55,45 @@ void ApplicationParser::SetApplicationEnv(const char* app_name, const char* trac
}
/**
* Parses kernel-info after running profiler against curent application
* Parses kernel-info after running tracer against curent application
* and saves them in a vector.
*/
void ApplicationParser::GetKernelInfoForRunningApplication(
std::vector<KernelInfo>* kernel_info_output) {
KernelInfo kinfo;
for (std::string line : output_lines) {
if (std::regex_match(line, std::regex("(Record)(.*)"))) {
int spos = line.find("[");
int epos = line.find("]", spos);
std::string sub = line.substr(spos + 1, epos - spos - 1);
// if (std::regex_match(line, std::regex("(Record)(.*)"))) {
// Record id
size_t found = line.find("Record");
if (found != std::string::npos) {
int spos = found;
int epos = line.find(")", spos);
int length = std::string("Record").length();
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
kinfo.record_id = sub;
kernel_info_output->push_back(kinfo);
// Kernel-Name
size_t found = line.find("Function");
if (found != std::string::npos) {
int spos = found;
int epos = line.find(")", spos);
int length = std::string("kernel-name").length();
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
kinfo.function = sub;
kernel_info_output->push_back(kinfo);
}
}
// Kernel-Name
found = line.find("Function");
if (found != std::string::npos) {
int spos = found;
int epos = line.find(")", spos);
int length = std::string("Function").length();
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
kinfo.function = sub;
}
// corealtion-ids
found = line.find("Correlation_ID");
if (found != std::string::npos) {
int spos = found;
int epos = line.find(")", spos);
int length = std::string("Correlation_ID").length();
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
kinfo.corelation_id = sub;
}
kernel_info_output->push_back(kinfo);
}
}
@@ -86,7 +112,7 @@ void ApplicationParser::GetKernelInfoForGoldenOutput(const char* app_name, std::
}
/**
* Runs a given appllication and saves profiler output.
* Runs a given appllication and saves tracer output.
* These output lines can be letter passed for kernel informations
* i.e: kernel_names
*/
@@ -125,45 +151,70 @@ void ApplicationParser::ParseKernelInfoFields(const std::string& s,
std::ifstream golden_file(s);
while (!golden_file.eof()) {
getline(golden_file, line);
if (std::regex_match(line, std::regex("(Record)(.*)"))) {
int spos = line.find("[");
int epos = line.find("]", spos);
std::string sub = line.substr(spos + 1, epos - spos - 1);
// if (std::regex_match(line, std::regex("(Record)(.*)"))) {
// Record id
size_t found = line.find("Record");
if (found != std::string::npos) {
int spos = found;
int epos = line.find(")", spos);
int length = std::string("Record").length();
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
kinfo.record_id = sub;
kernel_info_output->push_back(kinfo);
// Kernel-Name
size_t found = line.find("Function");
if (found != std::string::npos) {
int spos = found;
int epos = line.find(")", spos);
int length = std::string("kernel-name").length();
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
kinfo.function = sub;
kernel_info_output->push_back(kinfo);
}
// kernel_info_output->push_back(kinfo);
}
// Kernel-Name
found = line.find("Function");
if (found != std::string::npos) {
int spos = found;
int epos = line.find(")", spos);
int length = std::string("kernel-name").length();
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
kinfo.function = sub;
// kernel_info_output->push_back(kinfo);
}
// corealtion-ids
found = line.find("Correlation_ID");
if (found != std::string::npos) {
int spos = found;
int epos = line.find(")", spos);
int length = std::string("Correlation_ID").length();
std::string sub = line.substr(spos + length + 1, epos - spos - length - 1);
kinfo.corelation_id = sub;
// kernel_info_output->push_back(kinfo);
}
//}
kernel_info_output->push_back(kinfo);
}
golden_file.close();
}
/*
* ###################################################
* ############ HelloWorld HIP Tests ################
* ###################################################
*/
constexpr auto kGoldenOutputHelloworld = "hip_helloworld_golden_traces.txt";
class HelloWorldTest : public ProfilerTest {
class HelloWorldTest : public Tracertest {
protected:
std::vector<KernelInfo> golden_kernel_info;
void SetUp() {
ProfilerTest::SetUp("tracer_hip_helloworld", "--hip-api ");
Tracertest::SetUp("tracer_hip_helloworld", "--hip-api ");
GetKernelInfoForGoldenOutput("tracer_hip_helloworld", kGoldenOutputHelloworld,
&golden_kernel_info);
}
void TearDown() { output_lines.clear(); }
};
// Test:1 Compares total num of kernel-names in golden output against current
// profiler output
// tracer output
TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelInfoMatchWithGoldenOutput) {
// kernel info in current profiler run
// kernel info in current profler run
std::vector<KernelInfo> current_kernel_info;
GetKernelInfoForRunningApplication(&current_kernel_info);
@@ -173,9 +224,9 @@ TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelInfoMatchWithGoldenOutp
}
// Test:2 Compares order of kernel-names in golden output against current
// profiler output
TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenFunctionNamessMatchWithGoldenOutput) {
// kernel info in current profiler run
// tracer output
TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenFunctionNamessMatchWithGoldenOutput) {
// kernel info in current tracer run
std::vector<KernelInfo> current_kernel_info;
GetKernelInfoForRunningApplication(&current_kernel_info);
@@ -186,13 +237,66 @@ TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenFunctionNamessMatchWithGold
}
// Test:3 Compares order of kernel-names in golden output against current
// profiler output
TEST_F(HelloWorldTest, WhenRunningProfilerWithAppThenKernelDurationShouldBePositive) {
// kernel info in current profiler run
// tracer output
TEST_F(HelloWorldTest, WhenRunningTracerWithAppThenKernelDurationShouldBePositive) {
// kernel info in current tracer run
std::vector<KernelInfo> current_kernel_info;
GetKernelInfoForRunningApplication(&current_kernel_info);
ASSERT_TRUE(current_kernel_info.size());
EXPECT_GT(current_kernel_info.size(), 0);
}
/*
* ###################################################
* ############ Async COopy HSA Tests ################
* ###################################################
*/
class AsyncCopyTest : public Tracertest {
protected:
void SetUp() { Tracertest::SetUp("copy_on_engine", "--hsa-api --hsa-activity"); }
void TearDown() { output_lines.clear(); }
};
// Test:1 Compares total num of kernel-names in golden output against current
// tracer output
TEST_F(AsyncCopyTest, WhenRunningTracerWithAppThenAsyncCopyOutputIsgenerated) {
// kernel info in current profler run
std::vector<KernelInfo> current_kernel_info;
GetKernelInfoForRunningApplication(&current_kernel_info);
ASSERT_TRUE(current_kernel_info.size());
}
// Test:2 Matches coelation Ids
TEST_F(AsyncCopyTest, WhenRunningTracerWithAppThenAsyncCorelationCountIsCorrect) {
// kernel info in current profler run
std::vector<KernelInfo> current_kernel_info;
GetKernelInfoForRunningApplication(&current_kernel_info);
ASSERT_TRUE(current_kernel_info.size());
std::vector<std::pair<std::string, std::string>> corelation_pair{};
for (const auto& itr : current_kernel_info) {
if (itr.function.find("async_copy_on_engine") != std::string::npos) {
corelation_pair.push_back({itr.record_id, itr.corelation_id});
}
}
ASSERT_TRUE(corelation_pair.size());
uint32_t corealtion_count = 0;
// check if corelation id appears more than twice
for (size_t i = 0; i < corelation_pair.size(); i++) {
for (const auto& itr : current_kernel_info) {
if ((itr.corelation_id == corelation_pair[i].second) &&
(itr.record_id != corelation_pair[i].first)) {
corealtion_count++;
}
}
}
EXPECT_GT(corealtion_count, corelation_pair.size());
}
+6 -6
Dosyayı Görüntüle
@@ -35,7 +35,7 @@ THE SOFTWARE.
/* --------------------------------------------------------------------------*/
/**
* @Synopsis Implementation of a Parser class for Profiler output
* @Synopsis Implementation of a Parser class for Tracer output
* Parses pre-saved golden output for kernel info and saves them in a vector
* Executes appliaction(passed as param:app_name) and saves parsed kernel info
* in a vector.
@@ -60,7 +60,7 @@ class ApplicationParser : public ::testing::Test {
std::string function;
};
//!< saves lines of profiler output
//!< saves lines of tracer output
std::vector<std::string> output_lines;
public:
@@ -72,12 +72,12 @@ class ApplicationParser : public ::testing::Test {
void GetKernelInfoForGoldenOutput(const char* app_name, std::string filename,
std::vector<KernelInfo>* kernel_info_output);
//!< Parses kernel-info after running profiler against curent application
//!< Parses kernel-info after running tracer against curent application
// and saves them in a vector.
void GetKernelInfoForRunningApplication(std::vector<KernelInfo>* kernel_info_output);
private:
//!< Runs a given appllication and saves profiler output.
//!< Runs a given appllication and saves tracer output.
// These output lines can be letter passed for kernel informations
// i.e: kernel_names
void ProcessApplication(std::stringstream& ss);
@@ -89,12 +89,12 @@ class ApplicationParser : public ::testing::Test {
/* --------------------------------------------------------------------------*/
/**
* @Synopsis Implementation of a ProfilerTest
* @Synopsis Implementation of a Tracertest
* Subsequent tests can use this to parse different applications
*/
/* --------------------------------------------------------------------------*/
class ProfilerTest : public ApplicationParser {
class Tracertest : public ApplicationParser {
protected:
virtual void SetUp(const char* app_name, const char* trace_option) {
ApplicationParser::SetUp(app_name, trace_option);