Merge "standalone/sampling mode" into amd-master

[ROCm/rocprofiler commit: e56fc0adda]
Этот коммит содержится в:
Evgeny Shcherbakov
2018-09-19 16:33:09 -04:00
коммит произвёл Gerrit Code Review
родитель ad60f25d92 443b15bc97
Коммит e16847ef21
16 изменённых файлов: 368 добавлений и 127 удалений
+2 -3
Просмотреть файл
@@ -224,10 +224,9 @@ typedef struct {
// Profiling callback type
typedef hsa_status_t (*rocprofiler_callback_t)(
const rocprofiler_callback_data_t* callback_data, // [in] callback data union, data depends on
// the callback API id
const rocprofiler_callback_data_t* callback_data, // [in] callback data
void* user_data, // [in/out] user data passed to the callback
rocprofiler_group_t* group); // [out] profiling group
rocprofiler_group_t* group); // [out] returned profiling group
// Queue callbacks
typedef struct {
-9
Просмотреть файл
@@ -32,15 +32,6 @@ namespace rocprofiler {
class HsaQueue : public Queue {
public:
typedef void (HsaQueue::*submit_fptr_t)(const packet_t* packet);
enum {
LEGACY_SLOT_SIZE_W = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_word_t),
LEGACY_SLOT_SIZE_P = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t)
};
struct slot_pm4_t {
packet_word_t words[LEGACY_SLOT_SIZE_W];
};
HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) : queue_(queue) {}
void Submit(const packet_t* packet) {
+1 -1
Просмотреть файл
@@ -195,7 +195,7 @@ class MetricsDict {
}
static hsa_ven_amd_aqlprofile_id_query_t Translate(const util::AgentInfo* agent_info, const std::string& block_name) {
hsa_ven_amd_aqlprofile_profile_t profile;
hsa_ven_amd_aqlprofile_profile_t profile{};
profile.agent = agent_info->dev_id;
hsa_ven_amd_aqlprofile_id_query_t query = {block_name.c_str(), 0, 0};
hsa_status_t status =
+107 -3
Просмотреть файл
@@ -56,6 +56,16 @@ THE SOFTWARE.
// Internal library methods
//
namespace rocprofiler {
hsa_status_t CreateQueuePro(
hsa_agent_t agent,
uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
void *data,
uint32_t private_segment_size,
uint32_t group_segment_size,
hsa_queue_t **queue);
decltype(hsa_queue_create)* hsa_queue_create_fn;
decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
@@ -115,6 +125,11 @@ void RestoreHsaApi() {
table->amd_ext_->hsa_amd_queue_intercept_register_fn = hsa_amd_queue_intercept_register_fn;
}
void StandaloneIntercept() {
::HsaApiTable* table = kHsaApiTable;
table->core_->hsa_queue_create_fn = rocprofiler::CreateQueuePro;
}
typedef void (*tool_handler_t)();
typedef void (*tool_handler_prop_t)(rocprofiler_settings_t*);
void * tool_handle = NULL;
@@ -195,9 +210,7 @@ DESTRUCTOR_API void destructor() {
const MetricsDict* GetMetrics(const hsa_agent_t& agent) {
rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance();
const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent);
if (agent_info == NULL) {
EXC_RAISING(HSA_STATUS_ERROR, "agent is not found");
}
if (agent_info == NULL) EXC_RAISING(HSA_STATUS_ERROR, "agent is not found");
const MetricsDict* metrics = MetricsDict::Create(agent_info);
if (metrics == NULL) EXC_RAISING(HSA_STATUS_ERROR, "MetricsDict create failed");
return metrics;
@@ -209,6 +222,94 @@ hsa_status_t GetExcStatus(const std::exception& e) {
: HSA_STATUS_ERROR;
}
inline size_t CreateEnableCmd(const hsa_agent_t& agent, packet_t* command, const size_t& slot_count) {
rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance();
const rocprofiler::util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(agent);
const bool is_legacy = (strncmp(agent_info->name, "gfx8", 4) == 0);
const size_t packet_count = (is_legacy) ? Profile::LEGACY_SLOT_SIZE_PKT : 1;
if (packet_count > slot_count) EXC_RAISING(HSA_STATUS_ERROR, "packet_count > slot_count");
// AQLprofile object
hsa_ven_amd_aqlprofile_profile_t profile{};
profile.agent = agent_info->dev_id;
// Query for cmd buffer size
hsa_status_t status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info(
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, NULL);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).size exc");
if (profile.command_buffer.size == 0) EXC_RAISING(status, "get_info(ENABLE_CMD).size == 0");
// Allocate cmd buffer
const size_t aligment_mask = 0x100 - 1;
profile.command_buffer.ptr =
hsa_rsrc->AllocateSysMemory(agent_info, profile.command_buffer.size);
if ((reinterpret_cast<uintptr_t>(profile.command_buffer.ptr) & aligment_mask) != 0) {
EXC_RAISING(status, "profile.command_buffer.ptr bad alignment");
}
// Generating cmd packet
if (is_legacy) {
packet_t packet{};
// Query for cmd buffer data
status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info(
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, &packet);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).data exc");
// Check for legacy GFXIP
status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_legacy_get_pm4(&packet, command);
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
} else {
// Query for cmd buffer data
status = hsa_rsrc->AqlProfileApi()->hsa_ven_amd_aqlprofile_get_info(
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD, command);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "get_info(ENABLE_CMD).data exc");
}
// Return cmd packet data size
return (packet_count * sizeof(packet_t));
}
hsa_status_t CreateQueuePro(
hsa_agent_t agent,
uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
void *data,
uint32_t private_segment_size,
uint32_t group_segment_size,
hsa_queue_t **queue)
{
static packet_t enable_cmd_packet[Profile::LEGACY_SLOT_SIZE_PKT];
static size_t enable_cmd_size = 0;
static std::mutex enable_cmd_mutex;
// Create HSA queue
hsa_status_t status = hsa_queue_create_fn(
agent,
size,
type,
callback,
data,
private_segment_size,
group_segment_size,
queue);
if (status != HSA_STATUS_SUCCESS) return status;
// Create 'Enable' cmd packet
if (enable_cmd_size == 0) {
std::lock_guard<std::mutex> lck(enable_cmd_mutex);
if (enable_cmd_size == 0) {
enable_cmd_size = CreateEnableCmd(agent, enable_cmd_packet, Profile::LEGACY_SLOT_SIZE_PKT);
}
}
// Enable counters for the queue
rocprofiler::util::HsaRsrcFactory::Instance().Submit(*queue, enable_cmd_packet, enable_cmd_size);
return HSA_STATUS_SUCCESS;
}
rocprofiler_properties_t rocprofiler_properties;
uint32_t SqttProfile::output_buffer_size_ = 0x2000000; // 32M
bool SqttProfile::output_buffer_local_ = true;
@@ -261,7 +362,10 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa
if (intercept_mode) {
rocprofiler::ProxyQueue::HsaIntercept(table);
rocprofiler::InterceptQueue::HsaIntercept(table);
} else {
rocprofiler::StandaloneIntercept();
}
return true;
}
+2 -2
Просмотреть файл
@@ -550,7 +550,7 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
}
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
const uint32_t slot_size_b = 0x40;
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
// adevance command queue
const uint64_t write_idx = hsa_queue_load_write_index_relaxed(queue);
@@ -578,7 +578,7 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
}
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) {
const uint32_t slot_size_b = 0x40;
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
if ((size_bytes & (slot_size_b - 1)) != 0) {
fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes);
abort();
+1
Просмотреть файл
@@ -154,6 +154,7 @@ class HsaTimer {
class HsaRsrcFactory {
public:
static const size_t CMD_SLOT_SIZE_B = 0x40;
typedef std::recursive_mutex mutex_t;
typedef HsaTimer::timestamp_t timestamp_t;
+19 -3
Просмотреть файл
@@ -35,19 +35,35 @@ endif ()
## Util sources
file( GLOB UTIL_SRC "${TEST_DIR}/util/*.cpp" )
## Standalone test sources
set ( STEXE_NAME "standalone_test" )
set ( STST_SRC
${TEST_DIR}/app/standalone_test.cpp
${TEST_DIR}/ctrl/test_hsa.cpp
)
## Test control sources
set ( CTRL_SRC
${TEST_DIR}/app/test.cpp
${TEST_DIR}/ctrl/test_hsa.cpp
)
## Test kernel sources
## Dummy kernel
set ( DUMMY_NAME dummy_kernel )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${DUMMY_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
## Test kernel
set ( TEST_NAME simple_convolution )
set ( KERN_SRC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
## Building test executable
add_executable ( ${EXE_NAME} ${KERN_SRC} ${CTRL_SRC} ${UTIL_SRC} )
## Building standalone test executable
add_executable ( ${STEXE_NAME} ${STST_SRC} ${UTIL_SRC} ${KERN_SRC} )
target_include_directories ( ${STEXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
target_link_libraries( ${STEXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt )
## Building ctrl test executable
add_executable ( ${EXE_NAME} ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC} )
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
target_link_libraries( ${EXE_NAME} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
+118 -86
Просмотреть файл
@@ -22,97 +22,25 @@ THE SOFTWARE.
#include <hsa.h>
#include <string.h>
#include <unistd.h>
#include <iostream>
#include "ctrl/run_kernel.h"
#include "ctrl/test_aql.h"
#include "ctrl/test_hsa.h"
#include "inc/rocprofiler.h"
#include "dummy_kernel/dummy_kernel.h"
#include "simple_convolution/simple_convolution.h"
#include "util/test_assert.h"
int main(int argc, char** argv) {
bool ret_val = false;
// HSA status
hsa_status_t status = HSA_STATUS_ERROR;
// Profiling context
rocprofiler_t* context = NULL;
// Profiling properties
rocprofiler_properties_t properties;
// Number of context invocation
uint32_t invocation = 0;
#if 0
// Profiling info objects
const unsigned info_count = 1;
rocprofiler_info_t info[info_count];
// PMC events
memset(info, 0, sizeof(info));
info[0].type = ROCPROFILER_TYPE_METRIC;
info[0].name = "SQ_WAVES";
#else
// Profiling info objects
const unsigned info_count = 3;
rocprofiler_info_t info[info_count];
// PMC events
memset(info, 0, sizeof(info));
info[0].type = ROCPROFILER_TYPE_METRIC;
info[0].name = "SQ_WAVES";
info[1].type = ROCPROFILER_TYPE_METRIC;
info[1].name = "SQ_ITEMS";
// Tracing parameters
const unsigned parameter_count = 2;
rocprofiler_parameter_t parameters[parameter_count];
info[2].name = "THREAD_TRACE";
info[2].type = ROCPROFILER_TYPE_TRACE;
info[2].parameters = parameters;
info[2].parameter_count = parameter_count;
parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK;
parameters[0].value = 0;
parameters[1].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK;
parameters[1].value = 0;
#endif
// Creating profiling context
properties = {};
properties.queue_depth = 128;
status = rocprofiler_open(TestHsa::HsaAgentId(), info, info_count, &context,
ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_OWNQUEUE, &properties);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
TestHsa::SetQueue(properties.queue);
// Adding dispatch observer
status = rocprofiler_dispatch_observer(rocprofiler_dispatch_callback, context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
// Querying the number of context invocation
status = rocprofiler_invocation(context, &invocation);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
// Dispatching profiled kernel n-times to collect all counter groups data
unsigned n = 0;
while (1) {
std::cout << "> " << n << "/" << invocation << std::endl;
#if 0
status = rocprofiler_start(context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
ret_val = RunKernel<SimpleConvolution, TestAql>(argc, argv);
status = rocprofiler_stop(context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
#else
ret_val = RunKernel<SimpleConvolution, TestAql>(argc, argv);
#endif
status = rocprofiler_sample(context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
for (rocprofiler_info_t* p = info; p < info + info_count; ++p) {
std::cout << (p - info) << ": " << p->name;
void print_features(rocprofiler_feature_t* feature, uint32_t feature_count) {
for (rocprofiler_feature_t* p = feature; p < feature + feature_count; ++p) {
std::cout << (p - feature) << ": " << p->name;
switch (p->data.kind) {
case ROCPROFILER_DATA_KIND_INT64:
std::cout << std::dec << " result64 (" << p->data.result64 << ")" << std::endl;
std::cout << std::dec << " result64 (" << p->data.result_int64 << ")" << std::endl;
break;
case ROCPROFILER_BYTES: {
case ROCPROFILER_DATA_KIND_BYTES: {
const char* ptr = reinterpret_cast<const char*>(p->data.result_bytes.ptr);
uint64_t size = 0;
for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) {
@@ -130,15 +58,119 @@ int main(int argc, char** argv) {
TEST_ASSERT(false);
}
}
}
++n;
if (n < invocation) {
status = rocprofiler_next(context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
continue;
}
break;
void read_features(uint32_t n, rocprofiler_t* context, rocprofiler_feature_t* feature, const unsigned feature_count) {
std::cout << "read features" << std::endl;
hsa_status_t status = rocprofiler_read(context, n);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
std::cout << "read issue" << std::endl;
status = rocprofiler_get_data(context, n);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
status = rocprofiler_get_metrics(context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
print_features(feature, feature_count);
}
int main() {
bool ret_val = false;
// HSA status
hsa_status_t status = HSA_STATUS_ERROR;
// Profiling context
rocprofiler_t* context = NULL;
// Profiling properties
rocprofiler_properties_t properties;
// Profiling feature objects
const unsigned feature_count = 9;
rocprofiler_feature_t feature[feature_count];
// PMC events
memset(feature, 0, sizeof(feature));
feature[0].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[0].name = "GRBM_COUNT";
feature[1].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[1].name = "GRBM_GUI_ACTIVE";
feature[2].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[2].name = "GPUBusy";
feature[3].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[3].name = "SQ_WAVES";
feature[4].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[4].name = "SQ_INSTS_VALU";
feature[5].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[5].name = "VALUInsts";
feature[6].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[6].name = "TCC_HIT_sum";
feature[7].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[7].name = "TCC_MISS_sum";
feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
feature[8].name = "WRITE_SIZE";
// feature[8].kind = ROCPROFILER_FEATURE_KIND_METRIC;
// feature[8].name = "TCC_EA_WRREQ_sum";
// feature[9].kind = ROCPROFILER_FEATURE_KIND_METRIC;
// feature[9].name = "TCC_EA_WRREQ_64B_sum";
#if 0
// Tracing parameters
const unsigned parameter_count = 2;
rocprofiler_parameter_t parameters[parameter_count];
feature[2].name = "THREAD_TRACE";
feature[2].kind = ROCPROFILER_FEATURE_KIND_TRACE;
feature[2].parameters = parameters;
feature[2].parameter_count = parameter_count;
parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK;
parameters[0].value = 0;
parameters[1].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK;
parameters[1].value = 0;
#endif
// Instantiate HSA resources
HsaRsrcFactory::Create();
// Getting GPU device info
const AgentInfo* agent_info = NULL;
if (HsaRsrcFactory::Instance().GetGpuAgentInfo(0, &agent_info) == false) abort();
// Creating the queues pool
const unsigned queue_count = 16;
hsa_queue_t* queue[queue_count];
for (unsigned queue_ind = 0; queue_ind < queue_count; ++queue_ind) {
if (HsaRsrcFactory::Instance().CreateQueue(agent_info, 128, &queue[queue_ind]) == false) abort();
}
hsa_queue_t* prof_queue = queue[0];
// Creating profiling context
properties = {};
properties.queue = prof_queue;
status = rocprofiler_open(agent_info->dev_id, feature, feature_count, &context,
ROCPROFILER_MODE_STANDALONE, &properties);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
// Test initialization
TestHsa::SetQueue(prof_queue);
TestHsa::HsaInstantiate(0);
// Dispatching profiled kernel n-times to collect all counter groups data
const unsigned group_n = 0;
status = rocprofiler_start(context, group_n);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
std::cout << "start" << std::endl;
for (unsigned ind = 0; ind < 3; ++ind) {
#if 1
const unsigned queue_ind = ind % queue_count;
TestHsa::SetQueue(queue[queue_ind]);
// ret_val = RunKernel<DummyKernel, TestAql>();
ret_val = RunKernel<SimpleConvolution, TestAql>();
std::cout << "run kernel, queue " << queue_ind << std::endl;
#else
sleep(3);
#endif
read_features(group_n, context, feature, feature_count);
}
// Stop counters
status = rocprofiler_stop(context, group_n);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
std::cout << "stop" << std::endl;
// Finishing cleanup
// Deleting profiling context will delete all allocated resources
+1 -1
Просмотреть файл
@@ -26,7 +26,7 @@ THE SOFTWARE.
#include "ctrl/test_hsa.h"
#include "util/test_assert.h"
template <class Kernel, class Test> bool RunKernel(int argc, char* argv[], int count = 1) {
template <class Kernel, class Test> bool RunKernel(int argc = 0, char* argv[] = NULL, int count = 1) {
bool ret_val = false;
// Create test kernel object
+5 -1
Просмотреть файл
@@ -71,7 +71,7 @@ void TestHsa::HsaShutdown() {
if (hsa_rsrc_) hsa_rsrc_->Destroy();
}
bool TestHsa::Initialize(int arg_cnt, char** arg_list) {
bool TestHsa::Initialize(int /*arg_cnt*/, char** /*arg_list*/) {
std::clog << "TestHsa::Initialize :" << std::endl;
// Instantiate a Timer object
@@ -119,6 +119,8 @@ bool TestHsa::Setup() {
mem_map_t& mem_map = test_->GetMemMap();
for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) {
mem_descr_t& des = it->second;
if (des.size == 0) continue;
switch (des.id) {
case TestKernel::LOCAL_DES_ID:
des.ptr = hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size);
@@ -245,6 +247,8 @@ bool TestHsa::VerifyResults() {
const uint32_t size = test_->GetOutputSize();
bool suc = false;
if (size == 0) return true;
// Copy local kernel output buffers from local memory into host memory
if (test_->IsOutputLocal()) {
output = hsa_rsrc_->AllocateSysMemory(agent_info_, size);
+1
Просмотреть файл
@@ -35,6 +35,7 @@ class TestHsa : public TestAql {
static HsaRsrcFactory* HsaInstantiate(const uint32_t agent_ind = agent_id_);
static void HsaShutdown();
static void SetQueue(hsa_queue_t* queue) { hsa_queue_ = queue; }
static hsa_agent_t HsaAgent() { return agent_info_->dev_id; }
static uint32_t HsaAgentId() { return agent_id_; }
// Constructor
+28
Просмотреть файл
@@ -0,0 +1,28 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
/**
dummy kernel
*/
__kernel void DummyKernel() {
uint tid = get_global_id(0);
}
+71
Просмотреть файл
@@ -0,0 +1,71 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
#define TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
#include <map>
#include <vector>
#include "ctrl/test_kernel.h"
// Class implements DummyKernel kernel parameters
class DummyKernel : public TestKernel {
public:
// Kernel buffers IDs
enum { KERNARG_BUF_ID, LOCAL_BUF_ID };
// Constructor
DummyKernel() :
width_(64),
height_(64)
{
SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, 0);
SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, 0);
}
// Initialize method
void Init() {}
// Return compute grid size
uint32_t GetGridSize() const { return width_ * height_; }
// Print output
void PrintOutput(const void* ptr) const {}
// Return name
std::string Name() const { return std::string("DummyKernel"); }
private:
// Reference CPU implementation
bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask,
const uint32_t width, const uint32_t height,
const uint32_t maskWidth, const uint32_t maskHeight) { return true; }
// Width of the Input array
const uint32_t width_;
// Height of the Input array
const uint32_t height_;
};
#endif // TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
Двоичные данные
Двоичный файл не отображается.
Двоичные данные
Двоичный файл не отображается.
+12 -18
Просмотреть файл
@@ -22,21 +22,21 @@
# THE SOFTWARE.
################################################################################
test_bin_dflt=./test/ctrl
# paths to ROC profiler and oher libraries
export LD_LIBRARY_PATH=$PWD
# enable error messages logging to '/tmp/rocprofiler_log.txt'
export ROCPROFILER_LOG=1
# ROC profiler library loaded by HSA runtime
export HSA_TOOLS_LIB=librocprofiler64.so
# tool library loaded by ROC profiler
export ROCP_TOOL_LIB=libtool.so
# enable error messages logging to '/tmp/rocprofiler_log.txt'
export ROCPROFILER_LOG=1
# ROC profiler metrics config file
unset ROCP_PROXY_QUEUE
# ROC profiler metrics config file
export ROCP_METRICS=metrics.xml
eval ./test/standalone_test
# tool library loaded by ROC profiler
export ROCP_TOOL_LIB=libtool.so
# ROC profiler kernels timing
export ROCP_TIMESTAMP_ON=1
# output directory for the tool library, for metrics results file 'results.txt'
@@ -47,21 +47,15 @@ if [ ! -e $ROCP_TOOL_LIB ] ; then
export ROCP_TOOL_LIB=test/libtool.so
fi
if [ -n "$1" ] ; then
tbin="$*"
else
tbin=$test_bin_dflt
fi
export ROCP_KITER=100
export ROCP_DITER=100
export ROCP_INPUT=input.xml
eval $tbin
eval ./test/ctrl
#export ROCP_KITER=1
#export ROCP_DITER=4
#export ROCP_INPUT=input1.xml
#eval $tbin
export ROCP_KITER=1
export ROCP_DITER=4
export ROCP_INPUT=input1.xml
eval ./test/ctrl
#valgrind --leak-check=full $tbin
#valgrind --tool=massif $tbin