memory allocation refactoring
Change-Id: Ic63b4f5ea44f2dc5e009e3e58652a661e957b7d6
[ROCm/rocprofiler commit: c9c0ecc976]
This commit is contained in:
@@ -166,9 +166,9 @@ typedef void rocprofiler_t;
|
||||
// Profiling group object
|
||||
typedef struct {
|
||||
unsigned index; // group index
|
||||
rocprofiler_t* context; // context object
|
||||
rocprofiler_feature_t** features; // profiling info array
|
||||
uint32_t feature_count; // profiling info count
|
||||
rocprofiler_t* context; // context object
|
||||
} rocprofiler_group_t;
|
||||
|
||||
// Profiling mode mask
|
||||
|
||||
@@ -121,9 +121,6 @@ class Group {
|
||||
Context* GetContext() { return context_; }
|
||||
uint32_t GetIndex() const { return index_; }
|
||||
|
||||
rocprofiler_group_t GetGroup() {
|
||||
return rocprofiler_group_t{index_, &info_vector_[0], (uint32_t)info_vector_.size(), context_};
|
||||
}
|
||||
void ResetRefs() { refs_ = n_profiles_; }
|
||||
uint32_t DecrRefs() {
|
||||
return (refs_ > 0) ? --refs_ : 0;
|
||||
@@ -279,15 +276,18 @@ class Context {
|
||||
|
||||
uint32_t GetGroupCount() const { return set_.size(); }
|
||||
|
||||
rocprofiler_group_t GetGroupInfo(const uint32_t& index) {
|
||||
rocprofiler::info_vector_t& info_vector = set_[index].GetInfoVector();
|
||||
rocprofiler_group_t GetGroupInfo(Group* g) {
|
||||
rocprofiler::info_vector_t& info_vector = g->GetInfoVector();
|
||||
rocprofiler_group_t group = {};
|
||||
group.feature_count = info_vector.size();
|
||||
group.features = &info_vector[0];
|
||||
group.index = g->GetIndex();
|
||||
group.context = reinterpret_cast<rocprofiler_t*>(this);
|
||||
group.index = index;
|
||||
group.features = &info_vector[0];
|
||||
group.feature_count = info_vector.size();
|
||||
return group;
|
||||
}
|
||||
rocprofiler_group_t GetGroupInfo(const uint32_t& index) {
|
||||
return GetGroupInfo(&set_[index]);
|
||||
}
|
||||
|
||||
const pkt_vector_t& StartPackets(const uint32_t& group_index) const {
|
||||
return set_[group_index].GetStartVector();
|
||||
@@ -391,11 +391,12 @@ class Context {
|
||||
|
||||
static bool Handler(hsa_signal_value_t value, void* arg) {
|
||||
Group* group = reinterpret_cast<Group*>(arg);
|
||||
group->GetContext()->mutex_.lock();
|
||||
Context* context = group->GetContext();
|
||||
context->mutex_.lock();
|
||||
uint32_t r = group->DecrRefs();
|
||||
group->GetContext()->mutex_.unlock();
|
||||
context->mutex_.unlock();
|
||||
if (r == 0) {
|
||||
return group->GetContext()->handler_(group->GetGroup(), group->GetContext()->handler_arg_);
|
||||
return context->handler_(context->GetGroupInfo(group), context->handler_arg_);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@@ -427,8 +428,9 @@ class Context {
|
||||
if (rinfo->data.result_bytes.copy) {
|
||||
if (sample_id == 0) {
|
||||
const uint32_t output_buffer_size = profile->output_buffer.size;
|
||||
const uint32_t output_buffer_size64 = output_buffer_size / sizeof(uint64_t);
|
||||
void* ptr = calloc(output_buffer_size64, sizeof(uint64_t));
|
||||
util::HsaRsrcFactory* hsa_rsrc = &util::HsaRsrcFactory::Instance();
|
||||
const util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(profile->agent);
|
||||
void* ptr = hsa_rsrc->AllocateSysMemory(agent_info, output_buffer_size);
|
||||
rinfo->data.result_bytes.size = output_buffer_size;
|
||||
rinfo->data.result_bytes.ptr = ptr;
|
||||
callback_data->ptr = reinterpret_cast<char*>(ptr);
|
||||
@@ -446,7 +448,7 @@ class Context {
|
||||
else EXC_RAISING(HSA_STATUS_ERROR, "SQTT data out of output buffer");
|
||||
}
|
||||
|
||||
const bool suc = util::HsaRsrcFactory::CopyToHost(dest, src, size);
|
||||
bool suc = util::HsaRsrcFactory::Memcpy(profile->agent, dest, src, size);
|
||||
if (suc) {
|
||||
*header = size;
|
||||
callback_data->ptr = dest + align_size(size, sizeof(uint32_t));
|
||||
|
||||
@@ -88,8 +88,8 @@ class Profile {
|
||||
}
|
||||
virtual ~Profile() {
|
||||
info_vector_.clear();
|
||||
if (profile_.command_buffer.ptr) util::HsaRsrcFactory::MemoryFree(profile_.command_buffer.ptr);
|
||||
if (profile_.output_buffer.ptr) util::HsaRsrcFactory::MemoryFree(profile_.output_buffer.ptr);
|
||||
if (profile_.command_buffer.ptr) util::HsaRsrcFactory::FreeMemory(profile_.command_buffer.ptr);
|
||||
if (profile_.output_buffer.ptr) util::HsaRsrcFactory::FreeMemory(profile_.output_buffer.ptr);
|
||||
if (profile_.events) free(const_cast<event_t*>(profile_.events));
|
||||
if (profile_.parameters) free(const_cast<parameter_t*>(profile_.parameters));
|
||||
if (completion_signal_.handle) {
|
||||
|
||||
@@ -359,9 +359,13 @@ bool HsaRsrcFactory::CopyToHost(void* dest_buff, const void* src_buff, uint32_t
|
||||
CHECK_STATUS("hsa_memory_copy", status);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
bool HsaRsrcFactory::Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length) {
|
||||
(void)agent;
|
||||
return CopyToHost(dest_buff, src_buff, length);
|
||||
}
|
||||
|
||||
// Free method
|
||||
bool HsaRsrcFactory::MemoryFree(void* ptr) {
|
||||
bool HsaRsrcFactory::FreeMemory(void* ptr) {
|
||||
const hsa_status_t status = hsa_memory_free(ptr);
|
||||
CHECK_STATUS("hsa_memory_free", status);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
|
||||
@@ -217,9 +217,10 @@ class HsaRsrcFactory {
|
||||
|
||||
// Memcopy method
|
||||
static bool CopyToHost(void* dest_buff, const void* src_buff, uint32_t length);
|
||||
static bool Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length);
|
||||
|
||||
// Free method
|
||||
static bool MemoryFree(void* ptr);
|
||||
static bool FreeMemory(void* ptr);
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
//
|
||||
|
||||
@@ -57,22 +57,23 @@ template <class Kernel, class Test> bool RunKernel(int argc, char* argv[], int c
|
||||
return false;
|
||||
}
|
||||
|
||||
// Run test kernel
|
||||
// Kernel dspatch iterations
|
||||
for (int i = 0; i < count; ++i) {
|
||||
// Run test kernel
|
||||
ret_val = test_aql->Run();
|
||||
if (ret_val == false) {
|
||||
std::cerr << "Error in running the test kernel" << std::endl;
|
||||
TEST_ASSERT(ret_val);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify the results of the execution
|
||||
ret_val = test_aql->VerifyResults();
|
||||
if (ret_val) {
|
||||
std::clog << "Test : Passed" << std::endl;
|
||||
} else {
|
||||
std::clog << "Test : Failed" << std::endl;
|
||||
// Verify the results of the execution
|
||||
ret_val = test_aql->VerifyResults();
|
||||
if (ret_val) {
|
||||
std::clog << "Test : Passed" << std::endl;
|
||||
} else {
|
||||
std::clog << "Test : Failed" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Print time taken by sample
|
||||
|
||||
@@ -115,11 +115,26 @@ bool TestHsa::Setup() {
|
||||
mem_map_t& mem_map = test_->GetMemMap();
|
||||
for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) {
|
||||
mem_descr_t& des = it->second;
|
||||
void* ptr = (des.local) ? hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size)
|
||||
: hsa_rsrc_->AllocateSysMemory(agent_info_, des.size);
|
||||
des.ptr = ptr;
|
||||
TEST_ASSERT(ptr != NULL);
|
||||
if (ptr == NULL) return false;
|
||||
switch (des.id) {
|
||||
case TestKernel::LOCAL_DES_ID:
|
||||
des.ptr = hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size);
|
||||
break;
|
||||
case TestKernel::KERNARG_DES_ID:
|
||||
des.ptr = hsa_rsrc_->AllocateKernArgMemory(agent_info_, des.size);
|
||||
if (des.ptr) memset(des.ptr, 0, des.size);
|
||||
break;
|
||||
case TestKernel::SYS_DES_ID:
|
||||
des.ptr = hsa_rsrc_->AllocateSysMemory(agent_info_, des.size);
|
||||
if (des.ptr) memset(des.ptr, 0, des.size);
|
||||
break;
|
||||
case TestKernel::NULL_DES_ID:
|
||||
des.ptr = NULL;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
};
|
||||
TEST_ASSERT(des.ptr != NULL);
|
||||
if (des.ptr == NULL) return false;
|
||||
}
|
||||
test_->Init();
|
||||
|
||||
@@ -208,24 +223,41 @@ bool TestHsa::Run() {
|
||||
hsa_signal_wait_acquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
|
||||
HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
std::clog << "> DONE, que_idx=" << que_idx << std::endl;
|
||||
|
||||
// Stop the timer object
|
||||
hsa_timer_.StopTimer(dispatch_timer_idx_);
|
||||
dispatch_time_taken_ = hsa_timer_.ReadTimer(dispatch_timer_idx_);
|
||||
total_time_taken_ += dispatch_time_taken_;
|
||||
|
||||
// Copy kernel buffers from local memory into system memory
|
||||
const bool suc = hsa_rsrc_->CopyToHost(test_->GetOutputPtr(), test_->GetLocalPtr(), test_->GetOutputSize());
|
||||
if (suc) test_->PrintOutput();
|
||||
|
||||
return suc;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestHsa::VerifyResults() {
|
||||
// Compare the results and see if they match
|
||||
const void* const refout_ptr = test_->GetRefoutPtr();
|
||||
const int32_t cmp_val =
|
||||
(refout_ptr != NULL) ? memcmp(test_->GetOutputPtr(), refout_ptr, test_->GetOutputSize()) : 0;
|
||||
return (cmp_val == 0);
|
||||
bool cmp = false;
|
||||
void* output = NULL;
|
||||
const uint32_t size = test_->GetOutputSize();
|
||||
bool suc = false;
|
||||
|
||||
// Copy local kernel output buffers from local memory into host memory
|
||||
if (test_->IsOutputLocal()) {
|
||||
output = hsa_rsrc_->AllocateSysMemory(agent_info_, size);
|
||||
suc = hsa_rsrc_->Memcpy(agent_info_, output, test_->GetOutputPtr(), size);
|
||||
} else {
|
||||
output = test_->GetOutputPtr();;
|
||||
suc = true;
|
||||
}
|
||||
|
||||
if ((output != NULL) && suc) {
|
||||
// Print the test output
|
||||
test_->PrintOutput(output);
|
||||
// Compare the results and see if they match
|
||||
cmp = (memcmp(output, test_->GetRefOut(), size) == 0);
|
||||
}
|
||||
|
||||
if (test_->IsOutputLocal() && (output != NULL)) hsa_rsrc_->FreeMemory(output);
|
||||
|
||||
return cmp;
|
||||
}
|
||||
|
||||
void TestHsa::PrintTime() {
|
||||
|
||||
@@ -28,22 +28,23 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef TEST_CTRL_TEST_KERNEL_H_
|
||||
#define TEST_CTRL_TEST_KERNEL_H_
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <map>
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
// Class implements kernel test
|
||||
class TestKernel {
|
||||
public:
|
||||
// Exported buffers IDs
|
||||
enum buf_id_t { KERNARG_EXP_ID, OUTPUT_EXP_ID, REFOUT_EXP_ID };
|
||||
// Memory descriptors IDs
|
||||
enum { INPUT_DES_ID, OUTPUT_DES_ID, LOCAL_DES_ID, MASK_DES_ID, KERNARG_DES_ID, REFOUT_DES_ID };
|
||||
enum des_id_t { NULL_DES_ID, LOCAL_DES_ID, KERNARG_DES_ID, SYS_DES_ID, REFOUT_DES_ID };
|
||||
|
||||
// Memory descriptors vector declaration
|
||||
struct mem_descr_t {
|
||||
des_id_t id;
|
||||
void* ptr;
|
||||
uint32_t size;
|
||||
bool local;
|
||||
};
|
||||
|
||||
// Memory map declaration
|
||||
@@ -51,12 +52,7 @@ class TestKernel {
|
||||
typedef mem_map_t::iterator mem_it_t;
|
||||
typedef mem_map_t::const_iterator mem_const_it_t;
|
||||
|
||||
virtual ~TestKernel() {
|
||||
for (auto& entry : mem_map_) {
|
||||
void* ptr = entry.second.ptr;
|
||||
if (ptr != NULL) HsaRsrcFactory::MemoryFree(ptr);
|
||||
}
|
||||
}
|
||||
virtual ~TestKernel() {}
|
||||
|
||||
// Initialize method
|
||||
virtual void Init() = 0;
|
||||
@@ -65,50 +61,79 @@ class TestKernel {
|
||||
mem_map_t& GetMemMap() { return mem_map_; }
|
||||
|
||||
// Return NULL descriptor
|
||||
static mem_descr_t NullDescriptor() { return {NULL, 0, 0}; }
|
||||
static mem_descr_t NullDescriptor() { return {NULL_DES_ID, NULL, 0}; }
|
||||
|
||||
// Check if decripter is local
|
||||
bool IsLocal(const mem_descr_t& descr) const { return (descr.id == LOCAL_DES_ID); }
|
||||
|
||||
// Methods to get the kernel attributes
|
||||
void* GetKernargPtr() const { return GetDescr(KERNARG_DES_ID).ptr; }
|
||||
uint32_t GetKernargSize() const { return GetDescr(KERNARG_DES_ID).size; }
|
||||
void* GetOutputPtr() const { return GetDescr(OUTPUT_DES_ID).ptr; }
|
||||
uint32_t GetOutputSize() const { return GetDescr(OUTPUT_DES_ID).size; }
|
||||
void* GetLocalPtr() const { return GetDescr(LOCAL_DES_ID).ptr; }
|
||||
void* GetRefoutPtr() const { return GetDescr(REFOUT_DES_ID).ptr; }
|
||||
const mem_descr_t& GetKernargDescr() { return *test_map_[KERNARG_EXP_ID]; };
|
||||
const mem_descr_t& GetOutputDescr() { return *test_map_[OUTPUT_EXP_ID]; };
|
||||
void* GetKernargPtr() { return GetKernargDescr().ptr; }
|
||||
uint32_t GetKernargSize() { return GetKernargDescr().size; }
|
||||
void* GetOutputPtr() { return GetOutputDescr().ptr; }
|
||||
uint32_t GetOutputSize() { return GetOutputDescr().size; }
|
||||
bool IsOutputLocal() { return IsLocal(GetOutputDescr()); }
|
||||
virtual uint32_t GetGridSize() const = 0;
|
||||
|
||||
// Return reference output
|
||||
void* GetRefOut() { return test_map_[REFOUT_EXP_ID]->ptr; };
|
||||
|
||||
// Print output
|
||||
virtual void PrintOutput() const = 0;
|
||||
virtual void PrintOutput(const void* ptr) const = 0;
|
||||
|
||||
// Return name
|
||||
virtual std::string Name() const = 0;
|
||||
|
||||
protected:
|
||||
// Set system memory descriptor
|
||||
bool SetSysDescr(const uint32_t& id, const uint32_t& size) {
|
||||
return SetMemDescr(id, size, false);
|
||||
// Set buffer descriptor
|
||||
bool SetInDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
|
||||
bool suc = SetMemDescr(buf_id, des_id, size);
|
||||
if (des_id == KERNARG_DES_ID) {
|
||||
test_map_[KERNARG_EXP_ID] = &mem_map_[buf_id];
|
||||
}
|
||||
return suc;
|
||||
}
|
||||
|
||||
// Set local memory descriptor
|
||||
bool SetLocalDescr(const uint32_t& id, const uint32_t& size) {
|
||||
return SetMemDescr(id, size, true);
|
||||
// Set results descriptor
|
||||
bool SetOutDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
|
||||
bool suc = SetMemDescr(buf_id, des_id, size);
|
||||
test_map_[OUTPUT_EXP_ID] = &mem_map_[buf_id];
|
||||
return suc;
|
||||
}
|
||||
|
||||
// Set host descriptor
|
||||
bool SetHostDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
|
||||
bool suc = SetMemDescr(buf_id, des_id, size);
|
||||
if (suc) {
|
||||
mem_descr_t& descr = mem_map_[buf_id];
|
||||
descr.ptr = malloc(size);
|
||||
if (des_id == REFOUT_DES_ID) {
|
||||
test_map_[REFOUT_EXP_ID] = &descr;
|
||||
}
|
||||
if (descr.ptr == NULL) suc = false;
|
||||
}
|
||||
return suc;
|
||||
}
|
||||
|
||||
// Get memory descriptor
|
||||
mem_descr_t GetDescr(const uint32_t& id) const {
|
||||
mem_const_it_t it = mem_map_.find(id);
|
||||
mem_descr_t GetDescr(const uint32_t& buf_id) const {
|
||||
mem_const_it_t it = mem_map_.find(buf_id);
|
||||
return (it != mem_map_.end()) ? it->second : NullDescriptor();
|
||||
}
|
||||
|
||||
private:
|
||||
// Set memory descriptor
|
||||
bool SetMemDescr(const uint32_t& id, const uint32_t& size, const bool& local) {
|
||||
const mem_descr_t des = {NULL, size, local};
|
||||
auto ret = mem_map_.insert(mem_map_t::value_type(id, des));
|
||||
bool SetMemDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
|
||||
const mem_descr_t des = {des_id, NULL, size};
|
||||
auto ret = mem_map_.insert(mem_map_t::value_type(buf_id, des));
|
||||
return ret.second;
|
||||
}
|
||||
|
||||
// Kernel memory map object
|
||||
mem_map_t mem_map_;
|
||||
// Test memory map object
|
||||
std::map<uint32_t, mem_descr_t*> test_map_;
|
||||
};
|
||||
|
||||
#endif // TEST_CTRL_TEST_KERNEL_H_
|
||||
|
||||
@@ -266,16 +266,14 @@ hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
const uint32_t data_size = info_data->sqtt_data.size;
|
||||
const void* data_ptr = info_data->sqtt_data.ptr;
|
||||
fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, data_size);
|
||||
#if 1
|
||||
dump_sqtt_trace(arg->label, info_data->sample_id, data_ptr, data_size);
|
||||
#else
|
||||
void* buffer = malloc(data_size);
|
||||
memset(buffer, 0, data_size);
|
||||
const bool suc = HsaRsrcFactory::Instance().CopyToHost(arg->agent, buffer, data_ptr, data_size);
|
||||
|
||||
HsaRsrcFactory* hsa_rsrc = &HsaRsrcFactory::Instance();
|
||||
const AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(arg->agent);
|
||||
void* buffer = hsa_rsrc->AllocateSysMemory(agent_info, data_size);
|
||||
const bool suc = HsaRsrcFactory::Memcpy(arg->agent, buffer, data_ptr, data_size);
|
||||
if (suc) dump_sqtt_trace(arg->label, info_data->sample_id, buffer, data_size);
|
||||
else fatal("SQTT data memcopy to host failed");
|
||||
free(buffer);
|
||||
#endif
|
||||
HsaRsrcFactory::FreeMemory(buffer);
|
||||
} else
|
||||
status = HSA_STATUS_ERROR;
|
||||
return status;
|
||||
@@ -320,7 +318,7 @@ void output_results(const context_entry_t* entry, const char* label) {
|
||||
size += chunk_size;
|
||||
}
|
||||
fprintf(file, "size(%lu)\n", size);
|
||||
free(p->data.result_bytes.ptr);
|
||||
HsaRsrcFactory::FreeMemory(p->data.result_bytes.ptr);
|
||||
const_cast<rocprofiler_feature_t*>(p)->data.result_bytes.size = 0;
|
||||
} else {
|
||||
fprintf(file, "(\n");
|
||||
|
||||
@@ -67,7 +67,7 @@ __kernel void SimpleConvolution(__global uint * output,
|
||||
float sumFX = 0;
|
||||
|
||||
for(uint i = left; i <= right; ++i) {
|
||||
for(uint j = top ; j <= bottom; ++j) {
|
||||
for(uint j = top; j <= bottom; ++j) {
|
||||
// performing wighted sum within the mask boundaries
|
||||
uint maskIndex = (j - (y - hstep)) * maskWidth + (i - (x - vstep));
|
||||
uint index = j * width + i;
|
||||
|
||||
@@ -27,6 +27,7 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "simple_convolution/simple_convolution.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
|
||||
@@ -282,12 +283,11 @@ SimpleConvolution::SimpleConvolution() {
|
||||
const uint32_t input_size_bytes = width_ * height_ * sizeof(uint32_t);
|
||||
const uint32_t mask_size_bytes = mask_width_ * mask_height_ * sizeof(float);
|
||||
|
||||
SetSysDescr(KERNARG_DES_ID, sizeof(kernel_args_t));
|
||||
SetSysDescr(INPUT_DES_ID, input_size_bytes);
|
||||
SetSysDescr(OUTPUT_DES_ID, input_size_bytes);
|
||||
SetLocalDescr(LOCAL_DES_ID, input_size_bytes);
|
||||
SetSysDescr(MASK_DES_ID, mask_size_bytes);
|
||||
SetSysDescr(REFOUT_DES_ID, input_size_bytes);
|
||||
SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, sizeof(kernel_args_t));
|
||||
SetInDescr(INPUT_BUF_ID, SYS_DES_ID, input_size_bytes);
|
||||
SetInDescr(MASK_BUF_ID, SYS_DES_ID, mask_size_bytes);
|
||||
SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, input_size_bytes);
|
||||
SetHostDescr(REFOUT_BUF_ID, REFOUT_DES_ID, input_size_bytes);
|
||||
|
||||
if (!randomize_seed_) TEST_ASSERT(sizeof(input_data_) <= input_size_bytes);
|
||||
}
|
||||
@@ -295,14 +295,18 @@ SimpleConvolution::SimpleConvolution() {
|
||||
void SimpleConvolution::Init() {
|
||||
std::clog << "SimpleConvolution::init :" << std::endl;
|
||||
|
||||
mem_descr_t input_des = GetDescr(INPUT_DES_ID);
|
||||
mem_descr_t local_des = GetDescr(LOCAL_DES_ID);
|
||||
mem_descr_t mask_des = GetDescr(MASK_DES_ID);
|
||||
mem_descr_t refout_des = GetDescr(REFOUT_DES_ID);
|
||||
mem_descr_t kernarg_des = GetDescr(KERNARG_DES_ID);
|
||||
|
||||
mem_descr_t kernarg_des = GetDescr(KERNARG_BUF_ID);
|
||||
mem_descr_t input_des = GetDescr(INPUT_BUF_ID);
|
||||
mem_descr_t mask_des = GetDescr(MASK_BUF_ID);
|
||||
mem_descr_t output_des = GetDescr(LOCAL_BUF_ID);
|
||||
#if 0
|
||||
printf("kernarg_des %p 0x%x\n", kernarg_des.ptr, kernarg_des.size);
|
||||
printf("input_des %p 0x%x\n", input_des.ptr, input_des.size);
|
||||
printf("mask_des %p 0x%x\n", mask_des.ptr, mask_des.size);
|
||||
printf("output_des %p 0x%x\n", output_des.ptr, output_des.size);
|
||||
#endif
|
||||
uint32_t* input = (uint32_t*)input_des.ptr;
|
||||
uint32_t* output_local = (uint32_t*)local_des.ptr;
|
||||
uint32_t* output_local = (uint32_t*)output_des.ptr;
|
||||
float* mask = (float*)mask_des.ptr;
|
||||
kernel_args_t* kernel_args = (kernel_args_t*)kernarg_des.ptr;
|
||||
|
||||
@@ -343,13 +347,12 @@ void SimpleConvolution::Init() {
|
||||
kernel_args->arg51 = mask_height_;
|
||||
|
||||
// Calculate the reference output
|
||||
memset(refout_des.ptr, 0, refout_des.size);
|
||||
ReferenceImplementation(reinterpret_cast<uint32_t*>(refout_des.ptr), input, mask, width_, height_,
|
||||
ReferenceImplementation(reinterpret_cast<uint32_t*>(GetRefOut()), input, mask, width_, height_,
|
||||
mask_width_, mask_height_);
|
||||
}
|
||||
|
||||
void SimpleConvolution::PrintOutput() const {
|
||||
PrintArray<uint32_t>("> Output[0]", reinterpret_cast<uint32_t*>(GetOutputPtr()), width_, 1);
|
||||
void SimpleConvolution::PrintOutput(const void* ptr) const {
|
||||
PrintArray<uint32_t>("> Output[0]", reinterpret_cast<const uint32_t*>(ptr), width_, 1);
|
||||
}
|
||||
|
||||
bool SimpleConvolution::ReferenceImplementation(uint32_t* output, const uint32_t* input,
|
||||
|
||||
@@ -36,6 +36,9 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
// Class implements SimpleConvolution kernel parameters
|
||||
class SimpleConvolution : public TestKernel {
|
||||
public:
|
||||
// Kernel buffers IDs
|
||||
enum { INPUT_BUF_ID, LOCAL_BUF_ID, MASK_BUF_ID, KERNARG_BUF_ID, REFOUT_BUF_ID };
|
||||
|
||||
// Constructor
|
||||
SimpleConvolution();
|
||||
|
||||
@@ -46,7 +49,7 @@ class SimpleConvolution : public TestKernel {
|
||||
uint32_t GetGridSize() const { return width_ * height_; }
|
||||
|
||||
// Print output
|
||||
void PrintOutput() const;
|
||||
void PrintOutput(const void* ptr) const;
|
||||
|
||||
// Return name
|
||||
std::string Name() const { return std::string("SimpleConvolution"); }
|
||||
|
||||
@@ -217,25 +217,17 @@ const AgentInfo* HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) {
|
||||
}
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
//
|
||||
uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); }
|
||||
|
||||
// Get the count of Hsa Cpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Cpu agents on platform
|
||||
//
|
||||
uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); }
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
//
|
||||
// @param idx Gpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
|
||||
// Determine if request is valid
|
||||
uint32_t size = uint32_t(gpu_list_.size());
|
||||
@@ -250,13 +242,9 @@ bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info)
|
||||
}
|
||||
|
||||
// Get the AgentInfo handle of a Cpu device
|
||||
//
|
||||
// @param idx Cpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
|
||||
// Determine if request is valid
|
||||
uint32_t size = uint32_t(cpu_list_.size());
|
||||
@@ -271,15 +259,10 @@ bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info)
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
//
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
//
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
//
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
|
||||
hsa_queue_t** queue) {
|
||||
hsa_status_t status;
|
||||
@@ -289,13 +272,9 @@ bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
|
||||
}
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
//
|
||||
// @param value Initial value of signal object
|
||||
//
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
|
||||
hsa_status_t status;
|
||||
status = hsa_signal_create(value, 0, NULL, signal);
|
||||
@@ -305,13 +284,9 @@ bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
|
||||
// Allocate memory for use by a kernel of specified size in specified
|
||||
// agent's memory region. Currently supports Global segment whose Kernarg
|
||||
// flag set.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status;
|
||||
uint8_t* buffer = NULL;
|
||||
@@ -332,14 +307,10 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t
|
||||
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
}
|
||||
|
||||
// Allocate memory tp pass kernel parameters.
|
||||
//
|
||||
// Allocate host memory.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
@@ -350,33 +321,40 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s
|
||||
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
}
|
||||
|
||||
// Allocate memory tp pass kernel parameters.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size_t size) {
|
||||
return AllocateSysMemory(agent_info, size);
|
||||
}
|
||||
|
||||
// Memcopy method
|
||||
bool HsaRsrcFactory::CopyToHost(void* dest_buff, const void* src_buff, uint32_t length) {
|
||||
bool HsaRsrcFactory::Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length) {
|
||||
(void)agent;
|
||||
const hsa_status_t status = hsa_memory_copy(dest_buff, src_buff, length);
|
||||
CHECK_STATUS("hsa_memory_copy", status);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dest_buff, const void* src_buff, uint32_t length) {
|
||||
(void)agent_info;
|
||||
return Memcpy(agent_info->dev_id, dest_buff, src_buff, length);
|
||||
}
|
||||
|
||||
// Free method
|
||||
bool HsaRsrcFactory::MemoryFree(void* ptr) {
|
||||
bool HsaRsrcFactory::FreeMemory(void* ptr) {
|
||||
const hsa_status_t status = hsa_memory_free(ptr);
|
||||
CHECK_STATUS("hsa_memory_free", status);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
//
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
//
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
//
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
//
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path,
|
||||
const char* kernel_name, hsa_executable_t* executable, hsa_executable_symbol_t* code_desc) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
@@ -137,101 +137,73 @@ class HsaRsrcFactory {
|
||||
const AgentInfo* GetAgentInfo(const hsa_agent_t agent);
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
//
|
||||
uint32_t GetCountOfGpuAgents();
|
||||
|
||||
// Get the count of Hsa Cpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Cpu agents on platform
|
||||
//
|
||||
uint32_t GetCountOfCpuAgents();
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
//
|
||||
// @param idx Gpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
|
||||
|
||||
// Get the AgentInfo handle of a Cpu device
|
||||
//
|
||||
// @param idx Cpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
//
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
//
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
//
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue);
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
//
|
||||
// @param value Initial value of signal object
|
||||
//
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool CreateSignal(uint32_t value, hsa_signal_t* signal);
|
||||
|
||||
// Allocate memory for use by a kernel of specified size in specified
|
||||
// agent's memory region. Currently supports Global segment whose Kernarg
|
||||
// flag set.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Allocate memory tp pass kernel parameters.
|
||||
//
|
||||
// Allocate system memory.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Allocate memory tp pass kernel parameters.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* AllocateKernArgMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Memcopy method
|
||||
static bool CopyToHost(void* dest_buff, const void* src_buff, uint32_t length);
|
||||
static bool Memcpy(const AgentInfo* agent_info, void* dest_buff, const void* src_buff, uint32_t length);
|
||||
static bool Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length);
|
||||
|
||||
// Free method
|
||||
static bool MemoryFree(void* ptr);
|
||||
static bool FreeMemory(void* ptr);
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
//
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
//
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
//
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
//
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
//
|
||||
// @return true if successful, false otherwise
|
||||
//
|
||||
bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name,
|
||||
hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user