memory allocation refactoring

Change-Id: Ic63b4f5ea44f2dc5e009e3e58652a661e957b7d6


[ROCm/rocprofiler commit: c9c0ecc976]
This commit is contained in:
Evgeny
2018-04-27 20:00:20 -05:00
rodzic 68cfb95f04
commit e761eda5fa
14 zmienionych plików z 194 dodań i 175 usunięć
+1 -1
Wyświetl plik
@@ -166,9 +166,9 @@ typedef void rocprofiler_t;
// Profiling group object
typedef struct {
unsigned index; // group index
rocprofiler_t* context; // context object
rocprofiler_feature_t** features; // profiling info array
uint32_t feature_count; // profiling info count
rocprofiler_t* context; // context object
} rocprofiler_group_t;
// Profiling mode mask
+16 -14
Wyświetl plik
@@ -121,9 +121,6 @@ class Group {
Context* GetContext() { return context_; }
uint32_t GetIndex() const { return index_; }
rocprofiler_group_t GetGroup() {
return rocprofiler_group_t{index_, &info_vector_[0], (uint32_t)info_vector_.size(), context_};
}
void ResetRefs() { refs_ = n_profiles_; }
uint32_t DecrRefs() {
return (refs_ > 0) ? --refs_ : 0;
@@ -279,15 +276,18 @@ class Context {
uint32_t GetGroupCount() const { return set_.size(); }
rocprofiler_group_t GetGroupInfo(const uint32_t& index) {
rocprofiler::info_vector_t& info_vector = set_[index].GetInfoVector();
rocprofiler_group_t GetGroupInfo(Group* g) {
rocprofiler::info_vector_t& info_vector = g->GetInfoVector();
rocprofiler_group_t group = {};
group.feature_count = info_vector.size();
group.features = &info_vector[0];
group.index = g->GetIndex();
group.context = reinterpret_cast<rocprofiler_t*>(this);
group.index = index;
group.features = &info_vector[0];
group.feature_count = info_vector.size();
return group;
}
rocprofiler_group_t GetGroupInfo(const uint32_t& index) {
return GetGroupInfo(&set_[index]);
}
const pkt_vector_t& StartPackets(const uint32_t& group_index) const {
return set_[group_index].GetStartVector();
@@ -391,11 +391,12 @@ class Context {
static bool Handler(hsa_signal_value_t value, void* arg) {
Group* group = reinterpret_cast<Group*>(arg);
group->GetContext()->mutex_.lock();
Context* context = group->GetContext();
context->mutex_.lock();
uint32_t r = group->DecrRefs();
group->GetContext()->mutex_.unlock();
context->mutex_.unlock();
if (r == 0) {
return group->GetContext()->handler_(group->GetGroup(), group->GetContext()->handler_arg_);
return context->handler_(context->GetGroupInfo(group), context->handler_arg_);
}
return false;
}
@@ -427,8 +428,9 @@ class Context {
if (rinfo->data.result_bytes.copy) {
if (sample_id == 0) {
const uint32_t output_buffer_size = profile->output_buffer.size;
const uint32_t output_buffer_size64 = output_buffer_size / sizeof(uint64_t);
void* ptr = calloc(output_buffer_size64, sizeof(uint64_t));
util::HsaRsrcFactory* hsa_rsrc = &util::HsaRsrcFactory::Instance();
const util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(profile->agent);
void* ptr = hsa_rsrc->AllocateSysMemory(agent_info, output_buffer_size);
rinfo->data.result_bytes.size = output_buffer_size;
rinfo->data.result_bytes.ptr = ptr;
callback_data->ptr = reinterpret_cast<char*>(ptr);
@@ -446,7 +448,7 @@ class Context {
else EXC_RAISING(HSA_STATUS_ERROR, "SQTT data out of output buffer");
}
const bool suc = util::HsaRsrcFactory::CopyToHost(dest, src, size);
bool suc = util::HsaRsrcFactory::Memcpy(profile->agent, dest, src, size);
if (suc) {
*header = size;
callback_data->ptr = dest + align_size(size, sizeof(uint32_t));
+2 -2
Wyświetl plik
@@ -88,8 +88,8 @@ class Profile {
}
virtual ~Profile() {
info_vector_.clear();
if (profile_.command_buffer.ptr) util::HsaRsrcFactory::MemoryFree(profile_.command_buffer.ptr);
if (profile_.output_buffer.ptr) util::HsaRsrcFactory::MemoryFree(profile_.output_buffer.ptr);
if (profile_.command_buffer.ptr) util::HsaRsrcFactory::FreeMemory(profile_.command_buffer.ptr);
if (profile_.output_buffer.ptr) util::HsaRsrcFactory::FreeMemory(profile_.output_buffer.ptr);
if (profile_.events) free(const_cast<event_t*>(profile_.events));
if (profile_.parameters) free(const_cast<parameter_t*>(profile_.parameters));
if (completion_signal_.handle) {
@@ -359,9 +359,13 @@ bool HsaRsrcFactory::CopyToHost(void* dest_buff, const void* src_buff, uint32_t
CHECK_STATUS("hsa_memory_copy", status);
return (status == HSA_STATUS_SUCCESS);
}
bool HsaRsrcFactory::Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length) {
(void)agent;
return CopyToHost(dest_buff, src_buff, length);
}
// Free method
bool HsaRsrcFactory::MemoryFree(void* ptr) {
bool HsaRsrcFactory::FreeMemory(void* ptr) {
const hsa_status_t status = hsa_memory_free(ptr);
CHECK_STATUS("hsa_memory_free", status);
return (status == HSA_STATUS_SUCCESS);
@@ -217,9 +217,10 @@ class HsaRsrcFactory {
// Memcopy method
static bool CopyToHost(void* dest_buff, const void* src_buff, uint32_t length);
static bool Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length);
// Free method
static bool MemoryFree(void* ptr);
static bool FreeMemory(void* ptr);
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
@@ -57,22 +57,23 @@ template <class Kernel, class Test> bool RunKernel(int argc, char* argv[], int c
return false;
}
// Run test kernel
// Kernel dspatch iterations
for (int i = 0; i < count; ++i) {
// Run test kernel
ret_val = test_aql->Run();
if (ret_val == false) {
std::cerr << "Error in running the test kernel" << std::endl;
TEST_ASSERT(ret_val);
return false;
}
}
// Verify the results of the execution
ret_val = test_aql->VerifyResults();
if (ret_val) {
std::clog << "Test : Passed" << std::endl;
} else {
std::clog << "Test : Failed" << std::endl;
// Verify the results of the execution
ret_val = test_aql->VerifyResults();
if (ret_val) {
std::clog << "Test : Passed" << std::endl;
} else {
std::clog << "Test : Failed" << std::endl;
}
}
// Print time taken by sample
@@ -115,11 +115,26 @@ bool TestHsa::Setup() {
mem_map_t& mem_map = test_->GetMemMap();
for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) {
mem_descr_t& des = it->second;
void* ptr = (des.local) ? hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size)
: hsa_rsrc_->AllocateSysMemory(agent_info_, des.size);
des.ptr = ptr;
TEST_ASSERT(ptr != NULL);
if (ptr == NULL) return false;
switch (des.id) {
case TestKernel::LOCAL_DES_ID:
des.ptr = hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size);
break;
case TestKernel::KERNARG_DES_ID:
des.ptr = hsa_rsrc_->AllocateKernArgMemory(agent_info_, des.size);
if (des.ptr) memset(des.ptr, 0, des.size);
break;
case TestKernel::SYS_DES_ID:
des.ptr = hsa_rsrc_->AllocateSysMemory(agent_info_, des.size);
if (des.ptr) memset(des.ptr, 0, des.size);
break;
case TestKernel::NULL_DES_ID:
des.ptr = NULL;
break;
default:
break;
};
TEST_ASSERT(des.ptr != NULL);
if (des.ptr == NULL) return false;
}
test_->Init();
@@ -208,24 +223,41 @@ bool TestHsa::Run() {
hsa_signal_wait_acquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
HSA_WAIT_STATE_BLOCKED);
std::clog << "> DONE, que_idx=" << que_idx << std::endl;
// Stop the timer object
hsa_timer_.StopTimer(dispatch_timer_idx_);
dispatch_time_taken_ = hsa_timer_.ReadTimer(dispatch_timer_idx_);
total_time_taken_ += dispatch_time_taken_;
// Copy kernel buffers from local memory into system memory
const bool suc = hsa_rsrc_->CopyToHost(test_->GetOutputPtr(), test_->GetLocalPtr(), test_->GetOutputSize());
if (suc) test_->PrintOutput();
return suc;
return true;
}
bool TestHsa::VerifyResults() {
// Compare the results and see if they match
const void* const refout_ptr = test_->GetRefoutPtr();
const int32_t cmp_val =
(refout_ptr != NULL) ? memcmp(test_->GetOutputPtr(), refout_ptr, test_->GetOutputSize()) : 0;
return (cmp_val == 0);
bool cmp = false;
void* output = NULL;
const uint32_t size = test_->GetOutputSize();
bool suc = false;
// Copy local kernel output buffers from local memory into host memory
if (test_->IsOutputLocal()) {
output = hsa_rsrc_->AllocateSysMemory(agent_info_, size);
suc = hsa_rsrc_->Memcpy(agent_info_, output, test_->GetOutputPtr(), size);
} else {
output = test_->GetOutputPtr();;
suc = true;
}
if ((output != NULL) && suc) {
// Print the test output
test_->PrintOutput(output);
// Compare the results and see if they match
cmp = (memcmp(output, test_->GetRefOut(), size) == 0);
}
if (test_->IsOutputLocal() && (output != NULL)) hsa_rsrc_->FreeMemory(output);
return cmp;
}
void TestHsa::PrintTime() {
@@ -28,22 +28,23 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef TEST_CTRL_TEST_KERNEL_H_
#define TEST_CTRL_TEST_KERNEL_H_
#include <string.h>
#include <stdint.h>
#include <map>
#include "util/hsa_rsrc_factory.h"
// Class implements kernel test
class TestKernel {
public:
// Exported buffers IDs
enum buf_id_t { KERNARG_EXP_ID, OUTPUT_EXP_ID, REFOUT_EXP_ID };
// Memory descriptors IDs
enum { INPUT_DES_ID, OUTPUT_DES_ID, LOCAL_DES_ID, MASK_DES_ID, KERNARG_DES_ID, REFOUT_DES_ID };
enum des_id_t { NULL_DES_ID, LOCAL_DES_ID, KERNARG_DES_ID, SYS_DES_ID, REFOUT_DES_ID };
// Memory descriptors vector declaration
struct mem_descr_t {
des_id_t id;
void* ptr;
uint32_t size;
bool local;
};
// Memory map declaration
@@ -51,12 +52,7 @@ class TestKernel {
typedef mem_map_t::iterator mem_it_t;
typedef mem_map_t::const_iterator mem_const_it_t;
virtual ~TestKernel() {
for (auto& entry : mem_map_) {
void* ptr = entry.second.ptr;
if (ptr != NULL) HsaRsrcFactory::MemoryFree(ptr);
}
}
virtual ~TestKernel() {}
// Initialize method
virtual void Init() = 0;
@@ -65,50 +61,79 @@ class TestKernel {
mem_map_t& GetMemMap() { return mem_map_; }
// Return NULL descriptor
static mem_descr_t NullDescriptor() { return {NULL, 0, 0}; }
static mem_descr_t NullDescriptor() { return {NULL_DES_ID, NULL, 0}; }
// Check if decripter is local
bool IsLocal(const mem_descr_t& descr) const { return (descr.id == LOCAL_DES_ID); }
// Methods to get the kernel attributes
void* GetKernargPtr() const { return GetDescr(KERNARG_DES_ID).ptr; }
uint32_t GetKernargSize() const { return GetDescr(KERNARG_DES_ID).size; }
void* GetOutputPtr() const { return GetDescr(OUTPUT_DES_ID).ptr; }
uint32_t GetOutputSize() const { return GetDescr(OUTPUT_DES_ID).size; }
void* GetLocalPtr() const { return GetDescr(LOCAL_DES_ID).ptr; }
void* GetRefoutPtr() const { return GetDescr(REFOUT_DES_ID).ptr; }
const mem_descr_t& GetKernargDescr() { return *test_map_[KERNARG_EXP_ID]; };
const mem_descr_t& GetOutputDescr() { return *test_map_[OUTPUT_EXP_ID]; };
void* GetKernargPtr() { return GetKernargDescr().ptr; }
uint32_t GetKernargSize() { return GetKernargDescr().size; }
void* GetOutputPtr() { return GetOutputDescr().ptr; }
uint32_t GetOutputSize() { return GetOutputDescr().size; }
bool IsOutputLocal() { return IsLocal(GetOutputDescr()); }
virtual uint32_t GetGridSize() const = 0;
// Return reference output
void* GetRefOut() { return test_map_[REFOUT_EXP_ID]->ptr; };
// Print output
virtual void PrintOutput() const = 0;
virtual void PrintOutput(const void* ptr) const = 0;
// Return name
virtual std::string Name() const = 0;
protected:
// Set system memory descriptor
bool SetSysDescr(const uint32_t& id, const uint32_t& size) {
return SetMemDescr(id, size, false);
// Set buffer descriptor
bool SetInDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
bool suc = SetMemDescr(buf_id, des_id, size);
if (des_id == KERNARG_DES_ID) {
test_map_[KERNARG_EXP_ID] = &mem_map_[buf_id];
}
return suc;
}
// Set local memory descriptor
bool SetLocalDescr(const uint32_t& id, const uint32_t& size) {
return SetMemDescr(id, size, true);
// Set results descriptor
bool SetOutDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
bool suc = SetMemDescr(buf_id, des_id, size);
test_map_[OUTPUT_EXP_ID] = &mem_map_[buf_id];
return suc;
}
// Set host descriptor
bool SetHostDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
bool suc = SetMemDescr(buf_id, des_id, size);
if (suc) {
mem_descr_t& descr = mem_map_[buf_id];
descr.ptr = malloc(size);
if (des_id == REFOUT_DES_ID) {
test_map_[REFOUT_EXP_ID] = &descr;
}
if (descr.ptr == NULL) suc = false;
}
return suc;
}
// Get memory descriptor
mem_descr_t GetDescr(const uint32_t& id) const {
mem_const_it_t it = mem_map_.find(id);
mem_descr_t GetDescr(const uint32_t& buf_id) const {
mem_const_it_t it = mem_map_.find(buf_id);
return (it != mem_map_.end()) ? it->second : NullDescriptor();
}
private:
// Set memory descriptor
bool SetMemDescr(const uint32_t& id, const uint32_t& size, const bool& local) {
const mem_descr_t des = {NULL, size, local};
auto ret = mem_map_.insert(mem_map_t::value_type(id, des));
bool SetMemDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
const mem_descr_t des = {des_id, NULL, size};
auto ret = mem_map_.insert(mem_map_t::value_type(buf_id, des));
return ret.second;
}
// Kernel memory map object
mem_map_t mem_map_;
// Test memory map object
std::map<uint32_t, mem_descr_t*> test_map_;
};
#endif // TEST_CTRL_TEST_KERNEL_H_
+7 -9
Wyświetl plik
@@ -266,16 +266,14 @@ hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type,
const uint32_t data_size = info_data->sqtt_data.size;
const void* data_ptr = info_data->sqtt_data.ptr;
fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, data_size);
#if 1
dump_sqtt_trace(arg->label, info_data->sample_id, data_ptr, data_size);
#else
void* buffer = malloc(data_size);
memset(buffer, 0, data_size);
const bool suc = HsaRsrcFactory::Instance().CopyToHost(arg->agent, buffer, data_ptr, data_size);
HsaRsrcFactory* hsa_rsrc = &HsaRsrcFactory::Instance();
const AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(arg->agent);
void* buffer = hsa_rsrc->AllocateSysMemory(agent_info, data_size);
const bool suc = HsaRsrcFactory::Memcpy(arg->agent, buffer, data_ptr, data_size);
if (suc) dump_sqtt_trace(arg->label, info_data->sample_id, buffer, data_size);
else fatal("SQTT data memcopy to host failed");
free(buffer);
#endif
HsaRsrcFactory::FreeMemory(buffer);
} else
status = HSA_STATUS_ERROR;
return status;
@@ -320,7 +318,7 @@ void output_results(const context_entry_t* entry, const char* label) {
size += chunk_size;
}
fprintf(file, "size(%lu)\n", size);
free(p->data.result_bytes.ptr);
HsaRsrcFactory::FreeMemory(p->data.result_bytes.ptr);
const_cast<rocprofiler_feature_t*>(p)->data.result_bytes.size = 0;
} else {
fprintf(file, "(\n");
@@ -67,7 +67,7 @@ __kernel void SimpleConvolution(__global uint * output,
float sumFX = 0;
for(uint i = left; i <= right; ++i) {
for(uint j = top ; j <= bottom; ++j) {
for(uint j = top; j <= bottom; ++j) {
// performing wighted sum within the mask boundaries
uint maskIndex = (j - (y - hstep)) * maskWidth + (i - (x - vstep));
uint index = j * width + i;
@@ -27,6 +27,7 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
#include "simple_convolution/simple_convolution.h"
#include <assert.h>
#include <string.h>
#include <iostream>
@@ -282,12 +283,11 @@ SimpleConvolution::SimpleConvolution() {
const uint32_t input_size_bytes = width_ * height_ * sizeof(uint32_t);
const uint32_t mask_size_bytes = mask_width_ * mask_height_ * sizeof(float);
SetSysDescr(KERNARG_DES_ID, sizeof(kernel_args_t));
SetSysDescr(INPUT_DES_ID, input_size_bytes);
SetSysDescr(OUTPUT_DES_ID, input_size_bytes);
SetLocalDescr(LOCAL_DES_ID, input_size_bytes);
SetSysDescr(MASK_DES_ID, mask_size_bytes);
SetSysDescr(REFOUT_DES_ID, input_size_bytes);
SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, sizeof(kernel_args_t));
SetInDescr(INPUT_BUF_ID, SYS_DES_ID, input_size_bytes);
SetInDescr(MASK_BUF_ID, SYS_DES_ID, mask_size_bytes);
SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, input_size_bytes);
SetHostDescr(REFOUT_BUF_ID, REFOUT_DES_ID, input_size_bytes);
if (!randomize_seed_) TEST_ASSERT(sizeof(input_data_) <= input_size_bytes);
}
@@ -295,14 +295,18 @@ SimpleConvolution::SimpleConvolution() {
void SimpleConvolution::Init() {
std::clog << "SimpleConvolution::init :" << std::endl;
mem_descr_t input_des = GetDescr(INPUT_DES_ID);
mem_descr_t local_des = GetDescr(LOCAL_DES_ID);
mem_descr_t mask_des = GetDescr(MASK_DES_ID);
mem_descr_t refout_des = GetDescr(REFOUT_DES_ID);
mem_descr_t kernarg_des = GetDescr(KERNARG_DES_ID);
mem_descr_t kernarg_des = GetDescr(KERNARG_BUF_ID);
mem_descr_t input_des = GetDescr(INPUT_BUF_ID);
mem_descr_t mask_des = GetDescr(MASK_BUF_ID);
mem_descr_t output_des = GetDescr(LOCAL_BUF_ID);
#if 0
printf("kernarg_des %p 0x%x\n", kernarg_des.ptr, kernarg_des.size);
printf("input_des %p 0x%x\n", input_des.ptr, input_des.size);
printf("mask_des %p 0x%x\n", mask_des.ptr, mask_des.size);
printf("output_des %p 0x%x\n", output_des.ptr, output_des.size);
#endif
uint32_t* input = (uint32_t*)input_des.ptr;
uint32_t* output_local = (uint32_t*)local_des.ptr;
uint32_t* output_local = (uint32_t*)output_des.ptr;
float* mask = (float*)mask_des.ptr;
kernel_args_t* kernel_args = (kernel_args_t*)kernarg_des.ptr;
@@ -343,13 +347,12 @@ void SimpleConvolution::Init() {
kernel_args->arg51 = mask_height_;
// Calculate the reference output
memset(refout_des.ptr, 0, refout_des.size);
ReferenceImplementation(reinterpret_cast<uint32_t*>(refout_des.ptr), input, mask, width_, height_,
ReferenceImplementation(reinterpret_cast<uint32_t*>(GetRefOut()), input, mask, width_, height_,
mask_width_, mask_height_);
}
void SimpleConvolution::PrintOutput() const {
PrintArray<uint32_t>("> Output[0]", reinterpret_cast<uint32_t*>(GetOutputPtr()), width_, 1);
void SimpleConvolution::PrintOutput(const void* ptr) const {
PrintArray<uint32_t>("> Output[0]", reinterpret_cast<const uint32_t*>(ptr), width_, 1);
}
bool SimpleConvolution::ReferenceImplementation(uint32_t* output, const uint32_t* input,
@@ -36,6 +36,9 @@ OF THE POSSIBILITY OF SUCH DAMAGE.
// Class implements SimpleConvolution kernel parameters
class SimpleConvolution : public TestKernel {
public:
// Kernel buffers IDs
enum { INPUT_BUF_ID, LOCAL_BUF_ID, MASK_BUF_ID, KERNARG_BUF_ID, REFOUT_BUF_ID };
// Constructor
SimpleConvolution();
@@ -46,7 +49,7 @@ class SimpleConvolution : public TestKernel {
uint32_t GetGridSize() const { return width_ * height_; }
// Print output
void PrintOutput() const;
void PrintOutput(const void* ptr) const;
// Return name
std::string Name() const { return std::string("SimpleConvolution"); }
@@ -217,25 +217,17 @@ const AgentInfo* HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) {
}
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); }
// Get the count of Hsa Cpu Agents available on the platform
//
// @return uint32_t Number of Cpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); }
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(gpu_list_.size());
@@ -250,13 +242,9 @@ bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info)
}
// Get the AgentInfo handle of a Cpu device
//
// @param idx Cpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(cpu_list_.size());
@@ -271,15 +259,10 @@ bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info)
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
hsa_queue_t** queue) {
hsa_status_t status;
@@ -289,13 +272,9 @@ bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
}
// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
hsa_status_t status;
status = hsa_signal_create(value, 0, NULL, signal);
@@ -305,13 +284,9 @@ bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
uint8_t* buffer = NULL;
@@ -332,14 +307,10 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Allocate memory tp pass kernel parameters.
//
// Allocate host memory.
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
@@ -350,33 +321,40 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Allocate memory tp pass kernel parameters.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size_t size) {
return AllocateSysMemory(agent_info, size);
}
// Memcopy method
bool HsaRsrcFactory::CopyToHost(void* dest_buff, const void* src_buff, uint32_t length) {
bool HsaRsrcFactory::Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length) {
(void)agent;
const hsa_status_t status = hsa_memory_copy(dest_buff, src_buff, length);
CHECK_STATUS("hsa_memory_copy", status);
return (status == HSA_STATUS_SUCCESS);
}
bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dest_buff, const void* src_buff, uint32_t length) {
(void)agent_info;
return Memcpy(agent_info->dev_id, dest_buff, src_buff, length);
}
// Free method
bool HsaRsrcFactory::MemoryFree(void* ptr) {
bool HsaRsrcFactory::FreeMemory(void* ptr) {
const hsa_status_t status = hsa_memory_free(ptr);
CHECK_STATUS("hsa_memory_free", status);
return (status == HSA_STATUS_SUCCESS);
}
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path,
const char* kernel_name, hsa_executable_t* executable, hsa_executable_symbol_t* code_desc) {
hsa_status_t status = HSA_STATUS_ERROR;
@@ -137,101 +137,73 @@ class HsaRsrcFactory {
const AgentInfo* GetAgentInfo(const hsa_agent_t agent);
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t GetCountOfGpuAgents();
// Get the count of Hsa Cpu Agents available on the platform
//
// @return uint32_t Number of Cpu agents on platform
//
uint32_t GetCountOfCpuAgents();
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
// Get the AgentInfo handle of a Cpu device
//
// @param idx Cpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue);
// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool CreateSignal(uint32_t value, hsa_signal_t* signal);
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size);
// Allocate memory tp pass kernel parameters.
//
// Allocate system memory.
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
// Allocate memory tp pass kernel parameters.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* AllocateKernArgMemory(const AgentInfo* agent_info, size_t size);
// Memcopy method
static bool CopyToHost(void* dest_buff, const void* src_buff, uint32_t length);
static bool Memcpy(const AgentInfo* agent_info, void* dest_buff, const void* src_buff, uint32_t length);
static bool Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length);
// Free method
static bool MemoryFree(void* ptr);
static bool FreeMemory(void* ptr);
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return true if successful, false otherwise
//
bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name,
hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc);