Change-Id: Ie4a150ad0dc141226f6f1c571916c5a526dd723c
Этот коммит содержится в:
Evgeny
2018-04-29 03:24:46 -05:00
родитель c9c0ecc976
Коммит 9dec361cd4
12 изменённых файлов: 393 добавлений и 237 удалений
+1
Просмотреть файл
@@ -85,6 +85,7 @@ uint32_t rocprofiler_version_minor();
typedef struct {
uint32_t intercept_mode;
uint32_t sqtt_size;
uint32_t sqtt_local;
uint64_t timeout;
uint32_t timestamp_on;
} rocprofiler_settings_t;
+13 -4
Просмотреть файл
@@ -426,11 +426,14 @@ class Context {
rinfo->data.kind = ROCPROFILER_DATA_KIND_INT64;
} else if (ainfo_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
if (rinfo->data.result_bytes.copy) {
const bool sqtt_local = SqttProfile::IsLocal();
util::HsaRsrcFactory* hsa_rsrc = &util::HsaRsrcFactory::Instance();
if (sample_id == 0) {
const uint32_t output_buffer_size = profile->output_buffer.size;
util::HsaRsrcFactory* hsa_rsrc = &util::HsaRsrcFactory::Instance();
const uint32_t output_buffer_size64 = profile->output_buffer.size / sizeof(uint64_t);
const util::AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(profile->agent);
void* ptr = hsa_rsrc->AllocateSysMemory(agent_info, output_buffer_size);
void* ptr = (sqtt_local) ? hsa_rsrc->AllocateSysMemory(agent_info, output_buffer_size) :
calloc(output_buffer_size64, sizeof(uint64_t));
rinfo->data.result_bytes.size = output_buffer_size;
rinfo->data.result_bytes.ptr = ptr;
callback_data->ptr = reinterpret_cast<char*>(ptr);
@@ -448,13 +451,19 @@ class Context {
else EXC_RAISING(HSA_STATUS_ERROR, "SQTT data out of output buffer");
}
bool suc = util::HsaRsrcFactory::Memcpy(profile->agent, dest, src, size);
bool suc = true;
if (sqtt_local) {
suc = hsa_rsrc->Memcpy(profile->agent, dest, src, size);
} else {
memcpy(dest, src, size);
}
if (suc) {
*header = size;
callback_data->ptr = dest + align_size(size, sizeof(uint32_t));
rinfo->data.result_bytes.instance_count = sample_id + 1;
rinfo->data.kind = ROCPROFILER_DATA_KIND_BYTES;
}
} else
EXC_RAISING(HSA_STATUS_ERROR, "Agent Memcpy failed, dst(" << (void*)dest << ") src(" << (void*)src << ") size(" << size << ")");
} else {
if (sample_id == 0) {
rinfo->data.result_bytes.ptr = profile->output_buffer.ptr;
+8 -4
Просмотреть файл
@@ -202,7 +202,7 @@ class PmcProfile : public Profile {
hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) {
profile_.command_buffer.ptr =
rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size);
rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size);
profile_.output_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.output_buffer.size);
return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS
: HSA_STATUS_ERROR;
@@ -213,6 +213,8 @@ class SqttProfile : public Profile {
public:
static inline void SetSize(const uint32_t& size) { output_buffer_size_ = size; }
static inline uint32_t GetSize() { return output_buffer_size_; }
static inline void SetLocal(const bool& b) { output_buffer_local_ = b; }
static inline bool IsLocal() { return output_buffer_local_; }
SqttProfile(const util::AgentInfo* agent_info) : Profile(agent_info) {
profile_.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT;
@@ -227,16 +229,18 @@ class SqttProfile : public Profile {
hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) {
profile_.command_buffer.ptr =
rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size);
rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size);
profile_.output_buffer.size = output_buffer_size_;
profile_.output_buffer.ptr =
rsrc->AllocateLocalMemory(agent_info_, profile_.output_buffer.size);
profile_.output_buffer.ptr = (output_buffer_local_) ?
rsrc->AllocateLocalMemory(agent_info_, profile_.output_buffer.size) :
rsrc->AllocateSysMemory(agent_info_, profile_.output_buffer.size);
return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS
: HSA_STATUS_ERROR;
}
private:
static uint32_t output_buffer_size_;
static bool output_buffer_local_;
};
} // namespace rocprofiler
+3
Просмотреть файл
@@ -127,6 +127,7 @@ bool LoadTool() {
rocprofiler_settings_t settings{};
settings.intercept_mode = (intercept_mode) ? 1 : 0;
settings.sqtt_size = SqttProfile::GetSize();
settings.sqtt_local = SqttProfile::IsLocal() ? 1: 0;
settings.timeout = Context::GetTimeout();
settings.timestamp_on = InterceptQueue::IsTrackerOn() ? 1 : 0;
@@ -135,6 +136,7 @@ bool LoadTool() {
intercept_mode = (settings.intercept_mode != 0);
SqttProfile::SetSize(settings.sqtt_size);
SqttProfile::SetLocal(settings.sqtt_local != 0);
Context::SetTimeout(settings.timeout);
InterceptQueue::SetTimeout(settings.timeout);
InterceptQueue::TrackerOn(settings.timestamp_on != 0);
@@ -186,6 +188,7 @@ const MetricsDict* GetMetrics(const hsa_agent_t& agent) {
rocprofiler_properties_t rocprofiler_properties;
uint64_t Context::timeout_ = UINT64_MAX;
uint32_t SqttProfile::output_buffer_size_ = 0x2000000; // 32M
bool SqttProfile::output_buffer_local_ = true;
Tracker::mutex_t Tracker::mutex_;
util::Logger::mutex_t util::Logger::mutex_;
util::Logger* util::Logger::instance_ = NULL;
+118 -72
Просмотреть файл
@@ -59,27 +59,59 @@ hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data)
return status;
}
// Callback function to find and bind kernarg region of an agent
hsa_status_t HsaRsrcFactory::FindMemRegionsCallback(hsa_region_t region, void* data) {
hsa_region_global_flag_t flags;
hsa_region_segment_t segment_id;
// This function checks to see if the provided
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
// the function adds an additional requirement that the pool have the
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
// pools must NOT have this property.
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
// no pool was found meeting the requirements. If an error is encountered, we
// return that error.
static hsa_status_t
FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
hsa_status_t err;
hsa_amd_segment_t segment;
uint32_t flag;
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
if (nullptr == data) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
&segment);
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
return HSA_STATUS_SUCCESS;
}
AgentInfo* agent_info = (AgentInfo*)data;
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
agent_info->coarse_region = region;
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
if ((karg_st == 0 && kern_arg) ||
(karg_st != 0 && !kern_arg)) {
return HSA_STATUS_SUCCESS;
}
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) {
agent_info->kernarg_region = region;
}
*(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
return HSA_STATUS_INFO_BREAK;
}
return HSA_STATUS_SUCCESS;
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
return FindGlobalPool(pool, data, false);
}
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
return FindGlobalPool(pool, data, true);
}
// Constructor of the class
@@ -172,7 +204,15 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
agent_info->dev_id = agent;
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
agent_info->dev_index = cpu_list_.size();
status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(cpu pool)", status);
status = hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool);
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(kern arg pool)", status);
agent_info->gpu_pool = {};
cpu_list_.push_back(agent_info);
cpu_agents_.push_back(agent);
}
if (type == HSA_DEVICE_TYPE_GPU) {
@@ -192,16 +232,15 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &agent_info->se_num);
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), &agent_info->shader_arrays_per_se);
// Initialize memory regions to zero
agent_info->kernarg_region.handle = 0;
agent_info->coarse_region.handle = 0;
// Find and Bind Memory regions of the Gpu agent
hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info);
agent_info->cpu_pool = {};
agent_info->kern_arg_pool = {};
status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status);
// Set GPU index
agent_info->dev_index = gpu_list_.size();
gpu_list_.push_back(agent_info);
gpu_agents_.push_back(agent);
}
if (agent_info) agent_map_[agent.handle] = agent_info;
@@ -292,13 +331,9 @@ bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
}
// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
hsa_status_t status;
status = hsa_signal_create(value, 0, NULL, signal);
@@ -306,65 +341,83 @@ bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
}
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// agent's memory region.
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
hsa_status_t status = HSA_STATUS_ERROR;
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
if (agent_info->coarse_region.handle != 0) {
// Allocate in local memory if it is available
status = hsa_memory_allocate(agent_info->coarse_region, size, (void**)&buffer);
if (status == HSA_STATUS_SUCCESS) {
status = hsa_memory_assign_agent(buffer, agent_info->dev_id, HSA_ACCESS_PERMISSION_RW);
}
} else {
// Allocate in system memory if local memory is not available
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
}
CHECK_STATUS("hsa_memory_allocate", status);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, (void**)&buffer);
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
return ptr;
}
// Allocate memory tp pass kernel parameters.
//
// Allocate memory to pass kernel parameters.
// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter.
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status = HSA_STATUS_ERROR;
uint8_t* buffer = NULL;
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
CHECK_STATUS("hsa_memory_allocate", status);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
if (!cpu_agents_.empty()) {
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
status = hsa_amd_memory_pool_allocate(cpu_list_[0]->kern_arg_pool, size, 0, (void**)&buffer);
// Both the CPU and GPU can access the kernel arguments
if (status == HSA_STATUS_SUCCESS) {
hsa_agent_t ag_list[1] = {agent_info->dev_id};
status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
}
}
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
return ptr;
}
// Memcopy method
bool HsaRsrcFactory::CopyToHost(void* dest_buff, const void* src_buff, uint32_t length) {
const hsa_status_t status = hsa_memory_copy(dest_buff, src_buff, length);
CHECK_STATUS("hsa_memory_copy", status);
// Allocate system memory accessible by both CPU and GPU
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status = HSA_STATUS_ERROR;
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
if (!cpu_agents_.empty()) {
status = hsa_amd_memory_pool_allocate(cpu_list_[0]->cpu_pool, size, 0, (void**)&buffer);
// Both the CPU and GPU can access the memory
if (status == HSA_STATUS_SUCCESS) {
hsa_agent_t ag_list[1] = {agent_info->dev_id};
status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
}
}
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
return ptr;
}
// Copy data from GPU to host memory
bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size) {
hsa_status_t status = HSA_STATUS_ERROR;
if (!cpu_agents_.empty()) {
hsa_signal_t s = {};
status = hsa_signal_create(1, 0, NULL, &s);
if (status == HSA_STATUS_SUCCESS) {
status = hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s);
if (status == HSA_STATUS_SUCCESS) {
if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) {
status = HSA_STATUS_ERROR;
}
}
status = hsa_signal_destroy(s);
}
}
return (status == HSA_STATUS_SUCCESS);
}
bool HsaRsrcFactory::Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length) {
(void)agent;
return CopyToHost(dest_buff, src_buff, length);
bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size) {
return Memcpy(agent_info->dev_id, dst, src, size);
}
// Free method
// Memory free method
bool HsaRsrcFactory::FreeMemory(void* ptr) {
const hsa_status_t status = hsa_memory_free(ptr);
CHECK_STATUS("hsa_memory_free", status);
@@ -372,18 +425,12 @@ bool HsaRsrcFactory::FreeMemory(void* ptr) {
}
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path,
const char* kernel_name, hsa_executable_t* executable, hsa_executable_symbol_t* code_desc) {
hsa_status_t status = HSA_STATUS_ERROR;
@@ -448,7 +495,6 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
std::clog << ">> HSAIL profile : " << agent_info->profile << std::endl;
std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl;
std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl;
std::clog << ">> Kernarg Region Id : " << agent_info->coarse_region.handle << std::endl;
std::clog << ">> CU number : " << agent_info->cu_num << std::endl;
std::clog << ">> Waves per CU : " << agent_info->waves_per_cu << std::endl;
std::clog << ">> SIMDs per CU : " << agent_info->simds_per_cu << std::endl;
+34 -53
Просмотреть файл
@@ -22,10 +22,11 @@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#ifndef SRC_UTIL_HSA_RSRC_FACTORY_H_
#define SRC_UTIL_HSA_RSRC_FACTORY_H_
#ifndef _HSA_RSRC_FACTORY_H_
#define _HSA_RSRC_FACTORY_H_
#include <hsa.h>
#include <hsa_ext_amd.h>
#include <hsa_ext_finalize.h>
#include <hsa_ven_amd_aqlprofile.h>
#include <hsa_ven_amd_loader.h>
@@ -52,10 +53,18 @@ POSSIBILITY OF SUCH DAMAGE.
exit(1); \
}
#define CHECK_ITER_STATUS(msg, status) \
if (status != HSA_STATUS_INFO_BREAK) { \
const char* emsg = 0; \
hsa_status_string(status, &emsg); \
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
exit(1); \
}
namespace rocprofiler {
namespace util {
static const unsigned MEM_PAGE_BYTES = 0x1000;
static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
static const size_t MEM_PAGE_BYTES = 0x1000;
static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
// Encapsulates information about a Hsa Agent such as its
@@ -88,11 +97,10 @@ struct AgentInfo {
// Hsail profile supported by agent
hsa_profile_t profile;
// Memory region supporting kernel parameters
hsa_region_t coarse_region;
// Memory region supporting kernel arguments
hsa_region_t kernarg_region;
// CPU/GPU/kern-arg memory pools
hsa_amd_memory_pool_t cpu_pool;
hsa_amd_memory_pool_t gpu_pool;
hsa_amd_memory_pool_t kern_arg_pool;
// The number of compute unit available in the agent.
uint32_t cu_num;
@@ -139,102 +147,73 @@ class HsaRsrcFactory {
const AgentInfo* GetAgentInfo(const hsa_agent_t agent);
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t GetCountOfGpuAgents();
// Get the count of Hsa Cpu Agents available on the platform
//
// @return uint32_t Number of Cpu agents on platform
//
uint32_t GetCountOfCpuAgents();
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
// Get the AgentInfo handle of a Cpu device
//
// @param idx Cpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue);
// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool CreateSignal(uint32_t value, hsa_signal_t* signal);
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// Allocate local GPU memory
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size);
// Allocate memory tp pass kernel parameters.
//
// Allocate memory tp pass kernel parameters
// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* AllocateKernArgMemory(const AgentInfo* agent_info, size_t size);
// Allocate system memory accessible from both CPU and GPU
// Memory is alocated accessible to all CPU agents and AgentInfo parameter is ignored.
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
// Memcopy method
static bool CopyToHost(void* dest_buff, const void* src_buff, uint32_t length);
static bool Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length);
// Copy data from GPU to host memory
bool Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size);
bool Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size);
// Free method
// Memory free method
static bool FreeMemory(void* ptr);
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return true if successful, false otherwise
//
bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name,
hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc);
@@ -279,9 +258,11 @@ class HsaRsrcFactory {
// Used to maintain a list of Hsa Gpu Agent Info
std::vector<const AgentInfo*> gpu_list_;
std::vector<hsa_agent_t> gpu_agents_;
// Used to maintain a list of Hsa Cpu Agent Info
std::vector<const AgentInfo*> cpu_list_;
std::vector<hsa_agent_t> cpu_agents_;
// System agents map
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
@@ -296,4 +277,4 @@ class HsaRsrcFactory {
} // namespace util
} // namespace rocprofiler
#endif // SRC_UTIL_HSA_RSRC_FACTORY_H_
#endif // _HSA_RSRC_FACTORY_H_
Исполняемый файл
+5
Просмотреть файл
@@ -0,0 +1,5 @@
#!/bin/sh -x
BDIR=`dirname $0`
for name in hsa_rsrc_factory.h hsa_rsrc_factory.cpp ; do
cat $BDIR/src/util/$name | grep -v namespace > $BDIR/test/util/$name
done
+10 -2
Просмотреть файл
@@ -220,8 +220,15 @@ bool TestHsa::Run() {
// Wait on the dispatch signal until the kernel is finished.
// Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling
hsa_signal_wait_acquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
HSA_WAIT_STATE_BLOCKED);
if (hsa_signal_wait_scacquire(
hsa_signal_,
HSA_SIGNAL_CONDITION_LT,
1,
UINT64_MAX,
HSA_WAIT_STATE_BLOCKED) != 0)
{
TEST_ASSERT("signal_wait failed");
}
std::clog << "> DONE, que_idx=" << que_idx << std::endl;
@@ -243,6 +250,7 @@ bool TestHsa::VerifyResults() {
if (test_->IsOutputLocal()) {
output = hsa_rsrc_->AllocateSysMemory(agent_info_, size);
suc = hsa_rsrc_->Memcpy(agent_info_, output, test_->GetOutputPtr(), size);
if (!suc) std::clog << "> VerifyResults: Memcpy failed" << std::endl << std::flush;
} else {
output = test_->GetOutputPtr();;
suc = true;
+28 -14
Просмотреть файл
@@ -93,6 +93,8 @@ static uint32_t CTX_OUTSTANDING_MAX = 0;
static uint32_t CTX_OUTSTANDING_MON = 0;
// to truncate kernel names
uint32_t to_truncate_names = 0;
// local SQTT buffer
bool is_sqtt_local = true;
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
@@ -263,17 +265,23 @@ hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_status_t status = HSA_STATUS_SUCCESS;
trace_data_arg_t* arg = reinterpret_cast<trace_data_arg_t*>(data);
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
const uint32_t data_size = info_data->sqtt_data.size;
const void* data_ptr = info_data->sqtt_data.ptr;
const uint32_t data_size = info_data->sqtt_data.size;
fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, data_size);
HsaRsrcFactory* hsa_rsrc = &HsaRsrcFactory::Instance();
const AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(arg->agent);
void* buffer = hsa_rsrc->AllocateSysMemory(agent_info, data_size);
const bool suc = HsaRsrcFactory::Memcpy(arg->agent, buffer, data_ptr, data_size);
if (suc) dump_sqtt_trace(arg->label, info_data->sample_id, buffer, data_size);
else fatal("SQTT data memcopy to host failed");
HsaRsrcFactory::FreeMemory(buffer);
if (is_sqtt_local) {
HsaRsrcFactory* hsa_rsrc = &HsaRsrcFactory::Instance();
const AgentInfo* agent_info = hsa_rsrc->GetAgentInfo(arg->agent);
const uint32_t mem_size = data_size;
void* buffer = hsa_rsrc->AllocateSysMemory(agent_info, mem_size);
if(!hsa_rsrc->Memcpy(agent_info, buffer, data_ptr, mem_size)) {
fatal("SQTT data memcopy to host failed");
}
dump_sqtt_trace(arg->label, info_data->sample_id, buffer, data_size);
HsaRsrcFactory::FreeMemory(buffer);
} else {
dump_sqtt_trace(arg->label, info_data->sample_id, data_ptr, data_size);
}
} else
status = HSA_STATUS_ERROR;
return status;
@@ -719,6 +727,8 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
if (multiplier != 1) str = str.substr(0, str.length() - 1);
settings->sqtt_size = strtoull(str.c_str(), NULL, 0) * multiplier;
}
it = opts.find("sqtt-local");
if (it != opts.end()) { settings->sqtt_local = (it->second == "on"); }
}
}
// Enable verbose mode
@@ -734,6 +744,10 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
check_env_var("ROCP_DATA_TIMEOUT", settings->timeout);
// Set SQTT size
check_env_var("ROCP_SQTT_SIZE", settings->sqtt_size);
// Set SQTT local buffer
check_env_var("ROCP_SQTT_LOCAL", settings->sqtt_local);
is_sqtt_local = settings->sqtt_local;
// Printing out info
char* info_symb = getenv("ROCP_INFO");
@@ -837,12 +851,6 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
}
if (name == "") fatal("ROCProfiler: Bad trace properties, name is not specified");
printf(" %s (", name.c_str());
features[index] = {};
features[index].kind = ROCPROFILER_FEATURE_KIND_TRACE;
features[index].name = strdup(name.c_str());
features[index].data.result_bytes.copy = to_copy_data;
std::map<std::string, hsa_ven_amd_aqlprofile_parameter_name_t> parameters_dict;
parameters_dict["TARGET_CU"] =
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET;
@@ -857,6 +865,12 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings)
parameters_dict["SE_MASK"] =
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK;
printf(" %s (", name.c_str());
features[index] = {};
features[index].kind = ROCPROFILER_FEATURE_KIND_TRACE;
features[index].name = strdup(name.c_str());
features[index].data.result_bytes.copy = to_copy_data;
for (auto* params : params_list) {
const unsigned parameter_count = params->opts.size();
rocprofiler_parameter_t* parameters = new rocprofiler_parameter_t[parameter_count];
+1 -1
Просмотреть файл
@@ -14,7 +14,7 @@
></metric>
# SQTT trace with parameters
<trace name=SQTT copy="true">
<trace name="SQTT">
<parameters
MASK=0x0f00
TOKEN_MASK=0x144b
+137 -63
Просмотреть файл
@@ -47,6 +47,7 @@ POSSIBILITY OF SUCH DAMAGE.
#define AQL_PROFILE_READ_API_ENABLE 0
#endif
// Callback function to get available in the system agents
hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) {
hsa_status_t status = HSA_STATUS_ERROR;
@@ -56,27 +57,59 @@ hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data)
return status;
}
// Callback function to find and bind kernarg region of an agent
hsa_status_t HsaRsrcFactory::FindMemRegionsCallback(hsa_region_t region, void* data) {
hsa_region_global_flag_t flags;
hsa_region_segment_t segment_id;
// This function checks to see if the provided
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
// the function adds an additional requirement that the pool have the
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
// pools must NOT have this property.
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
// no pool was found meeting the requirements. If an error is encountered, we
// return that error.
static hsa_status_t
FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
hsa_status_t err;
hsa_amd_segment_t segment;
uint32_t flag;
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
if (nullptr == data) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
&segment);
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
return HSA_STATUS_SUCCESS;
}
AgentInfo* agent_info = (AgentInfo*)data;
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
agent_info->coarse_region = region;
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
if ((karg_st == 0 && kern_arg) ||
(karg_st != 0 && !kern_arg)) {
return HSA_STATUS_SUCCESS;
}
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) {
agent_info->kernarg_region = region;
}
*(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
return HSA_STATUS_INFO_BREAK;
}
return HSA_STATUS_SUCCESS;
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
return FindGlobalPool(pool, data, false);
}
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
return FindGlobalPool(pool, data, true);
}
// Constructor of the class
@@ -169,7 +202,15 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
agent_info->dev_id = agent;
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
agent_info->dev_index = cpu_list_.size();
status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(cpu pool)", status);
status = hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool);
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(kern arg pool)", status);
agent_info->gpu_pool = {};
cpu_list_.push_back(agent_info);
cpu_agents_.push_back(agent);
}
if (type == HSA_DEVICE_TYPE_GPU) {
@@ -189,16 +230,15 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &agent_info->se_num);
hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), &agent_info->shader_arrays_per_se);
// Initialize memory regions to zero
agent_info->kernarg_region.handle = 0;
agent_info->coarse_region.handle = 0;
// Find and Bind Memory regions of the Gpu agent
hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info);
agent_info->cpu_pool = {};
agent_info->kern_arg_pool = {};
status = hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status);
// Set GPU index
agent_info->dev_index = gpu_list_.size();
gpu_list_.push_back(agent_info);
gpu_agents_.push_back(agent);
}
if (agent_info) agent_map_[agent.handle] = agent_info;
@@ -217,17 +257,25 @@ const AgentInfo* HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) {
}
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); }
// Get the count of Hsa Cpu Agents available on the platform
//
// @return uint32_t Number of Cpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); }
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(gpu_list_.size());
@@ -242,9 +290,13 @@ bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info)
}
// Get the AgentInfo handle of a Cpu device
//
// @param idx Cpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(cpu_list_.size());
@@ -259,10 +311,15 @@ bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info)
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
hsa_queue_t** queue) {
hsa_status_t status;
@@ -282,66 +339,83 @@ bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
}
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
// agent's memory region.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
hsa_status_t status = HSA_STATUS_ERROR;
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
if (agent_info->coarse_region.handle != 0) {
// Allocate in local memory if it is available
status = hsa_memory_allocate(agent_info->coarse_region, size, (void**)&buffer);
if (status == HSA_STATUS_SUCCESS) {
status = hsa_memory_assign_agent(buffer, agent_info->dev_id, HSA_ACCESS_PERMISSION_RW);
}
} else {
// Allocate in system memory if local memory is not available
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
}
CHECK_STATUS("hsa_memory_allocate", status);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, (void**)&buffer);
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
return ptr;
}
// Allocate host memory.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
uint8_t* buffer = NULL;
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
CHECK_STATUS("hsa_memory_allocate", status);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Allocate memory tp pass kernel parameters.
// Allocate memory to pass kernel parameters.
// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size_t size) {
return AllocateSysMemory(agent_info, size);
hsa_status_t status = HSA_STATUS_ERROR;
uint8_t* buffer = NULL;
if (!cpu_agents_.empty()) {
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
status = hsa_amd_memory_pool_allocate(cpu_list_[0]->kern_arg_pool, size, 0, (void**)&buffer);
// Both the CPU and GPU can access the kernel arguments
if (status == HSA_STATUS_SUCCESS) {
hsa_agent_t ag_list[1] = {agent_info->dev_id};
status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
}
}
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
return ptr;
}
// Memcopy method
bool HsaRsrcFactory::Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length) {
(void)agent;
const hsa_status_t status = hsa_memory_copy(dest_buff, src_buff, length);
CHECK_STATUS("hsa_memory_copy", status);
// Allocate system memory accessible by both CPU and GPU
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status = HSA_STATUS_ERROR;
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
if (!cpu_agents_.empty()) {
status = hsa_amd_memory_pool_allocate(cpu_list_[0]->cpu_pool, size, 0, (void**)&buffer);
// Both the CPU and GPU can access the memory
if (status == HSA_STATUS_SUCCESS) {
hsa_agent_t ag_list[1] = {agent_info->dev_id};
status = hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
}
}
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
return ptr;
}
// Copy data from GPU to host memory
bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size) {
hsa_status_t status = HSA_STATUS_ERROR;
if (!cpu_agents_.empty()) {
hsa_signal_t s = {};
status = hsa_signal_create(1, 0, NULL, &s);
if (status == HSA_STATUS_SUCCESS) {
status = hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s);
if (status == HSA_STATUS_SUCCESS) {
if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) {
status = HSA_STATUS_ERROR;
}
}
status = hsa_signal_destroy(s);
}
}
return (status == HSA_STATUS_SUCCESS);
}
bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dest_buff, const void* src_buff, uint32_t length) {
(void)agent_info;
return Memcpy(agent_info->dev_id, dest_buff, src_buff, length);
bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size) {
return Memcpy(agent_info->dev_id, dst, src, size);
}
// Free method
// Memory free method
bool HsaRsrcFactory::FreeMemory(void* ptr) {
const hsa_status_t status = hsa_memory_free(ptr);
CHECK_STATUS("hsa_memory_free", status);
@@ -419,7 +493,6 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
std::clog << ">> HSAIL profile : " << agent_info->profile << std::endl;
std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl;
std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl;
std::clog << ">> Kernarg Region Id : " << agent_info->coarse_region.handle << std::endl;
std::clog << ">> CU number : " << agent_info->cu_num << std::endl;
std::clog << ">> Waves per CU : " << agent_info->waves_per_cu << std::endl;
std::clog << ">> SIMDs per CU : " << agent_info->simds_per_cu << std::endl;
@@ -458,3 +531,4 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) {
HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL;
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
+35 -24
Просмотреть файл
@@ -22,10 +22,11 @@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#ifndef TEST_UTIL_HSA_RSRC_FACTORY_H_
#define TEST_UTIL_HSA_RSRC_FACTORY_H_
#ifndef _HSA_RSRC_FACTORY_H_
#define _HSA_RSRC_FACTORY_H_
#include <hsa.h>
#include <hsa_ext_amd.h>
#include <hsa_ext_finalize.h>
#include <hsa_ven_amd_aqlprofile.h>
#include <hsa_ven_amd_loader.h>
@@ -52,8 +53,16 @@ POSSIBILITY OF SUCH DAMAGE.
exit(1); \
}
static const unsigned MEM_PAGE_BYTES = 0x1000;
static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
#define CHECK_ITER_STATUS(msg, status) \
if (status != HSA_STATUS_INFO_BREAK) { \
const char* emsg = 0; \
hsa_status_string(status, &emsg); \
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
exit(1); \
}
static const size_t MEM_PAGE_BYTES = 0x1000;
static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
// Encapsulates information about a Hsa Agent such as its
@@ -86,11 +95,10 @@ struct AgentInfo {
// Hsail profile supported by agent
hsa_profile_t profile;
// Memory region supporting kernel parameters
hsa_region_t coarse_region;
// Memory region supporting kernel arguments
hsa_region_t kernarg_region;
// CPU/GPU/kern-arg memory pools
hsa_amd_memory_pool_t cpu_pool;
hsa_amd_memory_pool_t gpu_pool;
hsa_amd_memory_pool_t kern_arg_pool;
// The number of compute unit available in the agent.
uint32_t cu_num;
@@ -170,31 +178,31 @@ class HsaRsrcFactory {
// @return bool true if successful, false otherwise
bool CreateSignal(uint32_t value, hsa_signal_t* signal);
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
// Allocate local GPU memory
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size);
// Allocate system memory.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
// Allocate memory tp pass kernel parameters.
// Allocate memory tp pass kernel parameters
// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* AllocateKernArgMemory(const AgentInfo* agent_info, size_t size);
// Memcopy method
static bool Memcpy(const AgentInfo* agent_info, void* dest_buff, const void* src_buff, uint32_t length);
static bool Memcpy(hsa_agent_t agent, void* dest_buff, const void* src_buff, uint32_t length);
// Allocate system memory accessible from both CPU and GPU
// Memory is alocated accessible to all CPU agents and AgentInfo parameter is ignored.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
// Free method
// Copy data from GPU to host memory
bool Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size);
bool Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size);
// Memory free method
static bool FreeMemory(void* ptr);
// Loads an Assembled Brig file and Finalizes it into Device Isa
@@ -248,9 +256,11 @@ class HsaRsrcFactory {
// Used to maintain a list of Hsa Gpu Agent Info
std::vector<const AgentInfo*> gpu_list_;
std::vector<hsa_agent_t> gpu_agents_;
// Used to maintain a list of Hsa Cpu Agent Info
std::vector<const AgentInfo*> cpu_list_;
std::vector<hsa_agent_t> cpu_agents_;
// System agents map
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
@@ -262,4 +272,5 @@ class HsaRsrcFactory {
hsa_ven_amd_loader_1_00_pfn_t loader_api_;
};
#endif // TEST_UTIL_HSA_RSRC_FACTORY_H_
#endif // _HSA_RSRC_FACTORY_H_