Files
rocm-systems/test/util/hsa_rsrc_factory.cpp
T
2018-01-04 20:11:30 -06:00

416 строки
15 KiB
C++

/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:
<95> Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#include "util/hsa_rsrc_factory.h"
#include <dlfcn.h>
#include <hsa.h>
#include <hsa_ext_finalize.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <cassert>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
// Callback function to find and bind kernarg region of an agent
static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data) {
hsa_region_global_flag_t flags;
hsa_region_segment_t segment_id;
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
return HSA_STATUS_SUCCESS;
}
AgentInfo* agent_info = (AgentInfo*)data;
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
agent_info->coarse_region = region;
}
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) {
agent_info->kernarg_region = region;
}
return HSA_STATUS_SUCCESS;
}
// Callback function to get the number of agents
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data) {
// Copy handle of agent and increment number of agents reported
HsaRsrcFactory* rsrcFactory = reinterpret_cast<HsaRsrcFactory*>(data);
// Determine if device is a Gpu agent
hsa_status_t status;
hsa_device_type_t type;
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
CHECK_STATUS("Error Calling hsa_agent_get_info", status);
if (type == HSA_DEVICE_TYPE_DSP) {
return HSA_STATUS_SUCCESS;
}
if (type == HSA_DEVICE_TYPE_CPU) {
AgentInfo* agent_info = reinterpret_cast<AgentInfo*>(malloc(sizeof(AgentInfo)));
agent_info->dev_id = agent;
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
rsrcFactory->AddAgentInfo(agent_info, false);
return HSA_STATUS_SUCCESS;
}
// Device is a Gpu agent, build an instance of AgentInfo
AgentInfo* agent_info = reinterpret_cast<AgentInfo*>(malloc(sizeof(AgentInfo)));
agent_info->dev_id = agent;
agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
agent_info->max_wave_size = 0;
hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
agent_info->max_queue_size = 0;
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
agent_info->profile = hsa_profile_t(108);
hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
// Initialize memory regions to zero
agent_info->kernarg_region.handle = 0;
agent_info->coarse_region.handle = 0;
// Find and Bind Memory regions of the Gpu agent
hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info);
// Save the instance of AgentInfo
rsrcFactory->AddAgentInfo(agent_info, true);
return HSA_STATUS_SUCCESS;
}
// Constructor of the class
HsaRsrcFactory::HsaRsrcFactory() {
// Initialize the Hsa Runtime
hsa_status_t status = hsa_init();
CHECK_STATUS("Error in hsa_init", status);
// Discover the set of Gpu devices available on the platform
status = hsa_iterate_agents(GetHsaAgentsCallback, this);
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
// Get AqlProfile API table
aqlprofile_api_ = {0};
#ifdef ROCP_LD_AQLPROFILE
status = LoadAqlProfileLib(&aqlprofile_api_);
#else
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_);
#endif
CHECK_STATUS("aqlprofile API table load failed", status);
}
// Destructor of the class
HsaRsrcFactory::~HsaRsrcFactory() {
hsa_status_t status = hsa_shut_down();
CHECK_STATUS("Error in hsa_shut_down", status);
}
hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) {
void* handle = dlopen(kAqlProfileLib, RTLD_NOW);
if (handle == NULL) {
fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror());
return HSA_STATUS_ERROR;
}
dlerror(); /* Clear any existing error */
api->hsa_ven_amd_aqlprofile_error_string =
(decltype(::hsa_ven_amd_aqlprofile_error_string)*)
dlsym(handle, "hsa_ven_amd_aqlprofile_error_string");
api->hsa_ven_amd_aqlprofile_validate_event =
(decltype(::hsa_ven_amd_aqlprofile_validate_event)*)
dlsym(handle, "hsa_ven_amd_aqlprofile_validate_event");
api->hsa_ven_amd_aqlprofile_start =
(decltype(::hsa_ven_amd_aqlprofile_start)*)
dlsym(handle, "hsa_ven_amd_aqlprofile_start");
api->hsa_ven_amd_aqlprofile_stop =
(decltype(::hsa_ven_amd_aqlprofile_stop)*)
dlsym(handle, "hsa_ven_amd_aqlprofile_stop");
api->hsa_ven_amd_aqlprofile_legacy_get_pm4 =
(decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)
dlsym(handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
api->hsa_ven_amd_aqlprofile_get_info =
(decltype(::hsa_ven_amd_aqlprofile_get_info)*)
dlsym(handle, "hsa_ven_amd_aqlprofile_get_info");
api->hsa_ven_amd_aqlprofile_iterate_data =
(decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)
dlsym(handle, "hsa_ven_amd_aqlprofile_iterate_data");
return HSA_STATUS_SUCCESS;
}
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); }
// Get the count of Hsa Cpu Agents available on the platform
//
// @return uint32_t Number of Cpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); }
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(gpu_list_.size());
if (idx >= size) {
return false;
}
// Copy AgentInfo from specified index
*agent_info = gpu_list_[idx];
return true;
}
// Get the AgentInfo handle of a Cpu device
//
// @param idx Cpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(cpu_list_.size());
if (idx >= size) {
return false;
}
// Copy AgentInfo from specified index
*agent_info = cpu_list_[idx];
return true;
}
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateQueue(AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue) {
hsa_status_t status;
status = hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
UINT32_MAX, UINT32_MAX, queue);
return (status == HSA_STATUS_SUCCESS);
}
// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
hsa_status_t status;
status = hsa_signal_create(value, 0, NULL, signal);
return (status == HSA_STATUS_SUCCESS);
}
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
if (agent_info->coarse_region.handle != 0) {
// Allocate in local memory if it is available
status = hsa_memory_allocate(agent_info->coarse_region, size, (void**)&buffer);
if (status == HSA_STATUS_SUCCESS) {
status = hsa_memory_assign_agent(buffer, agent_info->dev_id, HSA_ACCESS_PERMISSION_RW);
}
} else {
// Allocate in system memory if local memory is not available
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
}
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Allocate memory tp pass kernel parameters.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
uint8_t* buffer = NULL;
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Transfer data method
bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length,
bool host_to_dev) {
hsa_status_t status;
status = hsa_memory_copy(dest_buff, src_buff, length);
return (status == HSA_STATUS_SUCCESS);
}
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::LoadAndFinalize(AgentInfo* agent_info, const char* brig_path,
char* kernel_name, hsa_executable_symbol_t* code_desc) {
// Finalize the Hsail object into code object
hsa_status_t status;
hsa_code_object_t code_object;
// Build the code object filename
std::string filename(brig_path);
std::clog << "Code object filename: " << filename << std::endl;
// Open the file containing code object
std::ifstream codeStream(filename.c_str(), std::ios::binary | std::ios::ate);
if (!codeStream) {
std::cerr << "Error: failed to load " << filename << std::endl;
assert(false);
return false;
}
// Allocate memory to read in code object from file
size_t size = std::string::size_type(codeStream.tellg());
char* codeBuff = (char*)AllocateSysMemory(agent_info, size);
if (!codeBuff) {
std::cerr << "Error: failed to allocate memory for code object." << std::endl;
assert(false);
return false;
}
// Read the code object into allocated memory
codeStream.seekg(0, std::ios::beg);
std::copy(std::istreambuf_iterator<char>(codeStream), std::istreambuf_iterator<char>(), codeBuff);
// De-Serialize the code object that has been read into memory
status = hsa_code_object_deserialize(codeBuff, size, NULL, &code_object);
if (status != HSA_STATUS_SUCCESS) {
std::cerr << "Failed to deserialize code object" << std::endl;
return false;
}
// Create executable.
hsa_executable_t hsaExecutable;
status =
hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, "", &hsaExecutable);
CHECK_STATUS("Error in creating executable object", status);
// Load code object.
status = hsa_executable_load_code_object(hsaExecutable, agent_info->dev_id, code_object, "");
CHECK_STATUS("Error in loading executable object", status);
// Freeze executable.
status = hsa_executable_freeze(hsaExecutable, "");
CHECK_STATUS("Error in freezing executable object", status);
// Get symbol handle.
hsa_executable_symbol_t kernelSymbol;
status = hsa_executable_get_symbol(hsaExecutable, NULL, kernel_name, agent_info->dev_id, 0,
&kernelSymbol);
CHECK_STATUS("Error in looking up kernel symbol", status);
// Update output parameter
*code_desc = kernelSymbol;
return true;
}
// Add an instance of AgentInfo representing a Hsa Gpu agent
void HsaRsrcFactory::AddAgentInfo(AgentInfo* agent_info, bool gpu) {
// Add input to Gpu list
if (gpu) {
gpu_list_.push_back(agent_info);
return;
}
// Add input to Cpu list
cpu_list_.push_back(agent_info);
}
// Print the various fields of Hsa Gpu Agents
bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
std::clog << header << " :" << std::endl;
AgentInfo* agent_info;
int size = uint32_t(gpu_list_.size());
for (int idx = 0; idx < size; idx++) {
agent_info = gpu_list_[idx];
std::clog << "> agent[" << idx << "] :" << std::endl;
std::clog << ">> Name : " << agent_info->name << std::endl;
std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl;
std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl;
std::clog << ">> Kernarg Region Id : " << agent_info->coarse_region.handle << std::endl;
}
return true;
}
HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL;
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;