2b2b8329b5
Signed-off-by: Alysa Liu <Alysa.Liu@amd.com>
325 wiersze
9.7 KiB
C++
325 wiersze
9.7 KiB
C++
/*
|
|
* Copyright © Advanced Micro Devices, Inc., or its affiliates.
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "hsa_base_util.h"
|
|
#include "HSAILAmdExt.h"
|
|
|
|
|
|
void HSA_UTIL::GetHsailNameAndKernelName(char * file_name_full, char *file_name_base, char *kernel_name)
|
|
{
|
|
strcpy(hail_file_name_full, file_name_full);
|
|
strcpy(hail_file_name_base, file_name_base);
|
|
strcpy(hsa_kernel_name, kernel_name);
|
|
}
|
|
|
|
HSA_UTIL::HSA_UTIL()
|
|
{
|
|
#ifdef TIME
|
|
base_kernel_time_idx = base_timer.CreateTimer();
|
|
base_setup_time_idx = base_timer.CreateTimer();
|
|
#endif
|
|
}
|
|
|
|
HSA_UTIL::~HSA_UTIL()
|
|
{
|
|
|
|
}
|
|
|
|
|
|
bool HSA_UTIL::HsaInit()
|
|
{
|
|
#ifdef TIME
|
|
base_timer.StartTimer(base_setup_time_idx);
|
|
#endif
|
|
|
|
err = hsa_init();
|
|
check(Initializing the hsa runtime, err);
|
|
|
|
/*
|
|
* Iterate over the agents and pick the gpu agent using
|
|
* the find_gpu callback.
|
|
*/
|
|
err = hsa_iterate_agents(find_gpu, &device);
|
|
check(Calling hsa_iterate_agents, err);
|
|
|
|
err = (device.handle== 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
|
|
check(Checking if the GPU device is non-zero, err);
|
|
|
|
if (err == HSA_STATUS_ERROR)
|
|
return false;
|
|
|
|
/*
|
|
* Query the maximum size of the queue.
|
|
*/
|
|
err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
|
|
check(Querying the device maximum queue size, err);
|
|
|
|
/*
|
|
* Create a queue using the maximum size.
|
|
*/
|
|
err = hsa_queue_create(device, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &command_queue);
|
|
check(Creating the queue, err);
|
|
|
|
profile = hsa_profile_t(108);
|
|
hsa_agent_get_info(device, HSA_AGENT_INFO_PROFILE, &profile);
|
|
|
|
if (profile == HSA_PROFILE_BASE)
|
|
{
|
|
memset(hail_file_name_full, 0, sizeof(char)*128);
|
|
cout << "Loading base profile!!!" << endl;
|
|
strcpy(hail_file_name_full, hail_file_name_base); //overwrite full hsail file name with base
|
|
}
|
|
|
|
amd::hsail::registerExtensions();
|
|
if (!tool.assembleFromFile(hail_file_name_full))
|
|
{
|
|
std::cout << tool.output();
|
|
return false;
|
|
}
|
|
module = tool.brigModule();
|
|
|
|
// Create hsail program.
|
|
err = hsa_ext_program_create(HSA_MACHINE_MODEL_LARGE, profile, HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO, NULL, &hsa_program);
|
|
check("Error in creating program object", err);
|
|
|
|
// Add hsail module.
|
|
//cout << "hsail file name = " << hail_file_name_full << endl;
|
|
err = hsa_ext_program_add_module(hsa_program, module);
|
|
check("Error in adding module to program object", err);
|
|
|
|
// Finalize hsail program.
|
|
hsa_isa_t isa = {0};
|
|
err = hsa_agent_get_info(device, HSA_AGENT_INFO_ISA, &isa);
|
|
check("Get hsa agent info isa", err);
|
|
|
|
hsa_ext_control_directives_t control_directives;
|
|
memset(&control_directives, 0, sizeof(hsa_ext_control_directives_t));
|
|
|
|
err = hsa_ext_program_finalize(hsa_program,
|
|
isa,
|
|
0,
|
|
control_directives,
|
|
NULL, //"-g -O0 -dump-isa",
|
|
HSA_CODE_OBJECT_TYPE_PROGRAM,
|
|
&code_object);
|
|
check("Error in finalizing program object", err);
|
|
|
|
// Create executable.
|
|
err = hsa_executable_create(profile, HSA_EXECUTABLE_STATE_UNFROZEN, "", &hsaExecutable);
|
|
check("Error in creating executable object", err);
|
|
|
|
// Load code object.
|
|
err = hsa_executable_load_code_object(hsaExecutable, device, code_object, "");
|
|
check("Error in loading executable object", err);
|
|
|
|
// Freeze executable.
|
|
err = hsa_executable_freeze(hsaExecutable, "");
|
|
check("Error in freezing executable object", err);
|
|
|
|
// Get symbol handle.
|
|
err = hsa_executable_get_symbol(hsaExecutable, NULL, hsa_kernel_name, device, 0, &kernelSymbol);
|
|
check("get symbol handle", err);
|
|
|
|
// Get code handle.
|
|
|
|
err = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &codeHandle);
|
|
check("Get code handle", err);
|
|
|
|
#ifdef TIME
|
|
base_timer.StopTimer(base_setup_time_idx);
|
|
#endif
|
|
|
|
|
|
//hsa_region_t local_kernarg_region;
|
|
mem_region.kernarg_region.handle = 0;
|
|
mem_region.coarse_region.handle = 0;
|
|
|
|
hsa_agent_iterate_regions(device, get_memory_region, &mem_region);
|
|
err = (mem_region.kernarg_region.handle== 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
|
|
check(Finding a kernarg memory region, err);
|
|
|
|
return true;
|
|
}
|
|
|
|
double HSA_UTIL::Run(int dim, int group_x, int group_y, int group_z, int s_size, int grid_x, int grid_y, int grid_z, void* kernel_args, int kernel_args_size)
|
|
{
|
|
#ifdef TIME
|
|
base_timer.StartTimer(base_kernel_time_idx);
|
|
#endif
|
|
|
|
/*
|
|
* Create a signal to wait for the dispatch to finish.
|
|
*/
|
|
hsa_signal_t local_signal;
|
|
err=hsa_signal_create(1, 0, NULL, &local_signal);
|
|
check(Creating a HSA_UTIL signal, err);
|
|
|
|
/* Initialize the dispatch packet */
|
|
hsa_kernel_dispatch_packet_t local_dispatch_packet;
|
|
memset(&local_dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t));
|
|
/*
|
|
* Setup the dispatch information.
|
|
*/
|
|
local_dispatch_packet.completion_signal=local_signal;
|
|
local_dispatch_packet.setup |= dim<< HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
|
|
local_dispatch_packet.workgroup_size_x = group_x;
|
|
local_dispatch_packet.workgroup_size_y = group_y;
|
|
local_dispatch_packet.workgroup_size_z = group_z;
|
|
local_dispatch_packet.group_segment_size = s_size;
|
|
local_dispatch_packet.grid_size_x = grid_x;
|
|
local_dispatch_packet.grid_size_y = grid_y;
|
|
local_dispatch_packet.grid_size_z = grid_z;
|
|
local_dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
|
//local_dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
|
|
//local_dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
|
|
local_dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
|
|
local_dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
|
|
local_dispatch_packet.kernel_object = codeHandle;
|
|
|
|
// Specify amount of private segment size (in bytes) that is needed per work-item
|
|
// Retrieve the amount of private memory needed
|
|
uint32_t private_mem_size = 0;
|
|
hsa_executable_symbol_get_info(kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &private_mem_size);
|
|
local_dispatch_packet.private_segment_size = private_mem_size;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
/*
|
|
* Find a memory region that supports kernel arguments.
|
|
*/
|
|
|
|
|
|
/*
|
|
kernarg_region.handle = 0;
|
|
|
|
hsa_agent_iterate_regions(device, get_kernarg, &kernarg_region);
|
|
err = (kernarg_region.handle== 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
|
|
check(Finding a kernarg memory region, err);
|
|
|
|
*/
|
|
|
|
void* local_kernel_arg_buffer = NULL;
|
|
/*
|
|
* Allocate the kernel argument buffer from the correct region.
|
|
*/
|
|
err = hsa_memory_allocate(mem_region.kernarg_region, kernel_args_size, &local_kernel_arg_buffer);
|
|
check(Allocating kernel argument memory buffer, err);
|
|
memcpy(local_kernel_arg_buffer, kernel_args, kernel_args_size);
|
|
local_dispatch_packet.kernarg_address = local_kernel_arg_buffer;
|
|
|
|
/*
|
|
* Obtain the current queue write index.
|
|
*/
|
|
uint64_t index = hsa_queue_load_write_index_relaxed(command_queue);
|
|
|
|
/*
|
|
* Write the aql packet at the calculated queue index address.
|
|
*/
|
|
const uint32_t queueMask = command_queue->size - 1;
|
|
((hsa_kernel_dispatch_packet_t*)(command_queue->base_address))[index&queueMask]=local_dispatch_packet;
|
|
|
|
/*
|
|
* Increment the write index and ring the doorbell to dispatch the kernel.
|
|
*/
|
|
hsa_queue_store_write_index_relaxed(command_queue, index+1);
|
|
hsa_signal_store_release(command_queue->doorbell_signal, index);
|
|
|
|
/*
|
|
* Wait on the dispatch signal until all kernel are finished.
|
|
*/
|
|
while (hsa_signal_wait_acquire(local_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0);
|
|
|
|
#ifdef TIME
|
|
base_timer.StopTimer(base_kernel_time_idx);
|
|
#endif
|
|
|
|
/*
|
|
* Cleanup all allocated resources.
|
|
*/
|
|
|
|
err = hsa_memory_free(local_kernel_arg_buffer);
|
|
check(Deallocate memory, err);
|
|
|
|
err=hsa_signal_destroy(local_signal);
|
|
check(Destroying the local_signal, err);
|
|
|
|
return 0;
|
|
}
|
|
|
|
double HSA_UTIL::GetKernelTime()
|
|
{
|
|
return base_timer.ReadTimer(base_kernel_time_idx);
|
|
}
|
|
|
|
double HSA_UTIL::GetSetupTime()
|
|
{
|
|
return base_timer.ReadTimer(base_setup_time_idx);
|
|
}
|
|
|
|
void HSA_UTIL::Close()
|
|
{
|
|
err = hsa_executable_destroy(hsaExecutable);
|
|
check(Destroying the hsaExecutable, err)
|
|
|
|
err = hsa_code_object_destroy(code_object);
|
|
check(Destroying the code_object, err);
|
|
|
|
err=hsa_queue_destroy(command_queue);
|
|
check(Destroying the queue, err);
|
|
|
|
err=hsa_shut_down();
|
|
check(Shutting down the runtime, err);
|
|
}
|
|
|
|
void* HSA_UTIL::AllocateLocalMemory(size_t size)
|
|
{
|
|
void *buffer = NULL;
|
|
|
|
// Allocate in local memory only if it is available
|
|
if (mem_region.coarse_region.handle != 0)
|
|
{
|
|
cout << "Allocating in local memory" << endl;
|
|
err = hsa_memory_allocate(mem_region.coarse_region, size, (void **)&buffer);
|
|
check(hsa memory allocation in local memory, err);
|
|
|
|
// register agent
|
|
err = hsa_memory_assign_agent(buffer, device, HSA_ACCESS_PERMISSION_RW);
|
|
return (err == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
|
}
|
|
|
|
// Allocate in system memory if local memory is not available
|
|
cout << "Allocating in system memory" << endl;
|
|
err = hsa_memory_allocate(mem_region.kernarg_region, size, (void **)&buffer);
|
|
return (err == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
|
}
|
|
|
|
void* HSA_UTIL::AllocateSysMemory( size_t size)
|
|
{
|
|
void *buffer = NULL;
|
|
err = hsa_memory_allocate(mem_region.kernarg_region, size, (void **)&buffer);
|
|
return (err == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
|
}
|
|
|
|
bool HSA_UTIL::TransferData(void *dest, void *src, uint length, bool host_to_dev)
|
|
{
|
|
|
|
hsa_status_t status;
|
|
|
|
void *buffer = (host_to_dev) ? dest : src;
|
|
err = hsa_memory_assign_agent(buffer, device, HSA_ACCESS_PERMISSION_RW);
|
|
if (err != HSA_STATUS_SUCCESS)
|
|
{
|
|
return false;
|
|
}
|
|
err = hsa_memory_copy(dest, src, length); // first is dest, second is src
|
|
return (err == HSA_STATUS_SUCCESS);
|
|
|
|
}
|
|
|
|
|
|
|