ECR #333755 - Move Hsa Sample BinarySearch from Hsa Sdk project to current Runtime/Samples

[git-p4: depot-paths = "//depot/stg/hsa/drivers/hsa/runtime/": change = 1126024]


[ROCm/ROCR-Runtime commit: 26575ed9ba]
This commit is contained in:
Ramesh Errabolu (xN/A) TX
2015-02-27 19:29:01 -05:00
parent 303c030f57
commit 6b478ae5cc
7 changed files with 1304 additions and 1 deletions
+6 -1
View File
@@ -2,6 +2,11 @@ OPENCL_DEPTH = ../..
include $(OPENCL_DEPTH)/runtimenew/runtimedefs
SUBDIRS = MatrixTranspose MatrixMultiplication DwtHarr1D BitionicSort NBody
SUBDIRS = NBody
SUBDIRS += DwtHarr1D
SUBDIRS += BitionicSort
SUBDIRS += BinarySearch
SUBDIRS += MatrixTranspose
SUBDIRS += MatrixMultiplication
include $(OPENCL_DEPTH)/runtimenew/runtimerules
+386
View File
@@ -0,0 +1,386 @@
/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
• Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
• Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#include "helper_funcs.hpp"
#ifndef _WIN32
#include <unistd.h>
#endif
/*
* Prints no more than 256 elements of the given array.
* Prints full array if length is less than 256.
* Prints Array name followed by elements.
*/
template<typename T>
void printArray(
const std::string header,
const T * data,
const int width,
const int height)
{
std::cout<<"\n"<<header<<"\n";
for(int i = 0; i < height; i++)
{
for(int j = 0; j < width; j++)
{
std::cout<<data[i*width+j]<<" ";
}
std::cout<<"\n";
}
std::cout<<"\n";
}
template<typename T>
int fillRandom(
T * arrayPtr,
const int width,
const int height,
const T rangeMin,
const T rangeMax,
unsigned int seed)
{
if(!arrayPtr)
{
error("Cannot fill array. NULL pointer.");
return HSA_SDK_FAILURE;
}
if(!seed)
seed = (unsigned int)time(NULL);
srand(seed);
double range = double(rangeMax - rangeMin) + 1.0;
/* random initialisation of input */
for(int i = 0; i < height; i++)
for(int j = 0; j < width; j++)
{
int index = i*width + j;
arrayPtr[index] = rangeMin + T(range*rand()/(RAND_MAX + 1.0));
}
return HSA_SDK_SUCCESS;
}
template<typename T>
int fillPos(
T * arrayPtr,
const int width,
const int height)
{
if(!arrayPtr)
{
error("Cannot fill array. NULL pointer.");
return HSA_SDK_FAILURE;
}
/* initialisation of input with positions*/
for(T i = 0; i < height; i++)
for(T j = 0; j < width; j++)
{
T index = i*width + j;
arrayPtr[index] = index;
}
return HSA_SDK_SUCCESS;
}
template<typename T>
int fillConstant(
T * arrayPtr,
const int width,
const int height,
const T val)
{
if(!arrayPtr)
{
error("Cannot fill array. NULL pointer.");
return HSA_SDK_FAILURE;
}
/* initialisation of input with constant value*/
for(int i = 0; i < height; i++)
for(int j = 0; j < width; j++)
{
int index = i*width + j;
arrayPtr[index] = val;
}
return HSA_SDK_SUCCESS;
}
template<typename T>
T roundToPowerOf2(T val)
{
int bytes = sizeof(T);
val--;
for(int i = 0; i < bytes; i++)
val |= val >> (1<<i);
val++;
return val;
}
template<typename T>
int isPowerOf2(T val)
{
long long _val = val;
if((_val & (-_val))-_val == 0 && _val != 0)
return HSA_SDK_SUCCESS;
else
return HSA_SDK_FAILURE;
}
template<typename T>
bool checkVal(
T input,
T reference,
std::string message,
bool isAPIerror)
{
if(input==reference)
{
return true;
}
else
{
error(message);
return false;
}
}
template<typename T>
std::string toString(T t, std::ios_base &(*r)(std::ios_base&))
{
std::ostringstream output;
output << r << t;
return output.str();
}
bool
compare(const float *refData, const float *data,
const int length, const float epsilon)
{
float error = 0.0f;
float ref = 0.0f;
for(int i = 1; i < length; ++i)
{
float diff = refData[i] - data[i];
error += diff * diff;
ref += refData[i] * refData[i];
}
float normRef =::sqrtf((float) ref);
if (::fabs((float) ref) < 1e-7f) {
return false;
}
float normError = ::sqrtf((float) error);
error = normError / normRef;
return error < epsilon;
}
bool
compare(const double *refData, const double *data,
const int length, const double epsilon)
{
double error = 0.0;
double ref = 0.0;
for(int i = 1; i < length; ++i)
{
double diff = refData[i] - data[i];
error += diff * diff;
ref += refData[i] * refData[i];
}
double normRef =::sqrt((double) ref);
if (::fabs((double) ref) < 1e-7) {
return false;
}
double normError = ::sqrt((double) error);
error = normError / normRef;
return error < epsilon;
}
void
error(const char* errorMsg)
{
std::cout<<"Error: "<<errorMsg<<std::endl;
}
void
error(std::string errorMsg)
{
std::cout<<"Error: "<<errorMsg<<std::endl;
}
void
expectedError(const char* errorMsg)
{
std::cout<<"Expected Error: "<<errorMsg<<std::endl;
}
void
expectedError(std::string errorMsg)
{
std::cout<<"Expected Error: "<<errorMsg<<std::endl;
}
/////////////////////////////////////////////////////////////////
// Template Instantiations
/////////////////////////////////////////////////////////////////
template
void printArray<short>(const std::string,
const short*, int, int);
template
void printArray<unsigned char>(const std::string,
const unsigned char *, int, int);
template
void printArray<unsigned int>(const std::string,
const unsigned int *, int, int);
template
void printArray<int>(const std::string,
const int *, int, int);
template
void printArray<long>(const std::string,
const long*, int, int);
template
void printArray<float>(const std::string,
const float*, int, int);
template
void printArray<double>(const std::string,
const double*, int, int);
template
int fillRandom<unsigned char>(unsigned char* arrayPtr,
const int width, const int height,
unsigned char rangeMin, unsigned char rangeMax, unsigned int seed);
template
int fillRandom<unsigned int>(unsigned int* arrayPtr,
const int width, const int height,
unsigned int rangeMin, unsigned int rangeMax, unsigned int seed);
template
int fillRandom<int>(int* arrayPtr,
const int width, const int height,
int rangeMin, int rangeMax, unsigned int seed);
template
int fillRandom<long>(long* arrayPtr,
const int width, const int height,
long rangeMin, long rangeMax, unsigned int seed);
template
int fillRandom<float>(float* arrayPtr,
const int width, const int height,
float rangeMin, float rangeMax, unsigned int seed);
template
int fillRandom<double>(double* arrayPtr,
const int width, const int height,
double rangeMin, double rangeMax, unsigned int seed);
template
short roundToPowerOf2<short>(short val);
template
unsigned int roundToPowerOf2<unsigned int>(unsigned int val);
template
int roundToPowerOf2<int>(int val);
template
long roundToPowerOf2<long>(long val);
template
int isPowerOf2<short>(short val);
template
int isPowerOf2<unsigned int>(unsigned int val);
template
int isPowerOf2<int>(int val);
template
int isPowerOf2<long>(long val);
template<>
int fillPos<short>(short * arrayPtr, const int width, const int height);
template<>
int fillPos<unsigned int>(unsigned int * arrayPtr, const int width, const int height);
template<>
int fillPos<int>(int * arrayPtr, const int width, const int height);
template<>
int fillPos<long>(long * arrayPtr, const int width, const int height);
template<>
int fillConstant<short>(short * arrayPtr,
const int width, const int height,
const short val);
template<>
int fillConstant(unsigned int * arrayPtr,
const int width, const int height,
const unsigned int val);
template<>
int fillConstant(int * arrayPtr,
const int width, const int height,
const int val);
template<>
int fillConstant(long * arrayPtr,
const int width, const int height,
const long val);
template<>
int fillConstant(long * arrayPtr,
const int width, const int height,
const long val);
template<>
int fillConstant(long * arrayPtr,
const int width, const int height,
const long val);
template
bool checkVal<char>(char input, char reference, std::string message, bool isAPIerror);
template
bool checkVal<bool>(bool input, bool reference, std::string message, bool isAPIerror);
template
bool checkVal<std::string>(std::string input, std::string reference, std::string message, bool isAPIerror);
template
bool checkVal<short>(short input, short reference, std::string message, bool isAPIerror);
template
bool checkVal<unsigned int>(unsigned int input, unsigned int reference, std::string message, bool isAPIerror);
template
bool checkVal<int>(int input, int reference, std::string message, bool isAPIerror);
template
bool checkVal<long>(long input, long reference, std::string message, bool isAPIerror);
template
std::string toString<char>(char t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<short>(short t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<unsigned int>(unsigned int t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<int>(int t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<long>(long t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<float>(float t, std::ios_base &(*r)(std::ios_base&));
template
std::string toString<double>(double t, std::ios_base &(*r)(std::ios_base&));
+159
View File
@@ -0,0 +1,159 @@
/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
• Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
• Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#ifndef HELPER_FUNCS_HPP_
#define HELPER_FUNCS_HPP_
#define HSA_SDK_SUCCESS 0
#define HSA_SDK_FAILURE 1
#define HSA_SDK_EXPECTED_FAILURE 2
#include <iostream>
#include <fstream>
#include <iomanip>
#include <sstream>
#include <string>
#include <ctime>
#include <cmath>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <vector>
#include <malloc.h>
/**
* error
* constant function, Prints error messages
* @param errorMsg char* message
*/
void error(const char* errorMsg);
/**
* error
* constant function, Prints error messages
* @param errorMsg std::string message
*/
void error(std::string errorMsg);
/**
* expectedError
* constant function, Prints error messages
* @param errorMsg char* message
*/
void expectedError(const char* errorMsg);
/**
* expectedError
* constant function, Prints error messages
* @param errorMsg string message
*/
void expectedError(std::string errorMsg);
/**
* compare template version
* compare data to check error
* @param refData templated input
* @param data templated input
* @param length number of values to compare
* @param epsilon errorWindow
*/
bool compare(const float *refData, const float *data,
const int length, const float epsilon = 1e-6f);
bool compare(const double *refData, const double *data,
const int length, const double epsilon = 1e-6);
/**
* printArray
* displays a array on std::out
*/
template<typename T>
void printArray(
const std::string header,
const T * data,
const int width,
const int height);
/**
* fillRandom
* fill array with random values
*/
template<typename T>
int fillRandom(
T * arrayPtr,
const int width,
const int height,
const T rangeMin,
const T rangeMax,
unsigned int seed=123);
/**
* fillPos
* fill the specified positions
*/
template<typename T>
int fillPos(
T * arrayPtr,
const int width,
const int height);
/**
* fillConstant
* fill the array with constant value
*/
template<typename T>
int fillConstant(
T * arrayPtr,
const int width,
const int height,
const T val);
/**
* roundToPowerOf2
* rounds to a power of 2
*/
template<typename T>
T roundToPowerOf2(T val);
/**
* isPowerOf2
* checks if input is a power of 2
*/
template<typename T>
int isPowerOf2(T val);
/**
* checkVal
* Set default(isAPIerror) parameter to false
* if checkVaul is used to check otherthan OpenCL API error code
*/
template<typename T>
bool checkVal(
T input,
T reference,
std::string message, bool isAPIerror = true);
/**
* toString
* convert a T type to string
*/
template<typename T>
std::string toString(T t, std::ios_base & (*r)(std::ios_base&));
#endif
+422
View File
@@ -0,0 +1,422 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <x86intrin.h>
#include <string.h>
#include <cassert>
#include <iostream>
#include <vector>
#include <string>
#include "hsa.h"
#include "elf_utils.h"
#include "hsa_rsrc_factory.hpp"
using namespace std;
// Provide access to command line arguments passed in by user
uint32_t hsa_cmdline_arg_cnt;
char **hsa_cmdline_arg_list;
// Callback function to find and bind kernarg region of an agent
static hsa_status_t find_kernarg(hsa_region_t region, void *data) {
hsa_region_global_flag_t flags;
hsa_region_segment_t segment_id;
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
return HSA_STATUS_SUCCESS;
}
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) {
AgentInfo *agent_info = (AgentInfo *)data;
agent_info->kernarg_region = region;
}
return HSA_STATUS_SUCCESS;
}
// Callback function to get the number of agents
static hsa_status_t get_gpu_agents(hsa_agent_t agent, void *data) {
// Copy handle of agent and increment number of agents reported
HsaRsrcFactory *rsrcFactory = reinterpret_cast<HsaRsrcFactory *>(data);
// Determine if device is a Gpu agent
hsa_status_t status;
hsa_device_type_t type;
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
if (type != HSA_DEVICE_TYPE_GPU) {
return HSA_STATUS_SUCCESS;
}
// Device is a Gpu agent, build an instance of AgentInfo
AgentInfo *agent_info = reinterpret_cast<AgentInfo *>(malloc(sizeof(AgentInfo)));
agent_info->dev_id = agent;
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
agent_info->max_wave_size = 0;
hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
agent_info->max_queue_size = 0;
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
// Find and Bind Kernarg regions of the Gpu agent
hsa_agent_iterate_regions(agent, find_kernarg, agent_info);
// Save the instance of AgentInfo
rsrcFactory->AddAgentInfo(agent_info);
return HSA_STATUS_SUCCESS;
}
// Finds the specified symbols offset in the specified brig_module.
// If the symbol is found the function returns HSA_STATUS_SUCCESS,
// otherwise it returns HSA_STATUS_ERROR.
hsa_status_t hsa_find_symbol_offset(hsa_ext_brig_module_t *brig_module,
char *symbol_name,
hsa_ext_brig_code_section_offset32_t *offset) {
// Get the data section
hsa_ext_brig_section_header_t *data_hdr = brig_module->section[HSA_EXT_BRIG_SECTION_DATA];
// Get the code section
hsa_ext_brig_section_header_t* code_hdr = brig_module->section[HSA_EXT_BRIG_SECTION_CODE];
// First entry into the BRIG code section
BrigCodeOffset32_t code_offset = code_hdr->header_byte_count;
BrigBase* code_entry = (BrigBase*) ((char*)code_hdr + code_offset);
while (code_offset != code_hdr->byte_count) {
if (code_entry->kind == BRIG_KIND_DIRECTIVE_KERNEL) {
// Now find the data in the data section
BrigDirectiveExecutable* directive_kernel = (BrigDirectiveExecutable*) (code_entry);
BrigDataOffsetString32_t data_name_offset = directive_kernel->name;
BrigData* data_entry = (BrigData*)((char*) data_hdr + data_name_offset);
if (!strncmp(symbol_name, (char*) data_entry->bytes, strlen(symbol_name))) {
*offset = code_offset;
return HSA_STATUS_SUCCESS;
}
}
code_offset += code_entry->byteCount;
code_entry = (BrigBase*) ((char*)code_hdr + code_offset);
}
return HSA_STATUS_ERROR;
}
// Definitions for Static Data members of the class
char* HsaRsrcFactory::brig_path_ = NULL;
uint32_t HsaRsrcFactory::num_cus_;
uint32_t HsaRsrcFactory::num_waves_;
uint32_t HsaRsrcFactory::num_workitems_;
uint32_t HsaRsrcFactory::kernel_loop_count_;
bool HsaRsrcFactory::print_debug_info_ = false;
// Constructor of the class
HsaRsrcFactory::HsaRsrcFactory( ) {
// Initialize the Hsa Runtime
hsa_status_t status = hsa_init();
assert(status == HSA_STATUS_SUCCESS);
// Discover the set of Gpu devices available on the platform
status = hsa_iterate_agents(get_gpu_agents, this);
check("Error Calling hsa_iterate_agents", status);
// Process command line arguments
ProcessCmdline( );
}
// Destructor of the class
HsaRsrcFactory::~HsaRsrcFactory( ) {
}
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfGpuAgents( ) {
return gpu_list_.size();
}
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, AgentInfo **agent_info) {
// Determine if request is valid
uint32_t size = gpu_list_.size();
if (idx >= size) {
return false;
}
// Copy AgentInfo from specified index
*agent_info = gpu_list_[idx];
return true;
}
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateQueue(AgentInfo *agent_info,
uint32_t num_pkts, hsa_queue_t **queue) {
hsa_status_t status;
status = hsa_queue_create(agent_info->dev_id, num_pkts,
HSA_QUEUE_TYPE_MULTI, NULL, NULL,
UINT32_MAX, UINT32_MAX, queue);
return (status == HSA_STATUS_SUCCESS);
}
// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t *signal) {
hsa_status_t status;
status = hsa_signal_create(value, 0, NULL, signal);
return (status == HSA_STATUS_SUCCESS);
}
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateMemory(AgentInfo *agent_info, size_t size) {
hsa_status_t status;
uint8_t *buffer = NULL;
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void **)&buffer);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::LoadAndFinalize(AgentInfo *agent_info,
const char *brig_path, char *kernel_name,
hsa_ext_code_descriptor_t **code_desc) {
// Load BRIG, encapsulated in an ELF container, into a BRIG module.
status_t build_err;
hsa_ext_brig_module_t *brig_obj;
build_err = (status_t)create_brig_module_from_brig_file(brig_path, &brig_obj);
check_build("Error in creating the brig module from brig file", build_err);
// Determine the Brig module has the kernel symbol
hsa_status_t status;
hsa_ext_brig_code_section_offset32_t kernel_symbol;
status = hsa_find_symbol_offset(brig_obj, kernel_name, &kernel_symbol);
check("Error in Finding the Symbol Offset for the Kernel", status);
// Create Hsa Program
hsa_ext_program_handle_t program;
status = hsa_ext_program_create(&agent_info->dev_id, 1,
HSA_EXT_BRIG_MACHINE_LARGE,
HSA_EXT_BRIG_PROFILE_FULL, &program);
check("Error in Creating Hsa Program", status);
// Add the BRIG module to hsa program.
hsa_ext_brig_module_handle_t brig_handle;
status = hsa_ext_add_module(program, brig_obj, &brig_handle);
check("Error in Adding Brig Module to the Program", status);
// Construct finalization request list.
hsa_ext_finalization_request_t finalize_request;
finalize_request.module = brig_handle;
finalize_request.symbol = kernel_symbol;
finalize_request.program_call_convention = 0;
// Finalize the Hsa Program.
status = hsa_ext_finalize_program(program, agent_info->dev_id,
1, &finalize_request, NULL, NULL, 0, NULL, 0);
check("Error in Finalizing the Hsa Program", status);
// Destroy the brig module. The program was successfully created the kernel
// symbol was found and the program was finalized, so it is no longer needed.
destroy_brig_module(brig_obj);
// Get the hsa code descriptor address.
status = hsa_ext_query_kernel_descriptor_address(program, brig_handle, kernel_symbol, code_desc);
check("Error Querying the Kernel Descriptor Address", status);
return true;
}
// Add an instance of AgentInfo representing a Hsa Gpu agent
void HsaRsrcFactory::AddAgentInfo(AgentInfo *agent_info) {
gpu_list_.push_back(agent_info);
}
// Print the various fields of Hsa Gpu Agents
bool HsaRsrcFactory::PrintGpuAgents( ) {
AgentInfo *agent_info;
int size = gpu_list_.size();
for (int idx = 0; idx < size; idx++) {
agent_info = gpu_list_[idx];
std::cout << std::endl;
std::cout << "Hsa Gpu Agent Id: " << agent_info->dev_id.handle << std::endl;
std::cout << "Hsa Gpu Agent Name: " << agent_info->name << std::endl;
std::cout << "Hsa Gpu Agent Max Wave Size: " << agent_info->max_wave_size << std::endl;
std::cout << "Hsa Gpu Agent Max Queue Size: " << agent_info->max_queue_size << std::endl;
std::cout << "Hsa Gpu Agent Kernarg Region Id: " << agent_info->kernarg_region.handle << std::endl;
std::cout << std::endl;
}
return true;
}
// Returns the file path where brig files is located. Value is
// available only after an instance has been built.
char* HsaRsrcFactory::GetBrigPath( ) {
return HsaRsrcFactory::brig_path_;
}
// Returns the number of compute units present on platform
// Value is available only after an instance has been built.
uint32_t HsaRsrcFactory::GetNumOfCUs( ) {
return HsaRsrcFactory::num_cus_;
}
// Returns the maximum number of waves that can be launched
// per compute unit. The actual number that can be launched
// is affected by resource availability
//
// Value is available only after an instance has been built.
uint32_t HsaRsrcFactory::GetNumOfWavesPerCU( ) {
return HsaRsrcFactory::num_waves_;
}
// Returns the number of work-items that can execute per wave
// Value is available only after an instance has been built.
uint32_t HsaRsrcFactory::GetNumOfWorkItemsPerWave( ) {
return HsaRsrcFactory::num_workitems_;
}
// Returns the number of times kernel loop body should execute.
// Value is available only after an instance has been built.
uint32_t HsaRsrcFactory::GetKernelLoopCount() {
return HsaRsrcFactory::kernel_loop_count_;
}
// Returns boolean flag to indicate if debug info should be printed
// Value is available only after an instance has been built.
uint32_t HsaRsrcFactory::GetPrintDebugInfo() {
return HsaRsrcFactory::print_debug_info_;
}
// Process command line arguments. The method will capture
// various user command line parameters for tests to use
void HsaRsrcFactory::ProcessCmdline( ) {
// Command line arguments are given
uint32_t idx;
uint32_t arg_idx;
for (idx = 1; idx < hsa_cmdline_arg_cnt; idx += 2) {
arg_idx = GetArgIndex((char *)hsa_cmdline_arg_list[idx]);
switch(arg_idx) {
case 0:
HsaRsrcFactory::brig_path_ = hsa_cmdline_arg_list[idx + 1];
break;
case 1:
HsaRsrcFactory::num_cus_ = atoi(hsa_cmdline_arg_list[idx + 1]);
break;
case 2:
HsaRsrcFactory::num_waves_ = atoi(hsa_cmdline_arg_list[idx + 1]);
break;
case 3:
HsaRsrcFactory::num_workitems_ = atoi(hsa_cmdline_arg_list[idx + 1]);
break;
case 4:
HsaRsrcFactory::kernel_loop_count_ = atoi(hsa_cmdline_arg_list[idx + 1]);
break;
case 5:
HsaRsrcFactory::print_debug_info_ = true;
break;
}
}
}
uint32_t HsaRsrcFactory::GetArgIndex(char *arg_value ) {
// Map Brig file path to index zero
if (!strcmp(HsaRsrcFactory::brig_path_key_, arg_value)) {
return 0;
}
// Map Number of Compute Units to index one
if (!strcmp(HsaRsrcFactory::num_cus_key_, arg_value)) {
return 1;
}
// Map Number of Waves per CU to index two
if (!strcmp(HsaRsrcFactory::num_waves_key_, arg_value)) {
return 2;
}
// Map Number of Workitems per Wave to index three
if (!strcmp(HsaRsrcFactory::num_workitems_key_, arg_value)) {
return 3;
}
// Map Kernel Loop Count to index four
if (!strcmp(HsaRsrcFactory::kernel_loop_count_key_, arg_value)) {
return 4;
}
// Map print debug info parameter
if (!strcmp(HsaRsrcFactory::print_debug_key_, arg_value)) {
return 5;
}
return 108;
}
void HsaRsrcFactory::PrintHelpMsg( ) {
std::cout << "Key for passing Brig filepath: " << HsaRsrcFactory::brig_path_key_ << std::endl;
std::cout << "Key for passing Number of Compute Units: " << HsaRsrcFactory::num_cus_key_ << std::endl;
std::cout << "Key for passing Number of Waves per CU: " << HsaRsrcFactory::num_waves_key_ << std::endl;
std::cout << "Key for passing Number of Workitems per Wave: " << HsaRsrcFactory::num_workitems_key_ << std::endl;
std::cout << "Key for passing Kernel Loop Count: " << HsaRsrcFactory::kernel_loop_count_key_ << std::endl;
}
+274
View File
@@ -0,0 +1,274 @@
#ifndef HSA_RSRC_FACTORY_H_
#define HSA_RSRC_FACTORY_H_
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <x86intrin.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <string>
#include "hsatimer.h"
#include "hsa.h"
#include "hsa_ext_finalize.h"
#define HSA_ARGUMENT_ALIGN_BYTES 16
#define HSA_QUEUE_ALIGN_BYTES 64
#define HSA_PACKET_ALIGN_BYTES 64
#define check(msg, status) \
if (status != HSA_STATUS_SUCCESS) { \
printf("%s\n", msg); \
exit(1); \
}
#define check_build(msg, status) \
if (status != STATUS_SUCCESS) { \
printf("%s\n", msg); \
exit(1); \
}
// Define required BRIG data structures.
typedef uint32_t BrigCodeOffset32_t;
typedef uint32_t BrigDataOffset32_t;
typedef uint16_t BrigKinds16_t;
typedef uint8_t BrigLinkage8_t;
typedef uint8_t BrigExecutableModifier8_t;
typedef BrigDataOffset32_t BrigDataOffsetString32_t;
enum BrigKinds {
BRIG_KIND_NONE = 0x0000,
BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
};
typedef struct BrigBase BrigBase;
struct BrigBase {
uint16_t byteCount;
BrigKinds16_t kind;
};
typedef struct BrigExecutableModifier BrigExecutableModifier;
struct BrigExecutableModifier {
BrigExecutableModifier8_t allBits;
};
typedef struct BrigDirectiveExecutable BrigDirectiveExecutable;
struct BrigDirectiveExecutable {
uint16_t byteCount;
BrigKinds16_t kind;
BrigDataOffsetString32_t name;
uint16_t outArgCount;
uint16_t inArgCount;
BrigCodeOffset32_t firstInArg;
BrigCodeOffset32_t firstCodeBlockEntry;
BrigCodeOffset32_t nextModuleEntry;
uint32_t codeBlockEntryCount;
BrigExecutableModifier modifier;
BrigLinkage8_t linkage;
uint16_t reserved;
};
typedef struct BrigData BrigData;
struct BrigData {
uint32_t byteCount;
uint8_t bytes[1];
};
// Provide access to command line arguments passed in by user
extern uint32_t hsa_cmdline_arg_cnt;
extern char **hsa_cmdline_arg_list;
// Encapsulates information about a Hsa Agent such as its
// handle, name, max queue size, max wavefront size, etc.
typedef struct {
// Handle of Agent
hsa_agent_t dev_id;
// Name of Agent whose length is less than 64
char name[64];
// Max size of Wavefront size
uint32_t max_wave_size;
// Max size of Queue buffer
uint32_t max_queue_size;
// Memory region supporting kernel arguments
hsa_region_t kernarg_region;
} AgentInfo;
class HsaRsrcFactory {
public:
// Constructor of the class. Will initialize the Hsa Runtime and
// query the system topology to get the list of Cpu and Gpu devices
HsaRsrcFactory( );
// Destructor of the class
~HsaRsrcFactory( );
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t GetCountOfGpuAgents( );
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool GetGpuAgentInfo(uint32_t idx, AgentInfo **agent_info);
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool CreateQueue(AgentInfo *agent_info,
uint32_t num_pkts, hsa_queue_t **queue);
// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool CreateSignal(uint32_t value, hsa_signal_t *signal);
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* AllocateMemory(AgentInfo *agent_info, size_t size);
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return bool true if successful, false otherwise
//
bool LoadAndFinalize(AgentInfo *agent_info,
const char *brig_path, char *kernel_name,
hsa_ext_code_descriptor_t **code_desc);
// Add an instance of AgentInfo representing a Hsa Gpu agent
void AddAgentInfo(AgentInfo *agent_info);
// Returns the file path where brig files is located
static char* GetBrigPath( );
// Returns the number of compute units present on platform
static uint32_t GetNumOfCUs( );
// Returns the maximum number of waves that can be launched
// per compute unit. The actual number that can be launched
// is affected by resource availability
static uint32_t GetNumOfWavesPerCU( );
// Returns the number of work-items that can execute per wave
static uint32_t GetNumOfWorkItemsPerWave( );
// Returns the number of times kernel loop body should execute.
static uint32_t GetKernelLoopCount();
// Returns boolean flag to indicate if debug info should be printed
static uint32_t GetPrintDebugInfo();
private:
// Number of queues to create
uint32_t num_queues_;
// Used to maintain a list of Hsa Queue handles
std::vector<hsa_queue_t *> queue_list_;
// Number of Signals to create
uint32_t num_signals_;
// Used to maintain a list of Hsa Signal handles
std::vector<hsa_signal_t *> signal_list_;
// Number of agents reported by platform
uint32_t num_agents_;
// Used to maintain a list of Hsa Gpu Agent Info
std::vector<AgentInfo *> gpu_list_;
// Records the file path where Brig file is located.
// Value is available only after an instance has been built.
static char* brig_path_;
static constexpr char* brig_path_key_ = "brig_path";
// Records the number of Compute units present on system.
// Value is available only after an instance has been built.
static uint32_t num_cus_;
static constexpr char* num_cus_key_ = "num_cus";
// Records the number of waves that can be launched per Compute unit
// Value is available only after an instance has been built.
static uint32_t num_waves_;
static constexpr char* num_waves_key_ = "waves_per_cu";
// Records the number of work-items that can be packed into a wave
// Value is available only after an instance has been built.
static uint32_t num_workitems_;
static constexpr char* num_workitems_key_ = "workitems_per_wave";
// Records the number of times kernel loop body should run. Value
// is available only after an instance has been built.
static uint32_t kernel_loop_count_;
static constexpr char* kernel_loop_count_key_ = "kernel_loop_count";
// Records the number of times kernel loop body should run. Value
// is available only after an instance has been built.
static bool print_debug_info_;
static constexpr char* print_debug_key_ = "print_debug";
// Print the various fields of Hsa Gpu Agents
bool PrintGpuAgents( );
// Process command line arguments. The method will capture
// various user command line parameters for tests to use
static void ProcessCmdline( );
// Prints the help banner on user arg keys
static void PrintHelpMsg( );
// Maps an index for the user argument
static uint32_t GetArgIndex(char *arg_value);
};
#endif // HSA_RSRC_FACTORY_H_
+45
View File
@@ -0,0 +1,45 @@
#ifdef _WIN32 // Compiling for Windows Platform
#include <stdlib.h>
#include <Windows.h>
#include "os.h"
#include <stdio.h>
void SetEnv(const char* env_var_name, const char* env_var_value) {
bool err = SetEnvironmentVariable(env_var_name, env_var_value);
if(false == err){
printf("Set environment variable failed!\n");
exit(1);
}
return;
}
char* GetEnv(const char* env_var_name){
char* buff;
DWORD char_count = GetEnvironmentVariable(env_var_name, NULL, 0);
if (char_count == 0) return NULL;
buff = (char*)malloc(sizeof(char) * char_count);
GetEnvironmentVariable(env_var_name, buff, char_count);
buff[char_count - 1] = '\0';
return buff;
}
#elif defined(__linux__)
#include "os.h"
#include <stdlib.h>
void SetEnv(const char* env_var_name, const char* env_var_value){
int err = setenv(env_var_name, env_var_value, 1);
if(0 != err){
printf("Set environment variable failed!\n");
exit(1);
}
return;
}
char* GetEnv(const char* env_var_name) {
return getenv(env_var_name);
}
#endif
+12
View File
@@ -0,0 +1,12 @@
#ifndef HSA_PERF_SRC_UTILS_OS_H_
#define HSA_PERF_SRC_UTILS_OS_H_
#include <stdio.h>
// Set envriroment variable
void SetEnv(const char* env_var_name, const char* env_var_value);
// Get the value of enviroment
char* GetEnv(const char* env_var_name);
#endif