From 6b478ae5cc546d6fc3c2aa1a8618e2bf61206fed Mon Sep 17 00:00:00 2001 From: "Ramesh Errabolu (xN/A) TX" Date: Fri, 27 Feb 2015 19:29:01 -0500 Subject: [PATCH] ECR #333755 - Move Hsa Sample BinarySearch from Hsa Sdk project to current Runtime/Samples [git-p4: depot-paths = "//depot/stg/hsa/drivers/hsa/runtime/": change = 1126024] [ROCm/ROCR-Runtime commit: 26575ed9ba228ae7306d74eaf26632cb9f208f6d] --- projects/rocr-runtime/samples/Makefile | 7 +- .../samples/common/helper_funcs.cpp | 386 ++++++++++++++++ .../samples/common/helper_funcs.hpp | 159 +++++++ .../samples/common/hsa_rsrc_factory.cpp | 422 ++++++++++++++++++ .../samples/common/hsa_rsrc_factory.hpp | 274 ++++++++++++ projects/rocr-runtime/samples/common/os.cpp | 45 ++ projects/rocr-runtime/samples/common/os.h | 12 + 7 files changed, 1304 insertions(+), 1 deletion(-) create mode 100755 projects/rocr-runtime/samples/common/helper_funcs.cpp create mode 100755 projects/rocr-runtime/samples/common/helper_funcs.hpp create mode 100755 projects/rocr-runtime/samples/common/hsa_rsrc_factory.cpp create mode 100755 projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp create mode 100755 projects/rocr-runtime/samples/common/os.cpp create mode 100755 projects/rocr-runtime/samples/common/os.h diff --git a/projects/rocr-runtime/samples/Makefile b/projects/rocr-runtime/samples/Makefile index 4037c071c1..7b1ccb4d85 100644 --- a/projects/rocr-runtime/samples/Makefile +++ b/projects/rocr-runtime/samples/Makefile @@ -2,6 +2,11 @@ OPENCL_DEPTH = ../.. include $(OPENCL_DEPTH)/runtimenew/runtimedefs -SUBDIRS = MatrixTranspose MatrixMultiplication DwtHarr1D BitionicSort NBody +SUBDIRS = NBody +SUBDIRS += DwtHarr1D +SUBDIRS += BitionicSort +SUBDIRS += BinarySearch +SUBDIRS += MatrixTranspose +SUBDIRS += MatrixMultiplication include $(OPENCL_DEPTH)/runtimenew/runtimerules diff --git a/projects/rocr-runtime/samples/common/helper_funcs.cpp b/projects/rocr-runtime/samples/common/helper_funcs.cpp new file mode 100755 index 0000000000..8d23b98738 --- /dev/null +++ b/projects/rocr-runtime/samples/common/helper_funcs.cpp @@ -0,0 +1,386 @@ +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +• Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +• Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ + +#include "helper_funcs.hpp" + +#ifndef _WIN32 +#include +#endif + + +/* + * Prints no more than 256 elements of the given array. + * Prints full array if length is less than 256. + * Prints Array name followed by elements. + */ +template +void printArray( + const std::string header, + const T * data, + const int width, + const int height) +{ + std::cout<<"\n"< +int fillRandom( + T * arrayPtr, + const int width, + const int height, + const T rangeMin, + const T rangeMax, + unsigned int seed) +{ + if(!arrayPtr) + { + error("Cannot fill array. NULL pointer."); + return HSA_SDK_FAILURE; + } + + if(!seed) + seed = (unsigned int)time(NULL); + + srand(seed); + double range = double(rangeMax - rangeMin) + 1.0; + + /* random initialisation of input */ + for(int i = 0; i < height; i++) + for(int j = 0; j < width; j++) + { + int index = i*width + j; + arrayPtr[index] = rangeMin + T(range*rand()/(RAND_MAX + 1.0)); + } + + return HSA_SDK_SUCCESS; +} + +template +int fillPos( + T * arrayPtr, + const int width, + const int height) +{ + if(!arrayPtr) + { + error("Cannot fill array. NULL pointer."); + return HSA_SDK_FAILURE; + } + + /* initialisation of input with positions*/ + for(T i = 0; i < height; i++) + for(T j = 0; j < width; j++) + { + T index = i*width + j; + arrayPtr[index] = index; + } + + return HSA_SDK_SUCCESS; +} + +template +int fillConstant( + T * arrayPtr, + const int width, + const int height, + const T val) +{ + if(!arrayPtr) + { + error("Cannot fill array. NULL pointer."); + return HSA_SDK_FAILURE; + } + + /* initialisation of input with constant value*/ + for(int i = 0; i < height; i++) + for(int j = 0; j < width; j++) + { + int index = i*width + j; + arrayPtr[index] = val; + } + + return HSA_SDK_SUCCESS; +} + +template +T roundToPowerOf2(T val) +{ + int bytes = sizeof(T); + + val--; + for(int i = 0; i < bytes; i++) + val |= val >> (1< +int isPowerOf2(T val) +{ + long long _val = val; + if((_val & (-_val))-_val == 0 && _val != 0) + return HSA_SDK_SUCCESS; + else + return HSA_SDK_FAILURE; +} + + + +template +bool checkVal( + T input, + T reference, + std::string message, + bool isAPIerror) +{ + if(input==reference) + { + return true; + } + else + { + error(message); + return false; + } +} + + +template +std::string toString(T t, std::ios_base &(*r)(std::ios_base&)) +{ + std::ostringstream output; + output << r << t; + return output.str(); +} + + +bool +compare(const float *refData, const float *data, + const int length, const float epsilon) +{ + float error = 0.0f; + float ref = 0.0f; + + for(int i = 1; i < length; ++i) + { + float diff = refData[i] - data[i]; + error += diff * diff; + ref += refData[i] * refData[i]; + } + + float normRef =::sqrtf((float) ref); + if (::fabs((float) ref) < 1e-7f) { + return false; + } + float normError = ::sqrtf((float) error); + error = normError / normRef; + + return error < epsilon; +} + +bool +compare(const double *refData, const double *data, + const int length, const double epsilon) +{ + double error = 0.0; + double ref = 0.0; + + for(int i = 1; i < length; ++i) + { + double diff = refData[i] - data[i]; + error += diff * diff; + ref += refData[i] * refData[i]; + } + + double normRef =::sqrt((double) ref); + if (::fabs((double) ref) < 1e-7) { + return false; + } + double normError = ::sqrt((double) error); + error = normError / normRef; + + return error < epsilon; +} + +void +error(const char* errorMsg) +{ + std::cout<<"Error: "<(const std::string, + const short*, int, int); +template +void printArray(const std::string, + const unsigned char *, int, int); +template +void printArray(const std::string, + const unsigned int *, int, int); +template +void printArray(const std::string, + const int *, int, int); +template +void printArray(const std::string, + const long*, int, int); +template +void printArray(const std::string, + const float*, int, int); +template +void printArray(const std::string, + const double*, int, int); + +template +int fillRandom(unsigned char* arrayPtr, + const int width, const int height, + unsigned char rangeMin, unsigned char rangeMax, unsigned int seed); +template +int fillRandom(unsigned int* arrayPtr, + const int width, const int height, + unsigned int rangeMin, unsigned int rangeMax, unsigned int seed); +template +int fillRandom(int* arrayPtr, + const int width, const int height, + int rangeMin, int rangeMax, unsigned int seed); +template +int fillRandom(long* arrayPtr, + const int width, const int height, + long rangeMin, long rangeMax, unsigned int seed); +template +int fillRandom(float* arrayPtr, + const int width, const int height, + float rangeMin, float rangeMax, unsigned int seed); +template +int fillRandom(double* arrayPtr, + const int width, const int height, + double rangeMin, double rangeMax, unsigned int seed); + +template +short roundToPowerOf2(short val); +template +unsigned int roundToPowerOf2(unsigned int val); +template +int roundToPowerOf2(int val); +template +long roundToPowerOf2(long val); + +template +int isPowerOf2(short val); +template +int isPowerOf2(unsigned int val); +template +int isPowerOf2(int val); +template +int isPowerOf2(long val); + +template<> +int fillPos(short * arrayPtr, const int width, const int height); +template<> +int fillPos(unsigned int * arrayPtr, const int width, const int height); +template<> +int fillPos(int * arrayPtr, const int width, const int height); +template<> +int fillPos(long * arrayPtr, const int width, const int height); + +template<> +int fillConstant(short * arrayPtr, + const int width, const int height, + const short val); +template<> +int fillConstant(unsigned int * arrayPtr, + const int width, const int height, + const unsigned int val); +template<> +int fillConstant(int * arrayPtr, + const int width, const int height, + const int val); +template<> +int fillConstant(long * arrayPtr, + const int width, const int height, + const long val); +template<> +int fillConstant(long * arrayPtr, + const int width, const int height, + const long val); +template<> +int fillConstant(long * arrayPtr, + const int width, const int height, + const long val); + + +template +bool checkVal(char input, char reference, std::string message, bool isAPIerror); +template +bool checkVal(bool input, bool reference, std::string message, bool isAPIerror); +template +bool checkVal(std::string input, std::string reference, std::string message, bool isAPIerror); +template +bool checkVal(short input, short reference, std::string message, bool isAPIerror); +template +bool checkVal(unsigned int input, unsigned int reference, std::string message, bool isAPIerror); +template +bool checkVal(int input, int reference, std::string message, bool isAPIerror); +template +bool checkVal(long input, long reference, std::string message, bool isAPIerror); + + +template +std::string toString(char t, std::ios_base &(*r)(std::ios_base&)); +template +std::string toString(short t, std::ios_base &(*r)(std::ios_base&)); +template +std::string toString(unsigned int t, std::ios_base &(*r)(std::ios_base&)); +template +std::string toString(int t, std::ios_base &(*r)(std::ios_base&)); +template +std::string toString(long t, std::ios_base &(*r)(std::ios_base&)); +template +std::string toString(float t, std::ios_base &(*r)(std::ios_base&)); +template +std::string toString(double t, std::ios_base &(*r)(std::ios_base&)); + diff --git a/projects/rocr-runtime/samples/common/helper_funcs.hpp b/projects/rocr-runtime/samples/common/helper_funcs.hpp new file mode 100755 index 0000000000..ae00f3e27c --- /dev/null +++ b/projects/rocr-runtime/samples/common/helper_funcs.hpp @@ -0,0 +1,159 @@ +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +• Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +• Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ +#ifndef HELPER_FUNCS_HPP_ +#define HELPER_FUNCS_HPP_ + +#define HSA_SDK_SUCCESS 0 +#define HSA_SDK_FAILURE 1 +#define HSA_SDK_EXPECTED_FAILURE 2 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * error + * constant function, Prints error messages + * @param errorMsg char* message + */ +void error(const char* errorMsg); + +/** + * error + * constant function, Prints error messages + * @param errorMsg std::string message + */ +void error(std::string errorMsg); + +/** + * expectedError + * constant function, Prints error messages + * @param errorMsg char* message + */ +void expectedError(const char* errorMsg); + +/** + * expectedError + * constant function, Prints error messages + * @param errorMsg string message + */ +void expectedError(std::string errorMsg); + +/** + * compare template version + * compare data to check error + * @param refData templated input + * @param data templated input + * @param length number of values to compare + * @param epsilon errorWindow + */ +bool compare(const float *refData, const float *data, + const int length, const float epsilon = 1e-6f); +bool compare(const double *refData, const double *data, + const int length, const double epsilon = 1e-6); + +/** + * printArray + * displays a array on std::out + */ +template +void printArray( + const std::string header, + const T * data, + const int width, + const int height); + + +/** + * fillRandom + * fill array with random values + */ +template +int fillRandom( + T * arrayPtr, + const int width, + const int height, + const T rangeMin, + const T rangeMax, + unsigned int seed=123); + +/** + * fillPos + * fill the specified positions + */ +template +int fillPos( + T * arrayPtr, + const int width, + const int height); + +/** + * fillConstant + * fill the array with constant value + */ +template +int fillConstant( + T * arrayPtr, + const int width, + const int height, + const T val); + + +/** + * roundToPowerOf2 + * rounds to a power of 2 + */ +template +T roundToPowerOf2(T val); + +/** + * isPowerOf2 + * checks if input is a power of 2 + */ +template +int isPowerOf2(T val); + +/** + * checkVal + * Set default(isAPIerror) parameter to false + * if checkVaul is used to check otherthan OpenCL API error code + */ +template +bool checkVal( + T input, + T reference, + std::string message, bool isAPIerror = true); + +/** + * toString + * convert a T type to string + */ +template +std::string toString(T t, std::ios_base & (*r)(std::ios_base&)); + + + + +#endif diff --git a/projects/rocr-runtime/samples/common/hsa_rsrc_factory.cpp b/projects/rocr-runtime/samples/common/hsa_rsrc_factory.cpp new file mode 100755 index 0000000000..ea8ca3f2da --- /dev/null +++ b/projects/rocr-runtime/samples/common/hsa_rsrc_factory.cpp @@ -0,0 +1,422 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "hsa.h" +#include "elf_utils.h" +#include "hsa_rsrc_factory.hpp" + +using namespace std; + +// Provide access to command line arguments passed in by user +uint32_t hsa_cmdline_arg_cnt; +char **hsa_cmdline_arg_list; + +// Callback function to find and bind kernarg region of an agent +static hsa_status_t find_kernarg(hsa_region_t region, void *data) { + + hsa_region_global_flag_t flags; + hsa_region_segment_t segment_id; + + hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id); + if (segment_id != HSA_REGION_SEGMENT_GLOBAL) { + return HSA_STATUS_SUCCESS; + } + + hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); + if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) { + AgentInfo *agent_info = (AgentInfo *)data; + agent_info->kernarg_region = region; + } + + return HSA_STATUS_SUCCESS; +} + +// Callback function to get the number of agents +static hsa_status_t get_gpu_agents(hsa_agent_t agent, void *data) { + + // Copy handle of agent and increment number of agents reported + HsaRsrcFactory *rsrcFactory = reinterpret_cast(data); + + // Determine if device is a Gpu agent + hsa_status_t status; + hsa_device_type_t type; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); + if (type != HSA_DEVICE_TYPE_GPU) { + return HSA_STATUS_SUCCESS; + } + + // Device is a Gpu agent, build an instance of AgentInfo + AgentInfo *agent_info = reinterpret_cast(malloc(sizeof(AgentInfo))); + agent_info->dev_id = agent; + hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name); + agent_info->max_wave_size = 0; + hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size); + agent_info->max_queue_size = 0; + hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); + + // Find and Bind Kernarg regions of the Gpu agent + hsa_agent_iterate_regions(agent, find_kernarg, agent_info); + + // Save the instance of AgentInfo + rsrcFactory->AddAgentInfo(agent_info); + return HSA_STATUS_SUCCESS; +} + +// Finds the specified symbols offset in the specified brig_module. +// If the symbol is found the function returns HSA_STATUS_SUCCESS, +// otherwise it returns HSA_STATUS_ERROR. +hsa_status_t hsa_find_symbol_offset(hsa_ext_brig_module_t *brig_module, + char *symbol_name, + hsa_ext_brig_code_section_offset32_t *offset) { + + // Get the data section + hsa_ext_brig_section_header_t *data_hdr = brig_module->section[HSA_EXT_BRIG_SECTION_DATA]; + + // Get the code section + hsa_ext_brig_section_header_t* code_hdr = brig_module->section[HSA_EXT_BRIG_SECTION_CODE]; + + // First entry into the BRIG code section + BrigCodeOffset32_t code_offset = code_hdr->header_byte_count; + BrigBase* code_entry = (BrigBase*) ((char*)code_hdr + code_offset); + while (code_offset != code_hdr->byte_count) { + if (code_entry->kind == BRIG_KIND_DIRECTIVE_KERNEL) { + + // Now find the data in the data section + BrigDirectiveExecutable* directive_kernel = (BrigDirectiveExecutable*) (code_entry); + BrigDataOffsetString32_t data_name_offset = directive_kernel->name; + BrigData* data_entry = (BrigData*)((char*) data_hdr + data_name_offset); + if (!strncmp(symbol_name, (char*) data_entry->bytes, strlen(symbol_name))) { + *offset = code_offset; + return HSA_STATUS_SUCCESS; + } + } + code_offset += code_entry->byteCount; + code_entry = (BrigBase*) ((char*)code_hdr + code_offset); + } + return HSA_STATUS_ERROR; +} + +// Definitions for Static Data members of the class +char* HsaRsrcFactory::brig_path_ = NULL; +uint32_t HsaRsrcFactory::num_cus_; +uint32_t HsaRsrcFactory::num_waves_; +uint32_t HsaRsrcFactory::num_workitems_; +uint32_t HsaRsrcFactory::kernel_loop_count_; +bool HsaRsrcFactory::print_debug_info_ = false; + +// Constructor of the class +HsaRsrcFactory::HsaRsrcFactory( ) { + + // Initialize the Hsa Runtime + hsa_status_t status = hsa_init(); + assert(status == HSA_STATUS_SUCCESS); + + // Discover the set of Gpu devices available on the platform + status = hsa_iterate_agents(get_gpu_agents, this); + check("Error Calling hsa_iterate_agents", status); + + // Process command line arguments + ProcessCmdline( ); +} + +// Destructor of the class +HsaRsrcFactory::~HsaRsrcFactory( ) { + +} + +// Get the count of Hsa Gpu Agents available on the platform +// +// @return uint32_t Number of Gpu agents on platform +// +uint32_t HsaRsrcFactory::GetCountOfGpuAgents( ) { + return gpu_list_.size(); +} + +// Get the AgentInfo handle of a Gpu device +// +// @param idx Gpu Agent at specified index +// +// @param agent_info Output parameter updated with AgentInfo +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, AgentInfo **agent_info) { + + // Determine if request is valid + uint32_t size = gpu_list_.size(); + if (idx >= size) { + return false; + } + + // Copy AgentInfo from specified index + *agent_info = gpu_list_[idx]; + return true; +} + +// Create a Queue object and return its handle. The queue object is expected +// to support user requested number of Aql dispatch packets. +// +// @param agent_info Gpu Agent on which to create a queue object +// +// @param num_Pkts Number of packets to be held by queue +// +// @param queue Output parameter updated with handle of queue object +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::CreateQueue(AgentInfo *agent_info, + uint32_t num_pkts, hsa_queue_t **queue) { + + hsa_status_t status; + status = hsa_queue_create(agent_info->dev_id, num_pkts, + HSA_QUEUE_TYPE_MULTI, NULL, NULL, + UINT32_MAX, UINT32_MAX, queue); + return (status == HSA_STATUS_SUCCESS); +} + +// Create a Signal object and return its handle. +// +// @param value Initial value of signal object +// +// @param signal Output parameter updated with handle of signal object +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t *signal) { + + hsa_status_t status; + status = hsa_signal_create(value, 0, NULL, signal); + return (status == HSA_STATUS_SUCCESS); +} + +// Allocate memory for use by a kernel of specified size in specified +// agent's memory region. Currently supports Global segment whose Kernarg +// flag set. +// +// @param agent_info Agent from whose memory region to allocate +// +// @param size Size of memory in terms of bytes +// +// @return uint8_t* Pointer to buffer, null if allocation fails. +// +uint8_t* HsaRsrcFactory::AllocateMemory(AgentInfo *agent_info, size_t size) { + + hsa_status_t status; + uint8_t *buffer = NULL; + status = hsa_memory_allocate(agent_info->kernarg_region, size, (void **)&buffer); + return (status == HSA_STATUS_SUCCESS) ? buffer : NULL; +} + + + +// Loads an Assembled Brig file and Finalizes it into Device Isa +// +// @param agent_info Gpu device for which to finalize +// +// @param brig_path File path of the Assembled Brig file +// +// @param kernel_name Name of the kernel to finalize +// +// @param code_desc Handle of finalized Code Descriptor that could +// be used to submit for execution +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::LoadAndFinalize(AgentInfo *agent_info, + const char *brig_path, char *kernel_name, + hsa_ext_code_descriptor_t **code_desc) { + + // Load BRIG, encapsulated in an ELF container, into a BRIG module. + status_t build_err; + hsa_ext_brig_module_t *brig_obj; + build_err = (status_t)create_brig_module_from_brig_file(brig_path, &brig_obj); + check_build("Error in creating the brig module from brig file", build_err); + + // Determine the Brig module has the kernel symbol + hsa_status_t status; + hsa_ext_brig_code_section_offset32_t kernel_symbol; + status = hsa_find_symbol_offset(brig_obj, kernel_name, &kernel_symbol); + check("Error in Finding the Symbol Offset for the Kernel", status); + + // Create Hsa Program + hsa_ext_program_handle_t program; + status = hsa_ext_program_create(&agent_info->dev_id, 1, + HSA_EXT_BRIG_MACHINE_LARGE, + HSA_EXT_BRIG_PROFILE_FULL, &program); + check("Error in Creating Hsa Program", status); + + // Add the BRIG module to hsa program. + hsa_ext_brig_module_handle_t brig_handle; + status = hsa_ext_add_module(program, brig_obj, &brig_handle); + check("Error in Adding Brig Module to the Program", status); + + // Construct finalization request list. + hsa_ext_finalization_request_t finalize_request; + finalize_request.module = brig_handle; + finalize_request.symbol = kernel_symbol; + finalize_request.program_call_convention = 0; + + // Finalize the Hsa Program. + status = hsa_ext_finalize_program(program, agent_info->dev_id, + 1, &finalize_request, NULL, NULL, 0, NULL, 0); + check("Error in Finalizing the Hsa Program", status); + + // Destroy the brig module. The program was successfully created the kernel + // symbol was found and the program was finalized, so it is no longer needed. + destroy_brig_module(brig_obj); + + // Get the hsa code descriptor address. + status = hsa_ext_query_kernel_descriptor_address(program, brig_handle, kernel_symbol, code_desc); + check("Error Querying the Kernel Descriptor Address", status); + + return true; +} + +// Add an instance of AgentInfo representing a Hsa Gpu agent +void HsaRsrcFactory::AddAgentInfo(AgentInfo *agent_info) { + gpu_list_.push_back(agent_info); +} + +// Print the various fields of Hsa Gpu Agents +bool HsaRsrcFactory::PrintGpuAgents( ) { + + AgentInfo *agent_info; + int size = gpu_list_.size(); + for (int idx = 0; idx < size; idx++) { + agent_info = gpu_list_[idx]; + std::cout << std::endl; + std::cout << "Hsa Gpu Agent Id: " << agent_info->dev_id.handle << std::endl; + std::cout << "Hsa Gpu Agent Name: " << agent_info->name << std::endl; + std::cout << "Hsa Gpu Agent Max Wave Size: " << agent_info->max_wave_size << std::endl; + std::cout << "Hsa Gpu Agent Max Queue Size: " << agent_info->max_queue_size << std::endl; + std::cout << "Hsa Gpu Agent Kernarg Region Id: " << agent_info->kernarg_region.handle << std::endl; + std::cout << std::endl; + } + return true; +} + +// Returns the file path where brig files is located. Value is +// available only after an instance has been built. +char* HsaRsrcFactory::GetBrigPath( ) { + return HsaRsrcFactory::brig_path_; +} + +// Returns the number of compute units present on platform +// Value is available only after an instance has been built. +uint32_t HsaRsrcFactory::GetNumOfCUs( ) { + return HsaRsrcFactory::num_cus_; +} + +// Returns the maximum number of waves that can be launched +// per compute unit. The actual number that can be launched +// is affected by resource availability +// +// Value is available only after an instance has been built. +uint32_t HsaRsrcFactory::GetNumOfWavesPerCU( ) { + return HsaRsrcFactory::num_waves_; +} + +// Returns the number of work-items that can execute per wave +// Value is available only after an instance has been built. +uint32_t HsaRsrcFactory::GetNumOfWorkItemsPerWave( ) { + return HsaRsrcFactory::num_workitems_; +} + +// Returns the number of times kernel loop body should execute. +// Value is available only after an instance has been built. +uint32_t HsaRsrcFactory::GetKernelLoopCount() { + return HsaRsrcFactory::kernel_loop_count_; +} + +// Returns boolean flag to indicate if debug info should be printed +// Value is available only after an instance has been built. +uint32_t HsaRsrcFactory::GetPrintDebugInfo() { + return HsaRsrcFactory::print_debug_info_; +} + +// Process command line arguments. The method will capture +// various user command line parameters for tests to use +void HsaRsrcFactory::ProcessCmdline( ) { + + // Command line arguments are given + uint32_t idx; + uint32_t arg_idx; + for (idx = 1; idx < hsa_cmdline_arg_cnt; idx += 2) { + arg_idx = GetArgIndex((char *)hsa_cmdline_arg_list[idx]); + switch(arg_idx) { + case 0: + HsaRsrcFactory::brig_path_ = hsa_cmdline_arg_list[idx + 1]; + break; + case 1: + HsaRsrcFactory::num_cus_ = atoi(hsa_cmdline_arg_list[idx + 1]); + break; + case 2: + HsaRsrcFactory::num_waves_ = atoi(hsa_cmdline_arg_list[idx + 1]); + break; + case 3: + HsaRsrcFactory::num_workitems_ = atoi(hsa_cmdline_arg_list[idx + 1]); + break; + case 4: + HsaRsrcFactory::kernel_loop_count_ = atoi(hsa_cmdline_arg_list[idx + 1]); + break; + case 5: + HsaRsrcFactory::print_debug_info_ = true; + break; + } + } + +} + +uint32_t HsaRsrcFactory::GetArgIndex(char *arg_value ) { + + // Map Brig file path to index zero + if (!strcmp(HsaRsrcFactory::brig_path_key_, arg_value)) { + return 0; + } + + // Map Number of Compute Units to index one + if (!strcmp(HsaRsrcFactory::num_cus_key_, arg_value)) { + return 1; + } + + // Map Number of Waves per CU to index two + if (!strcmp(HsaRsrcFactory::num_waves_key_, arg_value)) { + return 2; + } + + // Map Number of Workitems per Wave to index three + if (!strcmp(HsaRsrcFactory::num_workitems_key_, arg_value)) { + return 3; + } + + // Map Kernel Loop Count to index four + if (!strcmp(HsaRsrcFactory::kernel_loop_count_key_, arg_value)) { + return 4; + } + + // Map print debug info parameter + if (!strcmp(HsaRsrcFactory::print_debug_key_, arg_value)) { + return 5; + } + + return 108; + +} + +void HsaRsrcFactory::PrintHelpMsg( ) { + + std::cout << "Key for passing Brig filepath: " << HsaRsrcFactory::brig_path_key_ << std::endl; + std::cout << "Key for passing Number of Compute Units: " << HsaRsrcFactory::num_cus_key_ << std::endl; + std::cout << "Key for passing Number of Waves per CU: " << HsaRsrcFactory::num_waves_key_ << std::endl; + std::cout << "Key for passing Number of Workitems per Wave: " << HsaRsrcFactory::num_workitems_key_ << std::endl; + std::cout << "Key for passing Kernel Loop Count: " << HsaRsrcFactory::kernel_loop_count_key_ << std::endl; + +} diff --git a/projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp b/projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp new file mode 100755 index 0000000000..72ac8f9ca5 --- /dev/null +++ b/projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp @@ -0,0 +1,274 @@ +#ifndef HSA_RSRC_FACTORY_H_ +#define HSA_RSRC_FACTORY_H_ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "hsatimer.h" +#include "hsa.h" +#include "hsa_ext_finalize.h" + + +#define HSA_ARGUMENT_ALIGN_BYTES 16 +#define HSA_QUEUE_ALIGN_BYTES 64 +#define HSA_PACKET_ALIGN_BYTES 64 + +#define check(msg, status) \ +if (status != HSA_STATUS_SUCCESS) { \ + printf("%s\n", msg); \ + exit(1); \ +} + +#define check_build(msg, status) \ +if (status != STATUS_SUCCESS) { \ + printf("%s\n", msg); \ + exit(1); \ +} + +// Define required BRIG data structures. +typedef uint32_t BrigCodeOffset32_t; +typedef uint32_t BrigDataOffset32_t; +typedef uint16_t BrigKinds16_t; +typedef uint8_t BrigLinkage8_t; +typedef uint8_t BrigExecutableModifier8_t; +typedef BrigDataOffset32_t BrigDataOffsetString32_t; + +enum BrigKinds { + BRIG_KIND_NONE = 0x0000, + BRIG_KIND_DIRECTIVE_BEGIN = 0x1000, + BRIG_KIND_DIRECTIVE_KERNEL = 0x1008, +}; + +typedef struct BrigBase BrigBase; +struct BrigBase { + uint16_t byteCount; + BrigKinds16_t kind; +}; + +typedef struct BrigExecutableModifier BrigExecutableModifier; +struct BrigExecutableModifier { + BrigExecutableModifier8_t allBits; +}; + +typedef struct BrigDirectiveExecutable BrigDirectiveExecutable; +struct BrigDirectiveExecutable { + uint16_t byteCount; + BrigKinds16_t kind; + BrigDataOffsetString32_t name; + uint16_t outArgCount; + uint16_t inArgCount; + BrigCodeOffset32_t firstInArg; + BrigCodeOffset32_t firstCodeBlockEntry; + BrigCodeOffset32_t nextModuleEntry; + uint32_t codeBlockEntryCount; + BrigExecutableModifier modifier; + BrigLinkage8_t linkage; + uint16_t reserved; +}; + +typedef struct BrigData BrigData; +struct BrigData { + uint32_t byteCount; + uint8_t bytes[1]; +}; + +// Provide access to command line arguments passed in by user +extern uint32_t hsa_cmdline_arg_cnt; +extern char **hsa_cmdline_arg_list; + +// Encapsulates information about a Hsa Agent such as its +// handle, name, max queue size, max wavefront size, etc. +typedef struct { + + // Handle of Agent + hsa_agent_t dev_id; + + // Name of Agent whose length is less than 64 + char name[64]; + + // Max size of Wavefront size + uint32_t max_wave_size; + + // Max size of Queue buffer + uint32_t max_queue_size; + + // Memory region supporting kernel arguments + hsa_region_t kernarg_region; +} AgentInfo; + +class HsaRsrcFactory { + + public: + + // Constructor of the class. Will initialize the Hsa Runtime and + // query the system topology to get the list of Cpu and Gpu devices + HsaRsrcFactory( ); + + // Destructor of the class + ~HsaRsrcFactory( ); + + // Get the count of Hsa Gpu Agents available on the platform + // + // @return uint32_t Number of Gpu agents on platform + // + uint32_t GetCountOfGpuAgents( ); + + // Get the AgentInfo handle of a Gpu device + // + // @param idx Gpu Agent at specified index + // + // @param agent_info Output parameter updated with AgentInfo + // + // @return bool true if successful, false otherwise + // + bool GetGpuAgentInfo(uint32_t idx, AgentInfo **agent_info); + + // Create a Queue object and return its handle. The queue object is expected + // to support user requested number of Aql dispatch packets. + // + // @param agent_info Gpu Agent on which to create a queue object + // + // @param num_Pkts Number of packets to be held by queue + // + // @param queue Output parameter updated with handle of queue object + // + // @return bool true if successful, false otherwise + // + bool CreateQueue(AgentInfo *agent_info, + uint32_t num_pkts, hsa_queue_t **queue); + + // Create a Signal object and return its handle. + // + // @param value Initial value of signal object + // + // @param signal Output parameter updated with handle of signal object + // + // @return bool true if successful, false otherwise + // + bool CreateSignal(uint32_t value, hsa_signal_t *signal); + + // Allocate memory for use by a kernel of specified size in specified + // agent's memory region. Currently supports Global segment whose Kernarg + // flag set. + // + // @param agent_info Agent from whose memory region to allocate + // + // @param size Size of memory in terms of bytes + // + // @return uint8_t* Pointer to buffer, null if allocation fails. + // + uint8_t* AllocateMemory(AgentInfo *agent_info, size_t size); + + // Loads an Assembled Brig file and Finalizes it into Device Isa + // + // @param agent_info Gpu device for which to finalize + // + // @param brig_path File path of the Assembled Brig file + // + // @param kernel_name Name of the kernel to finalize + // + // @param code_desc Handle of finalized Code Descriptor that could + // be used to submit for execution + // + // @return bool true if successful, false otherwise + // + bool LoadAndFinalize(AgentInfo *agent_info, + const char *brig_path, char *kernel_name, + hsa_ext_code_descriptor_t **code_desc); + + // Add an instance of AgentInfo representing a Hsa Gpu agent + void AddAgentInfo(AgentInfo *agent_info); + + // Returns the file path where brig files is located + static char* GetBrigPath( ); + + // Returns the number of compute units present on platform + static uint32_t GetNumOfCUs( ); + + // Returns the maximum number of waves that can be launched + // per compute unit. The actual number that can be launched + // is affected by resource availability + static uint32_t GetNumOfWavesPerCU( ); + + // Returns the number of work-items that can execute per wave + static uint32_t GetNumOfWorkItemsPerWave( ); + + // Returns the number of times kernel loop body should execute. + static uint32_t GetKernelLoopCount(); + + // Returns boolean flag to indicate if debug info should be printed + static uint32_t GetPrintDebugInfo(); + + private: + + // Number of queues to create + uint32_t num_queues_; + + // Used to maintain a list of Hsa Queue handles + std::vector queue_list_; + + // Number of Signals to create + uint32_t num_signals_; + + // Used to maintain a list of Hsa Signal handles + std::vector signal_list_; + + // Number of agents reported by platform + uint32_t num_agents_; + + // Used to maintain a list of Hsa Gpu Agent Info + std::vector gpu_list_; + + // Records the file path where Brig file is located. + // Value is available only after an instance has been built. + static char* brig_path_; + static constexpr char* brig_path_key_ = "brig_path"; + + // Records the number of Compute units present on system. + // Value is available only after an instance has been built. + static uint32_t num_cus_; + static constexpr char* num_cus_key_ = "num_cus"; + + // Records the number of waves that can be launched per Compute unit + // Value is available only after an instance has been built. + static uint32_t num_waves_; + static constexpr char* num_waves_key_ = "waves_per_cu"; + + // Records the number of work-items that can be packed into a wave + // Value is available only after an instance has been built. + static uint32_t num_workitems_; + static constexpr char* num_workitems_key_ = "workitems_per_wave"; + + // Records the number of times kernel loop body should run. Value + // is available only after an instance has been built. + static uint32_t kernel_loop_count_; + static constexpr char* kernel_loop_count_key_ = "kernel_loop_count"; + + // Records the number of times kernel loop body should run. Value + // is available only after an instance has been built. + static bool print_debug_info_; + static constexpr char* print_debug_key_ = "print_debug"; + + // Print the various fields of Hsa Gpu Agents + bool PrintGpuAgents( ); + + // Process command line arguments. The method will capture + // various user command line parameters for tests to use + static void ProcessCmdline( ); + + // Prints the help banner on user arg keys + static void PrintHelpMsg( ); + + // Maps an index for the user argument + static uint32_t GetArgIndex(char *arg_value); + +}; + +#endif // HSA_RSRC_FACTORY_H_ diff --git a/projects/rocr-runtime/samples/common/os.cpp b/projects/rocr-runtime/samples/common/os.cpp new file mode 100755 index 0000000000..6ec575fb99 --- /dev/null +++ b/projects/rocr-runtime/samples/common/os.cpp @@ -0,0 +1,45 @@ +#ifdef _WIN32 // Compiling for Windows Platform + +#include +#include +#include "os.h" +#include + +void SetEnv(const char* env_var_name, const char* env_var_value) { + bool err = SetEnvironmentVariable(env_var_name, env_var_value); + if(false == err){ + printf("Set environment variable failed!\n"); + exit(1); + } + return; +} + +char* GetEnv(const char* env_var_name){ + char* buff; + DWORD char_count = GetEnvironmentVariable(env_var_name, NULL, 0); + if (char_count == 0) return NULL; + buff = (char*)malloc(sizeof(char) * char_count); + GetEnvironmentVariable(env_var_name, buff, char_count); + buff[char_count - 1] = '\0'; + return buff; +} + +#elif defined(__linux__) + +#include "os.h" +#include + +void SetEnv(const char* env_var_name, const char* env_var_value){ + int err = setenv(env_var_name, env_var_value, 1); + if(0 != err){ + printf("Set environment variable failed!\n"); + exit(1); + } + return; +} + +char* GetEnv(const char* env_var_name) { + return getenv(env_var_name); +} + +#endif diff --git a/projects/rocr-runtime/samples/common/os.h b/projects/rocr-runtime/samples/common/os.h new file mode 100755 index 0000000000..69dd7fa61c --- /dev/null +++ b/projects/rocr-runtime/samples/common/os.h @@ -0,0 +1,12 @@ +#ifndef HSA_PERF_SRC_UTILS_OS_H_ +#define HSA_PERF_SRC_UTILS_OS_H_ + +#include + +// Set envriroment variable +void SetEnv(const char* env_var_name, const char* env_var_value); + +// Get the value of enviroment +char* GetEnv(const char* env_var_name); + +#endif