ECR #333755 - Move Hsa Sample BinarySearch from Hsa Sdk project to current Runtime/Samples
[git-p4: depot-paths = "//depot/stg/hsa/drivers/hsa/runtime/": change = 1126024]
[ROCm/ROCR-Runtime commit: 26575ed9ba]
This commit is contained in:
@@ -2,6 +2,11 @@ OPENCL_DEPTH = ../..
|
||||
|
||||
include $(OPENCL_DEPTH)/runtimenew/runtimedefs
|
||||
|
||||
SUBDIRS = MatrixTranspose MatrixMultiplication DwtHarr1D BitionicSort NBody
|
||||
SUBDIRS = NBody
|
||||
SUBDIRS += DwtHarr1D
|
||||
SUBDIRS += BitionicSort
|
||||
SUBDIRS += BinarySearch
|
||||
SUBDIRS += MatrixTranspose
|
||||
SUBDIRS += MatrixMultiplication
|
||||
|
||||
include $(OPENCL_DEPTH)/runtimenew/runtimerules
|
||||
|
||||
+386
@@ -0,0 +1,386 @@
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
• Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
• Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
|
||||
#include "helper_funcs.hpp"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Prints no more than 256 elements of the given array.
|
||||
* Prints full array if length is less than 256.
|
||||
* Prints Array name followed by elements.
|
||||
*/
|
||||
template<typename T>
|
||||
void printArray(
|
||||
const std::string header,
|
||||
const T * data,
|
||||
const int width,
|
||||
const int height)
|
||||
{
|
||||
std::cout<<"\n"<<header<<"\n";
|
||||
for(int i = 0; i < height; i++)
|
||||
{
|
||||
for(int j = 0; j < width; j++)
|
||||
{
|
||||
std::cout<<data[i*width+j]<<" ";
|
||||
}
|
||||
std::cout<<"\n";
|
||||
}
|
||||
std::cout<<"\n";
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
int fillRandom(
|
||||
T * arrayPtr,
|
||||
const int width,
|
||||
const int height,
|
||||
const T rangeMin,
|
||||
const T rangeMax,
|
||||
unsigned int seed)
|
||||
{
|
||||
if(!arrayPtr)
|
||||
{
|
||||
error("Cannot fill array. NULL pointer.");
|
||||
return HSA_SDK_FAILURE;
|
||||
}
|
||||
|
||||
if(!seed)
|
||||
seed = (unsigned int)time(NULL);
|
||||
|
||||
srand(seed);
|
||||
double range = double(rangeMax - rangeMin) + 1.0;
|
||||
|
||||
/* random initialisation of input */
|
||||
for(int i = 0; i < height; i++)
|
||||
for(int j = 0; j < width; j++)
|
||||
{
|
||||
int index = i*width + j;
|
||||
arrayPtr[index] = rangeMin + T(range*rand()/(RAND_MAX + 1.0));
|
||||
}
|
||||
|
||||
return HSA_SDK_SUCCESS;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
int fillPos(
|
||||
T * arrayPtr,
|
||||
const int width,
|
||||
const int height)
|
||||
{
|
||||
if(!arrayPtr)
|
||||
{
|
||||
error("Cannot fill array. NULL pointer.");
|
||||
return HSA_SDK_FAILURE;
|
||||
}
|
||||
|
||||
/* initialisation of input with positions*/
|
||||
for(T i = 0; i < height; i++)
|
||||
for(T j = 0; j < width; j++)
|
||||
{
|
||||
T index = i*width + j;
|
||||
arrayPtr[index] = index;
|
||||
}
|
||||
|
||||
return HSA_SDK_SUCCESS;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
int fillConstant(
|
||||
T * arrayPtr,
|
||||
const int width,
|
||||
const int height,
|
||||
const T val)
|
||||
{
|
||||
if(!arrayPtr)
|
||||
{
|
||||
error("Cannot fill array. NULL pointer.");
|
||||
return HSA_SDK_FAILURE;
|
||||
}
|
||||
|
||||
/* initialisation of input with constant value*/
|
||||
for(int i = 0; i < height; i++)
|
||||
for(int j = 0; j < width; j++)
|
||||
{
|
||||
int index = i*width + j;
|
||||
arrayPtr[index] = val;
|
||||
}
|
||||
|
||||
return HSA_SDK_SUCCESS;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T roundToPowerOf2(T val)
|
||||
{
|
||||
int bytes = sizeof(T);
|
||||
|
||||
val--;
|
||||
for(int i = 0; i < bytes; i++)
|
||||
val |= val >> (1<<i);
|
||||
val++;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
int isPowerOf2(T val)
|
||||
{
|
||||
long long _val = val;
|
||||
if((_val & (-_val))-_val == 0 && _val != 0)
|
||||
return HSA_SDK_SUCCESS;
|
||||
else
|
||||
return HSA_SDK_FAILURE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename T>
|
||||
bool checkVal(
|
||||
T input,
|
||||
T reference,
|
||||
std::string message,
|
||||
bool isAPIerror)
|
||||
{
|
||||
if(input==reference)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
error(message);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
std::string toString(T t, std::ios_base &(*r)(std::ios_base&))
|
||||
{
|
||||
std::ostringstream output;
|
||||
output << r << t;
|
||||
return output.str();
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
compare(const float *refData, const float *data,
|
||||
const int length, const float epsilon)
|
||||
{
|
||||
float error = 0.0f;
|
||||
float ref = 0.0f;
|
||||
|
||||
for(int i = 1; i < length; ++i)
|
||||
{
|
||||
float diff = refData[i] - data[i];
|
||||
error += diff * diff;
|
||||
ref += refData[i] * refData[i];
|
||||
}
|
||||
|
||||
float normRef =::sqrtf((float) ref);
|
||||
if (::fabs((float) ref) < 1e-7f) {
|
||||
return false;
|
||||
}
|
||||
float normError = ::sqrtf((float) error);
|
||||
error = normError / normRef;
|
||||
|
||||
return error < epsilon;
|
||||
}
|
||||
|
||||
bool
|
||||
compare(const double *refData, const double *data,
|
||||
const int length, const double epsilon)
|
||||
{
|
||||
double error = 0.0;
|
||||
double ref = 0.0;
|
||||
|
||||
for(int i = 1; i < length; ++i)
|
||||
{
|
||||
double diff = refData[i] - data[i];
|
||||
error += diff * diff;
|
||||
ref += refData[i] * refData[i];
|
||||
}
|
||||
|
||||
double normRef =::sqrt((double) ref);
|
||||
if (::fabs((double) ref) < 1e-7) {
|
||||
return false;
|
||||
}
|
||||
double normError = ::sqrt((double) error);
|
||||
error = normError / normRef;
|
||||
|
||||
return error < epsilon;
|
||||
}
|
||||
|
||||
void
|
||||
error(const char* errorMsg)
|
||||
{
|
||||
std::cout<<"Error: "<<errorMsg<<std::endl;
|
||||
}
|
||||
|
||||
void
|
||||
error(std::string errorMsg)
|
||||
{
|
||||
std::cout<<"Error: "<<errorMsg<<std::endl;
|
||||
}
|
||||
|
||||
void
|
||||
expectedError(const char* errorMsg)
|
||||
{
|
||||
std::cout<<"Expected Error: "<<errorMsg<<std::endl;
|
||||
}
|
||||
|
||||
void
|
||||
expectedError(std::string errorMsg)
|
||||
{
|
||||
std::cout<<"Expected Error: "<<errorMsg<<std::endl;
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Template Instantiations
|
||||
/////////////////////////////////////////////////////////////////
|
||||
template
|
||||
void printArray<short>(const std::string,
|
||||
const short*, int, int);
|
||||
template
|
||||
void printArray<unsigned char>(const std::string,
|
||||
const unsigned char *, int, int);
|
||||
template
|
||||
void printArray<unsigned int>(const std::string,
|
||||
const unsigned int *, int, int);
|
||||
template
|
||||
void printArray<int>(const std::string,
|
||||
const int *, int, int);
|
||||
template
|
||||
void printArray<long>(const std::string,
|
||||
const long*, int, int);
|
||||
template
|
||||
void printArray<float>(const std::string,
|
||||
const float*, int, int);
|
||||
template
|
||||
void printArray<double>(const std::string,
|
||||
const double*, int, int);
|
||||
|
||||
template
|
||||
int fillRandom<unsigned char>(unsigned char* arrayPtr,
|
||||
const int width, const int height,
|
||||
unsigned char rangeMin, unsigned char rangeMax, unsigned int seed);
|
||||
template
|
||||
int fillRandom<unsigned int>(unsigned int* arrayPtr,
|
||||
const int width, const int height,
|
||||
unsigned int rangeMin, unsigned int rangeMax, unsigned int seed);
|
||||
template
|
||||
int fillRandom<int>(int* arrayPtr,
|
||||
const int width, const int height,
|
||||
int rangeMin, int rangeMax, unsigned int seed);
|
||||
template
|
||||
int fillRandom<long>(long* arrayPtr,
|
||||
const int width, const int height,
|
||||
long rangeMin, long rangeMax, unsigned int seed);
|
||||
template
|
||||
int fillRandom<float>(float* arrayPtr,
|
||||
const int width, const int height,
|
||||
float rangeMin, float rangeMax, unsigned int seed);
|
||||
template
|
||||
int fillRandom<double>(double* arrayPtr,
|
||||
const int width, const int height,
|
||||
double rangeMin, double rangeMax, unsigned int seed);
|
||||
|
||||
template
|
||||
short roundToPowerOf2<short>(short val);
|
||||
template
|
||||
unsigned int roundToPowerOf2<unsigned int>(unsigned int val);
|
||||
template
|
||||
int roundToPowerOf2<int>(int val);
|
||||
template
|
||||
long roundToPowerOf2<long>(long val);
|
||||
|
||||
template
|
||||
int isPowerOf2<short>(short val);
|
||||
template
|
||||
int isPowerOf2<unsigned int>(unsigned int val);
|
||||
template
|
||||
int isPowerOf2<int>(int val);
|
||||
template
|
||||
int isPowerOf2<long>(long val);
|
||||
|
||||
template<>
|
||||
int fillPos<short>(short * arrayPtr, const int width, const int height);
|
||||
template<>
|
||||
int fillPos<unsigned int>(unsigned int * arrayPtr, const int width, const int height);
|
||||
template<>
|
||||
int fillPos<int>(int * arrayPtr, const int width, const int height);
|
||||
template<>
|
||||
int fillPos<long>(long * arrayPtr, const int width, const int height);
|
||||
|
||||
template<>
|
||||
int fillConstant<short>(short * arrayPtr,
|
||||
const int width, const int height,
|
||||
const short val);
|
||||
template<>
|
||||
int fillConstant(unsigned int * arrayPtr,
|
||||
const int width, const int height,
|
||||
const unsigned int val);
|
||||
template<>
|
||||
int fillConstant(int * arrayPtr,
|
||||
const int width, const int height,
|
||||
const int val);
|
||||
template<>
|
||||
int fillConstant(long * arrayPtr,
|
||||
const int width, const int height,
|
||||
const long val);
|
||||
template<>
|
||||
int fillConstant(long * arrayPtr,
|
||||
const int width, const int height,
|
||||
const long val);
|
||||
template<>
|
||||
int fillConstant(long * arrayPtr,
|
||||
const int width, const int height,
|
||||
const long val);
|
||||
|
||||
|
||||
template
|
||||
bool checkVal<char>(char input, char reference, std::string message, bool isAPIerror);
|
||||
template
|
||||
bool checkVal<bool>(bool input, bool reference, std::string message, bool isAPIerror);
|
||||
template
|
||||
bool checkVal<std::string>(std::string input, std::string reference, std::string message, bool isAPIerror);
|
||||
template
|
||||
bool checkVal<short>(short input, short reference, std::string message, bool isAPIerror);
|
||||
template
|
||||
bool checkVal<unsigned int>(unsigned int input, unsigned int reference, std::string message, bool isAPIerror);
|
||||
template
|
||||
bool checkVal<int>(int input, int reference, std::string message, bool isAPIerror);
|
||||
template
|
||||
bool checkVal<long>(long input, long reference, std::string message, bool isAPIerror);
|
||||
|
||||
|
||||
template
|
||||
std::string toString<char>(char t, std::ios_base &(*r)(std::ios_base&));
|
||||
template
|
||||
std::string toString<short>(short t, std::ios_base &(*r)(std::ios_base&));
|
||||
template
|
||||
std::string toString<unsigned int>(unsigned int t, std::ios_base &(*r)(std::ios_base&));
|
||||
template
|
||||
std::string toString<int>(int t, std::ios_base &(*r)(std::ios_base&));
|
||||
template
|
||||
std::string toString<long>(long t, std::ios_base &(*r)(std::ios_base&));
|
||||
template
|
||||
std::string toString<float>(float t, std::ios_base &(*r)(std::ios_base&));
|
||||
template
|
||||
std::string toString<double>(double t, std::ios_base &(*r)(std::ios_base&));
|
||||
|
||||
+159
@@ -0,0 +1,159 @@
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
• Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
• Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
#ifndef HELPER_FUNCS_HPP_
|
||||
#define HELPER_FUNCS_HPP_
|
||||
|
||||
#define HSA_SDK_SUCCESS 0
|
||||
#define HSA_SDK_FAILURE 1
|
||||
#define HSA_SDK_EXPECTED_FAILURE 2
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <ctime>
|
||||
#include <cmath>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include <malloc.h>
|
||||
|
||||
/**
|
||||
* error
|
||||
* constant function, Prints error messages
|
||||
* @param errorMsg char* message
|
||||
*/
|
||||
void error(const char* errorMsg);
|
||||
|
||||
/**
|
||||
* error
|
||||
* constant function, Prints error messages
|
||||
* @param errorMsg std::string message
|
||||
*/
|
||||
void error(std::string errorMsg);
|
||||
|
||||
/**
|
||||
* expectedError
|
||||
* constant function, Prints error messages
|
||||
* @param errorMsg char* message
|
||||
*/
|
||||
void expectedError(const char* errorMsg);
|
||||
|
||||
/**
|
||||
* expectedError
|
||||
* constant function, Prints error messages
|
||||
* @param errorMsg string message
|
||||
*/
|
||||
void expectedError(std::string errorMsg);
|
||||
|
||||
/**
|
||||
* compare template version
|
||||
* compare data to check error
|
||||
* @param refData templated input
|
||||
* @param data templated input
|
||||
* @param length number of values to compare
|
||||
* @param epsilon errorWindow
|
||||
*/
|
||||
bool compare(const float *refData, const float *data,
|
||||
const int length, const float epsilon = 1e-6f);
|
||||
bool compare(const double *refData, const double *data,
|
||||
const int length, const double epsilon = 1e-6);
|
||||
|
||||
/**
|
||||
* printArray
|
||||
* displays a array on std::out
|
||||
*/
|
||||
template<typename T>
|
||||
void printArray(
|
||||
const std::string header,
|
||||
const T * data,
|
||||
const int width,
|
||||
const int height);
|
||||
|
||||
|
||||
/**
|
||||
* fillRandom
|
||||
* fill array with random values
|
||||
*/
|
||||
template<typename T>
|
||||
int fillRandom(
|
||||
T * arrayPtr,
|
||||
const int width,
|
||||
const int height,
|
||||
const T rangeMin,
|
||||
const T rangeMax,
|
||||
unsigned int seed=123);
|
||||
|
||||
/**
|
||||
* fillPos
|
||||
* fill the specified positions
|
||||
*/
|
||||
template<typename T>
|
||||
int fillPos(
|
||||
T * arrayPtr,
|
||||
const int width,
|
||||
const int height);
|
||||
|
||||
/**
|
||||
* fillConstant
|
||||
* fill the array with constant value
|
||||
*/
|
||||
template<typename T>
|
||||
int fillConstant(
|
||||
T * arrayPtr,
|
||||
const int width,
|
||||
const int height,
|
||||
const T val);
|
||||
|
||||
|
||||
/**
|
||||
* roundToPowerOf2
|
||||
* rounds to a power of 2
|
||||
*/
|
||||
template<typename T>
|
||||
T roundToPowerOf2(T val);
|
||||
|
||||
/**
|
||||
* isPowerOf2
|
||||
* checks if input is a power of 2
|
||||
*/
|
||||
template<typename T>
|
||||
int isPowerOf2(T val);
|
||||
|
||||
/**
|
||||
* checkVal
|
||||
* Set default(isAPIerror) parameter to false
|
||||
* if checkVaul is used to check otherthan OpenCL API error code
|
||||
*/
|
||||
template<typename T>
|
||||
bool checkVal(
|
||||
T input,
|
||||
T reference,
|
||||
std::string message, bool isAPIerror = true);
|
||||
|
||||
/**
|
||||
* toString
|
||||
* convert a T type to string
|
||||
*/
|
||||
template<typename T>
|
||||
std::string toString(T t, std::ios_base & (*r)(std::ios_base&));
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
+422
@@ -0,0 +1,422 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
#include <cassert>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "hsa.h"
|
||||
#include "elf_utils.h"
|
||||
#include "hsa_rsrc_factory.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
// Provide access to command line arguments passed in by user
|
||||
uint32_t hsa_cmdline_arg_cnt;
|
||||
char **hsa_cmdline_arg_list;
|
||||
|
||||
// Callback function to find and bind kernarg region of an agent
|
||||
static hsa_status_t find_kernarg(hsa_region_t region, void *data) {
|
||||
|
||||
hsa_region_global_flag_t flags;
|
||||
hsa_region_segment_t segment_id;
|
||||
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
|
||||
if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
|
||||
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) {
|
||||
AgentInfo *agent_info = (AgentInfo *)data;
|
||||
agent_info->kernarg_region = region;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Callback function to get the number of agents
|
||||
static hsa_status_t get_gpu_agents(hsa_agent_t agent, void *data) {
|
||||
|
||||
// Copy handle of agent and increment number of agents reported
|
||||
HsaRsrcFactory *rsrcFactory = reinterpret_cast<HsaRsrcFactory *>(data);
|
||||
|
||||
// Determine if device is a Gpu agent
|
||||
hsa_status_t status;
|
||||
hsa_device_type_t type;
|
||||
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
if (type != HSA_DEVICE_TYPE_GPU) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Device is a Gpu agent, build an instance of AgentInfo
|
||||
AgentInfo *agent_info = reinterpret_cast<AgentInfo *>(malloc(sizeof(AgentInfo)));
|
||||
agent_info->dev_id = agent;
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
|
||||
agent_info->max_wave_size = 0;
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
|
||||
agent_info->max_queue_size = 0;
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
|
||||
|
||||
// Find and Bind Kernarg regions of the Gpu agent
|
||||
hsa_agent_iterate_regions(agent, find_kernarg, agent_info);
|
||||
|
||||
// Save the instance of AgentInfo
|
||||
rsrcFactory->AddAgentInfo(agent_info);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Finds the specified symbols offset in the specified brig_module.
|
||||
// If the symbol is found the function returns HSA_STATUS_SUCCESS,
|
||||
// otherwise it returns HSA_STATUS_ERROR.
|
||||
hsa_status_t hsa_find_symbol_offset(hsa_ext_brig_module_t *brig_module,
|
||||
char *symbol_name,
|
||||
hsa_ext_brig_code_section_offset32_t *offset) {
|
||||
|
||||
// Get the data section
|
||||
hsa_ext_brig_section_header_t *data_hdr = brig_module->section[HSA_EXT_BRIG_SECTION_DATA];
|
||||
|
||||
// Get the code section
|
||||
hsa_ext_brig_section_header_t* code_hdr = brig_module->section[HSA_EXT_BRIG_SECTION_CODE];
|
||||
|
||||
// First entry into the BRIG code section
|
||||
BrigCodeOffset32_t code_offset = code_hdr->header_byte_count;
|
||||
BrigBase* code_entry = (BrigBase*) ((char*)code_hdr + code_offset);
|
||||
while (code_offset != code_hdr->byte_count) {
|
||||
if (code_entry->kind == BRIG_KIND_DIRECTIVE_KERNEL) {
|
||||
|
||||
// Now find the data in the data section
|
||||
BrigDirectiveExecutable* directive_kernel = (BrigDirectiveExecutable*) (code_entry);
|
||||
BrigDataOffsetString32_t data_name_offset = directive_kernel->name;
|
||||
BrigData* data_entry = (BrigData*)((char*) data_hdr + data_name_offset);
|
||||
if (!strncmp(symbol_name, (char*) data_entry->bytes, strlen(symbol_name))) {
|
||||
*offset = code_offset;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
code_offset += code_entry->byteCount;
|
||||
code_entry = (BrigBase*) ((char*)code_hdr + code_offset);
|
||||
}
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// Definitions for Static Data members of the class
|
||||
char* HsaRsrcFactory::brig_path_ = NULL;
|
||||
uint32_t HsaRsrcFactory::num_cus_;
|
||||
uint32_t HsaRsrcFactory::num_waves_;
|
||||
uint32_t HsaRsrcFactory::num_workitems_;
|
||||
uint32_t HsaRsrcFactory::kernel_loop_count_;
|
||||
bool HsaRsrcFactory::print_debug_info_ = false;
|
||||
|
||||
// Constructor of the class
|
||||
HsaRsrcFactory::HsaRsrcFactory( ) {
|
||||
|
||||
// Initialize the Hsa Runtime
|
||||
hsa_status_t status = hsa_init();
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
// Discover the set of Gpu devices available on the platform
|
||||
status = hsa_iterate_agents(get_gpu_agents, this);
|
||||
check("Error Calling hsa_iterate_agents", status);
|
||||
|
||||
// Process command line arguments
|
||||
ProcessCmdline( );
|
||||
}
|
||||
|
||||
// Destructor of the class
|
||||
HsaRsrcFactory::~HsaRsrcFactory( ) {
|
||||
|
||||
}
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
//
|
||||
uint32_t HsaRsrcFactory::GetCountOfGpuAgents( ) {
|
||||
return gpu_list_.size();
|
||||
}
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
//
|
||||
// @param idx Gpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, AgentInfo **agent_info) {
|
||||
|
||||
// Determine if request is valid
|
||||
uint32_t size = gpu_list_.size();
|
||||
if (idx >= size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy AgentInfo from specified index
|
||||
*agent_info = gpu_list_[idx];
|
||||
return true;
|
||||
}
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
//
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
//
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
//
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::CreateQueue(AgentInfo *agent_info,
|
||||
uint32_t num_pkts, hsa_queue_t **queue) {
|
||||
|
||||
hsa_status_t status;
|
||||
status = hsa_queue_create(agent_info->dev_id, num_pkts,
|
||||
HSA_QUEUE_TYPE_MULTI, NULL, NULL,
|
||||
UINT32_MAX, UINT32_MAX, queue);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
//
|
||||
// @param value Initial value of signal object
|
||||
//
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t *signal) {
|
||||
|
||||
hsa_status_t status;
|
||||
status = hsa_signal_create(value, 0, NULL, signal);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Allocate memory for use by a kernel of specified size in specified
|
||||
// agent's memory region. Currently supports Global segment whose Kernarg
|
||||
// flag set.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* HsaRsrcFactory::AllocateMemory(AgentInfo *agent_info, size_t size) {
|
||||
|
||||
hsa_status_t status;
|
||||
uint8_t *buffer = NULL;
|
||||
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void **)&buffer);
|
||||
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
//
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
//
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
//
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
//
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::LoadAndFinalize(AgentInfo *agent_info,
|
||||
const char *brig_path, char *kernel_name,
|
||||
hsa_ext_code_descriptor_t **code_desc) {
|
||||
|
||||
// Load BRIG, encapsulated in an ELF container, into a BRIG module.
|
||||
status_t build_err;
|
||||
hsa_ext_brig_module_t *brig_obj;
|
||||
build_err = (status_t)create_brig_module_from_brig_file(brig_path, &brig_obj);
|
||||
check_build("Error in creating the brig module from brig file", build_err);
|
||||
|
||||
// Determine the Brig module has the kernel symbol
|
||||
hsa_status_t status;
|
||||
hsa_ext_brig_code_section_offset32_t kernel_symbol;
|
||||
status = hsa_find_symbol_offset(brig_obj, kernel_name, &kernel_symbol);
|
||||
check("Error in Finding the Symbol Offset for the Kernel", status);
|
||||
|
||||
// Create Hsa Program
|
||||
hsa_ext_program_handle_t program;
|
||||
status = hsa_ext_program_create(&agent_info->dev_id, 1,
|
||||
HSA_EXT_BRIG_MACHINE_LARGE,
|
||||
HSA_EXT_BRIG_PROFILE_FULL, &program);
|
||||
check("Error in Creating Hsa Program", status);
|
||||
|
||||
// Add the BRIG module to hsa program.
|
||||
hsa_ext_brig_module_handle_t brig_handle;
|
||||
status = hsa_ext_add_module(program, brig_obj, &brig_handle);
|
||||
check("Error in Adding Brig Module to the Program", status);
|
||||
|
||||
// Construct finalization request list.
|
||||
hsa_ext_finalization_request_t finalize_request;
|
||||
finalize_request.module = brig_handle;
|
||||
finalize_request.symbol = kernel_symbol;
|
||||
finalize_request.program_call_convention = 0;
|
||||
|
||||
// Finalize the Hsa Program.
|
||||
status = hsa_ext_finalize_program(program, agent_info->dev_id,
|
||||
1, &finalize_request, NULL, NULL, 0, NULL, 0);
|
||||
check("Error in Finalizing the Hsa Program", status);
|
||||
|
||||
// Destroy the brig module. The program was successfully created the kernel
|
||||
// symbol was found and the program was finalized, so it is no longer needed.
|
||||
destroy_brig_module(brig_obj);
|
||||
|
||||
// Get the hsa code descriptor address.
|
||||
status = hsa_ext_query_kernel_descriptor_address(program, brig_handle, kernel_symbol, code_desc);
|
||||
check("Error Querying the Kernel Descriptor Address", status);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Add an instance of AgentInfo representing a Hsa Gpu agent
|
||||
void HsaRsrcFactory::AddAgentInfo(AgentInfo *agent_info) {
|
||||
gpu_list_.push_back(agent_info);
|
||||
}
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool HsaRsrcFactory::PrintGpuAgents( ) {
|
||||
|
||||
AgentInfo *agent_info;
|
||||
int size = gpu_list_.size();
|
||||
for (int idx = 0; idx < size; idx++) {
|
||||
agent_info = gpu_list_[idx];
|
||||
std::cout << std::endl;
|
||||
std::cout << "Hsa Gpu Agent Id: " << agent_info->dev_id.handle << std::endl;
|
||||
std::cout << "Hsa Gpu Agent Name: " << agent_info->name << std::endl;
|
||||
std::cout << "Hsa Gpu Agent Max Wave Size: " << agent_info->max_wave_size << std::endl;
|
||||
std::cout << "Hsa Gpu Agent Max Queue Size: " << agent_info->max_queue_size << std::endl;
|
||||
std::cout << "Hsa Gpu Agent Kernarg Region Id: " << agent_info->kernarg_region.handle << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns the file path where brig files is located. Value is
|
||||
// available only after an instance has been built.
|
||||
char* HsaRsrcFactory::GetBrigPath( ) {
|
||||
return HsaRsrcFactory::brig_path_;
|
||||
}
|
||||
|
||||
// Returns the number of compute units present on platform
|
||||
// Value is available only after an instance has been built.
|
||||
uint32_t HsaRsrcFactory::GetNumOfCUs( ) {
|
||||
return HsaRsrcFactory::num_cus_;
|
||||
}
|
||||
|
||||
// Returns the maximum number of waves that can be launched
|
||||
// per compute unit. The actual number that can be launched
|
||||
// is affected by resource availability
|
||||
//
|
||||
// Value is available only after an instance has been built.
|
||||
uint32_t HsaRsrcFactory::GetNumOfWavesPerCU( ) {
|
||||
return HsaRsrcFactory::num_waves_;
|
||||
}
|
||||
|
||||
// Returns the number of work-items that can execute per wave
|
||||
// Value is available only after an instance has been built.
|
||||
uint32_t HsaRsrcFactory::GetNumOfWorkItemsPerWave( ) {
|
||||
return HsaRsrcFactory::num_workitems_;
|
||||
}
|
||||
|
||||
// Returns the number of times kernel loop body should execute.
|
||||
// Value is available only after an instance has been built.
|
||||
uint32_t HsaRsrcFactory::GetKernelLoopCount() {
|
||||
return HsaRsrcFactory::kernel_loop_count_;
|
||||
}
|
||||
|
||||
// Returns boolean flag to indicate if debug info should be printed
|
||||
// Value is available only after an instance has been built.
|
||||
uint32_t HsaRsrcFactory::GetPrintDebugInfo() {
|
||||
return HsaRsrcFactory::print_debug_info_;
|
||||
}
|
||||
|
||||
// Process command line arguments. The method will capture
|
||||
// various user command line parameters for tests to use
|
||||
void HsaRsrcFactory::ProcessCmdline( ) {
|
||||
|
||||
// Command line arguments are given
|
||||
uint32_t idx;
|
||||
uint32_t arg_idx;
|
||||
for (idx = 1; idx < hsa_cmdline_arg_cnt; idx += 2) {
|
||||
arg_idx = GetArgIndex((char *)hsa_cmdline_arg_list[idx]);
|
||||
switch(arg_idx) {
|
||||
case 0:
|
||||
HsaRsrcFactory::brig_path_ = hsa_cmdline_arg_list[idx + 1];
|
||||
break;
|
||||
case 1:
|
||||
HsaRsrcFactory::num_cus_ = atoi(hsa_cmdline_arg_list[idx + 1]);
|
||||
break;
|
||||
case 2:
|
||||
HsaRsrcFactory::num_waves_ = atoi(hsa_cmdline_arg_list[idx + 1]);
|
||||
break;
|
||||
case 3:
|
||||
HsaRsrcFactory::num_workitems_ = atoi(hsa_cmdline_arg_list[idx + 1]);
|
||||
break;
|
||||
case 4:
|
||||
HsaRsrcFactory::kernel_loop_count_ = atoi(hsa_cmdline_arg_list[idx + 1]);
|
||||
break;
|
||||
case 5:
|
||||
HsaRsrcFactory::print_debug_info_ = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
uint32_t HsaRsrcFactory::GetArgIndex(char *arg_value ) {
|
||||
|
||||
// Map Brig file path to index zero
|
||||
if (!strcmp(HsaRsrcFactory::brig_path_key_, arg_value)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Map Number of Compute Units to index one
|
||||
if (!strcmp(HsaRsrcFactory::num_cus_key_, arg_value)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Map Number of Waves per CU to index two
|
||||
if (!strcmp(HsaRsrcFactory::num_waves_key_, arg_value)) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
// Map Number of Workitems per Wave to index three
|
||||
if (!strcmp(HsaRsrcFactory::num_workitems_key_, arg_value)) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
// Map Kernel Loop Count to index four
|
||||
if (!strcmp(HsaRsrcFactory::kernel_loop_count_key_, arg_value)) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
// Map print debug info parameter
|
||||
if (!strcmp(HsaRsrcFactory::print_debug_key_, arg_value)) {
|
||||
return 5;
|
||||
}
|
||||
|
||||
return 108;
|
||||
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::PrintHelpMsg( ) {
|
||||
|
||||
std::cout << "Key for passing Brig filepath: " << HsaRsrcFactory::brig_path_key_ << std::endl;
|
||||
std::cout << "Key for passing Number of Compute Units: " << HsaRsrcFactory::num_cus_key_ << std::endl;
|
||||
std::cout << "Key for passing Number of Waves per CU: " << HsaRsrcFactory::num_waves_key_ << std::endl;
|
||||
std::cout << "Key for passing Number of Workitems per Wave: " << HsaRsrcFactory::num_workitems_key_ << std::endl;
|
||||
std::cout << "Key for passing Kernel Loop Count: " << HsaRsrcFactory::kernel_loop_count_key_ << std::endl;
|
||||
|
||||
}
|
||||
+274
@@ -0,0 +1,274 @@
|
||||
#ifndef HSA_RSRC_FACTORY_H_
|
||||
#define HSA_RSRC_FACTORY_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "hsatimer.h"
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
|
||||
|
||||
#define HSA_ARGUMENT_ALIGN_BYTES 16
|
||||
#define HSA_QUEUE_ALIGN_BYTES 64
|
||||
#define HSA_PACKET_ALIGN_BYTES 64
|
||||
|
||||
#define check(msg, status) \
|
||||
if (status != HSA_STATUS_SUCCESS) { \
|
||||
printf("%s\n", msg); \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
#define check_build(msg, status) \
|
||||
if (status != STATUS_SUCCESS) { \
|
||||
printf("%s\n", msg); \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
// Define required BRIG data structures.
|
||||
typedef uint32_t BrigCodeOffset32_t;
|
||||
typedef uint32_t BrigDataOffset32_t;
|
||||
typedef uint16_t BrigKinds16_t;
|
||||
typedef uint8_t BrigLinkage8_t;
|
||||
typedef uint8_t BrigExecutableModifier8_t;
|
||||
typedef BrigDataOffset32_t BrigDataOffsetString32_t;
|
||||
|
||||
enum BrigKinds {
|
||||
BRIG_KIND_NONE = 0x0000,
|
||||
BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
|
||||
BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
|
||||
};
|
||||
|
||||
typedef struct BrigBase BrigBase;
|
||||
struct BrigBase {
|
||||
uint16_t byteCount;
|
||||
BrigKinds16_t kind;
|
||||
};
|
||||
|
||||
typedef struct BrigExecutableModifier BrigExecutableModifier;
|
||||
struct BrigExecutableModifier {
|
||||
BrigExecutableModifier8_t allBits;
|
||||
};
|
||||
|
||||
typedef struct BrigDirectiveExecutable BrigDirectiveExecutable;
|
||||
struct BrigDirectiveExecutable {
|
||||
uint16_t byteCount;
|
||||
BrigKinds16_t kind;
|
||||
BrigDataOffsetString32_t name;
|
||||
uint16_t outArgCount;
|
||||
uint16_t inArgCount;
|
||||
BrigCodeOffset32_t firstInArg;
|
||||
BrigCodeOffset32_t firstCodeBlockEntry;
|
||||
BrigCodeOffset32_t nextModuleEntry;
|
||||
uint32_t codeBlockEntryCount;
|
||||
BrigExecutableModifier modifier;
|
||||
BrigLinkage8_t linkage;
|
||||
uint16_t reserved;
|
||||
};
|
||||
|
||||
typedef struct BrigData BrigData;
|
||||
struct BrigData {
|
||||
uint32_t byteCount;
|
||||
uint8_t bytes[1];
|
||||
};
|
||||
|
||||
// Provide access to command line arguments passed in by user
|
||||
extern uint32_t hsa_cmdline_arg_cnt;
|
||||
extern char **hsa_cmdline_arg_list;
|
||||
|
||||
// Encapsulates information about a Hsa Agent such as its
|
||||
// handle, name, max queue size, max wavefront size, etc.
|
||||
typedef struct {
|
||||
|
||||
// Handle of Agent
|
||||
hsa_agent_t dev_id;
|
||||
|
||||
// Name of Agent whose length is less than 64
|
||||
char name[64];
|
||||
|
||||
// Max size of Wavefront size
|
||||
uint32_t max_wave_size;
|
||||
|
||||
// Max size of Queue buffer
|
||||
uint32_t max_queue_size;
|
||||
|
||||
// Memory region supporting kernel arguments
|
||||
hsa_region_t kernarg_region;
|
||||
} AgentInfo;
|
||||
|
||||
class HsaRsrcFactory {
|
||||
|
||||
public:
|
||||
|
||||
// Constructor of the class. Will initialize the Hsa Runtime and
|
||||
// query the system topology to get the list of Cpu and Gpu devices
|
||||
HsaRsrcFactory( );
|
||||
|
||||
// Destructor of the class
|
||||
~HsaRsrcFactory( );
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
//
|
||||
uint32_t GetCountOfGpuAgents( );
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
//
|
||||
// @param idx Gpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool GetGpuAgentInfo(uint32_t idx, AgentInfo **agent_info);
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
//
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
//
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
//
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool CreateQueue(AgentInfo *agent_info,
|
||||
uint32_t num_pkts, hsa_queue_t **queue);
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
//
|
||||
// @param value Initial value of signal object
|
||||
//
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool CreateSignal(uint32_t value, hsa_signal_t *signal);
|
||||
|
||||
// Allocate memory for use by a kernel of specified size in specified
|
||||
// agent's memory region. Currently supports Global segment whose Kernarg
|
||||
// flag set.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* AllocateMemory(AgentInfo *agent_info, size_t size);
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
//
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
//
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
//
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
//
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool LoadAndFinalize(AgentInfo *agent_info,
|
||||
const char *brig_path, char *kernel_name,
|
||||
hsa_ext_code_descriptor_t **code_desc);
|
||||
|
||||
// Add an instance of AgentInfo representing a Hsa Gpu agent
|
||||
void AddAgentInfo(AgentInfo *agent_info);
|
||||
|
||||
// Returns the file path where brig files is located
|
||||
static char* GetBrigPath( );
|
||||
|
||||
// Returns the number of compute units present on platform
|
||||
static uint32_t GetNumOfCUs( );
|
||||
|
||||
// Returns the maximum number of waves that can be launched
|
||||
// per compute unit. The actual number that can be launched
|
||||
// is affected by resource availability
|
||||
static uint32_t GetNumOfWavesPerCU( );
|
||||
|
||||
// Returns the number of work-items that can execute per wave
|
||||
static uint32_t GetNumOfWorkItemsPerWave( );
|
||||
|
||||
// Returns the number of times kernel loop body should execute.
|
||||
static uint32_t GetKernelLoopCount();
|
||||
|
||||
// Returns boolean flag to indicate if debug info should be printed
|
||||
static uint32_t GetPrintDebugInfo();
|
||||
|
||||
private:
|
||||
|
||||
// Number of queues to create
|
||||
uint32_t num_queues_;
|
||||
|
||||
// Used to maintain a list of Hsa Queue handles
|
||||
std::vector<hsa_queue_t *> queue_list_;
|
||||
|
||||
// Number of Signals to create
|
||||
uint32_t num_signals_;
|
||||
|
||||
// Used to maintain a list of Hsa Signal handles
|
||||
std::vector<hsa_signal_t *> signal_list_;
|
||||
|
||||
// Number of agents reported by platform
|
||||
uint32_t num_agents_;
|
||||
|
||||
// Used to maintain a list of Hsa Gpu Agent Info
|
||||
std::vector<AgentInfo *> gpu_list_;
|
||||
|
||||
// Records the file path where Brig file is located.
|
||||
// Value is available only after an instance has been built.
|
||||
static char* brig_path_;
|
||||
static constexpr char* brig_path_key_ = "brig_path";
|
||||
|
||||
// Records the number of Compute units present on system.
|
||||
// Value is available only after an instance has been built.
|
||||
static uint32_t num_cus_;
|
||||
static constexpr char* num_cus_key_ = "num_cus";
|
||||
|
||||
// Records the number of waves that can be launched per Compute unit
|
||||
// Value is available only after an instance has been built.
|
||||
static uint32_t num_waves_;
|
||||
static constexpr char* num_waves_key_ = "waves_per_cu";
|
||||
|
||||
// Records the number of work-items that can be packed into a wave
|
||||
// Value is available only after an instance has been built.
|
||||
static uint32_t num_workitems_;
|
||||
static constexpr char* num_workitems_key_ = "workitems_per_wave";
|
||||
|
||||
// Records the number of times kernel loop body should run. Value
|
||||
// is available only after an instance has been built.
|
||||
static uint32_t kernel_loop_count_;
|
||||
static constexpr char* kernel_loop_count_key_ = "kernel_loop_count";
|
||||
|
||||
// Records the number of times kernel loop body should run. Value
|
||||
// is available only after an instance has been built.
|
||||
static bool print_debug_info_;
|
||||
static constexpr char* print_debug_key_ = "print_debug";
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool PrintGpuAgents( );
|
||||
|
||||
// Process command line arguments. The method will capture
|
||||
// various user command line parameters for tests to use
|
||||
static void ProcessCmdline( );
|
||||
|
||||
// Prints the help banner on user arg keys
|
||||
static void PrintHelpMsg( );
|
||||
|
||||
// Maps an index for the user argument
|
||||
static uint32_t GetArgIndex(char *arg_value);
|
||||
|
||||
};
|
||||
|
||||
#endif // HSA_RSRC_FACTORY_H_
|
||||
Executable
+45
@@ -0,0 +1,45 @@
|
||||
#ifdef _WIN32 // Compiling for Windows Platform
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <Windows.h>
|
||||
#include "os.h"
|
||||
#include <stdio.h>
|
||||
|
||||
void SetEnv(const char* env_var_name, const char* env_var_value) {
|
||||
bool err = SetEnvironmentVariable(env_var_name, env_var_value);
|
||||
if(false == err){
|
||||
printf("Set environment variable failed!\n");
|
||||
exit(1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
char* GetEnv(const char* env_var_name){
|
||||
char* buff;
|
||||
DWORD char_count = GetEnvironmentVariable(env_var_name, NULL, 0);
|
||||
if (char_count == 0) return NULL;
|
||||
buff = (char*)malloc(sizeof(char) * char_count);
|
||||
GetEnvironmentVariable(env_var_name, buff, char_count);
|
||||
buff[char_count - 1] = '\0';
|
||||
return buff;
|
||||
}
|
||||
|
||||
#elif defined(__linux__)
|
||||
|
||||
#include "os.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
void SetEnv(const char* env_var_name, const char* env_var_value){
|
||||
int err = setenv(env_var_name, env_var_value, 1);
|
||||
if(0 != err){
|
||||
printf("Set environment variable failed!\n");
|
||||
exit(1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
char* GetEnv(const char* env_var_name) {
|
||||
return getenv(env_var_name);
|
||||
}
|
||||
|
||||
#endif
|
||||
Executable
+12
@@ -0,0 +1,12 @@
|
||||
#ifndef HSA_PERF_SRC_UTILS_OS_H_
|
||||
#define HSA_PERF_SRC_UTILS_OS_H_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
// Set envriroment variable
|
||||
void SetEnv(const char* env_var_name, const char* env_var_value);
|
||||
|
||||
// Get the value of enviroment
|
||||
char* GetEnv(const char* env_var_name);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user