ECR #333755 - Added two HSA samples: MatrixMultiplication, MatrixTranspose
[git-p4: depot-paths = "//depot/stg/hsa/drivers/hsa/runtime/": change = 1124951]
Bu işleme şunda yer alıyor:
@@ -0,0 +1,7 @@
|
||||
OPENCL_DEPTH = ../..
|
||||
|
||||
include $(OPENCL_DEPTH)/runtimenew/runtimedefs
|
||||
|
||||
SUBDIRS = MatrixTranspose MatrixMultiplication
|
||||
|
||||
include $(OPENCL_DEPTH)/runtimenew/runtimerules
|
||||
@@ -0,0 +1,9 @@
|
||||
OPENCL_DEPTH = ../../..
|
||||
|
||||
include $(OPENCL_DEPTH)/hsadefs
|
||||
|
||||
BUILD_SUBDIRS = $(DEFAULT_TARGETS)
|
||||
BUILD_MAKEFILE = Makefile.samples
|
||||
|
||||
|
||||
include $(OPENCL_DEPTH)/hsarules
|
||||
@@ -0,0 +1,53 @@
|
||||
include $(OPENCL_DEPTH)/hsadefs
|
||||
|
||||
EXE_TARGET = Test
|
||||
vpath %.cpp $(COMPONENT_DEPTH)
|
||||
CPPFILES := $(notdir $(wildcard $(COMPONENT_DEPTH)/*.cpp))
|
||||
|
||||
ifdef ATI_BITS_64
|
||||
LIB_SUFFIX = 64
|
||||
NBITS = 64
|
||||
else
|
||||
LIB_SUFFIX =
|
||||
ifndef ATI_OS_WINDOWS
|
||||
NBITS := 32
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef ATI_OS_WINDOWS
|
||||
CORE_LIB = dll
|
||||
LFLAGS += /subsystem:console
|
||||
LIB_PREFIX =
|
||||
else
|
||||
CORE_LIB = so
|
||||
LIB_PREFIX = lib
|
||||
endif
|
||||
|
||||
ifdef ATI_OS_LINUX
|
||||
GCXXOPTS := $(filter-out -fno-rtti,$(GCXXOPTS))
|
||||
GCXXOPTS := $(filter-out -fno-exceptions,$(GCXXOPTS))
|
||||
LFLAGS += -L$(DIST_LIB_DEST) -lpthread $(LIBSTDCXX) -lm -ldl -lrt
|
||||
endif
|
||||
|
||||
export BUILD_HSA_TARGET=yes
|
||||
|
||||
LCINCS := $(INCSWITCH) "$(OPENCL_DEPTH)/compiler/finalizer/HSAIL/hsail-tools/libHSAIL"
|
||||
LCINCS += $(INCSWITCH) "$(OPENCL_DEPTH)/compiler/finalizer/HSAIL/hsail-tools/libHSAIL/$(FULL_BUILD_DIR)"
|
||||
LCINCS += $(INCSWITCH) "$(OPENCL_DEPTH)/compiler/finalizer/Interface"
|
||||
LCINCS += $(INCSWITCH) "$(OPENCL_DEPTH)/contrib/gtest-1.6.0/include"
|
||||
LCINCS += $(INCSWITCH) "$(OPENCL_DEPTH)/runtime/inc"
|
||||
|
||||
LLLIBS := $(OPENCL_DEPTH)/contrib/gtest-1.6.0/$(FULL_BUILD_DIR)/libgtest$(LIB_EXT)
|
||||
LLLIBS += $(OPENCL_DEPTH)/runtime/test/common/$(FULL_BUILD_DIR)/testcommon$(LIB_EXT)
|
||||
LLLIBS += $(OPENCL_DEPTH)/runtime/test/gcommon/$(FULL_BUILD_DIR)/gtestcommon$(LIB_EXT)
|
||||
LLLIBS += $(OPENCL_DEPTH)/compiler/finalizer/HSAIL/hsail-tools/libHSAIL/$(FULL_BUILD_DIR)/libhsail$(LIB_EXT)
|
||||
|
||||
RUNTIME_BUILD = build/$(OS_TYPE)/$(CORE_LIB)/$(BUILD_DIR)
|
||||
|
||||
ifdef ATI_OS_LINUX
|
||||
LFLAGS += -L$(OPENCL_DEPTH)/runtime/core/$(RUNTIME_BUILD) -lhsa-runtime$(LIB_SUFFIX)
|
||||
else
|
||||
LLLIBS += $(OPENCL_DEPTH)/runtime/core/$(RUNTIME_BUILD)/hsa-runtime$(LIB_SUFFIX)$(LIB_EXT)
|
||||
endif
|
||||
|
||||
include $(OPENCL_DEPTH)/hsarules
|
||||
@@ -0,0 +1,11 @@
|
||||
# Generated Makefile lnx
|
||||
|
||||
OPENCL_DEPTH = ..$(BUILD_DIR_DEPTH)/../../..
|
||||
SCLIB_DEPTH = ..$(BUILD_DIR_DEPTH)/../../../../../drivers/hsa/compiler/finalizer
|
||||
MODULE_DEPTH = ../..$(BUILD_DIR_DEPTH)
|
||||
COMPONENT_DEPTH = $(MODULE_DEPTH)
|
||||
|
||||
OPENCL_TARGET = lnx
|
||||
SCLIB_TARGET = lnx
|
||||
|
||||
include $(MODULE_DEPTH)/build/Makefile.samples
|
||||
@@ -0,0 +1,11 @@
|
||||
# Generated Makefile lnx64a
|
||||
|
||||
OPENCL_DEPTH = ..$(BUILD_DIR_DEPTH)/../../..
|
||||
SCLIB_DEPTH = ..$(BUILD_DIR_DEPTH)/../../../../../drivers/hsa/compiler/finalizer
|
||||
MODULE_DEPTH = ../..$(BUILD_DIR_DEPTH)
|
||||
COMPONENT_DEPTH = $(MODULE_DEPTH)
|
||||
|
||||
OPENCL_TARGET = lnx64a
|
||||
SCLIB_TARGET = lnx64a
|
||||
|
||||
include $(MODULE_DEPTH)/build/Makefile.samples
|
||||
@@ -0,0 +1,7 @@
|
||||
OPENCL_DEPTH = ../../..
|
||||
|
||||
include $(OPENCL_DEPTH)/runtimenew/runtimedefs
|
||||
|
||||
SUBDIRS = build
|
||||
|
||||
include $(OPENCL_DEPTH)/runtimenew/runtimerules
|
||||
@@ -0,0 +1,67 @@
|
||||
#include "common_utility.h"
|
||||
|
||||
|
||||
double CalcMedian(vector<double> scores)
|
||||
{
|
||||
double median;
|
||||
size_t size = scores.size();
|
||||
|
||||
if (size % 2 == 0)
|
||||
median = (scores[size / 2 - 1] + scores[size / 2]) / 2;
|
||||
else
|
||||
median = scores[size / 2];
|
||||
|
||||
return median;
|
||||
}
|
||||
|
||||
double CalcMean(vector<double> scores)
|
||||
{
|
||||
double mean;
|
||||
size_t size = scores.size();
|
||||
|
||||
for (int i=0; i<size; ++i)
|
||||
mean += scores[i];
|
||||
|
||||
return mean/size;
|
||||
}
|
||||
|
||||
|
||||
double CalcStdDeviation(vector<double> scores, int score_mean)
|
||||
{
|
||||
double ret = 0.0;
|
||||
for (int i=0; i<scores.size(); ++i)
|
||||
{
|
||||
ret += (scores[i] - score_mean) * (scores[i] - score_mean);
|
||||
}
|
||||
|
||||
ret /= scores.size();
|
||||
|
||||
return sqrt(ret);
|
||||
}
|
||||
|
||||
int CalcConcurrentQueues(vector<double> scores)
|
||||
{
|
||||
int num_of_concurrent_queues = 0;
|
||||
vector<double>execpted_exec_time_array;
|
||||
|
||||
for (int i=0; i<scores.size(); ++i)
|
||||
{
|
||||
execpted_exec_time_array.push_back(scores[0]/(1<<i));
|
||||
}
|
||||
|
||||
|
||||
for (int i=0; i<scores.size(); ++i)
|
||||
{
|
||||
cout << "expected exe time = " << execpted_exec_time_array[i] << endl;
|
||||
}
|
||||
|
||||
for (int i=1; i<scores.size(); ++i)
|
||||
{
|
||||
if ((execpted_exec_time_array[i] - scores[i]) < 0.1 * execpted_exec_time_array[i])
|
||||
++num_of_concurrent_queues;
|
||||
}
|
||||
|
||||
return num_of_concurrent_queues;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
double CalcMean(vector<double> scores);
|
||||
double CalcMedian(vector<double> scores);
|
||||
double CalcStdDeviation(vector<double> scores, int score_mean);
|
||||
int CalcConcurrentQueues(vector<double> scores);
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,222 @@
|
||||
/* Copyright 2014 HSA Foundation Inc. All Rights Reserved.
|
||||
*
|
||||
* HSAF is granting you permission to use this software and documentation (if
|
||||
* any) (collectively, the "Materials") pursuant to the terms and conditions
|
||||
* of the Software License Agreement included with the Materials. If you do
|
||||
* not have a copy of the Software License Agreement, contact the HSA Foundation for a copy.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <libelf.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "hsa.h"
|
||||
#include "elf_utils.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
|
||||
enum {
|
||||
SECTION_HSA_DATA = 0,
|
||||
SECTION_HSA_CODE,
|
||||
SECTION_HSA_OPERAND,
|
||||
};
|
||||
|
||||
typedef struct SectionDesc SectionDesc;
|
||||
struct SectionDesc {
|
||||
int sectionId;
|
||||
const char *brigName;
|
||||
const char *bifName;
|
||||
}
|
||||
|
||||
sectionDescs[] = {
|
||||
{ SECTION_HSA_DATA, "hsa_data",".brig_hsa_data" },
|
||||
{ SECTION_HSA_CODE, "hsa_code",".brig_hsa_code" },
|
||||
{ SECTION_HSA_OPERAND,"hsa_operand",".brig_hsa_operand"},
|
||||
};
|
||||
|
||||
extern int fileno(FILE* stream);
|
||||
|
||||
const SectionDesc* get_section_desc(int sectionId) {
|
||||
const int NUM_PREDEFINED_SECTIONS = sizeof(sectionDescs)/sizeof(sectionDescs[0]);
|
||||
for(int i=0; i<NUM_PREDEFINED_SECTIONS; ++i) {
|
||||
if (sectionDescs[i].sectionId == sectionId) {
|
||||
return §ionDescs[i];
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static Elf_Scn* extract_elf_section (Elf *elfP,
|
||||
Elf_Data *secHdr,
|
||||
const SectionDesc* desc) {
|
||||
int cnt = 0;
|
||||
Elf_Scn* scn = NULL;
|
||||
Elf32_Shdr* shdr = NULL;
|
||||
char* sectionName = NULL;
|
||||
|
||||
/* Iterate thru the elf sections */
|
||||
for (cnt = 1, scn = NULL; scn = elf_nextscn(elfP, scn); cnt++) {
|
||||
if (((shdr = elf32_getshdr(scn)) == NULL)) {
|
||||
return NULL;
|
||||
}
|
||||
sectionName = (char *)secHdr->d_buf + shdr->sh_name;
|
||||
if (sectionName &&
|
||||
((strcmp(sectionName, desc->brigName) == 0) ||
|
||||
(strcmp(sectionName, desc->bifName) == 0))) {
|
||||
return scn;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Extract section and copy into HsaBrig */
|
||||
static status_t extract_section_and_copy (Elf *elfP,
|
||||
Elf_Data *secHdr,
|
||||
const SectionDesc* desc,
|
||||
hsa_ext_brig_module_t* brig_module,
|
||||
hsa_ext_brig_section_id_t section_id) {
|
||||
Elf_Scn* scn = NULL;
|
||||
Elf_Data* data = NULL;
|
||||
void* address_to_copy;
|
||||
size_t section_size=0;
|
||||
|
||||
scn = extract_elf_section(elfP, secHdr, desc);
|
||||
|
||||
if (scn) {
|
||||
if ((data = elf_getdata(scn, NULL)) == NULL) {
|
||||
return STATUS_UNKNOWN;
|
||||
}
|
||||
section_size = data->d_size;
|
||||
if (section_size > 0) {
|
||||
address_to_copy = malloc(section_size);
|
||||
memcpy(address_to_copy, data->d_buf, section_size);
|
||||
}
|
||||
}
|
||||
|
||||
if ((!scn || section_size == 0)) {
|
||||
return STATUS_UNKNOWN;
|
||||
}
|
||||
|
||||
/* Create a section header */
|
||||
brig_module->section[section_id] = (hsa_ext_brig_section_header_t*) address_to_copy;
|
||||
|
||||
return STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/* Reads binary of BRIG and BIF format */
|
||||
status_t read_binary(hsa_ext_brig_module_t **brig_module_t, FILE* binary) {
|
||||
/* Create the brig_module */
|
||||
uint32_t number_of_sections = 3;
|
||||
hsa_ext_brig_module_t* brig_module;
|
||||
|
||||
brig_module = (hsa_ext_brig_module_t*)
|
||||
(malloc (sizeof(hsa_ext_brig_module_t) + sizeof(void*)*number_of_sections));
|
||||
brig_module->section_count = number_of_sections;
|
||||
|
||||
status_t status;
|
||||
Elf* elfP = NULL;
|
||||
Elf32_Ehdr* ehdr = NULL;
|
||||
Elf_Data *secHdr = NULL;
|
||||
Elf_Scn* scn = NULL;
|
||||
int fd;
|
||||
|
||||
if (elf_version ( EV_CURRENT ) == EV_NONE) {
|
||||
return STATUS_KERNEL_ELF_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
fd = fileno(binary);
|
||||
if ((elfP = elf_begin(fd, ELF_C_READ, (Elf *)0)) == NULL) {
|
||||
return STATUS_KERNEL_INVALID_ELF_CONTAINER;
|
||||
}
|
||||
|
||||
if (elf_kind (elfP) != ELF_K_ELF) {
|
||||
return STATUS_KERNEL_INVALID_ELF_CONTAINER;
|
||||
}
|
||||
|
||||
if (((ehdr = elf32_getehdr(elfP)) == NULL) ||
|
||||
((scn = elf_getscn(elfP, ehdr->e_shstrndx)) == NULL) ||
|
||||
((secHdr = elf_getdata(scn, NULL)) == NULL)) {
|
||||
return STATUS_KERNEL_INVALID_SECTION_HEADER;
|
||||
}
|
||||
|
||||
status = extract_section_and_copy(elfP,
|
||||
secHdr,
|
||||
get_section_desc(SECTION_HSA_DATA),
|
||||
brig_module,
|
||||
HSA_EXT_BRIG_SECTION_DATA);
|
||||
|
||||
if (status != STATUS_SUCCESS) {
|
||||
return STATUS_KERNEL_MISSING_DATA_SECTION;
|
||||
}
|
||||
|
||||
status = extract_section_and_copy(elfP,
|
||||
secHdr,
|
||||
get_section_desc(SECTION_HSA_CODE),
|
||||
brig_module,
|
||||
HSA_EXT_BRIG_SECTION_CODE);
|
||||
|
||||
if (status != STATUS_SUCCESS) {
|
||||
return STATUS_KERNEL_MISSING_CODE_SECTION;
|
||||
}
|
||||
|
||||
status = extract_section_and_copy(elfP,
|
||||
secHdr,
|
||||
get_section_desc(SECTION_HSA_OPERAND),
|
||||
brig_module,
|
||||
HSA_EXT_BRIG_SECTION_OPERAND);
|
||||
|
||||
if (status != STATUS_SUCCESS) {
|
||||
return STATUS_KERNEL_MISSING_OPERAND_SECTION;
|
||||
}
|
||||
|
||||
elf_end(elfP);
|
||||
*brig_module_t = brig_module;
|
||||
|
||||
return STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
status_t create_brig_module_from_brig_file(const char* file_name, hsa_ext_brig_module_t** brig_module) {
|
||||
FILE *fp = fopen(file_name, "rb");
|
||||
|
||||
status_t status = read_binary(brig_module, fp);
|
||||
|
||||
if (status != STATUS_SUCCESS) {
|
||||
printf("Could not create BRIG module: %d\n", status);
|
||||
if (status == STATUS_KERNEL_INVALID_SECTION_HEADER ||
|
||||
status == STATUS_KERNEL_ELF_INITIALIZATION_FAILED ||
|
||||
status == STATUS_KERNEL_INVALID_ELF_CONTAINER) {
|
||||
printf("The ELF file is invalid or possibley corrupted.\n");
|
||||
}
|
||||
if (status == STATUS_KERNEL_MISSING_DATA_SECTION ||
|
||||
status == STATUS_KERNEL_MISSING_CODE_SECTION ||
|
||||
status == STATUS_KERNEL_MISSING_OPERAND_SECTION) {
|
||||
printf("One or more ELF sections are missing. Use readelf command to \
|
||||
to check if hsa_data, hsa_code and hsa_operands exist.\n");
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void destroy_brig_module(hsa_ext_brig_module_t* brig_module) {
|
||||
for (int i=0; i<brig_module->section_count; i++) {
|
||||
free (brig_module->section[i]);
|
||||
}
|
||||
free (brig_module);
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/* Copyright 2014 HSA Foundation Inc. All Rights Reserved.
|
||||
*
|
||||
* HSAF is granting you permission to use this software and documentation (if
|
||||
* any) (collectively, the "Materials") pursuant to the terms and conditions
|
||||
* of the Software License Agreement included with the Materials. If you do
|
||||
* not have a copy of the Software License Agreement, contact the HSA Foundation for a copy.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "hsa_ext_finalize.h"
|
||||
|
||||
//typedef enum status_t status_t;
|
||||
typedef enum status_t {
|
||||
STATUS_SUCCESS=0,
|
||||
STATUS_KERNEL_INVALID_SECTION_HEADER=1,
|
||||
STATUS_KERNEL_ELF_INITIALIZATION_FAILED=2,
|
||||
STATUS_KERNEL_INVALID_ELF_CONTAINER=3,
|
||||
STATUS_KERNEL_MISSING_DATA_SECTION=4,
|
||||
STATUS_KERNEL_MISSING_CODE_SECTION=5,
|
||||
STATUS_KERNEL_MISSING_OPERAND_SECTION=6,
|
||||
STATUS_UNKNOWN=7,
|
||||
} status_t;
|
||||
|
||||
status_t create_brig_module_from_brig_file(const char* file_name, hsa_ext_brig_module_t** brig_module);
|
||||
|
||||
void destroy_brig_module(hsa_ext_brig_module_t* brig_module);
|
||||
@@ -0,0 +1,225 @@
|
||||
#include "hsa_base.h"
|
||||
|
||||
void HSA::SetBrigFileAndKernelName(char * brig_file_name, char *kernel_name)
|
||||
{
|
||||
strcpy(hsa_brig_file_name, brig_file_name);
|
||||
strcpy(hsa_kernel_name, kernel_name);
|
||||
}
|
||||
|
||||
HSA::HSA()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
HSA::~HSA()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
bool HSA::HsaInit()
|
||||
{
|
||||
err = hsa_init();
|
||||
check(Initializing the hsa runtime, err);
|
||||
|
||||
/*
|
||||
* Iterate over the agents and pick the gpu agent using
|
||||
* the find_gpu callback.
|
||||
*/
|
||||
err = hsa_iterate_agents(find_gpu, &device);
|
||||
check(Calling hsa_iterate_agents, err);
|
||||
|
||||
err = (device.handle== 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
|
||||
check(Checking if the GPU device is non-zero, err);
|
||||
|
||||
if (err == HSA_STATUS_ERROR)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Query the maximum size of the queue.
|
||||
*/
|
||||
err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
|
||||
check(Querying the device maximum queue size, err);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
double HSA::Run(int dim, int group_x, int group_y, int group_z, int s_size, int grid_x, int grid_y, int grid_z, void* kernel_args, int kernel_args_size)
|
||||
{
|
||||
hsa_queue_t* local_command_queue;
|
||||
/*
|
||||
* Create a queue using the maximum size.
|
||||
*/
|
||||
err = hsa_queue_create(device, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &local_command_queue);
|
||||
check(Creating the queue, err);
|
||||
|
||||
/*
|
||||
* Load BRIG, encapsulated in an ELF container, into a BRIG module.
|
||||
*/
|
||||
//char file_name[128] = "transpose_kernel.brig";
|
||||
hsa_ext_brig_module_t* local_brig_module;
|
||||
err = (hsa_status_t)create_brig_module_from_brig_file(hsa_brig_file_name, &local_brig_module);
|
||||
check(Creating the brig module from vector_copy.brig, err);
|
||||
|
||||
/*
|
||||
* Create hsa program.
|
||||
*/
|
||||
hsa_ext_program_handle_t local_hsa_program;
|
||||
err = hsa_ext_program_create(&device, 1, HSA_EXT_BRIG_MACHINE_LARGE, HSA_EXT_BRIG_PROFILE_FULL, &local_hsa_program);
|
||||
check(Creating the hsa program, err);
|
||||
|
||||
/*
|
||||
* Add the BRIG module to hsa program.
|
||||
*/
|
||||
hsa_ext_brig_module_handle_t local_module;
|
||||
err = hsa_ext_add_module(local_hsa_program, local_brig_module, &local_module);
|
||||
check(Adding the local brig module to the program, err);
|
||||
|
||||
/*
|
||||
* Construct finalization request list.
|
||||
*/
|
||||
hsa_ext_finalization_request_t local_finalization_request_list;
|
||||
local_finalization_request_list.module = local_module;
|
||||
local_finalization_request_list.program_call_convention = 0;
|
||||
//char kernel_name[128] = "&__OpenCL_matrixTranspose_kernel";
|
||||
err = find_symbol_offset(local_brig_module, hsa_kernel_name, &local_finalization_request_list.symbol);
|
||||
check(Finding the symbol offset for the kernel, err);
|
||||
|
||||
/*
|
||||
* Finalize the hsa program.
|
||||
*/
|
||||
err = hsa_ext_finalize_program(local_hsa_program, device, 1, &local_finalization_request_list, NULL, NULL, 0, NULL, 0);
|
||||
check(Finalizing the program, err);
|
||||
|
||||
/*
|
||||
* Destroy the brig module. The program was successfully created the kernel
|
||||
* symbol was found and the program was finalized, so it is no longer needed.
|
||||
*/
|
||||
destroy_brig_module(local_brig_module);
|
||||
|
||||
/*
|
||||
* Get the hsa code descriptor address.
|
||||
*/
|
||||
hsa_ext_code_descriptor_t *local_hsa_code_descriptor;
|
||||
err = hsa_ext_query_kernel_descriptor_address(local_hsa_program, local_module, local_finalization_request_list.symbol, &local_hsa_code_descriptor);
|
||||
check(Querying the kernel descriptor address, err);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
* Create a signal to wait for the dispatch to finish.
|
||||
*/
|
||||
hsa_signal_t local_signal;
|
||||
err=hsa_signal_create(1, 0, NULL, &local_signal);
|
||||
check(Creating a HSA signal, err);
|
||||
|
||||
|
||||
/* Initialize the dispatch packet */
|
||||
hsa_kernel_dispatch_packet_t local_dispatch_packet;
|
||||
memset(&local_dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t));
|
||||
/*
|
||||
* Setup the dispatch information.
|
||||
*/
|
||||
local_dispatch_packet.completion_signal=local_signal;
|
||||
local_dispatch_packet.setup |= dim<< HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
|
||||
local_dispatch_packet.workgroup_size_x = group_x;
|
||||
local_dispatch_packet.workgroup_size_y = group_y;
|
||||
local_dispatch_packet.workgroup_size_z = group_z;
|
||||
local_dispatch_packet.group_segment_size = s_size;
|
||||
local_dispatch_packet.grid_size_x = grid_x;
|
||||
local_dispatch_packet.grid_size_y = grid_y;
|
||||
local_dispatch_packet.grid_size_z = grid_z;
|
||||
local_dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
local_dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
|
||||
local_dispatch_packet.header |= HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
* Find a memory region that supports kernel arguments.
|
||||
*/
|
||||
hsa_region_t local_kernarg_region;
|
||||
local_kernarg_region.handle = 0;
|
||||
|
||||
hsa_agent_iterate_regions(device, get_kernarg, &local_kernarg_region);
|
||||
err = (local_kernarg_region.handle== 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
|
||||
check(Finding a kernarg memory region, err);
|
||||
void* local_kernel_arg_buffer = NULL;
|
||||
|
||||
size_t local_kernel_arg_buffer_size = local_hsa_code_descriptor->kernarg_segment_byte_size;
|
||||
|
||||
/*
|
||||
* Allocate the kernel argument buffer from the correct region.
|
||||
*/
|
||||
err = hsa_memory_allocate(local_kernarg_region, local_kernel_arg_buffer_size, &local_kernel_arg_buffer);
|
||||
check(Allocating kernel argument memory buffer, err);
|
||||
memcpy(local_kernel_arg_buffer, kernel_args, kernel_args_size);
|
||||
|
||||
local_dispatch_packet.kernel_object = local_hsa_code_descriptor->code.handle;
|
||||
// Assume our kernel receives no arguments
|
||||
local_dispatch_packet.kernarg_address = local_kernel_arg_buffer;
|
||||
|
||||
/*
|
||||
* Register the memory region for the argument buffer.
|
||||
*/
|
||||
err = hsa_memory_register(kernel_args, kernel_args_size);
|
||||
|
||||
check(Registering the argument buffer, err);
|
||||
|
||||
/*
|
||||
* Obtain the current queue write index.
|
||||
*/
|
||||
uint64_t index = hsa_queue_load_write_index_relaxed(local_command_queue);
|
||||
|
||||
/*
|
||||
* Write the aql packet at the calculated queue index address.
|
||||
*/
|
||||
const uint32_t queueMask = local_command_queue->size - 1;
|
||||
((hsa_kernel_dispatch_packet_t*)(local_command_queue->base_address))[index&queueMask]=local_dispatch_packet;
|
||||
|
||||
/*
|
||||
* Increment the write index and ring the doorbell to dispatch the kernel.
|
||||
*/
|
||||
hsa_queue_store_write_index_relaxed(local_command_queue, index+1);
|
||||
|
||||
#ifdef TIME
|
||||
PerfTimer perf_timer_0;
|
||||
int timer_idx_0 = perf_timer_0.CreateTimer();
|
||||
perf_timer_0.StartTimer(timer_idx_0);
|
||||
#endif
|
||||
hsa_signal_store_release(local_command_queue->doorbell_signal, index);
|
||||
|
||||
/*
|
||||
* Wait on the dispatch signal until all kernel are finished.
|
||||
*/
|
||||
while (hsa_signal_wait_acquire(local_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0);
|
||||
|
||||
#ifdef TIME
|
||||
perf_timer_0.StopTimer(timer_idx_0);
|
||||
#endif
|
||||
/*
|
||||
* Cleanup all allocated resources.
|
||||
*/
|
||||
|
||||
err=hsa_signal_destroy(local_signal);
|
||||
check(Destroying the local_signal, err);
|
||||
|
||||
err=hsa_ext_program_destroy(local_hsa_program);
|
||||
check(Destroying the program, err);
|
||||
|
||||
err=hsa_queue_destroy(local_command_queue);
|
||||
check(Destroying the queue, err);
|
||||
|
||||
#ifdef TIME
|
||||
double ret = perf_timer_0.ReadTimer(timer_idx_0);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void HSA::Close()
|
||||
{
|
||||
err=hsa_shut_down();
|
||||
check(Shutting down the runtime, err);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
#ifndef __HSA_BASE__
|
||||
#define __HSA_BASE__
|
||||
|
||||
|
||||
#include <vector>
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
#include "elf_utils.h"
|
||||
#include "hsatimer.h"
|
||||
#include "utilities.h"
|
||||
|
||||
class HSA{
|
||||
public:
|
||||
HSA();
|
||||
~HSA();
|
||||
|
||||
public:
|
||||
void SetBrigFileAndKernelName(char *brig_file_name, char *kernel_name);
|
||||
bool HsaInit();
|
||||
void Close();
|
||||
double Run(int dim, int group_x, int group_y, int group_z, int s_size, int grid_x, int grid_y, int grid_z, void* kernel_args, int kernel_args_size);
|
||||
|
||||
public:
|
||||
hsa_status_t err;
|
||||
uint32_t queue_size;
|
||||
hsa_agent_t device;
|
||||
|
||||
char hsa_brig_file_name[128];
|
||||
char hsa_kernel_name[128];
|
||||
|
||||
hsa_queue_t* command_queue;
|
||||
hsa_signal_t hsa_signal;
|
||||
hsa_ext_brig_module_t* brig_module;
|
||||
hsa_ext_brig_module_handle_t module;
|
||||
hsa_ext_program_handle_t hsa_program;
|
||||
hsa_ext_code_descriptor_t *hsa_code_descriptor;
|
||||
hsa_kernel_dispatch_packet_t dispatch_packet; // needs to be set manually each time
|
||||
hsa_region_t hsa_kernarg_region;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,190 @@
|
||||
#include "hsatimer.h"
|
||||
|
||||
PerfTimer::PerfTimer()
|
||||
{
|
||||
freq_in_100mhz = MeasureTSCFreqHz();
|
||||
}
|
||||
|
||||
PerfTimer::~PerfTimer()
|
||||
{
|
||||
while(!_timers.empty())
|
||||
{
|
||||
Timer *temp = _timers.back();
|
||||
_timers.pop_back();
|
||||
delete temp;
|
||||
}
|
||||
}
|
||||
|
||||
//a new cretaed timer instantance index will be returned
|
||||
int PerfTimer::CreateTimer()
|
||||
{
|
||||
Timer *newTimer = new Timer;
|
||||
newTimer->_start = 0;
|
||||
newTimer->_clocks = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->_freq);
|
||||
#else
|
||||
newTimer->_freq = (long long)1.0E3;
|
||||
#endif
|
||||
|
||||
/* Push back the address of new Timer instance created */
|
||||
_timers.push_back(newTimer);
|
||||
return (int)(_timers.size() - 1);
|
||||
}
|
||||
|
||||
int PerfTimer::StartTimer(int index)
|
||||
{
|
||||
if(index >= (int)_timers.size())
|
||||
{
|
||||
Error("Cannot reset timer. Invalid handle.");
|
||||
return HSA_FAILURE;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
// General Windows timing method
|
||||
#ifndef _AMD
|
||||
long long tmpStart;
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&(tmpStart));
|
||||
_timers[index]->_start = (double)tmpStart;
|
||||
#else
|
||||
// AMD Windows timing method
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
// General Linux timing method
|
||||
#ifndef _AMD
|
||||
struct timeval s;
|
||||
gettimeofday(&s, 0);
|
||||
_timers[index]->_start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3));
|
||||
#else
|
||||
|
||||
// AMD timing method
|
||||
|
||||
unsigned int unused;
|
||||
_timers[index]->_start = __rdtscp(&unused);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
return HSA_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int PerfTimer::StopTimer(int index)
|
||||
{
|
||||
double n=0;
|
||||
if(index >= (int)_timers.size())
|
||||
{
|
||||
Error("Cannot reset timer. Invalid handle.");
|
||||
return HSA_FAILURE;
|
||||
}
|
||||
#ifdef _WIN32
|
||||
#ifndef _AMD
|
||||
long long n1;
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&(n1));
|
||||
n = (double) n1;
|
||||
#else
|
||||
|
||||
// AMD Window Timing
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
// General Linux timing method
|
||||
#ifndef _AMD
|
||||
struct timeval s;
|
||||
gettimeofday(&s, 0);
|
||||
n = s.tv_sec * 1.0E3+ (double)(s.tv_usec/1.0E3);
|
||||
#else
|
||||
// AMD Linux timing
|
||||
|
||||
unsigned int unused;
|
||||
n = __rdtscp(&unused);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
n -= _timers[index]->_start;
|
||||
_timers[index]->_start = 0;
|
||||
|
||||
#ifndef _AMD
|
||||
_timers[index]->_clocks += n;
|
||||
#else
|
||||
//_timers[index]->_clocks += 10 * n /freq_in_100mhz; // unit is ns
|
||||
_timers[index]->_clocks += 1.0E-6 * 10 * n /freq_in_100mhz; // convert to ms
|
||||
cout << "_AMD is enabled!!!" << endl;
|
||||
#endif
|
||||
|
||||
return HSA_SUCCESS;
|
||||
}
|
||||
|
||||
void PerfTimer::Error(string str)
|
||||
{
|
||||
cout << str << endl;
|
||||
}
|
||||
|
||||
|
||||
double PerfTimer::ReadTimer(int index)
|
||||
{
|
||||
|
||||
if(index >= (int)_timers.size())
|
||||
{
|
||||
Error("Cannot read timer. Invalid handle.");
|
||||
return HSA_FAILURE;
|
||||
}
|
||||
|
||||
double reading = double(_timers[index]->_clocks);
|
||||
|
||||
reading = double(reading / _timers[index]->_freq);
|
||||
|
||||
return reading;
|
||||
}
|
||||
|
||||
|
||||
uint64_t PerfTimer::CoarseTimestampUs()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
uint64_t freqHz, ticks;
|
||||
QueryPerformanceFrequency((LARGE_INTEGER *)&freqHz);
|
||||
QueryPerformanceCounter((LARGE_INTEGER *)&ticks);
|
||||
|
||||
// Scale numerator and divisor until (ticks * 1000000) fits in uint64_t.
|
||||
while (ticks > (1ULL << 44)) {
|
||||
ticks /= 16;
|
||||
freqHz /= 16;
|
||||
}
|
||||
|
||||
return (ticks * 1000000) / freqHz;
|
||||
#else
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
|
||||
return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
|
||||
#endif
|
||||
}
|
||||
|
||||
uint64_t PerfTimer::MeasureTSCFreqHz()
|
||||
{
|
||||
// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
|
||||
unsigned int unused;
|
||||
uint64_t tscTicksEnd;
|
||||
|
||||
uint64_t coarseBeginUs = CoarseTimestampUs();
|
||||
uint64_t tscTicksBegin = __rdtscp(&unused);
|
||||
do
|
||||
{
|
||||
tscTicksEnd = __rdtscp(&unused);
|
||||
}
|
||||
while (tscTicksEnd - tscTicksBegin < 1000000000);
|
||||
|
||||
uint64_t coarseEndUs = CoarseTimestampUs();
|
||||
|
||||
// Compute the TSC frequency and round to nearest 100MHz.
|
||||
uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
|
||||
uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
|
||||
return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
#ifndef __MYTIME__
|
||||
#define __MYTIME__
|
||||
|
||||
// Will use AMD timer and general Linux timer based on users' need --> compilation flag
|
||||
|
||||
// need to consider platform is Windows or Linux
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#define HSA_FAILURE 1
|
||||
#define HSA_SUCCESS 0
|
||||
|
||||
class PerfTimer {
|
||||
private:
|
||||
struct Timer
|
||||
{
|
||||
string name; /* < name name of time object*/
|
||||
long long _freq; /* < _freq frequency*/
|
||||
double _clocks; /* < _clocks number of ticks at end*/
|
||||
double _start; /* < _start start point ticks*/
|
||||
};
|
||||
|
||||
std::vector<Timer*> _timers; /*< _timers vector to Timer objects */
|
||||
double freq_in_100mhz;
|
||||
|
||||
public:
|
||||
PerfTimer();
|
||||
~PerfTimer();
|
||||
|
||||
private:
|
||||
//AMD timing method
|
||||
uint64_t CoarseTimestampUs();
|
||||
uint64_t MeasureTSCFreqHz();
|
||||
|
||||
//General Linux timing method
|
||||
|
||||
public:
|
||||
int CreateTimer();
|
||||
int StartTimer(int index);
|
||||
int StopTimer(int index);
|
||||
|
||||
public:
|
||||
// retrieve time
|
||||
double ReadTimer(int index);
|
||||
// write into a file
|
||||
double WriteTimer(int index);
|
||||
|
||||
public:
|
||||
void Error(string str);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,185 @@
|
||||
#include "utilities.h"
|
||||
|
||||
/*
|
||||
* Prints no more than 256 elements of the given array.
|
||||
* Prints full array if length is less than 256.
|
||||
* Prints Array name followed by elements.
|
||||
*/
|
||||
template<typename T>
|
||||
void PrintArray(
|
||||
string header,
|
||||
const T * data,
|
||||
const int width,
|
||||
const int height)
|
||||
{
|
||||
cout<<"\n"<<header<<"\n";
|
||||
for(int i = 0; i < height; i++)
|
||||
{
|
||||
for(int j = 0; j < width; j++)
|
||||
{
|
||||
cout<<data[i*width+j]<<" ";
|
||||
}
|
||||
cout<<"\n";
|
||||
}
|
||||
cout<<"\n";
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
int IsPowerOf2(T val)
|
||||
{
|
||||
long long _val = val;
|
||||
if((_val & (-_val))-_val == 0 && _val != 0)
|
||||
return 0;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
T RoundToPowerOf2(T val)
|
||||
{
|
||||
int bytes = sizeof(T);
|
||||
|
||||
val--;
|
||||
for(int i = 0; i < bytes; i++)
|
||||
val |= val >> (1<<i);
|
||||
val++;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
int FillRandom(
|
||||
T * arrayPtr,
|
||||
const int width,
|
||||
const int height,
|
||||
const T rangeMin,
|
||||
const T rangeMax,
|
||||
unsigned int seed=123)
|
||||
{
|
||||
if(!arrayPtr)
|
||||
{
|
||||
printf("Cannot fill array. NULL pointer.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!seed)
|
||||
seed = (unsigned int)time(NULL);
|
||||
|
||||
srand(seed);
|
||||
double range = double(rangeMax - rangeMin) + 1.0;
|
||||
|
||||
/* random initialisation of input */
|
||||
for(int i = 0; i < height; i++)
|
||||
for(int j = 0; j < width; j++)
|
||||
{
|
||||
int index = i*width + j;
|
||||
arrayPtr[index] = rangeMin + T(range*rand()/(RAND_MAX + 1.0));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//get a memory region that can be used for global memory allocations.
|
||||
hsa_status_t get_global_region(hsa_region_t region, void* data)
|
||||
{
|
||||
hsa_region_segment_t segment;
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment);
|
||||
if (HSA_REGION_SEGMENT_GLOBAL == segment)
|
||||
{
|
||||
hsa_region_t* ret = (hsa_region_t*) data;
|
||||
*ret = region;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Finds the specified symbols offset in the specified brig_module.
|
||||
* If the symbol is found the function returns HSA_STATUS_SUCCESS,
|
||||
* otherwise it returns HSA_STATUS_ERROR.
|
||||
*/
|
||||
hsa_status_t find_symbol_offset(hsa_ext_brig_module_t* brig_module,
|
||||
char* symbol_name,
|
||||
hsa_ext_brig_code_section_offset32_t* offset)
|
||||
{
|
||||
|
||||
/*
|
||||
* Get the data section
|
||||
*/
|
||||
hsa_ext_brig_section_header_t* data_section_header =
|
||||
brig_module->section[HSA_EXT_BRIG_SECTION_DATA];
|
||||
/*
|
||||
* Get the code section
|
||||
*/
|
||||
hsa_ext_brig_section_header_t* code_section_header =
|
||||
brig_module->section[HSA_EXT_BRIG_SECTION_CODE];
|
||||
|
||||
/*
|
||||
* First entry into the BRIG code section
|
||||
*/
|
||||
BrigCodeOffset32_t code_offset = code_section_header->header_byte_count;
|
||||
BrigBase* code_entry = (BrigBase*) ((char*)code_section_header + code_offset);
|
||||
while (code_offset != code_section_header->byte_count)
|
||||
{
|
||||
if (code_entry->kind == BRIG_KIND_DIRECTIVE_KERNEL)
|
||||
{
|
||||
/*
|
||||
* Now find the data in the data section
|
||||
*/
|
||||
BrigDirectiveExecutable* directive_kernel = (BrigDirectiveExecutable*) (code_entry);
|
||||
BrigDataOffsetString32_t data_name_offset = directive_kernel->name;
|
||||
BrigData* data_entry = (BrigData*)((char*) data_section_header + data_name_offset);
|
||||
if (!strncmp(symbol_name, (char*) data_entry->bytes, strlen(symbol_name)))
|
||||
{
|
||||
*offset = code_offset;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
code_offset += code_entry->byteCount;
|
||||
code_entry = (BrigBase*) ((char*)code_section_header + code_offset);
|
||||
}
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Determines if the given agent is of type HSA_DEVICE_TYPE_GPU
|
||||
* and sets the value of data to the agent handle if it is.
|
||||
*/
|
||||
hsa_status_t find_gpu(hsa_agent_t agent, void *data)
|
||||
{
|
||||
if (data == NULL)
|
||||
{
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
hsa_device_type_t device_type;
|
||||
hsa_status_t stat = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
|
||||
if (stat != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
return stat;
|
||||
}
|
||||
if (device_type == HSA_DEVICE_TYPE_GPU)
|
||||
{
|
||||
*((hsa_agent_t *)data) = agent;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Determines if a memory region can be used for kernarg
|
||||
* allocations.
|
||||
*/
|
||||
hsa_status_t get_kernarg(hsa_region_t region, void* data)
|
||||
{
|
||||
hsa_region_global_flag_t flags;
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
|
||||
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
|
||||
{
|
||||
hsa_region_t* ret = (hsa_region_t*) data;
|
||||
*ret = region;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,207 @@
|
||||
#ifndef __HSA_UTILITY__
|
||||
#define __HSA_UTILITY__
|
||||
|
||||
#include <vector>
|
||||
#include <thread>
|
||||
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
#include "elf_utils.h"
|
||||
|
||||
#include <string.h>
|
||||
#include<iostream>
|
||||
using namespace std;
|
||||
|
||||
#define HSA_ARGUMENT_ALIGN_BYTES 16
|
||||
|
||||
#define SDK_FAILURE 1
|
||||
#define SDK_SUCCESS 0
|
||||
|
||||
/*
|
||||
#define check(msg, status) \
|
||||
if (status != HSA_STATUS_SUCCESS) { \
|
||||
printf("%s failed.\n", #msg); \
|
||||
exit(1); \
|
||||
} else { \
|
||||
printf("%s succeeded.\n", #msg); \
|
||||
}
|
||||
*/
|
||||
#define check(msg, status) \
|
||||
if (status != HSA_STATUS_SUCCESS) { \
|
||||
printf("%s failed.\n", #msg); \
|
||||
exit(1); \
|
||||
} else { \
|
||||
; \
|
||||
}
|
||||
|
||||
/*
|
||||
* Define required BRIG data structures.
|
||||
*/
|
||||
|
||||
typedef uint32_t BrigCodeOffset32_t;
|
||||
|
||||
typedef uint32_t BrigDataOffset32_t;
|
||||
|
||||
typedef uint16_t BrigKinds16_t;
|
||||
|
||||
typedef uint8_t BrigLinkage8_t;
|
||||
|
||||
typedef uint8_t BrigExecutableModifier8_t;
|
||||
|
||||
typedef BrigDataOffset32_t BrigDataOffsetString32_t;
|
||||
|
||||
enum BrigKinds {
|
||||
BRIG_KIND_NONE = 0x0000,
|
||||
BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
|
||||
BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
|
||||
};
|
||||
|
||||
typedef struct BrigBase BrigBase;
|
||||
struct BrigBase {
|
||||
uint16_t byteCount;
|
||||
BrigKinds16_t kind;
|
||||
};
|
||||
|
||||
typedef struct BrigExecutableModifier BrigExecutableModifier;
|
||||
struct BrigExecutableModifier {
|
||||
BrigExecutableModifier8_t allBits;
|
||||
};
|
||||
|
||||
typedef struct BrigDirectiveExecutable BrigDirectiveExecutable;
|
||||
struct BrigDirectiveExecutable {
|
||||
uint16_t byteCount;
|
||||
BrigKinds16_t kind;
|
||||
BrigDataOffsetString32_t name;
|
||||
uint16_t outArgCount;
|
||||
uint16_t inArgCount;
|
||||
BrigCodeOffset32_t firstInArg;
|
||||
BrigCodeOffset32_t firstCodeBlockEntry;
|
||||
BrigCodeOffset32_t nextModuleEntry;
|
||||
uint32_t codeBlockEntryCount;
|
||||
BrigExecutableModifier modifier;
|
||||
BrigLinkage8_t linkage;
|
||||
uint16_t reserved;
|
||||
};
|
||||
|
||||
typedef struct BrigData BrigData;
|
||||
struct BrigData {
|
||||
uint32_t byteCount;
|
||||
uint8_t bytes[1];
|
||||
};
|
||||
|
||||
struct float2
|
||||
{
|
||||
float s0;
|
||||
float s1;
|
||||
|
||||
|
||||
float2 operator * (float2 &fl)
|
||||
{
|
||||
float2 temp;
|
||||
temp.s0 = (this->s0) * fl.s0;
|
||||
temp.s1 = (this->s1) * fl.s1;
|
||||
return temp;
|
||||
}
|
||||
|
||||
float2 operator * (float scalar)
|
||||
{
|
||||
float2 temp;
|
||||
temp.s0 = (this->s0) * scalar;
|
||||
temp.s1 = (this->s1) * scalar;
|
||||
return temp;
|
||||
}
|
||||
|
||||
float2 operator + (float2 &fl)
|
||||
{
|
||||
float2 temp;
|
||||
temp.s0 = (this->s0) + fl.s0;
|
||||
temp.s1 = (this->s1) + fl.s1;
|
||||
return temp;
|
||||
}
|
||||
|
||||
float2 operator - (float2 fl)
|
||||
{
|
||||
float2 temp;
|
||||
temp.s0 = (this->s0) - fl.s0;
|
||||
temp.s1 = (this->s1) - fl.s1;
|
||||
return temp;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct uint2
|
||||
{
|
||||
uint s0;
|
||||
uint s1;
|
||||
|
||||
|
||||
uint2 operator * (uint2 &fl)
|
||||
{
|
||||
uint2 temp;
|
||||
temp.s0 = (this->s0) * fl.s0;
|
||||
temp.s1 = (this->s1) * fl.s1;
|
||||
return temp;
|
||||
}
|
||||
|
||||
uint2 operator * (float scalar)
|
||||
{
|
||||
uint2 temp;
|
||||
temp.s0 = (this->s0) * scalar;
|
||||
temp.s1 = (this->s1) * scalar;
|
||||
return temp;
|
||||
}
|
||||
|
||||
uint2 operator + (uint2 &fl)
|
||||
{
|
||||
uint2 temp;
|
||||
temp.s0 = (this->s0) + fl.s0;
|
||||
temp.s1 = (this->s1) + fl.s1;
|
||||
return temp;
|
||||
}
|
||||
|
||||
uint2 operator - (uint2 fl)
|
||||
{
|
||||
uint2 temp;
|
||||
temp.s0 = (this->s0) - fl.s0;
|
||||
temp.s1 = (this->s1) - fl.s1;
|
||||
return temp;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Prints no more than 256 elements of the given array.
|
||||
* Prints full array if length is less than 256.
|
||||
* Prints Array name followed by elements.
|
||||
*/
|
||||
template<typename T> void PrintArray(string header, const T * data, const int width, const int height);
|
||||
|
||||
template<typename T> int IsPowerOf2(T val);
|
||||
|
||||
template<typename T> T RoundToPowerOf2(T val);
|
||||
|
||||
template<typename T> int FillRandom(T * arrayPtr, const int width, const int height, const T rangeMin, const T rangeMax, unsigned int seed=123);
|
||||
|
||||
//get a memory region that can be used for global memory allocations.
|
||||
hsa_status_t get_global_region(hsa_region_t region, void* data);
|
||||
|
||||
/*
|
||||
* Finds the specified symbols offset in the specified brig_module.
|
||||
* If the symbol is found the function returns HSA_STATUS_SUCCESS,
|
||||
* otherwise it returns HSA_STATUS_ERROR.
|
||||
*/
|
||||
hsa_status_t find_symbol_offset(hsa_ext_brig_module_t* brig_module, char* symbol_name, hsa_ext_brig_code_section_offset32_t* offset);
|
||||
|
||||
/*
|
||||
* Determines if the given agent is of type HSA_DEVICE_TYPE_GPU
|
||||
* and sets the value of data to the agent handle if it is.
|
||||
*/
|
||||
hsa_status_t find_gpu(hsa_agent_t agent, void *data);
|
||||
|
||||
/*
|
||||
* Determines if a memory region can be used for kernarg
|
||||
* allocations.
|
||||
*/
|
||||
hsa_status_t get_kernarg(hsa_region_t region, void* data);
|
||||
|
||||
#endif
|
||||
Yeni konuda referans
Bir kullanıcı engelle