diff --git a/projects/rocr-runtime/samples/GetInfo/Makefile b/projects/rocr-runtime/samples/GetInfo/Makefile index 11141a08a8..ae780dfdcf 100644 --- a/projects/rocr-runtime/samples/GetInfo/Makefile +++ b/projects/rocr-runtime/samples/GetInfo/Makefile @@ -3,5 +3,5 @@ OPENCL_DEPTH = ../../.. include $(OPENCL_DEPTH)/runtimenew/runtimedefs SUBDIRS = build - + include $(OPENCL_DEPTH)/runtimenew/runtimerules diff --git a/projects/rocr-runtime/samples/GetInfo/build/Makefile.get_info b/projects/rocr-runtime/samples/GetInfo/build/Makefile.get_info index fb14b708b4..211dc86d4b 100644 --- a/projects/rocr-runtime/samples/GetInfo/build/Makefile.get_info +++ b/projects/rocr-runtime/samples/GetInfo/build/Makefile.get_info @@ -16,7 +16,7 @@ ifdef ATI_BITS_64 LIB_SUFFIX=64 BITS=64 CMPILERBITS=64 -else +else LIB_SUFFIX= BITS= ifdef ATI_OS_WINDOWS @@ -41,7 +41,7 @@ endif LCXXDEFS += -DAMD_INTERNAL_BUILD -vpath %.cpp $(COMPONENT_DEPTH) +vpath %.cpp $(COMPONENT_DEPTH) CPPFILES := $(notdir $(wildcard $(COMPONENT_DEPTH)/*.cpp)) TOOLS_TEST_COMMON=$(COMPONENT_DEPTH)/../common @@ -52,7 +52,7 @@ CPPFILES += $(notdir $(wildcard $(TOOLS_TEST_COMMON)/*.cpp)) ifdef ATI_OS_LINUX GCXXOPTS := $(filter-out -fno-rtti,$(GCXXOPTS)) GCXXOPTS := $(filter-out -fno-exceptions,$(GCXXOPTS)) - LFLAGS += -L$(DIST_LIB_DEST) -lpthread $(LIBSTDCXX) -lm -ldl -lrt -lstdc++ + LFLAGS += -L$(DIST_LIB_DEST) -lpthread $(LIBSTDCXX) -lm -ldl -lrt -lstdc++ #LFLAGS += -L$(DIST_LIB_DEST) -lpthread $(LIBSTDCXX) -lm -ldl -lrt endif diff --git a/projects/rocr-runtime/samples/GetInfo/get_info.cpp b/projects/rocr-runtime/samples/GetInfo/get_info.cpp index c77171acc5..dfc29cda37 100644 --- a/projects/rocr-runtime/samples/GetInfo/get_info.cpp +++ b/projects/rocr-runtime/samples/GetInfo/get_info.cpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #include "get_info.h" #include diff --git a/projects/rocr-runtime/samples/GetInfo/get_info.h b/projects/rocr-runtime/samples/GetInfo/get_info.h index 8d3ca421e6..6b0104a9f4 100644 --- a/projects/rocr-runtime/samples/GetInfo/get_info.h +++ b/projects/rocr-runtime/samples/GetInfo/get_info.h @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #ifndef GET_INFO_H #define GET_INFO_H diff --git a/projects/rocr-runtime/samples/Makefile b/projects/rocr-runtime/samples/Makefile index 03a6afad33..8c5c5b92ac 100644 --- a/projects/rocr-runtime/samples/Makefile +++ b/projects/rocr-runtime/samples/Makefile @@ -7,7 +7,7 @@ SUBDIRS += DwtHarr1D SUBDIRS += BitonicSort SUBDIRS += BinarySearch SUBDIRS += BlackScholes -SUBDIRS += FloydWarshall +SUBDIRS += FloydWarshall SUBDIRS += FastWalshTransform SUBDIRS += MatrixTranspose SUBDIRS += MatrixMultiplication diff --git a/projects/rocr-runtime/samples/common/common.cpp b/projects/rocr-runtime/samples/common/common.cpp index c0980fb8a1..f747dfa3c8 100644 --- a/projects/rocr-runtime/samples/common/common.cpp +++ b/projects/rocr-runtime/samples/common/common.cpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #include "common.hpp" void ErrorCheck(hsa_status_t hsa_error_code) { diff --git a/projects/rocr-runtime/samples/common/common.hpp b/projects/rocr-runtime/samples/common/common.hpp index 24925fa599..8fab3b05f4 100644 --- a/projects/rocr-runtime/samples/common/common.hpp +++ b/projects/rocr-runtime/samples/common/common.hpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #ifndef COMMON_COMMON_HPP #define COMMON_COMMON_HPP diff --git a/projects/rocr-runtime/samples/common/common_utility.cpp b/projects/rocr-runtime/samples/common/common_utility.cpp index 1474657b1a..f4b3a54462 100644 --- a/projects/rocr-runtime/samples/common/common_utility.cpp +++ b/projects/rocr-runtime/samples/common/common_utility.cpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #include "common_utility.h" @@ -14,7 +14,7 @@ double CalcMedian(vector scores) if (size % 2 == 0) median = (scores[size / 2 - 1] + scores[size / 2]) / 2; - else + else median = scores[size / 2]; return median; @@ -49,13 +49,13 @@ int CalcConcurrentQueues(vector scores) { int num_of_concurrent_queues = 0; vectorexecpted_exec_time_array; - + for (int i=0; i +template void printArray( - const std::string header, - const T * data, + const std::string header, + const T * data, const int width, const int height) { @@ -45,9 +45,9 @@ void printArray( std::cout<<"\n"; } -template +template int fillRandom( - T * arrayPtr, + T * arrayPtr, const int width, const int height, const T rangeMin, @@ -64,22 +64,22 @@ int fillRandom( seed = (unsigned int)time(NULL); srand(seed); - double range = double(rangeMax - rangeMin) + 1.0; + double range = double(rangeMax - rangeMin) + 1.0; /* random initialisation of input */ for(int i = 0; i < height; i++) for(int j = 0; j < width; j++) { int index = i*width + j; - arrayPtr[index] = rangeMin + T(range*rand()/(RAND_MAX + 1.0)); + arrayPtr[index] = rangeMin + T(range*rand()/(RAND_MAX + 1.0)); } return HSA_SDK_SUCCESS; } -template +template int fillPos( - T * arrayPtr, + T * arrayPtr, const int width, const int height) { @@ -100,9 +100,9 @@ int fillPos( return HSA_SDK_SUCCESS; } -template +template int fillConstant( - T * arrayPtr, + T * arrayPtr, const int width, const int height, const T val) @@ -131,7 +131,7 @@ T roundToPowerOf2(T val) val--; for(int i = 0; i < bytes; i++) - val |= val >> (1<> (1< bool checkVal( - T input, - T reference, + T input, + T reference, std::string message, bool isAPIerror) { @@ -162,7 +162,7 @@ bool checkVal( } else { - error(message); + error(message); return false; } } @@ -178,13 +178,13 @@ std::string toString(T t, std::ios_base &(*r)(std::ios_base&)) bool -compare(const float *refData, const float *data, +compare(const float *refData, const float *data, const int length, const float epsilon) { float error = 0.0f; float ref = 0.0f; - for(int i = 1; i < length; ++i) + for(int i = 1; i < length; ++i) { float diff = refData[i] - data[i]; error += diff * diff; @@ -202,13 +202,13 @@ compare(const float *refData, const float *data, } bool -compare(const double *refData, const double *data, +compare(const double *refData, const double *data, const int length, const double epsilon) { double error = 0.0; double ref = 0.0; - for(int i = 1; i < length; ++i) + for(int i = 1; i < length; ++i) { double diff = refData[i] - data[i]; error += diff * diff; @@ -225,25 +225,25 @@ compare(const double *refData, const double *data, return error < epsilon; } -void +void error(const char* errorMsg) { std::cout<<"Error: "<(const std::string, +template +void printArray(const std::string, const short*, int, int); -template -void printArray(const std::string, +template +void printArray(const std::string, const unsigned char *, int, int); -template -void printArray(const std::string, +template +void printArray(const std::string, const unsigned int *, int, int); -template -void printArray(const std::string, +template +void printArray(const std::string, const int *, int, int); -template -void printArray(const std::string, +template +void printArray(const std::string, const long*, int, int); -template -void printArray(const std::string, +template +void printArray(const std::string, const float*, int, int); -template -void printArray(const std::string, +template +void printArray(const std::string, const double*, int, int); -template -int fillRandom(unsigned char* arrayPtr, - const int width, const int height, - unsigned char rangeMin, unsigned char rangeMax, unsigned int seed); -template -int fillRandom(unsigned int* arrayPtr, - const int width, const int height, - unsigned int rangeMin, unsigned int rangeMax, unsigned int seed); -template -int fillRandom(int* arrayPtr, - const int width, const int height, - int rangeMin, int rangeMax, unsigned int seed); -template -int fillRandom(long* arrayPtr, - const int width, const int height, - long rangeMin, long rangeMax, unsigned int seed); -template -int fillRandom(float* arrayPtr, - const int width, const int height, - float rangeMin, float rangeMax, unsigned int seed); -template -int fillRandom(double* arrayPtr, - const int width, const int height, - double rangeMin, double rangeMax, unsigned int seed); +template +int fillRandom(unsigned char* arrayPtr, + const int width, const int height, + unsigned char rangeMin, unsigned char rangeMax, unsigned int seed); +template +int fillRandom(unsigned int* arrayPtr, + const int width, const int height, + unsigned int rangeMin, unsigned int rangeMax, unsigned int seed); +template +int fillRandom(int* arrayPtr, + const int width, const int height, + int rangeMin, int rangeMax, unsigned int seed); +template +int fillRandom(long* arrayPtr, + const int width, const int height, + long rangeMin, long rangeMax, unsigned int seed); +template +int fillRandom(float* arrayPtr, + const int width, const int height, + float rangeMin, float rangeMax, unsigned int seed); +template +int fillRandom(double* arrayPtr, + const int width, const int height, + double rangeMin, double rangeMax, unsigned int seed); -template +template short roundToPowerOf2(short val); -template +template unsigned int roundToPowerOf2(unsigned int val); -template +template int roundToPowerOf2(int val); -template +template long roundToPowerOf2(long val); template @@ -318,38 +318,38 @@ int isPowerOf2(int val); template int isPowerOf2(long val); -template<> +template<> int fillPos(short * arrayPtr, const int width, const int height); -template<> +template<> int fillPos(unsigned int * arrayPtr, const int width, const int height); -template<> +template<> int fillPos(int * arrayPtr, const int width, const int height); -template<> +template<> int fillPos(long * arrayPtr, const int width, const int height); -template<> -int fillConstant(short * arrayPtr, - const int width, const int height, +template<> +int fillConstant(short * arrayPtr, + const int width, const int height, const short val); -template<> -int fillConstant(unsigned int * arrayPtr, - const int width, const int height, +template<> +int fillConstant(unsigned int * arrayPtr, + const int width, const int height, const unsigned int val); -template<> -int fillConstant(int * arrayPtr, - const int width, const int height, +template<> +int fillConstant(int * arrayPtr, + const int width, const int height, const int val); -template<> -int fillConstant(long * arrayPtr, - const int width, const int height, +template<> +int fillConstant(long * arrayPtr, + const int width, const int height, const long val); -template<> -int fillConstant(long * arrayPtr, - const int width, const int height, +template<> +int fillConstant(long * arrayPtr, + const int width, const int height, const long val); -template<> -int fillConstant(long * arrayPtr, - const int width, const int height, +template<> +int fillConstant(long * arrayPtr, + const int width, const int height, const long val); diff --git a/projects/rocr-runtime/samples/common/helper_funcs.hpp b/projects/rocr-runtime/samples/common/helper_funcs.hpp index ae00f3e27c..a3eb3ab9e8 100755 --- a/projects/rocr-runtime/samples/common/helper_funcs.hpp +++ b/projects/rocr-runtime/samples/common/helper_funcs.hpp @@ -35,28 +35,28 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND /** * error - * constant function, Prints error messages + * constant function, Prints error messages * @param errorMsg char* message */ -void error(const char* errorMsg); +void error(const char* errorMsg); /** * error - * constant function, Prints error messages + * constant function, Prints error messages * @param errorMsg std::string message */ void error(std::string errorMsg); /** * expectedError - * constant function, Prints error messages + * constant function, Prints error messages * @param errorMsg char* message */ -void expectedError(const char* errorMsg); +void expectedError(const char* errorMsg); /** * expectedError - * constant function, Prints error messages + * constant function, Prints error messages * @param errorMsg string message */ void expectedError(std::string errorMsg); @@ -69,19 +69,19 @@ void expectedError(std::string errorMsg); * @param length number of values to compare * @param epsilon errorWindow */ -bool compare(const float *refData, const float *data, - const int length, const float epsilon = 1e-6f); -bool compare(const double *refData, const double *data, - const int length, const double epsilon = 1e-6); +bool compare(const float *refData, const float *data, + const int length, const float epsilon = 1e-6f); +bool compare(const double *refData, const double *data, + const int length, const double epsilon = 1e-6); /** * printArray * displays a array on std::out */ -template +template void printArray( const std::string header, - const T * data, + const T * data, const int width, const int height); @@ -90,37 +90,37 @@ void printArray( * fillRandom * fill array with random values */ -template +template int fillRandom( - T * arrayPtr, + T * arrayPtr, const int width, const int height, const T rangeMin, const T rangeMax, - unsigned int seed=123); - + unsigned int seed=123); + /** * fillPos * fill the specified positions */ -template +template int fillPos( - T * arrayPtr, + T * arrayPtr, const int width, const int height); - + /** * fillConstant * fill the array with constant value */ -template +template int fillConstant( - T * arrayPtr, + T * arrayPtr, const int width, const int height, const T val); - + /** * roundToPowerOf2 * rounds to a power of 2 @@ -134,16 +134,16 @@ T roundToPowerOf2(T val); */ template int isPowerOf2(T val); - + /** * checkVal - * Set default(isAPIerror) parameter to false - * if checkVaul is used to check otherthan OpenCL API error code + * Set default(isAPIerror) parameter to false + * if checkVaul is used to check otherthan OpenCL API error code */ -template +template bool checkVal( - T input, - T reference, + T input, + T reference, std::string message, bool isAPIerror = true); /** @@ -151,7 +151,7 @@ bool checkVal( * convert a T type to string */ template -std::string toString(T t, std::ios_base & (*r)(std::ios_base&)); +std::string toString(T t, std::ios_base & (*r)(std::ios_base&)); diff --git a/projects/rocr-runtime/samples/common/hsa_base_util.cpp b/projects/rocr-runtime/samples/common/hsa_base_util.cpp index bd2d9ee55f..a33e9124ef 100644 --- a/projects/rocr-runtime/samples/common/hsa_base_util.cpp +++ b/projects/rocr-runtime/samples/common/hsa_base_util.cpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #include "hsa_base_util.h" #include "HSAILAmdExt.h" @@ -38,8 +38,8 @@ bool HSA_UTIL::HsaInit() err = hsa_init(); check(Initializing the hsa runtime, err); - /* - * Iterate over the agents and pick the gpu agent using + /* + * Iterate over the agents and pick the gpu agent using * the find_gpu callback. */ err = hsa_iterate_agents(find_gpu, &device); @@ -57,7 +57,7 @@ bool HSA_UTIL::HsaInit() err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); check(Querying the device maximum queue size, err); - /* + /* * Create a queue using the maximum size. */ err = hsa_queue_create(device, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, 0, 0, &command_queue); @@ -66,15 +66,15 @@ bool HSA_UTIL::HsaInit() profile = hsa_profile_t(108); hsa_agent_get_info(device, HSA_AGENT_INFO_PROFILE, &profile); - if (profile == HSA_PROFILE_BASE) + if (profile == HSA_PROFILE_BASE) { memset(hail_file_name_full, 0, sizeof(char)*128); cout << "Loading base profile!!!" << endl; - strcpy(hail_file_name_full, hail_file_name_base); //overwrite full hsail file name with base - } - + strcpy(hail_file_name_full, hail_file_name_base); //overwrite full hsail file name with base + } + amd::hsail::registerExtensions(); - if (!tool.assembleFromFile(hail_file_name_full)) + if (!tool.assembleFromFile(hail_file_name_full)) { std::cout << tool.output(); return false; @@ -124,7 +124,7 @@ bool HSA_UTIL::HsaInit() check("get symbol handle", err); // Get code handle. - + err = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &codeHandle); check("Get code handle", err); @@ -199,7 +199,7 @@ double HSA_UTIL::Run(int dim, int group_x, int group_y, int group_z, int s_size, hsa_agent_iterate_regions(device, get_kernarg, &kernarg_region); err = (kernarg_region.handle== 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS; check(Finding a kernarg memory region, err); - + */ void* local_kernel_arg_buffer = NULL; @@ -211,24 +211,24 @@ double HSA_UTIL::Run(int dim, int group_x, int group_y, int group_z, int s_size, memcpy(local_kernel_arg_buffer, kernel_args, kernel_args_size); local_dispatch_packet.kernarg_address = local_kernel_arg_buffer; - /* + /* * Obtain the current queue write index. */ uint64_t index = hsa_queue_load_write_index_relaxed(command_queue); - /* + /* * Write the aql packet at the calculated queue index address. */ const uint32_t queueMask = command_queue->size - 1; ((hsa_kernel_dispatch_packet_t*)(command_queue->base_address))[index&queueMask]=local_dispatch_packet; - /* + /* * Increment the write index and ring the doorbell to dispatch the kernel. */ hsa_queue_store_write_index_relaxed(command_queue, index+1); hsa_signal_store_release(command_queue->doorbell_signal, index); - /* + /* * Wait on the dispatch signal until all kernel are finished. */ while (hsa_signal_wait_acquire(local_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0); @@ -262,7 +262,7 @@ double HSA_UTIL::GetSetupTime() void HSA_UTIL::Close() { - err = hsa_executable_destroy(hsaExecutable); + err = hsa_executable_destroy(hsaExecutable); check(Destroying the hsaExecutable, err) err = hsa_code_object_destroy(code_object); @@ -275,12 +275,12 @@ void HSA_UTIL::Close() check(Shutting down the runtime, err); } -void* HSA_UTIL::AllocateLocalMemory(size_t size) +void* HSA_UTIL::AllocateLocalMemory(size_t size) { void *buffer = NULL; // Allocate in local memory only if it is available - if (mem_region.coarse_region.handle != 0) + if (mem_region.coarse_region.handle != 0) { cout << "Allocating in local memory" << endl; err = hsa_memory_allocate(mem_region.coarse_region, size, (void **)&buffer); @@ -304,18 +304,18 @@ void* HSA_UTIL::AllocateSysMemory( size_t size) return (err == HSA_STATUS_SUCCESS) ? buffer : NULL; } -bool HSA_UTIL::TransferData(void *dest, void *src, uint length, bool host_to_dev) +bool HSA_UTIL::TransferData(void *dest, void *src, uint length, bool host_to_dev) { hsa_status_t status; void *buffer = (host_to_dev) ? dest : src; err = hsa_memory_assign_agent(buffer, device, HSA_ACCESS_PERMISSION_RW); - if (err != HSA_STATUS_SUCCESS) + if (err != HSA_STATUS_SUCCESS) { return false; } - err = hsa_memory_copy(dest, src, length); // first is dest, second is src + err = hsa_memory_copy(dest, src, length); // first is dest, second is src return (err == HSA_STATUS_SUCCESS); } diff --git a/projects/rocr-runtime/samples/common/hsa_base_util.h b/projects/rocr-runtime/samples/common/hsa_base_util.h index dbf40a06e4..b0e7ac7943 100644 --- a/projects/rocr-runtime/samples/common/hsa_base_util.h +++ b/projects/rocr-runtime/samples/common/hsa_base_util.h @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #ifndef __HSA_BASE__ #define __HSA_BASE__ @@ -31,7 +31,7 @@ class HSA_UTIL{ void* AllocateLocalMemory(size_t size) ; void* AllocateSysMemory(size_t size); bool TransferData(void *dest, void *src, uint length, bool host_to_dev) ; - + double Run(int dim, int group_x, int group_y, int group_z, int s_size, int grid_x, int grid_y, int grid_z, void* kernel_args, int kernel_args_size); public: @@ -58,7 +58,7 @@ class HSA_UTIL{ hsa_code_object_t code_object; uint64_t codeHandle; hsa_signal_t hsa_signal; - hsa_kernel_dispatch_packet_t dispatch_packet; + hsa_kernel_dispatch_packet_t dispatch_packet; hsa_region_t hsa_kernarg_region; PerfTimer base_timer; diff --git a/projects/rocr-runtime/samples/common/hsa_perf_cntrs.cpp b/projects/rocr-runtime/samples/common/hsa_perf_cntrs.cpp index be1c8f7407..313f79f600 100644 --- a/projects/rocr-runtime/samples/common/hsa_perf_cntrs.cpp +++ b/projects/rocr-runtime/samples/common/hsa_perf_cntrs.cpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #include #include #include @@ -89,32 +89,32 @@ hsa_status_t RocrPerfCntrApp::Init(hsa_agent_t agent) { // Add SQ counter for number of waves CntrInfo* info = NULL; cntrList_.reserve(23); - + char *cntrChoice = getenv("IOMMU"); if (cntrChoice == NULL) { // Event for number of Waves info = new CntrInfo(0x4, "SQ_SQ_PERF_SEL_WAVES", NULL, 0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact); cntrList_.push_back(info); - + // Event for number of Threads info = new CntrInfo(0xE, "SQ_SQ_PERF_SEL_ITEMS", NULL, 0x0E, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_Exact); cntrList_.push_back(info); - + } else { // Program to collect event number 4 info = new CntrInfo(0x4, "Iommu_Cntr_4", NULL, 0x63, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_None); cntrList_.push_back(info); - + // Program to collect event number 6 info = new CntrInfo(0x6, "Iommu_Cntr_6", NULL, 0x63, NULL, 0x00, 0xFFFFFFFF, CntrValCnf_None); cntrList_.push_back(info); } - + // Create an instance of Perf Mgr hsa_status_t status; @@ -127,7 +127,7 @@ hsa_status_t RocrPerfCntrApp::Init(hsa_agent_t agent) { uint32_t size = GetNumPerfCntrs(); for (uint32_t idx = 0; idx < size; idx++) { info = GetPerfCntr(idx); - + // Obtain the handle of perf block if (info->blkHndl == NULL) { status = hsa_ext_tools_get_counter_block_by_id(perfMgr_, info->blkId, &info->blkHndl); @@ -154,7 +154,7 @@ hsa_status_t RocrPerfCntrApp::Init(hsa_agent_t agent) { // Register Pre and Post dispatch callbacks void RocrPerfCntrApp::RegisterCallbacks(hsa_queue_t *queue){ - + hsa_status_t status; status = hsa_ext_tools_set_callback_functions(queue, PreDispatchCallback, PostDispatchCallback); assert((status == HSA_STATUS_SUCCESS) && "Error in registering Pre & Post Dispatch Callbacks"); diff --git a/projects/rocr-runtime/samples/common/hsa_perf_cntrs.hpp b/projects/rocr-runtime/samples/common/hsa_perf_cntrs.hpp index b01ac6a59e..c693e1a295 100644 --- a/projects/rocr-runtime/samples/common/hsa_perf_cntrs.hpp +++ b/projects/rocr-runtime/samples/common/hsa_perf_cntrs.hpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #ifndef ROCR_PERF_CNTR_APP_H_ #define ROCR_PERF_CNTR_APP_H_ @@ -20,7 +20,7 @@ #include "tools/inc/hsa_ext_profiler.h" typedef enum CntrValCnfType { - + ///< no counter value validation should be performed CntrValCnf_None, @@ -29,7 +29,7 @@ typedef enum CntrValCnfType { ///< counter value should be greater than expectedResult CntrValCnf_GreaterThan, - + ///< counter value should be less than expectedResult CntrValCnf_LessThan @@ -37,28 +37,28 @@ typedef enum CntrValCnfType { /// Struct used to encapsulate Counter Info typedef struct CntrInfo { - + ///< Id of counter in hardware block uint32_t cntrId; - + ///< Name of counter char cntrName[72]; - + ///< Handle of perf counter hsa_ext_tools_counter_t cntrHndl; - + ///< Id of hardware block containing the counter uint32_t blkId; - + ///< Handle of counter block hsa_ext_tools_counter_block_t blkHndl; - + ///< Expected value of perf counte uint64_t expectedResult; ///< Value of perf counter expected uint64_t cntrResult; - + ///< Type of validation upon completion of dispatch CntrValCnfType cnfType; @@ -74,7 +74,7 @@ typedef struct CntrInfo { this->cnfType = cnfType; memcpy(this->cntrName, cntrName, strlen(cntrName)); } - + } CntrInfo; class RocrPerfCntrApp { @@ -108,9 +108,9 @@ class RocrPerfCntrApp { // Validate perf counter values hsa_status_t Validate(); - + private: - + // Number of queues to create std::vector cntrList_; diff --git a/projects/rocr-runtime/samples/common/hsa_rsrc_factory.cpp b/projects/rocr-runtime/samples/common/hsa_rsrc_factory.cpp index 57eb457ee9..ea365844ee 100755 --- a/projects/rocr-runtime/samples/common/hsa_rsrc_factory.cpp +++ b/projects/rocr-runtime/samples/common/hsa_rsrc_factory.cpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #include #include #include @@ -73,7 +73,7 @@ static hsa_status_t get_hsa_agents(hsa_agent_t agent, void *data) { rsrcFactory->AddAgentInfo(agent_info, false); return HSA_STATUS_SUCCESS; } - + // Device is a Gpu agent, build an instance of AgentInfo AgentInfo *agent_info = reinterpret_cast(malloc(sizeof(AgentInfo))); agent_info->dev_id = agent; @@ -89,7 +89,7 @@ static hsa_status_t get_hsa_agents(hsa_agent_t agent, void *data) { // Initialize memory regions to zero agent_info->kernarg_region.handle = 0; agent_info->coarse_region.handle = 0; - + // Find and Bind Memory regions of the Gpu agent hsa_agent_iterate_regions(agent, find_memregions, agent_info); @@ -343,7 +343,7 @@ bool HsaRsrcFactory::LoadAndFinalize(AgentInfo *agent_info, return false; } brig_module_v3 = tool.brigModule(); - + // Create hsail program. hsa_ext_program_t hsailProgram; status = hsa_ext_program_create(HSA_MACHINE_MODEL_LARGE, @@ -396,7 +396,7 @@ bool HsaRsrcFactory::LoadAndFinalize(AgentInfo *agent_info, hsa_executable_symbol_t kernelSymbol; status = hsa_executable_get_symbol(hsaExecutable, NULL, kernel_name, agent_info->dev_id, 0, &kernelSymbol); - + // Update output parameter *code_desc = kernelSymbol; return true; @@ -440,7 +440,7 @@ bool HsaRsrcFactory::LoadAndFinalize(AgentInfo *agent_info, // Add an instance of AgentInfo representing a Hsa Gpu agent void HsaRsrcFactory::AddAgentInfo(AgentInfo *agent_info, bool gpu) { - + // Add input to Gpu list if (gpu) { gpu_list_.push_back(agent_info); @@ -511,7 +511,7 @@ uint32_t HsaRsrcFactory::GetPrintDebugInfo() { // Process command line arguments. The method will capture // various user command line parameters for tests to use void HsaRsrcFactory::ProcessCmdline( ) { - + // Command line arguments are given uint32_t idx; uint32_t arg_idx; @@ -572,7 +572,7 @@ uint32_t HsaRsrcFactory::GetArgIndex(char *arg_value ) { if (!strcmp(HsaRsrcFactory::print_debug_key_, arg_value)) { return 5; } - + return 108; } diff --git a/projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp b/projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp index a57981ed72..6b3bcc4ce2 100755 --- a/projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp +++ b/projects/rocr-runtime/samples/common/hsa_rsrc_factory.hpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #ifndef HSA_RSRC_FACTORY_H_ #define HSA_RSRC_FACTORY_H_ @@ -99,7 +99,7 @@ typedef struct { // Handle of Agent hsa_agent_t dev_id; - + // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2 uint32_t dev_type; @@ -249,33 +249,33 @@ class HsaRsrcFactory { // Returns the number of work-items that can execute per wave static uint32_t GetNumOfWorkItemsPerWave( ); - + // Returns the number of times kernel loop body should execute. static uint32_t GetKernelLoopCount(); - + // Returns boolean flag to indicate if debug info should be printed static uint32_t GetPrintDebugInfo(); private: - + // Number of queues to create uint32_t num_queues_; // Used to maintain a list of Hsa Queue handles std::vector queue_list_; - + // Number of Signals to create uint32_t num_signals_; - + // Used to maintain a list of Hsa Signal handles std::vector signal_list_; - + // Number of agents reported by platform uint32_t num_agents_; - + // Used to maintain a list of Hsa Gpu Agent Info std::vector gpu_list_; - + // Used to maintain a list of Hsa Cpu Agent Info std::vector cpu_list_; @@ -311,11 +311,11 @@ class HsaRsrcFactory { // Print the various fields of Hsa Gpu Agents bool PrintGpuAgents( ); - + // Process command line arguments. The method will capture // various user command line parameters for tests to use static void ProcessCmdline( ); - + // Prints the help banner on user arg keys static void PrintHelpMsg( ); diff --git a/projects/rocr-runtime/samples/common/hsa_test.cpp b/projects/rocr-runtime/samples/common/hsa_test.cpp index 1250a12df7..1da3495353 100644 --- a/projects/rocr-runtime/samples/common/hsa_test.cpp +++ b/projects/rocr-runtime/samples/common/hsa_test.cpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #include "hsa_test.h" #include diff --git a/projects/rocr-runtime/samples/common/hsa_test.h b/projects/rocr-runtime/samples/common/hsa_test.h index b251e1839e..09b331788a 100644 --- a/projects/rocr-runtime/samples/common/hsa_test.h +++ b/projects/rocr-runtime/samples/common/hsa_test.h @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #ifndef HSA_TEST_H #define HSA_TEST_H diff --git a/projects/rocr-runtime/samples/common/hsatimer.cpp b/projects/rocr-runtime/samples/common/hsatimer.cpp index dbacdba2f7..46c9df72b9 100644 --- a/projects/rocr-runtime/samples/common/hsatimer.cpp +++ b/projects/rocr-runtime/samples/common/hsatimer.cpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #include "hsatimer.h" PerfTimer::PerfTimer() @@ -29,7 +29,7 @@ int PerfTimer::CreateTimer() newTimer->_clocks = 0; #ifdef _WIN32 - QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->_freq); + QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->_freq); #else newTimer->_freq = (long long)1.0E3; #endif @@ -46,7 +46,7 @@ int PerfTimer::StartTimer(int index) Error("Cannot reset timer. Invalid handle."); return HSA_FAILURE; } - + #ifdef _WIN32 // General Windows timing method #ifndef _AMD @@ -54,16 +54,16 @@ int PerfTimer::StartTimer(int index) QueryPerformanceCounter((LARGE_INTEGER*)&(tmpStart)); _timers[index]->_start = (double)tmpStart; #else - // AMD Windows timing method + // AMD Windows timing method #endif - + #else // General Linux timing method #ifndef _AMD struct timeval s; gettimeofday(&s, 0); - _timers[index]->_start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3)); + _timers[index]->_start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3)); #else // AMD timing method @@ -72,7 +72,7 @@ int PerfTimer::StartTimer(int index) _timers[index]->_start = __rdtscp(&unused); #endif - + #endif return HSA_SUCCESS; @@ -93,11 +93,11 @@ int PerfTimer::StopTimer(int index) QueryPerformanceCounter((LARGE_INTEGER*)&(n1)); n = (double) n1; #else - + // AMD Window Timing - + #endif - + #else // General Linux timing method #ifndef _AMD @@ -110,7 +110,7 @@ int PerfTimer::StopTimer(int index) unsigned int unused; n = __rdtscp(&unused); #endif - + #endif n -= _timers[index]->_start; @@ -123,7 +123,7 @@ int PerfTimer::StopTimer(int index) _timers[index]->_clocks += 1.0E-6 * 10 * n /freq_in_100mhz; // convert to ms cout << "_AMD is enabled!!!" << endl; #endif - + return HSA_SUCCESS; } @@ -141,16 +141,16 @@ double PerfTimer::ReadTimer(int index) Error("Cannot read timer. Invalid handle."); return HSA_FAILURE; } - + double reading = double(_timers[index]->_clocks); - + reading = double(reading / _timers[index]->_freq); - + return reading; } -uint64_t PerfTimer::CoarseTimestampUs() +uint64_t PerfTimer::CoarseTimestampUs() { #ifdef _WIN32 uint64_t freqHz, ticks; @@ -166,12 +166,12 @@ uint64_t PerfTimer::CoarseTimestampUs() return (ticks * 1000000) / freqHz; #else struct timespec ts; - clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000; #endif } -uint64_t PerfTimer::MeasureTSCFreqHz() +uint64_t PerfTimer::MeasureTSCFreqHz() { // Make a coarse interval measurement of TSC ticks for 1 gigacycles. unsigned int unused; @@ -179,12 +179,12 @@ uint64_t PerfTimer::MeasureTSCFreqHz() uint64_t coarseBeginUs = CoarseTimestampUs(); uint64_t tscTicksBegin = __rdtscp(&unused); - do + do { tscTicksEnd = __rdtscp(&unused); - } + } while (tscTicksEnd - tscTicksBegin < 1000000000); - + uint64_t coarseEndUs = CoarseTimestampUs(); // Compute the TSC frequency and round to nearest 100MHz. diff --git a/projects/rocr-runtime/samples/common/hsatimer.h b/projects/rocr-runtime/samples/common/hsatimer.h index fb6ae8b3e6..eb8f0d4e9c 100644 --- a/projects/rocr-runtime/samples/common/hsatimer.h +++ b/projects/rocr-runtime/samples/common/hsatimer.h @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #ifndef __MYTIME__ #define __MYTIME__ diff --git a/projects/rocr-runtime/samples/common/os.cpp b/projects/rocr-runtime/samples/common/os.cpp index 81e0809af8..f6c20c573b 100755 --- a/projects/rocr-runtime/samples/common/os.cpp +++ b/projects/rocr-runtime/samples/common/os.cpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #ifdef _WIN32 // Compiling for Windows Platform #include diff --git a/projects/rocr-runtime/samples/common/os.h b/projects/rocr-runtime/samples/common/os.h index 27e1e5e6f7..052c6c9a49 100755 --- a/projects/rocr-runtime/samples/common/os.h +++ b/projects/rocr-runtime/samples/common/os.h @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #ifndef HSA_PERF_SRC_UTILS_OS_H_ #define HSA_PERF_SRC_UTILS_OS_H_ diff --git a/projects/rocr-runtime/samples/common/utilities.cpp b/projects/rocr-runtime/samples/common/utilities.cpp index efa39671fd..f120b49d24 100644 --- a/projects/rocr-runtime/samples/common/utilities.cpp +++ b/projects/rocr-runtime/samples/common/utilities.cpp @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #include "utilities.h" /* @@ -11,12 +11,12 @@ * Prints full array if length is less than 256. * Prints Array name followed by elements. */ -template +template void PrintArray( - string header, - const T * data, + string header, + const T * data, const int width, - const int height) + const int height) { cout<<"\n"<> (1<> (1< +template int FillRandom( - T * arrayPtr, + T * arrayPtr, const int width, const int height, const T rangeMin, @@ -73,14 +73,14 @@ int FillRandom( seed = (unsigned int)time(NULL); srand(seed); - double range = double(rangeMax - rangeMin) + 1.0; + double range = double(rangeMax - rangeMin) + 1.0; /* random initialisation of input */ for(int i = 0; i < height; i++) for(int j = 0; j < width; j++) { int index = i*width + j; - arrayPtr[index] = rangeMin + T(range*rand()/(RAND_MAX + 1.0)); + arrayPtr[index] = rangeMin + T(range*rand()/(RAND_MAX + 1.0)); } return 0; @@ -88,11 +88,11 @@ int FillRandom( #if 0 //get a memory region that can be used for global memory allocations. -hsa_status_t get_global_region(hsa_region_t region, void* data) +hsa_status_t get_global_region(hsa_region_t region, void* data) { hsa_region_segment_t segment; hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment); - if (HSA_REGION_SEGMENT_GLOBAL == segment) + if (HSA_REGION_SEGMENT_GLOBAL == segment) { hsa_region_t* ret = (hsa_region_t*) data; *ret = region; @@ -103,41 +103,41 @@ hsa_status_t get_global_region(hsa_region_t region, void* data) /* * Finds the specified symbols offset in the specified brig_module. - * If the symbol is found the function returns HSA_STATUS_SUCCESS, + * If the symbol is found the function returns HSA_STATUS_SUCCESS, * otherwise it returns HSA_STATUS_ERROR. */ -hsa_status_t find_symbol_offset(hsa_ext_brig_module_t* brig_module, +hsa_status_t find_symbol_offset(hsa_ext_brig_module_t* brig_module, char* symbol_name, - hsa_ext_brig_code_section_offset32_t* offset) + hsa_ext_brig_code_section_offset32_t* offset) { - /* - * Get the data section + /* + * Get the data section */ - hsa_ext_brig_section_header_t* data_section_header = + hsa_ext_brig_section_header_t* data_section_header = brig_module->section[HSA_EXT_BRIG_SECTION_DATA]; - /* + /* * Get the code section */ hsa_ext_brig_section_header_t* code_section_header = brig_module->section[HSA_EXT_BRIG_SECTION_CODE]; - /* + /* * First entry into the BRIG code section */ BrigCodeOffset32_t code_offset = code_section_header->header_byte_count; BrigBase* code_entry = (BrigBase*) ((char*)code_section_header + code_offset); - while (code_offset != code_section_header->byte_count) + while (code_offset != code_section_header->byte_count) { - if (code_entry->kind == BRIG_KIND_DIRECTIVE_KERNEL) + if (code_entry->kind == BRIG_KIND_DIRECTIVE_KERNEL) { - /* + /* * Now find the data in the data section */ BrigDirectiveExecutable* directive_kernel = (BrigDirectiveExecutable*) (code_entry); BrigDataOffsetString32_t data_name_offset = directive_kernel->name; BrigData* data_entry = (BrigData*)((char*) data_section_header + data_name_offset); - if (!strncmp(symbol_name, (char*) data_entry->bytes, strlen(symbol_name))) + if (!strncmp(symbol_name, (char*) data_entry->bytes, strlen(symbol_name))) { *offset = code_offset; return HSA_STATUS_SUCCESS; @@ -145,7 +145,7 @@ hsa_status_t find_symbol_offset(hsa_ext_brig_module_t* brig_module, } code_offset += code_entry->byteCount; code_entry = (BrigBase*) ((char*)code_section_header + code_offset); - } + } return HSA_STATUS_ERROR; } #endif @@ -154,22 +154,22 @@ hsa_status_t find_symbol_offset(hsa_ext_brig_module_t* brig_module, * Determines if the given agent is of type HSA_DEVICE_TYPE_GPU * and sets the value of data to the agent handle if it is. */ -hsa_status_t find_gpu(hsa_agent_t agent, void *data) +hsa_status_t find_gpu(hsa_agent_t agent, void *data) { - if (data == NULL) + if (data == NULL) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; - } + } hsa_device_type_t device_type; hsa_status_t stat = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); - if (stat != HSA_STATUS_SUCCESS) + if (stat != HSA_STATUS_SUCCESS) { return stat; - } - if (device_type == HSA_DEVICE_TYPE_GPU) + } + if (device_type == HSA_DEVICE_TYPE_GPU) { *((hsa_agent_t *)data) = agent; - } + } return HSA_STATUS_SUCCESS; } @@ -178,22 +178,22 @@ hsa_status_t find_gpu(hsa_agent_t agent, void *data) * Determines if a memory region can be used for kernarg * allocations. */ -hsa_status_t get_memory_region(hsa_region_t region, void* data) +hsa_status_t get_memory_region(hsa_region_t region, void* data) { hsa_region_global_flag_t flags; hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); MemRegion *my_mem_region = (MemRegion *)data; - + if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) { my_mem_region->coarse_region = region; } - - if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) + + if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) { my_mem_region->kernarg_region= region; - } - + } + return HSA_STATUS_SUCCESS; } diff --git a/projects/rocr-runtime/samples/common/utilities.h b/projects/rocr-runtime/samples/common/utilities.h index 16f1bdf642..c3ffea6ff1 100644 --- a/projects/rocr-runtime/samples/common/utilities.h +++ b/projects/rocr-runtime/samples/common/utilities.h @@ -1,9 +1,9 @@ /* - * Copyright © Advanced Micro Devices, Inc., or its affiliates. - * + * Copyright © Advanced Micro Devices, Inc., or its affiliates. + * * SPDX-License-Identifier: MIT */ - + #ifndef __HSA_UTILITY__ #define __HSA_UTILITY__ @@ -152,7 +152,7 @@ struct float2 temp.s1 = (this->s1) + fl.s1; return temp; } - + float2 operator - (float2 fl) { float2 temp; @@ -192,7 +192,7 @@ struct uint2 temp.s1 = (this->s1) + fl.s1; return temp; } - + uint2 operator - (uint2 fl) { uint2 temp; @@ -217,14 +217,14 @@ template T RoundToPowerOf2(T val); template int FillRandom(T * arrayPtr, const int width, const int height, const T rangeMin, const T rangeMax, unsigned int seed=123); //get a memory region that can be used for global memory allocations. -hsa_status_t get_global_region(hsa_region_t region, void* data); +hsa_status_t get_global_region(hsa_region_t region, void* data); /* * Finds the specified symbols offset in the specified brig_module. - * If the symbol is found the function returns HSA_STATUS_SUCCESS, + * If the symbol is found the function returns HSA_STATUS_SUCCESS, * otherwise it returns HSA_STATUS_ERROR. */ - + //hsa_status_t find_symbol_offset(hsa_ext_brig_module_t* brig_module, char* symbol_name, hsa_ext_brig_code_section_offset32_t* offset); /*