SWDEV-307394: Fixing Download HSA-Class Issue

Exchanging the git clone of the hsa-class to a local downloaded version pushed to the roctracer repo

Change-Id: Id45a38b2d355102c2e0dee1e4bfde50398369047
This commit is contained in:
Ammar ELWazir
2022-03-29 22:27:25 +00:00
rodzic bfea525ff3
commit 7ee4f87b73
28 zmienionych plików z 3886 dodań i 4 usunięć
-1
Wyświetl plik
@@ -5,6 +5,5 @@
*.swp
*.Po
build
test/hsa
test/MatrixTranspose/MatrixTranspose
test/MatrixTranspose_test/MatrixTranspose
-3
Wyświetl plik
@@ -44,7 +44,6 @@ endif ()
## Path to HSA test
set ( HSA_TEST_DIR "${TEST_DIR}/hsa/test" )
set ( HSA_REV "f8b3870" )
## test run script
set ( RUN_SCRIPT "${TEST_DIR}/run.sh" )
@@ -70,8 +69,6 @@ add_custom_target( mytest
)
## Build HSA test
execute_process ( COMMAND sh -xc "if [ ! -e ${TEST_DIR}/hsa ] ; then git clone https://github.com/ROCmSoftwarePlatform/hsa-class.git ${TEST_DIR}/hsa; fi" )
execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout ${HSA_REV}; fi" )
set ( TMP ${TEST_DIR} )
set ( TEST_DIR ${HSA_TEST_DIR} )
add_subdirectory ( ${HSA_TEST_DIR} ${PROJECT_BINARY_DIR}/test/hsa )
+20
Wyświetl plik
@@ -0,0 +1,20 @@
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
[MITx11 license]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
+4
Wyświetl plik
@@ -0,0 +1,4 @@
# HSA-class
```
HSA high level C++ API
```
+80
Wyświetl plik
@@ -0,0 +1,80 @@
#!/bin/sh -x
SO_EXT="hsaco"
TEST_NAME=$1
DST_DIR=$2
ROCM_DIR=$3
TGT_LIST=$4
if [ -z "$TEST_NAME" ] ; then
echo "Usage: $0 <test name> <dst dir>"
echo " Will look for <test name>.cl and will build <test name>.$SO_EXT dynamic code object library"
exit 1
fi
OBJ_NAME=$(echo "_$(basename $TEST_NAME)" | sed -e 's/_./\U&\E/g' -e 's/_//g')
if [ -z "$DST_DIR" ] ; then
DST_DIR=$(dirname TEST_NAME)
fi
if [ -z "$ROCM_DIR" ] ; then
ROCM_DIR=/opt/rocm
fi
if [ -z "$TGT_LIST" ] ; then
TGT_LIST=`$ROCM_DIR/bin/rocminfo | grep "amdgcn-amd-amdhsa--" | head -n 1 | sed -n "s/^.*amdgcn-amd-amdhsa--\(\w*\).*$/\1/p"`
fi
if [ -z "$TGT_LIST" ] ; then
echo "Error: GPU targets not found"
exit 1
fi
OCL_VER="2.0"
if [ -e $ROCM_DIR/llvm ] ; then
LLVM_DIR=$ROCM_DIR/llvm
LIB_DIR=$ROCM_DIR/lib
else
LLVM_DIR=$ROCM_DIR/hcc
LIB_DIR=$LLVM_DIR/lib
fi
# Determine whether using new or old device-libs layout
if [ -e $LIB_DIR/bitcode/opencl.amdgcn.bc ]; then
BC_DIR=$LIB_DIR/bitcode
elif [ -e $LIB_DIR/opencl.amdgcn.bc ]; then
BC_DIR=$LIB_DIR
elif [ -e $ROCM_DIR/amdgcn/bitcode/opencl.bc ]; then
BC_DIR=$ROCM_DIR/amdgcn/bitcode
else
echo "Error: Cannot find amdgcn bitcode directory"
exit 1
fi
CLANG_ROOT=$LLVM_DIR/lib/clang
CLANG_DIR=`ls -d $CLANG_ROOT/* | head -n 1`
if [ "$CLANG_DIR" = "" ] ; then
echo "Error: LLVM clang library was not found"
exit 1
fi
BIN_DIR=$LLVM_DIR/bin
INC_DIR=$CLANG_DIR/include
if [ -e $BC_DIR/opencl.amdgcn.bc ]; then
BITCODE_OPTS="-nogpulib \
-Xclang -mlink-bitcode-file -Xclang $BC_DIR/opencl.amdgcn.bc \
-Xclang -mlink-bitcode-file -Xclang $BC_DIR/ockl.amdgcn.bc \
-Xclang -mlink-bitcode-file -Xclang $BC_DIR/ocml.amdgcn.bc"
else
BITCODE_OPTS="--hip-device-lib-path=$BC_DIR"
fi
for GFXIP in $TGT_LIST ; do
OBJ_PREF=$GFXIP
OBJ_FILE="${OBJ_PREF}_${OBJ_NAME}.$SO_EXT"
$BIN_DIR/clang -cl-std=CL$OCL_VER -include $INC_DIR/opencl-c.h $BITCODE_OPTS -target amdgcn-amd-amdhsa -mcpu=$GFXIP $TEST_NAME.cl -o $DST_DIR/$OBJ_FILE
echo "'$OBJ_FILE' generated"
done
exit 0
+761
Wyświetl plik
@@ -0,0 +1,761 @@
/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:
<95> Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#include "util/hsa_rsrc_factory.h"
#include <dlfcn.h>
#include <fcntl.h>
#include <hsa.h>
#include <hsa_ext_amd.h>
#include <hsa_ext_finalize.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <atomic>
#include <cassert>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
// Callback function to get available in the system agents
hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) {
hsa_status_t status = HSA_STATUS_ERROR;
HsaRsrcFactory* hsa_rsrc = reinterpret_cast<HsaRsrcFactory*>(data);
const AgentInfo* agent_info = hsa_rsrc->AddAgentInfo(agent);
if (agent_info != NULL) status = HSA_STATUS_SUCCESS;
return status;
}
// This function checks to see if the provided
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
// the function adds an additional requirement that the pool have the
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
// pools must NOT have this property.
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
// no pool was found meeting the requirements. If an error is encountered, we
// return that error.
static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
hsa_status_t err;
hsa_amd_segment_t segment;
uint32_t flag;
if (nullptr == data) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
return HSA_STATUS_SUCCESS;
}
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
if ((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) {
return HSA_STATUS_SUCCESS;
}
*(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
return HSA_STATUS_INFO_BREAK;
}
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
return FindGlobalPool(pool, data, false);
}
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
return FindGlobalPool(pool, data, true);
}
// Constructor of the class
HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize_hsa) {
hsa_status_t status;
cpu_pool_ = NULL;
kern_arg_pool_ = NULL;
InitHsaApiTable(NULL);
// Initialize the Hsa Runtime
if (initialize_hsa_) {
status = hsa_api_.hsa_init();
CHECK_STATUS("Error in hsa_init", status);
}
// Discover the set of Gpu devices available on the platform
status = hsa_api_.hsa_iterate_agents(GetHsaAgentsCallback, this);
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR);
if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
// Get AqlProfile API table
aqlprofile_api_ = {0};
#ifdef ROCP_LD_AQLPROFILE
status = LoadAqlProfileLib(&aqlprofile_api_);
#else
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_);
#endif
CHECK_STATUS("aqlprofile API table load failed", status);
// Get Loader API table
loader_api_ = {0};
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_);
CHECK_STATUS("loader API table query failed", status);
// Instantiate HSA timer
timer_ = new HsaTimer(&hsa_api_);
CHECK_STATUS("HSA timer allocation failed",
(timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
// Time correlation
const uint32_t corr_iters = 1000;
CorrelateTime(HsaTimer::TIME_ID_CLOCK_REALTIME, corr_iters);
CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters);
// System timeout
timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_);
}
// Destructor of the class
HsaRsrcFactory::~HsaRsrcFactory() {
delete timer_;
for (auto p : cpu_list_) delete p;
for (auto p : gpu_list_) delete p;
if (initialize_hsa_) {
hsa_status_t status = hsa_api_.hsa_shut_down();
CHECK_STATUS("Error in hsa_shut_down", status);
}
}
void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
std::lock_guard<mutex_t> lck(mutex_);
if (hsa_api_.hsa_init == NULL) {
if (table != NULL) {
hsa_api_.hsa_init = table->core_->hsa_init_fn;
hsa_api_.hsa_shut_down = table->core_->hsa_shut_down_fn;
hsa_api_.hsa_agent_get_info = table->core_->hsa_agent_get_info_fn;
hsa_api_.hsa_iterate_agents = table->core_->hsa_iterate_agents_fn;
hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn;
hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn;
hsa_api_.hsa_queue_load_write_index_relaxed = table->core_->hsa_queue_load_write_index_relaxed_fn;
hsa_api_.hsa_queue_store_write_index_relaxed = table->core_->hsa_queue_store_write_index_relaxed_fn;
hsa_api_.hsa_queue_load_read_index_relaxed = table->core_->hsa_queue_load_read_index_relaxed_fn;
hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn;
hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn;
hsa_api_.hsa_signal_load_relaxed = table->core_->hsa_signal_load_relaxed_fn;
hsa_api_.hsa_signal_store_relaxed = table->core_->hsa_signal_store_relaxed_fn;
hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn;
hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn;
hsa_api_.hsa_code_object_reader_create_from_file = table->core_->hsa_code_object_reader_create_from_file_fn;
hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn;
hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn;
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn;
hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn;
hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn;
hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn;
hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn;
hsa_api_.hsa_amd_agent_iterate_memory_pools = table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn;
hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn;
hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn;
hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn;
hsa_api_.hsa_amd_memory_async_copy = table->amd_ext_->hsa_amd_memory_async_copy_fn;
hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn;
hsa_api_.hsa_amd_profiling_set_profiler_enabled = table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn;
hsa_api_.hsa_amd_profiling_get_async_copy_time = table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn;
hsa_api_.hsa_amd_profiling_get_dispatch_time = table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn;
} else {
hsa_api_.hsa_init = hsa_init;
hsa_api_.hsa_shut_down = hsa_shut_down;
hsa_api_.hsa_agent_get_info = hsa_agent_get_info;
hsa_api_.hsa_iterate_agents = hsa_iterate_agents;
hsa_api_.hsa_queue_create = hsa_queue_create;
hsa_api_.hsa_queue_destroy = hsa_queue_destroy;
hsa_api_.hsa_queue_load_write_index_relaxed = hsa_queue_load_write_index_relaxed;
hsa_api_.hsa_queue_store_write_index_relaxed = hsa_queue_store_write_index_relaxed;
hsa_api_.hsa_queue_load_read_index_relaxed = hsa_queue_load_read_index_relaxed;
hsa_api_.hsa_signal_create = hsa_signal_create;
hsa_api_.hsa_signal_destroy = hsa_signal_destroy;
hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed;
hsa_api_.hsa_signal_store_relaxed = hsa_signal_store_relaxed;
hsa_api_.hsa_signal_wait_scacquire = hsa_signal_wait_scacquire;
hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease;
hsa_api_.hsa_code_object_reader_create_from_file = hsa_code_object_reader_create_from_file;
hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt;
hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object;
hsa_api_.hsa_executable_freeze = hsa_executable_freeze;
hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol;
hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info;
hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols;
hsa_api_.hsa_system_get_info = hsa_system_get_info;
hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table;
hsa_api_.hsa_amd_agent_iterate_memory_pools = hsa_amd_agent_iterate_memory_pools;
hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info;
hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate;
hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access;
hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy;
hsa_api_.hsa_amd_signal_async_handler = hsa_amd_signal_async_handler;
hsa_api_.hsa_amd_profiling_set_profiler_enabled = hsa_amd_profiling_set_profiler_enabled;
hsa_api_.hsa_amd_profiling_get_async_copy_time = hsa_amd_profiling_get_async_copy_time;
hsa_api_.hsa_amd_profiling_get_dispatch_time = hsa_amd_profiling_get_dispatch_time;
}
}
}
hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) {
void* handle = dlopen(kAqlProfileLib, RTLD_NOW);
if (handle == NULL) {
fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror());
return HSA_STATUS_ERROR;
}
dlerror(); /* Clear any existing error */
api->hsa_ven_amd_aqlprofile_error_string =
(decltype(::hsa_ven_amd_aqlprofile_error_string)*)dlsym(
handle, "hsa_ven_amd_aqlprofile_error_string");
api->hsa_ven_amd_aqlprofile_validate_event =
(decltype(::hsa_ven_amd_aqlprofile_validate_event)*)dlsym(
handle, "hsa_ven_amd_aqlprofile_validate_event");
api->hsa_ven_amd_aqlprofile_start =
(decltype(::hsa_ven_amd_aqlprofile_start)*)dlsym(handle, "hsa_ven_amd_aqlprofile_start");
api->hsa_ven_amd_aqlprofile_stop =
(decltype(::hsa_ven_amd_aqlprofile_stop)*)dlsym(handle, "hsa_ven_amd_aqlprofile_stop");
#ifdef AQLPROF_NEW_API
api->hsa_ven_amd_aqlprofile_read =
(decltype(::hsa_ven_amd_aqlprofile_read)*)dlsym(handle, "hsa_ven_amd_aqlprofile_read");
#endif
api->hsa_ven_amd_aqlprofile_legacy_get_pm4 =
(decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)dlsym(
handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
api->hsa_ven_amd_aqlprofile_get_info = (decltype(::hsa_ven_amd_aqlprofile_get_info)*)dlsym(
handle, "hsa_ven_amd_aqlprofile_get_info");
api->hsa_ven_amd_aqlprofile_iterate_data =
(decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)dlsym(
handle, "hsa_ven_amd_aqlprofile_iterate_data");
return HSA_STATUS_SUCCESS;
}
// Add system agent info
const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
// Determine if device is a Gpu agent
hsa_status_t status;
AgentInfo* agent_info = NULL;
hsa_device_type_t type;
status = hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
CHECK_STATUS("Error Calling hsa_agent_get_info", status);
if (type == HSA_DEVICE_TYPE_CPU) {
agent_info = new AgentInfo{};
agent_info->dev_id = agent;
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
agent_info->dev_index = cpu_list_.size();
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool;
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool);
if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) kern_arg_pool_ = &agent_info->kern_arg_pool;
agent_info->gpu_pool = {};
cpu_list_.push_back(agent_info);
cpu_agents_.push_back(agent);
}
if (type == HSA_DEVICE_TYPE_GPU) {
agent_info = new AgentInfo{};
agent_info->dev_id = agent;
agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
strncpy(agent_info->gfxip, agent_info->name, 4);
agent_info->gfxip[4] = '\0';
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false;
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT),
&agent_info->cu_num);
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU),
&agent_info->waves_per_cu);
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU),
&agent_info->simds_per_cu);
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
&agent_info->se_num);
hsa_api_.hsa_agent_get_info(agent,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE),
&agent_info->shader_arrays_per_se);
agent_info->cpu_pool = {};
agent_info->kern_arg_pool = {};
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status);
// GFX8 and GFX9 SGPR/VGPR block sizes
agent_info->sgpr_block_dflt = (strcmp(agent_info->gfxip, "gfx8") == 0) ? 1 : 2;
agent_info->sgpr_block_size = 8;
agent_info->vgpr_block_size = 4;
// Set GPU index
agent_info->dev_index = gpu_list_.size();
gpu_list_.push_back(agent_info);
gpu_agents_.push_back(agent);
}
if (agent_info) agent_map_[agent.handle] = agent_info;
return agent_info;
}
// Return systen agent info
const AgentInfo* HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) {
const AgentInfo* agent_info = NULL;
auto it = agent_map_.find(agent.handle);
if (it != agent_map_.end()) {
agent_info = it->second;
}
return agent_info;
}
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); }
// Get the count of Hsa Cpu Agents available on the platform
//
// @return uint32_t Number of Cpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); }
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(gpu_list_.size());
if (idx >= size) {
return false;
}
// Copy AgentInfo from specified index
*agent_info = gpu_list_[idx];
return true;
}
// Get the AgentInfo handle of a Cpu device
//
// @param idx Cpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(cpu_list_.size());
if (idx >= size) {
return false;
}
// Copy AgentInfo from specified index
*agent_info = cpu_list_[idx];
return true;
}
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
hsa_queue_t** queue) {
hsa_status_t status;
status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
UINT32_MAX, UINT32_MAX, queue);
return (status == HSA_STATUS_SUCCESS);
}
// Create a Signal object and return its handle.
// @param value Initial value of signal object
// @param signal Output parameter updated with handle of signal object
// @return bool true if successful, false otherwise
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
hsa_status_t status;
status = hsa_api_.hsa_signal_create(value, 0, NULL, signal);
return (status == HSA_STATUS_SUCCESS);
}
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status = HSA_STATUS_ERROR;
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
return ptr;
}
// Allocate memory to pass kernel parameters.
// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status = HSA_STATUS_ERROR;
uint8_t* buffer = NULL;
if (!cpu_agents_.empty()) {
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast<void**>(&buffer));
// Both the CPU and GPU can access the kernel arguments
if (status == HSA_STATUS_SUCCESS) {
hsa_agent_t ag_list[1] = {agent_info->dev_id};
status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
}
}
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
return ptr;
}
// Allocate system memory accessible by both CPU and GPU
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status = HSA_STATUS_ERROR;
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
if (!cpu_agents_.empty()) {
status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast<void**>(&buffer));
// Both the CPU and GPU can access the memory
if (status == HSA_STATUS_SUCCESS) {
hsa_agent_t ag_list[1] = {agent_info->dev_id};
status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
}
}
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
return ptr;
}
// Allocate memory for command buffer.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t size) {
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
uint8_t* ptr = (agent_info->is_apu && CMD_MEMORY_MMAP)
? reinterpret_cast<uint8_t*>(
mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0, 0))
: AllocateSysMemory(agent_info, size);
return ptr;
}
// Wait signal
hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
const hsa_signal_value_t exp_value = signal_value - 1;
hsa_signal_value_t ret_value = signal_value;
while (1) {
ret_value =
hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, timeout_, HSA_WAIT_STATE_BLOCKED);
if (ret_value == exp_value) break;
if (ret_value != signal_value) {
std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value
<< "), ret_value(" << ret_value << ")" << std::endl << std::flush;
abort();
}
}
return ret_value;
}
// Wait signal with signal value restore
void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
SignalWait(signal, signal_value);
hsa_api_.hsa_signal_store_relaxed(const_cast<hsa_signal_t&>(signal), signal_value);
}
// Copy data from GPU to host memory
bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size) {
hsa_status_t status = HSA_STATUS_ERROR;
if (!cpu_agents_.empty()) {
hsa_signal_t s = {};
status = hsa_api_.hsa_signal_create(1, 0, NULL, &s);
CHECK_STATUS("hsa_signal_create()", status);
status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s);
CHECK_STATUS("hsa_amd_memory_async_copy()", status);
SignalWait(s, 1);
status = hsa_api_.hsa_signal_destroy(s);
CHECK_STATUS("hsa_signal_destroy()", status);
}
return (status == HSA_STATUS_SUCCESS);
}
bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size) {
return Memcpy(agent_info->dev_id, dst, src, size);
}
// Memory free method
bool HsaRsrcFactory::FreeMemory(void* ptr) {
const hsa_status_t status = hsa_memory_free(ptr);
CHECK_STATUS("hsa_memory_free", status);
return (status == HSA_STATUS_SUCCESS);
}
// Loads an Assembled Brig file and Finalizes it into Device Isa
// @param agent_info Gpu device for which to finalize
// @param brig_path File path of the Assembled Brig file
// @param kernel_name Name of the kernel to finalize
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
// @return bool true if successful, false otherwise
bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path,
const char* kernel_name, hsa_executable_t* executable,
hsa_executable_symbol_t* code_desc) {
hsa_status_t status = HSA_STATUS_ERROR;
// Build the code object filename
std::string filename(brig_path);
std::clog << "Code object filename: " << filename << std::endl;
// Open the file containing code object
hsa_file_t file_handle = open(filename.c_str(), O_RDONLY);
if (file_handle == -1) {
std::cerr << "Error: failed to load '" << filename << "'" << std::endl;
assert(false);
return false;
}
// Create code object reader
hsa_code_object_reader_t code_obj_rdr = {0};
status = hsa_api_.hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
if (status != HSA_STATUS_SUCCESS) {
std::cerr << "Failed to create code object reader '" << filename << "'" << std::endl;
return false;
}
// Create executable.
status = hsa_api_.hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
NULL, executable);
CHECK_STATUS("Error in creating executable object", status);
// Load code object.
status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr,
NULL, NULL);
CHECK_STATUS("Error in loading executable object", status);
// Freeze executable.
status = hsa_api_.hsa_executable_freeze(*executable, "");
CHECK_STATUS("Error in freezing executable object", status);
// Get symbol handle.
hsa_executable_symbol_t kernelSymbol;
status = hsa_api_.hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0,
&kernelSymbol);
CHECK_STATUS("Error in looking up kernel symbol", status);
// Update output parameter
*code_desc = kernelSymbol;
return true;
}
// Print the various fields of Hsa Gpu Agents
bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
std::cout << std::flush;
std::clog << header << " :" << std::endl;
const AgentInfo* agent_info;
int size = uint32_t(gpu_list_.size());
for (int idx = 0; idx < size; idx++) {
agent_info = gpu_list_[idx];
std::clog << "> agent[" << idx << "] :" << std::endl;
std::clog << ">> Name : " << agent_info->name << std::endl;
std::clog << ">> APU : " << agent_info->is_apu << std::endl;
std::clog << ">> HSAIL profile : " << agent_info->profile << std::endl;
std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl;
std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl;
std::clog << ">> CU number : " << agent_info->cu_num << std::endl;
std::clog << ">> Waves per CU : " << agent_info->waves_per_cu << std::endl;
std::clog << ">> SIMDs per CU : " << agent_info->simds_per_cu << std::endl;
std::clog << ">> SE number : " << agent_info->se_num << std::endl;
std::clog << ">> Shader Arrays per SE : " << agent_info->shader_arrays_per_se << std::endl;
}
return true;
}
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
// adevance command queue
const uint64_t write_idx = hsa_api_.hsa_queue_load_write_index_relaxed(queue);
hsa_api_.hsa_queue_store_write_index_relaxed(queue, write_idx + 1);
while ((write_idx - hsa_api_.hsa_queue_load_read_index_relaxed(queue)) >= queue->size) {
sched_yield();
}
uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
uint32_t* queue_slot = reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b));
const uint32_t* slot_data = reinterpret_cast<const uint32_t*>(packet);
// Copy buffered commands into the queue slot.
// Overwrite the AQL invalid header (first dword) last.
// This prevents the slot from being read until it's fully written.
memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t));
std::atomic<uint32_t>* header_atomic_ptr =
reinterpret_cast<std::atomic<uint32_t>*>(&queue_slot[0]);
header_atomic_ptr->store(slot_data[0], std::memory_order_release);
// ringdoor bell
hsa_api_.hsa_signal_store_relaxed(queue->doorbell_signal, write_idx);
return write_idx;
}
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) {
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
if ((size_bytes & (slot_size_b - 1)) != 0) {
fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes);
abort();
}
const char* begin = reinterpret_cast<const char*>(packet);
const char* end = begin + size_bytes;
uint64_t write_idx = 0;
for (const char* ptr = begin; ptr < end; ptr += slot_size_b) {
write_idx = Submit(queue, ptr);
}
return write_idx;
}
const char* HsaRsrcFactory::GetKernelName(uint64_t addr) {
std::lock_guard<mutex_t> lck(mutex_);
const auto it = symbols_map_->find(addr);
if (it == symbols_map_->end()) {
fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr);
abort();
}
return strdup(it->second);
}
void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
std::lock_guard<mutex_t> lck(mutex_);
executable_tracking_on_ = true;
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
}
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) {
hsa_symbol_kind_t value = (hsa_symbol_kind_t)0;
hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value);
CHECK_STATUS("Error in getting symbol info", status);
if (value == HSA_SYMBOL_KIND_KERNEL) {
uint64_t addr = 0;
uint32_t len = 0;
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr);
CHECK_STATUS("Error in getting kernel object", status);
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
CHECK_STATUS("Error in getting name len", status);
char *name = new char[len + 1];
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
CHECK_STATUS("Error in getting kernel name", status);
name[len] = 0;
auto ret = symbols_map_->insert({addr, name});
if (ret.second == false) {
delete[] ret.first->second;
ret.first->second = name;
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) {
std::lock_guard<mutex_t> lck(mutex_);
if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t;
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL);
CHECK_STATUS("Error in iterating executable symbols", status);
return hsa_api_.hsa_executable_freeze(executable, options);;
}
void HsaRsrcFactory::DumpHandles(FILE* file) {
auto beg = agent_map_.begin();
auto end = agent_map_.end();
for (auto it = beg; it != end; ++it) {
const AgentInfo* agent_info = it->second;
fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle, (agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu");
if (agent_info->cpu_pool.handle != 0) fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle);
if (agent_info->kern_arg_pool.handle != 0) fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle);
if (agent_info->gpu_pool.handle != 0) fprintf(file, "0x%lx pool gpu\n", agent_info->gpu_pool.handle);
}
fflush(file);
}
std::atomic<HsaRsrcFactory*> HsaRsrcFactory::instance_{};
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX;
hsa_pfn_t HsaRsrcFactory::hsa_api_{};
bool HsaRsrcFactory::executable_tracking_on_ = false;
HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = NULL;
+516
Wyświetl plik
@@ -0,0 +1,516 @@
/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:
<95> Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#ifndef _HSA_RSRC_FACTORY_H_
#define _HSA_RSRC_FACTORY_H_
#include <hsa.h>
#include <hsa_api_trace.h>
#include <hsa_ext_amd.h>
#include <hsa_ext_finalize.h>
#include <hsa_ven_amd_aqlprofile.h>
#include <hsa_ven_amd_loader.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <atomic>
#include <iostream>
#include <mutex>
#include <map>
#include <string>
#include <vector>
#define HSA_ARGUMENT_ALIGN_BYTES 16
#define HSA_QUEUE_ALIGN_BYTES 64
#define HSA_PACKET_ALIGN_BYTES 64
#define CHECK_STATUS(msg, status) do { \
if ((status) != HSA_STATUS_SUCCESS) { \
const char* emsg = 0; \
hsa_status_string(status, &emsg); \
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
abort(); \
} \
} while (0)
#define CHECK_ITER_STATUS(msg, status) do { \
if ((status) != HSA_STATUS_INFO_BREAK) { \
const char* emsg = 0; \
hsa_status_string(status, &emsg); \
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
abort(); \
} \
} while (0)
static const size_t MEM_PAGE_BYTES = 0x1000;
static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
struct hsa_pfn_t {
decltype(hsa_init)* hsa_init;
decltype(hsa_shut_down)* hsa_shut_down;
decltype(hsa_agent_get_info)* hsa_agent_get_info;
decltype(hsa_iterate_agents)* hsa_iterate_agents;
decltype(hsa_queue_create)* hsa_queue_create;
decltype(hsa_queue_destroy)* hsa_queue_destroy;
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed;
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed;
decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed;
decltype(hsa_signal_create)* hsa_signal_create;
decltype(hsa_signal_destroy)* hsa_signal_destroy;
decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed;
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed;
decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire;
decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease;
decltype(hsa_code_object_reader_create_from_file)* hsa_code_object_reader_create_from_file;
decltype(hsa_executable_create_alt)* hsa_executable_create_alt;
decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object;
decltype(hsa_executable_freeze)* hsa_executable_freeze;
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol;
decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info;
decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols;
decltype(hsa_system_get_info)* hsa_system_get_info;
decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table;
decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools;
decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info;
decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate;
decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access;
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy;
decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler;
decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled;
decltype(hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time;
decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time;
};
// Encapsulates information about a Hsa Agent such as its
// handle, name, max queue size, max wavefront size, etc.
struct AgentInfo {
// Handle of Agent
hsa_agent_t dev_id;
// Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
uint32_t dev_type;
// APU flag
bool is_apu;
// Agent system index
uint32_t dev_index;
// GFXIP name
char gfxip[64];
// Name of Agent whose length is less than 64
char name[64];
// Max size of Wavefront size
uint32_t max_wave_size;
// Max size of Queue buffer
uint32_t max_queue_size;
// Hsail profile supported by agent
hsa_profile_t profile;
// CPU/GPU/kern-arg memory pools
hsa_amd_memory_pool_t cpu_pool;
hsa_amd_memory_pool_t gpu_pool;
hsa_amd_memory_pool_t kern_arg_pool;
// The number of compute unit available in the agent.
uint32_t cu_num;
// Maximum number of waves possible in a Compute Unit.
uint32_t waves_per_cu;
// Number of SIMD's per compute unit CU
uint32_t simds_per_cu;
// Number of Shader Engines (SE) in Gpu
uint32_t se_num;
// Number of Shader Arrays Per Shader Engines in Gpu
uint32_t shader_arrays_per_se;
// SGPR/VGPR block sizes
uint32_t sgpr_block_dflt;
uint32_t sgpr_block_size;
uint32_t vgpr_block_size;
};
// HSA timer class
// Provides current HSA timestampa and system-clock/ns conversion API
class HsaTimer {
public:
typedef uint64_t timestamp_t;
static const timestamp_t TIMESTAMP_MAX = UINT64_MAX;
typedef long double freq_t;
enum time_id_t {
TIME_ID_CLOCK_REALTIME = 0,
TIME_ID_CLOCK_MONOTONIC = 1,
TIME_ID_NUMBER
};
HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) {
timestamp_t sysclock_hz = 0;
hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status);
sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz;
}
// Methods for system-clock/ns conversion
timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const {
return timestamp_t((freq_t)sysclock * sysclock_factor_);
}
timestamp_t ns_to_sysclock(const timestamp_t& time) const {
return timestamp_t((freq_t)time / sysclock_factor_);
}
// Method for timespec/ns conversion
static timestamp_t timespec_to_ns(const timespec& time) {
return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec;
}
// Return timestamp in 'ns'
timestamp_t timestamp_ns() const {
timestamp_t sysclock;
hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)", status);
return sysclock_to_ns(sysclock);
}
// Return time in 'ns'
static timestamp_t clocktime_ns(clockid_t clock_id) {
timespec time;
clock_gettime(clock_id, &time);
return timespec_to_ns(time);
}
// Return pair of correlated values of profiling timestamp and time with
// correlation error for a given time ID and number of iterations
void correlated_pair_ns(time_id_t time_id, uint32_t iters,
timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) const {
clockid_t clock_id = 0;
switch (clock_id) {
case TIME_ID_CLOCK_REALTIME:
clock_id = CLOCK_REALTIME;
break;
case TIME_ID_CLOCK_MONOTONIC:
clock_id = CLOCK_MONOTONIC;
break;
default:
CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR);
}
std::vector<timestamp_t> ts_vec(iters);
std::vector<timespec> tm_vec(iters);
const uint32_t steps = iters - 1;
for (uint32_t i = 0; i < iters; ++i) {
hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]);
clock_gettime(clock_id, &tm_vec[i]);
}
const timestamp_t ts_base = sysclock_to_ns(ts_vec.front());
const timestamp_t tm_base = timespec_to_ns(tm_vec.front());
const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps);
timestamp_t ts_accum = 0;
timestamp_t tm_accum = 0;
for (uint32_t i = 0; i < iters; ++i) {
ts_accum += (ts_vec[i] - ts_base);
tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base);
}
*timestamp_v = (ts_accum / iters) + ts_base + error;
*time_v = (tm_accum / iters) + tm_base;
*error_v = error;
}
private:
// Timestamp frequency factor
freq_t sysclock_factor_;
// HSA API table
const hsa_pfn_t* const hsa_api_;
};
class HsaRsrcFactory {
public:
static const size_t CMD_SLOT_SIZE_B = 0x40;
typedef std::recursive_mutex mutex_t;
typedef HsaTimer::timestamp_t timestamp_t;
static HsaRsrcFactory* Create(bool initialize_hsa = true) {
std::lock_guard<mutex_t> lck(mutex_);
HsaRsrcFactory* obj = instance_.load(std::memory_order_relaxed);
if (obj == NULL) {
obj = new HsaRsrcFactory(initialize_hsa);
instance_.store(obj, std::memory_order_release);
}
return obj;
}
static HsaRsrcFactory& Instance() {
HsaRsrcFactory* obj = instance_.load(std::memory_order_acquire);
if (obj == NULL) obj = Create(false);
hsa_status_t status = (obj != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
CHECK_STATUS("HsaRsrcFactory::Instance() failed", status);
return *obj;
}
static void Destroy() {
std::lock_guard<mutex_t> lck(mutex_);
if (instance_) delete instance_.load();
instance_ = NULL;
}
// Return system agent info
const AgentInfo* GetAgentInfo(const hsa_agent_t agent);
// Get the count of Hsa Gpu Agents available on the platform
// @return uint32_t Number of Gpu agents on platform
uint32_t GetCountOfGpuAgents();
// Get the count of Hsa Cpu Agents available on the platform
// @return uint32_t Number of Cpu agents on platform
uint32_t GetCountOfCpuAgents();
// Get the AgentInfo handle of a Gpu device
// @param idx Gpu Agent at specified index
// @param agent_info Output parameter updated with AgentInfo
// @return bool true if successful, false otherwise
bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
// Get the AgentInfo handle of a Cpu device
// @param idx Cpu Agent at specified index
// @param agent_info Output parameter updated with AgentInfo
// @return bool true if successful, false otherwise
bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
// @param agent_info Gpu Agent on which to create a queue object
// @param num_Pkts Number of packets to be held by queue
// @param queue Output parameter updated with handle of queue object
// @return bool true if successful, false otherwise
bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue);
// Create a Signal object and return its handle.
// @param value Initial value of signal object
// @param signal Output parameter updated with handle of signal object
// @return bool true if successful, false otherwise
bool CreateSignal(uint32_t value, hsa_signal_t* signal);
// Allocate local GPU memory
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size);
// Allocate memory tp pass kernel parameters
// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* AllocateKernArgMemory(const AgentInfo* agent_info, size_t size);
// Allocate system memory accessible from both CPU and GPU
// Memory is alocated accessible to all CPU agents and AgentInfo parameter is ignored.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
// Allocate memory for command buffer.
// @param agent_info Agent from whose memory region to allocate
// @param size Size of memory in terms of bytes
// @return uint8_t* Pointer to buffer, null if allocation fails.
uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size);
// Wait signal
hsa_signal_value_t SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
// Wait signal with signal value restore
void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
// Copy data from GPU to host memory
bool Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size);
bool Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size);
// Memory free method
static bool FreeMemory(void* ptr);
// Loads an Assembled Brig file and Finalizes it into Device Isa
// @param agent_info Gpu device for which to finalize
// @param brig_path File path of the Assembled Brig file
// @param kernel_name Name of the kernel to finalize
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
// @return true if successful, false otherwise
bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name,
hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc);
// Print the various fields of Hsa Gpu Agents
bool PrintGpuAgents(const std::string& header);
// Submit AQL packet to given queue
static uint64_t Submit(hsa_queue_t* queue, const void* packet);
static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes);
// Enable executables loading tracking
static bool IsExecutableTracking() { return executable_tracking_on_; }
static void EnableExecutableTracking(HsaApiTable* table);
static const char* GetKernelName(uint64_t addr);
// Initialize HSA API table
void static InitHsaApiTable(HsaApiTable* table);
static const hsa_pfn_t* HsaApi() { return &hsa_api_; }
// Return AqlProfile API table
typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t;
const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; }
// Return Loader API table
const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; }
// Methods for system-clock/ns conversion and timestamp in 'ns'
timestamp_t SysclockToNs(const timestamp_t& sysclock) const { return timer_->sysclock_to_ns(sysclock); }
timestamp_t NsToSysclock(const timestamp_t& time) const { return timer_->ns_to_sysclock(time); }
timestamp_t TimestampNs() const { return timer_->timestamp_ns(); }
timestamp_t GetSysTimeout() const { return timeout_; }
static timestamp_t GetTimeoutNs() { return timeout_ns_; }
static void SetTimeoutNs(const timestamp_t& time) {
std::lock_guard<mutex_t> lck(mutex_);
timeout_ns_ = time;
if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time);
}
void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters) {
timestamp_t timestamp_v = 0;
timestamp_t time_v = 0;
timestamp_t error_v = 0;
timer_->correlated_pair_ns(time_id, iters, &timestamp_v, &time_v, &error_v);
time_shift_[time_id] = time_v - timestamp_v;
time_error_[time_id] = error_v;
}
hsa_status_t GetTime(uint32_t time_id, timestamp_t value, uint64_t* time) {
if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR;
*time = value + time_shift_[time_id];
return HSA_STATUS_SUCCESS;
}
hsa_status_t GetTimestamp(uint32_t time_id, uint64_t value, timestamp_t* timestamp) {
if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR;
*timestamp = value - time_shift_[time_id];
return HSA_STATUS_SUCCESS;
}
void DumpHandles(FILE* output_file);
private:
// System agents iterating callback
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data);
// Callback function to find and bind kernarg region of an agent
static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data);
// Load AQL profile HSA extension library directly
static hsa_status_t LoadAqlProfileLib(aqlprofile_pfn_t* api);
// Constructor of the class. Will initialize the Hsa Runtime and
// query the system topology to get the list of Cpu and Gpu devices
explicit HsaRsrcFactory(bool initialize_hsa);
// Destructor of the class
~HsaRsrcFactory();
// Add an instance of AgentInfo representing a Hsa Gpu agent
const AgentInfo* AddAgentInfo(const hsa_agent_t agent);
// To mmap command buffer memory
static const bool CMD_MEMORY_MMAP = false;
// HSA was initialized
const bool initialize_hsa_;
static std::atomic<HsaRsrcFactory*> instance_;
static mutex_t mutex_;
// Used to maintain a list of Hsa Gpu Agent Info
std::vector<const AgentInfo*> gpu_list_;
std::vector<hsa_agent_t> gpu_agents_;
// Used to maintain a list of Hsa Cpu Agent Info
std::vector<const AgentInfo*> cpu_list_;
std::vector<hsa_agent_t> cpu_agents_;
// System agents map
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
// Executables loading tracking
typedef std::map<uint64_t, const char*> symbols_map_t;
static symbols_map_t* symbols_map_;
static bool executable_tracking_on_;
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options);
static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data);
// HSA runtime API table
static hsa_pfn_t hsa_api_;
// AqlProfile API table
aqlprofile_pfn_t aqlprofile_api_;
// Loader API table
hsa_ven_amd_loader_1_00_pfn_t loader_api_;
// System timeout, ns
static timestamp_t timeout_ns_;
// System timeout, sysclock
timestamp_t timeout_;
// HSA timer
HsaTimer* timer_;
// Time shift array to support time conversion
timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER];
timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER];
// CPU/kern-arg memory pools
hsa_amd_memory_pool_t *cpu_pool_;
hsa_amd_memory_pool_t *kern_arg_pool_;
};
#endif // _HSA_RSRC_FACTORY_H_
+64
Wyświetl plik
@@ -0,0 +1,64 @@
################################################################################
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
cmake_minimum_required ( VERSION 2.8.12 )
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
set ( EXE_NAME "ctrl" )
if ( NOT DEFINED TEST_DIR )
set ( TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
project ( ${EXE_NAME} )
## Set build environment
include ( env )
endif ()
if ( NOT DEFINED ROCM_ROOT_DIR )
set ( ROCM_ROOT_DIR "" )
endif ()
if ( NOT DEFINED GPU_TARGETS )
set ( GPU_TARGETS "" )
endif ()
## Util sources
file( GLOB UTIL_SRC "${TEST_DIR}/util/*.cpp" )
## Test control sources
set ( CTRL_SRC
${TEST_DIR}/app/test.cpp
${TEST_DIR}/ctrl/test_hsa.cpp
)
## Dummy kernel
set ( DUMMY_NAME dummy_kernel )
execute_process ( COMMAND sh -xc "${TEST_DIR}/../script/build_kernel.sh '${TEST_DIR}/${DUMMY_NAME}/${DUMMY_NAME}' '${PROJECT_BINARY_DIR}' '${ROCM_ROOT_DIR}' '${GPU_TARGETS}'" )
## Test kernel
set ( TEST_NAME simple_convolution )
set ( KERN_SRC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp )
execute_process ( COMMAND sh -xc "${TEST_DIR}/../script/build_kernel.sh '${TEST_DIR}/${TEST_NAME}/${TEST_NAME}' '${PROJECT_BINARY_DIR}' '${ROCM_ROOT_DIR}' '${GPU_TARGETS}'" )
## Building ctrl test executable
add_executable ( ${EXE_NAME} ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC} )
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
target_link_libraries( ${EXE_NAME} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
+86
Wyświetl plik
@@ -0,0 +1,86 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#include <hsa.h>
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <thread>
#include "ctrl/run_kernel.h"
#include "ctrl/test_aql.h"
#include "dummy_kernel/dummy_kernel.h"
#include "simple_convolution/simple_convolution.h"
void thread_fun(const int kiter, const int diter, const uint32_t agents_number) {
const AgentInfo* agent_info[agents_number];
hsa_queue_t* queue[agents_number];
HsaRsrcFactory* rsrc = &HsaRsrcFactory::Instance();
for (uint32_t n = 0; n < agents_number; ++n) {
uint32_t agent_id = n % rsrc->GetCountOfGpuAgents();
if (rsrc->GetGpuAgentInfo(agent_id, &agent_info[n]) == false) {
fprintf(stderr, "AgentInfo failed\n");
abort();
}
if (rsrc->CreateQueue(agent_info[n], 128, &queue[n]) == false) {
fprintf(stderr, "CreateQueue failed\n");
abort();
}
}
for (int i = 0; i < kiter; ++i) {
for (uint32_t n = 0; n < agents_number; ++n) {
RunKernel<DummyKernel, TestAql>(0, NULL, agent_info[n], queue[n], diter);
RunKernel<SimpleConvolution, TestAql>(0, NULL, agent_info[n], queue[n], diter);
}
}
for (uint32_t n = 0; n < agents_number; ++n) {
hsa_queue_destroy(queue[n]);
}
}
int main(int argc, char** argv) {
const char* kiter_s = getenv("ROCP_KITER");
const char* diter_s = getenv("ROCP_DITER");
const char* agents_s = getenv("ROCP_AGENTS");
const char* thrs_s = getenv("ROCP_THRS");
const int kiter = (kiter_s != NULL) ? atol(kiter_s) : 1;
const int diter = (diter_s != NULL) ? atol(diter_s) : 1;
const uint32_t agents_number = (agents_s != NULL) ? (uint32_t)atol(agents_s) : 1;
const int thrs = (thrs_s != NULL) ? atol(thrs_s) : 1;
TestHsa::HsaInstantiate();
std::vector<std::thread> t(thrs);
for (int n = 0; n < thrs; ++n) {
t[n] = std::thread(thread_fun, kiter, diter, agents_number);
}
for (int n = 0; n < thrs; ++n) {
t[n].join();
}
TestHsa::HsaShutdown();
return 0;
}
+90
Wyświetl plik
@@ -0,0 +1,90 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef TEST_CTRL_RUN_KERNEL_H_
#define TEST_CTRL_RUN_KERNEL_H_
#include "ctrl/test_hsa.h"
#include "util/test_assert.h"
template <class Kernel, class Test> bool RunKernel(int argc = 0, char* argv[] = NULL, const AgentInfo* agent_info = NULL, hsa_queue_t* queue = NULL, int count = 1) {
bool ret_val = false;
if (getenv("ROC_TEST_TRACE") == NULL) std::clog.rdbuf(NULL);
// Create test kernel object
Kernel test_kernel;
TestHsa* test_hsa = new TestHsa(&test_kernel);
test_hsa->SetAgentInfo(agent_info);
test_hsa->SetQueue(queue);
TestAql* test_aql = new Test(test_hsa);
TEST_ASSERT(test_aql != NULL);
if (test_aql == NULL) return 1;
// Initialization of Hsa Runtime
ret_val = test_aql->Initialize(argc, argv);
if (ret_val == false) {
std::cerr << "Error in the test initialization" << std::endl;
// TEST_ASSERT(ret_val);
return false;
}
// Setup Hsa resources needed for execution
ret_val = test_aql->Setup();
if (ret_val == false) {
std::cerr << "Error in creating hsa resources" << std::endl;
TEST_ASSERT(ret_val);
return false;
}
// Kernel dspatch iterations
for (int i = 0; i < count; ++i) {
// Run test kernel
ret_val = test_aql->Run();
if (ret_val == false) {
std::cerr << "Error in running the test kernel" << std::endl;
TEST_ASSERT(ret_val);
return false;
}
// Verify the results of the execution
ret_val = test_aql->VerifyResults();
if (ret_val) {
std::clog << "Test : Passed" << std::endl;
} else {
std::clog << "Test : Failed" << std::endl;
}
}
// Print time taken by sample
test_aql->PrintTime();
test_aql->Cleanup();
delete test_aql;
return ret_val;
}
#endif // TEST_CTRL_RUN_KERNEL_H_
+77
Wyświetl plik
@@ -0,0 +1,77 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef TEST_CTRL_TEST_AQL_H_
#define TEST_CTRL_TEST_AQL_H_
#include <hsa.h>
#include <hsa_ven_amd_aqlprofile.h>
#include "util/hsa_rsrc_factory.h"
// Test AQL interface
class TestAql {
public:
explicit TestAql(TestAql* t = 0) : test_(t) {}
virtual ~TestAql() {
if (test_) delete test_;
}
TestAql* Test() { return test_; }
virtual const AgentInfo* GetAgentInfo() { return (test_) ? test_->GetAgentInfo() : 0; }
virtual hsa_queue_t* GetQueue() { return (test_) ? test_->GetQueue() : 0; }
virtual HsaRsrcFactory* GetRsrcFactory() { return (test_) ? test_->GetRsrcFactory() : 0; }
// Initialize application environment including setting
// up of various configuration parameters based on
// command line arguments
// @return bool true on success and false on failure
virtual bool Initialize(int argc, char** argv) {
return (test_) ? test_->Initialize(argc, argv) : true;
}
// Setup application parameters for exectuion
// @return bool true on success and false on failure
virtual bool Setup() { return (test_) ? test_->Setup() : true; }
// Run the kernel
// @return bool true on success and false on failure
virtual bool Run() { return (test_) ? test_->Run() : true; }
// Verify results
// @return bool true on success and false on failure
virtual bool VerifyResults() { return (test_) ? test_->VerifyResults() : true; }
// Print to console the time taken to execute kernel
virtual void PrintTime() {
if (test_) test_->PrintTime();
}
// Release resources e.g. memory allocations
// @return bool true on success and false on failure
virtual bool Cleanup() { return (test_) ? test_->Cleanup() : true; }
private:
TestAql* const test_;
};
#endif // TEST_CTRL_TEST_AQL_H_
+279
Wyświetl plik
@@ -0,0 +1,279 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#include "ctrl/test_hsa.h"
#include <atomic>
#include "util/test_assert.h"
#include "util/helper_funcs.h"
#include "util/hsa_rsrc_factory.h"
HsaRsrcFactory* TestHsa::hsa_rsrc_ = NULL;
HsaRsrcFactory* TestHsa::HsaInstantiate() {
// Instantiate an instance of Hsa Resources Factory
if (hsa_rsrc_ == NULL) {
hsa_rsrc_ = HsaRsrcFactory::Create();
// Print properties of the agents
hsa_rsrc_->PrintGpuAgents("> GPU agents");
}
return hsa_rsrc_;
}
void TestHsa::HsaShutdown() {
if (hsa_rsrc_) hsa_rsrc_->Destroy();
}
bool TestHsa::Initialize(int /*arg_cnt*/, char** /*arg_list*/) {
std::clog << "TestHsa::Initialize :" << std::endl;
// Instantiate a Timer object
setup_timer_idx_ = hsa_timer_.CreateTimer();
dispatch_timer_idx_ = hsa_timer_.CreateTimer();
if (hsa_rsrc_ == NULL) {
TEST_ASSERT(false);
return false;
}
// Create an instance of Gpu agent
if (agent_info_ == NULL) {
const uint32_t agent_id = 0;
if (!hsa_rsrc_->GetGpuAgentInfo(agent_id, &agent_info_)) {
agent_info_ = NULL;
std::cerr << "> error: agent[" << agent_id << "] is not found" << std::endl;
return false;
}
}
std::clog << "> Using agent[" << agent_info_->dev_index << "] : " << agent_info_->name << std::endl;
// Create an instance of Aql Queue
if (hsa_queue_ == NULL) {
const uint32_t num_pkts = 128;
if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) {
hsa_queue_ = NULL;
TEST_ASSERT(false);
}
my_queue_ = true;
}
// Obtain handle of signal
hsa_rsrc_->CreateSignal(1, &hsa_signal_);
// Obtain the code object file name
std::string agentName(agent_info_->name);
brig_path_obj_.append(agentName);
brig_path_obj_.append("_" + name_ + ".hsaco");
return true;
}
bool TestHsa::Setup() {
std::clog << "TestHsa::setup :" << std::endl;
// Start the timer object
hsa_timer_.StartTimer(setup_timer_idx_);
// Load and Finalize Kernel Code Descriptor
const char* brig_path = brig_path_obj_.c_str();
bool suc = hsa_rsrc_->LoadAndFinalize(agent_info_, brig_path, symb_.c_str(), &hsa_exec_,
&kernel_code_desc_);
if (suc == false) {
std::cerr << "Error in loading and finalizing Kernel" << std::endl;
return false;
}
mem_map_t& mem_map = test_->GetMemMap();
for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) {
mem_descr_t& des = it->second;
if (des.size == 0) continue;
switch (des.id) {
case TestKernel::LOCAL_DES_ID:
des.ptr = hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size);
break;
case TestKernel::KERNARG_DES_ID: {
// Check the kernel args size
const size_t kernarg_size = des.size;
size_t size_info = 0;
const hsa_status_t status = hsa_executable_symbol_get_info(
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info);
TEST_ASSERT(status == HSA_STATUS_SUCCESS);
size_info = kernarg_size;
const bool kernarg_missmatch = (kernarg_size > size_info);
if (kernarg_missmatch) {
std::cout << "kernarg_size = " << kernarg_size << ", size_info = " << size_info
<< std::flush << std::endl;
TEST_ASSERT(!kernarg_missmatch);
break;
}
// ALlocate kernarg memory
des.size = size_info;
des.ptr = hsa_rsrc_->AllocateKernArgMemory(agent_info_, size_info);
if (des.ptr) memset(des.ptr, 0, size_info);
break;
}
case TestKernel::SYS_DES_ID:
des.ptr = hsa_rsrc_->AllocateSysMemory(agent_info_, des.size);
if (des.ptr) memset(des.ptr, 0, des.size);
break;
case TestKernel::NULL_DES_ID:
des.ptr = NULL;
break;
default:
break;
}
TEST_ASSERT(des.ptr != NULL);
if (des.ptr == NULL) return false;
}
test_->Init();
// Stop the timer object
hsa_timer_.StopTimer(setup_timer_idx_);
setup_time_taken_ = hsa_timer_.ReadTimer(setup_timer_idx_);
total_time_taken_ = setup_time_taken_;
return true;
}
bool TestHsa::Run() {
std::clog << "TestHsa::run :" << std::endl;
const uint32_t work_group_size = 64;
const uint32_t work_grid_size = test_->GetGridSize();
uint32_t group_segment_size = 0;
uint32_t private_segment_size = 0;
uint64_t code_handle = 0;
// Retrieve the amount of group memory needed
hsa_executable_symbol_get_info(
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &group_segment_size);
// Retrieve the amount of private memory needed
hsa_executable_symbol_get_info(kernel_code_desc_,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
&private_segment_size);
// Retrieve handle of the code block
hsa_executable_symbol_get_info(kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
&code_handle);
// Initialize the dispatch packet.
hsa_kernel_dispatch_packet_t aql;
memset(&aql, 0, sizeof(aql));
// Set the packet's type, barrier bit, acquire and release fences
aql.header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE;
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE;
// Populate Aql packet with default values
aql.setup = 1;
aql.grid_size_x = work_grid_size;
aql.grid_size_y = 1;
aql.grid_size_z = 1;
aql.workgroup_size_x = work_group_size;
aql.workgroup_size_y = 1;
aql.workgroup_size_z = 1;
// Bind the kernel code descriptor and arguments
aql.kernel_object = code_handle;
aql.kernarg_address = test_->GetKernargPtr();
aql.group_segment_size = group_segment_size;
aql.private_segment_size = private_segment_size;
// Initialize Aql packet with handle of signal
hsa_signal_store_relaxed(hsa_signal_, 1);
aql.completion_signal = hsa_signal_;
std::clog << "> Executing kernel: \"" << name_ << "\"" << std::endl;
// Start the timer object
hsa_timer_.StartTimer(dispatch_timer_idx_);
// Submit AQL packet to the queue
const uint64_t que_idx = hsa_rsrc_->Submit(hsa_queue_, &aql);
std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl << std::flush;
// Wait on the dispatch signal until the kernel is finished.
// Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling
if (hsa_signal_wait_scacquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX,
HSA_WAIT_STATE_BLOCKED) != 0) {
TEST_ASSERT("signal_wait failed");
}
std::clog << "> DONE, que_idx=" << que_idx << std::endl;
// Stop the timer object
hsa_timer_.StopTimer(dispatch_timer_idx_);
dispatch_time_taken_ = hsa_timer_.ReadTimer(dispatch_timer_idx_);
total_time_taken_ += dispatch_time_taken_;
return true;
}
bool TestHsa::VerifyResults() {
bool cmp = false;
void* output = NULL;
const uint32_t size = test_->GetOutputSize();
bool suc = false;
if (size == 0) return true;
// Copy local kernel output buffers from local memory into host memory
if (test_->IsOutputLocal()) {
output = hsa_rsrc_->AllocateSysMemory(agent_info_, size);
suc = hsa_rsrc_->Memcpy(agent_info_, output, test_->GetOutputPtr(), size);
if (!suc) std::clog << "> VerifyResults: Memcpy failed" << std::endl << std::flush;
} else {
output = test_->GetOutputPtr();
suc = true;
}
if ((output != NULL) && suc) {
// Print the test output
test_->PrintOutput(output);
// Compare the results and see if they match
cmp = (memcmp(output, test_->GetRefOut(), size) == 0);
}
if (test_->IsOutputLocal() && (output != NULL)) hsa_rsrc_->FreeMemory(output);
return cmp;
}
void TestHsa::PrintTime() {
std::clog << "Time taken for Setup by " << this->name_ << " : " << this->setup_time_taken_
<< std::endl;
std::clog << "Time taken for Dispatch by " << this->name_ << " : " << this->dispatch_time_taken_
<< std::endl;
std::clog << "Time taken in Total by " << this->name_ << " : " << this->total_time_taken_
<< std::endl;
}
bool TestHsa::Cleanup() {
hsa_executable_destroy(hsa_exec_);
hsa_signal_destroy(hsa_signal_);
if (my_queue_) hsa_queue_destroy(hsa_queue_);
hsa_queue_ = NULL;
agent_info_ = NULL;
return true;
}
+129
Wyświetl plik
@@ -0,0 +1,129 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef TEST_CTRL_TEST_HSA_H_
#define TEST_CTRL_TEST_HSA_H_
#include "ctrl/test_aql.h"
#include "ctrl/test_kernel.h"
#include "util/hsa_rsrc_factory.h"
#include "util/perf_timer.h"
// Class implements HSA test
class TestHsa : public TestAql {
public:
// Instantiate HSA resources
static HsaRsrcFactory* HsaInstantiate();
static void HsaShutdown();
// Constructor
explicit TestHsa(TestKernel* test) : test_(test), name_(test->Name()), symb_(test->SymbName()) {
total_time_taken_ = 0;
setup_time_taken_ = 0;
dispatch_time_taken_ = 0;
agent_info_ = NULL;
hsa_queue_ = NULL;
my_queue_ = false;
hsa_exec_ = {};
}
// Get methods for Agent Info, HAS queue, HSA Resourcse Manager
HsaRsrcFactory* GetRsrcFactory() { return hsa_rsrc_; }
hsa_agent_t HsaAgent() { return agent_info_->dev_id; }
const AgentInfo* GetAgentInfo() { return agent_info_; }
void SetAgentInfo(const AgentInfo* agent_info) { agent_info_ = agent_info; }
hsa_queue_t* GetQueue() { return hsa_queue_; }
void SetQueue(hsa_queue_t* queue) { hsa_queue_ = queue; }
// Initialize application environment including setting
// up of various configuration parameters based on
// command line arguments
// @return bool true on success and false on failure
bool Initialize(int argc, char** argv);
// Setup application parameters for exectuion
// @return bool true on success and false on failure
bool Setup();
// Run the BinarySearch kernel
// @return bool true on success and false on failure
bool Run();
// Verify against reference implementation
// @return bool true on success and false on failure
bool VerifyResults();
// Print to console the time taken to execute kernel
void PrintTime();
// Release resources e.g. memory allocations
// @return bool true on success and false on failure
bool Cleanup();
private:
typedef TestKernel::mem_descr_t mem_descr_t;
typedef TestKernel::mem_map_t mem_map_t;
typedef TestKernel::mem_it_t mem_it_t;
// Test object
TestKernel* test_;
// Path of Brig file
std::string brig_path_obj_;
// Used to track time taken to run the sample
double total_time_taken_;
double setup_time_taken_;
double dispatch_time_taken_;
// Handle of signal
hsa_signal_t hsa_signal_;
// Handle of Kernel Code Descriptor
hsa_executable_symbol_t kernel_code_desc_;
// Instance of timer object
uint32_t setup_timer_idx_;
uint32_t dispatch_timer_idx_;
PerfTimer hsa_timer_;
// Instance of Hsa Resources Factory
static HsaRsrcFactory* hsa_rsrc_;
// Handle to an Hsa Gpu Agent
const AgentInfo* agent_info_;
// Handle to an Hsa Queue
hsa_queue_t* hsa_queue_;
bool my_queue_;
// Test kernel name
std::string name_;
// Test kernel name
std::string symb_;
// Kernel executable
hsa_executable_t hsa_exec_;
};
#endif // TEST_CTRL_TEST_HSA_H_
+138
Wyświetl plik
@@ -0,0 +1,138 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef TEST_CTRL_TEST_KERNEL_H_
#define TEST_CTRL_TEST_KERNEL_H_
#include <string>
#include <string.h>
#include <stdint.h>
#include <map>
// Class implements kernel test
class TestKernel {
public:
// Exported buffers IDs
enum buf_id_t { KERNARG_EXP_ID, OUTPUT_EXP_ID, REFOUT_EXP_ID };
// Memory descriptors IDs
enum des_id_t { NULL_DES_ID, LOCAL_DES_ID, KERNARG_DES_ID, SYS_DES_ID, REFOUT_DES_ID };
// Memory descriptors vector declaration
struct mem_descr_t {
des_id_t id;
void* ptr;
uint32_t size;
};
// Memory map declaration
typedef std::map<uint32_t, mem_descr_t> mem_map_t;
typedef mem_map_t::iterator mem_it_t;
typedef mem_map_t::const_iterator mem_const_it_t;
virtual ~TestKernel() {}
// Initialize method
virtual void Init() = 0;
// Return kernel memory map
mem_map_t& GetMemMap() { return mem_map_; }
// Return NULL descriptor
static mem_descr_t NullDescriptor() { return {NULL_DES_ID, NULL, 0}; }
// Check if decripter is local
bool IsLocal(const mem_descr_t& descr) const { return (descr.id == LOCAL_DES_ID); }
// Methods to get the kernel attributes
const mem_descr_t& GetKernargDescr() { return *test_map_[KERNARG_EXP_ID]; }
const mem_descr_t& GetOutputDescr() { return *test_map_[OUTPUT_EXP_ID]; }
void* GetKernargPtr() { return GetKernargDescr().ptr; }
uint32_t GetKernargSize() { return GetKernargDescr().size; }
void* GetOutputPtr() { return GetOutputDescr().ptr; }
uint32_t GetOutputSize() { return GetOutputDescr().size; }
bool IsOutputLocal() { return IsLocal(GetOutputDescr()); }
virtual uint32_t GetGridSize() const = 0;
// Return reference output
void* GetRefOut() { return test_map_[REFOUT_EXP_ID]->ptr; }
// Print output
virtual void PrintOutput(const void* ptr) const = 0;
// Return name
virtual std::string Name() const = 0;
// Return name
virtual std::string SymbName() { return Name() + ".kd"; }
protected:
// Set buffer descriptor
bool SetInDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
bool suc = SetMemDescr(buf_id, des_id, size);
if (des_id == KERNARG_DES_ID) {
test_map_[KERNARG_EXP_ID] = &mem_map_[buf_id];
}
return suc;
}
// Set results descriptor
bool SetOutDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
bool suc = SetMemDescr(buf_id, des_id, size);
test_map_[OUTPUT_EXP_ID] = &mem_map_[buf_id];
return suc;
}
// Set host descriptor
bool SetHostDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
bool suc = SetMemDescr(buf_id, des_id, size);
if (suc) {
mem_descr_t& descr = mem_map_[buf_id];
descr.ptr = malloc(size);
if (des_id == REFOUT_DES_ID) {
test_map_[REFOUT_EXP_ID] = &descr;
}
if (descr.ptr == NULL) suc = false;
}
return suc;
}
// Get memory descriptor
mem_descr_t GetDescr(const uint32_t& buf_id) const {
mem_const_it_t it = mem_map_.find(buf_id);
return (it != mem_map_.end()) ? it->second : NullDescriptor();
}
private:
// Set memory descriptor
bool SetMemDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
const mem_descr_t des = {des_id, NULL, size};
auto ret = mem_map_.insert(mem_map_t::value_type(buf_id, des));
return ret.second;
}
// Kernel memory map object
mem_map_t mem_map_;
// Test memory map object
std::map<uint32_t, mem_descr_t*> test_map_;
};
#endif // TEST_CTRL_TEST_KERNEL_H_
@@ -0,0 +1,28 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
/**
dummy kernel
*/
__kernel void DummyKernel() {
uint tid = get_global_id(0);
}
@@ -0,0 +1,71 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
#define TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
#include <map>
#include <vector>
#include "ctrl/test_kernel.h"
// Class implements DummyKernel kernel parameters
class DummyKernel : public TestKernel {
public:
// Kernel buffers IDs
enum { KERNARG_BUF_ID, LOCAL_BUF_ID };
// Constructor
DummyKernel() :
width_(64),
height_(64)
{
SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, 0);
SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, 0);
}
// Initialize method
void Init() {}
// Return compute grid size
uint32_t GetGridSize() const { return width_ * height_; }
// Print output
void PrintOutput(const void* ptr) const {}
// Return name
std::string Name() const { return std::string("DummyKernel"); }
private:
// Reference CPU implementation
bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask,
const uint32_t width, const uint32_t height,
const uint32_t maskWidth, const uint32_t maskHeight) { return true; }
// Width of the Input array
const uint32_t width_;
// Height of the Input array
const uint32_t height_;
};
#endif // TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
+45
Wyświetl plik
@@ -0,0 +1,45 @@
#!/bin/sh
################################################################################
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# test trace
export ROC_TEST_TRACE=1
# kernels loading iterations
export ROCP_KITER=50
# kernels dispatching iterations per kernel load
# dispatching to the same queue
export ROCP_DITER=50
# GPU agents number
export ROCP_AGENTS=2
# host threads number
# each thread creates a queue pre GPU agent
export ROCP_THRS=3
eval ./test/ctrl
#valgrind --leak-check=full $tbin
#valgrind --tool=massif $tbin
#ms_print massif.out.<N>
exit 0
@@ -0,0 +1,76 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
/**
* SimpleConvolution is where each pixel of the output image
* is the weighted sum of the neighborhood pixels of the input image
* The neighborhood is defined by the dimensions of the mask and
* weight of each neighbor is defined by the mask itself.
* @param output Output matrix after performing convolution
* @param input Input matrix on which convolution is to be performed
* @param mask mask matrix using which convolution was to be performed
* @param inputDimensions dimensions of the input matrix
* @param maskDimensions dimensions of the mask matrix
*/
__kernel void SimpleConvolution(__global uint * output,
__global uint * input,
__global float * mask,
const uint2 inputDimensions,
const uint2 maskDimensions) {
uint tid = get_global_id(0);
uint width = inputDimensions.x;
uint height = inputDimensions.y;
uint x = tid%width;
uint y = tid/width;
uint maskWidth = maskDimensions.x;
uint maskHeight = maskDimensions.y;
uint vstep = (maskWidth -1)/2;
uint hstep = (maskHeight -1)/2;
// find the left, right, top and bottom indices such that
// the indices do not go beyond image boundaires
uint left = (x < vstep) ? 0 : (x - vstep);
uint right = ((x + vstep) >= width) ? width - 1 : (x + vstep);
uint top = (y < hstep) ? 0 : (y - hstep);
uint bottom = ((y + hstep) >= height)? height - 1: (y + hstep);
// initializing wighted sum value
float sumFX = 0;
for(uint i = left; i <= right; ++i) {
for(uint j = top; j <= bottom; ++j) {
// performing wighted sum within the mask boundaries
uint maskIndex = (j - (y - hstep)) * maskWidth + (i - (x - vstep));
uint index = j * width + i;
sumFX += ((float)input[index] * mask[maskIndex]);
}
}
// To round to the nearest integer
sumFX += 0.5f;
output[tid] = (uint)sumFX;
}
@@ -0,0 +1,388 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#include "simple_convolution/simple_convolution.h"
#include <assert.h>
#include <string.h>
#include <iostream>
#include "util/helper_funcs.h"
#include "util/test_assert.h"
const uint32_t SimpleConvolution::input_data_[]{
15, 201, 51, 89, 92, 34, 96, 66, 11, 225, 161, 96, 81, 211, 108, 124, 202, 244, 182,
90, 215, 92, 98, 20, 44, 225, 55, 247, 202, 0, 45, 218, 202, 97, 51, 39, 131, 147,
105, 143, 116, 11, 239, 198, 222, 92, 67, 169, 81, 250, 3, 40, 86, 101, 60, 131, 70,
116, 123, 17, 117, 168, 236, 64, 10, 31, 103, 142, 179, 209, 29, 40, 220, 13, 239, 187,
105, 50, 100, 186, 44, 104, 227, 131, 205, 32, 6, 20, 149, 130, 38, 10, 43, 18, 75,
53, 50, 178, 195, 230, 132, 225, 14, 96, 238, 253, 27, 88, 48, 128, 18, 92, 232, 246,
224, 182, 23, 231, 203, 172, 105, 241, 183, 148, 4, 2, 202, 55, 181, 142, 29, 57, 111,
43, 153, 93, 41, 181, 181, 89, 54, 200, 182, 31, 190, 150, 213, 213, 126, 160, 130, 232,
146, 57, 125, 151, 59, 71, 206, 240, 213, 236, 42, 68, 24, 195, 162, 65, 121, 87, 155,
175, 31, 81, 207, 222, 232, 164, 180, 102, 69, 55, 79, 216, 112, 204, 112, 171, 19, 63,
156, 233, 43, 198, 46, 67, 138, 208, 132, 4, 39, 32, 180, 71, 113, 131, 38, 90, 40,
219, 193, 109, 18, 16, 70, 131, 220, 182, 46, 240, 245, 203, 217, 32, 146, 7, 100, 28,
216, 233, 32, 255, 9, 213, 71, 123, 88, 110, 213, 128, 74, 150, 238, 93, 166, 52, 224,
131, 234, 15, 115, 224, 218, 76, 1, 108, 84, 101, 137, 44, 79, 170, 44, 88, 127, 116,
211, 216, 226, 168, 88, 45, 63, 70, 138, 230, 123, 107, 105, 101, 122, 220, 70, 84, 41,
71, 193, 125, 173, 75, 169, 252, 245, 213, 84, 117, 73, 40, 77, 44, 209, 166, 90, 16,
237, 229, 246, 104, 80, 95, 206, 202, 60, 20, 31, 101, 92, 225, 226, 9, 44, 140, 5,
34, 97, 89, 151, 171, 129, 229, 216, 82, 139, 51, 99, 120, 24, 89, 225, 104, 185, 175,
50, 246, 196, 82, 91, 32, 51, 62, 42, 96, 202, 47, 130, 44, 137, 26, 215, 10, 255,
176, 93, 138, 227, 193, 3, 251, 27, 229, 100, 212, 149, 151, 202, 89, 233, 38, 122, 29,
100, 164, 125, 46, 212, 0, 90, 93, 26, 50, 103, 25, 226, 197, 164, 198, 135, 168, 194,
162, 141, 38, 119, 34, 190, 66, 124, 167, 104, 247, 197, 204, 156, 67, 251, 112, 67, 85,
205, 93, 135, 53, 119, 106, 251, 28, 49, 130, 196, 243, 36, 82, 26, 155, 117, 216, 221,
241, 128, 70, 233, 70, 18, 133, 137, 14, 245, 204, 99, 195, 42, 235, 248, 161, 86, 243,
190, 135, 118, 130, 123, 154, 213, 150, 54, 74, 111, 20, 60, 240, 90, 37, 54, 109, 171,
191, 123, 161, 140, 222, 100, 182, 202, 93, 88, 32, 80, 23, 168, 198, 153, 36, 97, 111,
187, 151, 185, 43, 172, 245, 27, 6, 27, 82, 115, 199, 18, 239, 104, 158, 206, 205, 85,
152, 42, 174, 185, 123, 197, 98, 65, 95, 135, 163, 206, 66, 59, 136, 109, 231, 125, 137,
237, 153, 219, 97, 96, 237, 81, 201, 140, 31, 150, 226, 183, 192, 144, 113, 59, 86, 212,
125, 182, 91, 33, 132, 158, 92, 12, 12, 68, 138, 149, 50, 36, 113, 147, 133, 95, 229,
78, 235, 4, 228, 206, 188, 165, 95, 45, 225, 181, 1, 94, 107, 93, 128, 240, 251, 220,
252, 7, 32, 135, 156, 83, 171, 14, 230, 48, 109, 203, 126, 89, 208, 99, 39, 140, 9,
134, 185, 234, 60, 187, 73, 167, 24, 201, 152, 20, 166, 148, 27, 199, 28, 184, 26, 199,
198, 0, 248, 52, 204, 119, 141, 157, 218, 181, 41, 227, 59, 227, 206, 119, 159, 23, 31,
184, 224, 183, 204, 134, 76, 231, 77, 105, 160, 103, 48, 103, 104, 41, 155, 53, 160, 41,
210, 123, 222, 252, 95, 26, 223, 45, 146, 126, 68, 177, 54, 37, 105, 3, 171, 182, 235,
249, 31, 139, 97, 80, 243, 202, 121, 143, 0, 26, 184, 210, 149, 151, 207, 244, 177, 174,
34, 67, 45, 102, 245, 100, 140, 95, 104, 55, 21, 83, 49, 53, 223, 147, 134, 210, 93,
0, 97, 93, 26, 26, 48, 175, 178, 255, 164, 99, 174, 198, 167, 220, 45, 156, 64, 185,
252, 168, 241, 18, 252, 35, 71, 219, 182, 205, 173, 19, 206, 15, 113, 232, 42, 161, 152,
220, 160, 60, 64, 79, 3, 231, 43, 49, 132, 108, 235, 128, 21, 220, 146, 17, 255, 218,
236, 182, 168, 154, 201, 118, 170, 58, 94, 212, 220, 246, 177, 125, 51, 241, 204, 55, 216,
248, 104, 92, 100, 83, 221, 121, 48, 111, 138, 47, 73, 119, 230, 241, 17, 175, 103, 187,
234, 198, 144, 199, 188, 65, 68, 240, 51, 17, 39, 11, 9, 143, 104, 109, 227, 70, 231,
19, 181, 113, 66, 255, 233, 41, 241, 250, 217, 89, 182, 196, 31, 71, 139, 220, 137, 208,
204, 188, 225, 243, 200, 234, 131, 48, 88, 102, 119, 63, 121, 44, 177, 188, 44, 154, 229,
29, 149, 190, 118, 76, 130, 150, 147, 14, 114, 28, 222, 62, 217, 191, 50, 161, 170, 181,
210, 2, 28, 73, 66, 149, 117, 243, 81, 162, 141, 55, 191, 35, 245, 54, 111, 120, 204,
2, 134, 62, 31, 100, 125, 248, 36, 175, 153, 206, 101, 107, 209, 129, 181, 19, 22, 43,
7, 104, 205, 149, 159, 140, 184, 149, 195, 39, 14, 143, 42, 148, 205, 73, 249, 74, 66,
30, 250, 219, 237, 96, 71, 190, 225, 253, 210, 248, 40, 218, 96, 245, 111, 0, 130, 39,
150, 69, 79, 165, 212, 122, 57, 162, 195, 51, 237, 6, 82, 231, 225, 63, 71, 41, 253,
41, 38, 208, 33, 78, 170, 130, 68, 26, 131, 198, 66, 26, 12, 145, 191, 224, 11, 249,
130, 207, 44, 112, 213, 126, 88, 183, 190, 160, 225, 187, 201, 8, 140, 235, 87, 55, 109,
155, 81, 241, 98, 147, 11, 110, 37, 202, 79, 49, 195, 210, 0, 240, 66, 214, 110, 154,
142, 44, 58, 111, 232, 4, 119, 117, 239, 207, 172, 93, 106, 254, 78, 205, 145, 89, 59,
183, 35, 138, 232, 230, 92, 233, 214, 159, 191, 69, 58, 78, 114, 116, 189, 91, 121, 53,
208, 104, 4, 125, 198, 111, 123, 20, 60, 13, 109, 120, 196, 145, 3, 172, 119, 95, 150,
78, 255, 85, 147, 57, 163, 6, 174, 97, 97, 39, 151, 50, 144, 155, 175, 86, 11, 43,
107, 71, 56, 216, 191, 253, 105, 194, 170, 225, 34, 64, 47, 34, 150, 195, 91, 58, 201,
10, 155, 43, 49, 50, 93, 194, 206, 13, 25, 217, 56, 132, 33, 112, 92, 225, 109, 198,
164, 23, 167, 199, 88, 215, 234, 238, 155, 69, 40, 100, 80, 196, 144, 129, 246, 237, 68,
197, 250, 93, 159, 51, 225, 193, 163, 62, 163, 17, 4, 71, 41, 172, 15, 130, 132, 249,
112, 31, 63, 152, 132, 143, 92, 20, 17, 83, 1, 86, 25, 252, 179, 185, 47, 149, 122,
211, 211, 29, 229, 216, 101, 15, 133, 117, 145, 9, 111, 1, 40, 175, 154, 173, 62, 247,
193, 80, 75, 194, 166, 100, 191, 90, 29, 239, 239, 152, 194, 195, 182, 168, 156, 27, 183,
33, 145, 73, 43, 0, 75, 83, 175, 229, 0, 238, 221, 194, 63, 40, 133, 230, 140, 68,
64, 170, 51, 48, 66, 246, 243, 248, 159, 144, 20, 87, 177, 165, 160, 220, 166, 235, 48,
86, 209, 49, 68, 174, 243, 132, 214, 120, 106, 99, 189, 170, 13, 241, 219, 80, 232, 207,
72, 135, 95, 92, 223, 16, 2, 127, 237, 169, 107, 29, 255, 61, 79, 68, 236, 67, 200,
194, 188, 50, 38, 121, 221, 52, 107, 184, 132, 84, 136, 204, 219, 231, 41, 186, 248, 44,
58, 229, 213, 166, 3, 212, 227, 82, 25, 207, 150, 225, 146, 82, 20, 185, 204, 242, 237,
55, 170, 113, 139, 50, 62, 103, 26, 103, 34, 18, 148, 93, 247, 105, 3, 251, 62, 231,
77, 87, 182, 227, 57, 73, 54, 77, 2, 2, 63, 239, 57, 234, 97, 197, 29, 159, 44,
55, 7, 79, 74, 155, 172, 66, 5, 175, 61, 67, 150, 139, 155, 77, 111, 212, 151, 165,
34, 153, 167, 98, 137, 225, 77, 234, 166, 107, 138, 211, 163, 145, 34, 237, 45, 206, 47,
50, 126, 108, 117, 21, 248, 17, 98, 103, 230, 249, 12, 9, 147, 179, 107, 29, 149, 185,
7, 59, 37, 146, 14, 200, 35, 49, 182, 80, 0, 230, 130, 126, 83, 248, 148, 75, 9,
247, 178, 240, 240, 190, 249, 132, 114, 101, 161, 7, 30, 169, 67, 68, 59, 82, 12, 95,
131, 195, 176, 131, 169, 51, 2, 252, 44, 150, 72, 54, 141, 250, 38, 126, 185, 31, 3,
44, 132, 165, 52, 163, 78, 120, 231, 138, 202, 244, 234, 77, 183, 155, 209, 97, 207, 212,
94, 251, 107, 166, 49, 249, 161, 88, 120, 91, 120, 123, 135, 253, 33, 188, 160, 112, 52,
136, 250, 254, 125, 229, 76, 53, 128, 30, 150, 79, 243, 244, 75, 95, 155, 125, 88, 60,
213, 209, 152, 78, 77, 32, 75, 110, 220, 236, 222, 17, 117, 217, 15, 242, 190, 92, 39,
63, 123, 190, 143, 111, 178, 219, 206, 78, 88, 38, 138, 46, 247, 34, 124, 69, 66, 199,
179, 31, 179, 145, 48, 41, 106, 64, 27, 41, 157, 67, 105, 24, 1, 249, 135, 179, 212,
86, 1, 44, 124, 140, 91, 116, 175, 215, 185, 242, 159, 108, 17, 83, 254, 66, 124, 105,
131, 151, 146, 32, 218, 252, 57, 219, 245, 193, 143, 201, 23, 145, 246, 148, 30, 82, 8,
206, 41, 194, 192, 201, 47, 210, 28, 46, 20, 152, 151, 151, 48, 42, 184, 11, 38, 241,
231, 28, 179, 119, 230, 202, 8, 220, 94, 39, 46, 103, 245, 88, 42, 181, 33, 90, 136,
62, 136, 156, 214, 31, 52, 7, 74, 237, 19, 113, 223, 250, 141, 146, 113, 115, 92, 122,
80, 187, 161, 126, 35, 150, 215, 78, 76, 249, 168, 212, 55, 48, 113, 14, 80, 166, 21,
154, 147, 40, 12, 114, 35, 153, 5, 148, 12, 98, 15, 92, 29, 176, 219, 65, 71, 179,
143, 147, 172, 56, 104, 227, 104, 218, 241, 185, 128, 7, 84, 20, 47, 96, 135, 82, 249,
140, 231, 6, 238, 246, 99, 12, 167, 63, 77, 238, 242, 221, 130, 158, 21, 235, 129, 126,
197, 114, 56, 69, 121, 140, 90, 169, 237, 225, 252, 231, 109, 228, 237, 91, 219, 81, 104,
130, 144, 181, 113, 130, 147, 244, 32, 169, 223, 162, 39, 164, 21, 95, 234, 143, 236, 68,
57, 217, 37, 53, 192, 147, 25, 174, 239, 245, 0, 87, 119, 144, 13, 232, 19, 160, 220,
51, 73, 188, 214, 113, 96, 235, 209, 75, 122, 190, 144, 179, 151, 181, 233, 88, 73, 3,
7, 56, 248, 7, 143, 112, 152, 156, 89, 171, 61, 53, 223, 135, 242, 181, 248, 83, 161,
202, 158, 28, 136, 46, 208, 32, 228, 186, 121, 45, 189, 128, 102, 182, 136, 246, 38, 32,
147, 127, 204, 208, 181, 171, 87, 167, 97, 80, 250, 2, 26, 153, 31, 163, 200, 239, 195,
172, 169, 60, 218, 103, 188, 65, 30, 69, 55, 68, 102, 202, 196, 50, 154, 121, 221, 242,
33, 63, 67, 28, 66, 93, 181, 97, 0, 126, 81, 196, 43, 251, 0, 5, 98, 189, 70,
128, 3, 126, 197, 105, 72, 137, 155, 227, 3, 121, 214, 36, 184, 25, 65, 250, 118, 247,
91, 119, 117, 173, 60, 160, 168, 60, 166, 10, 250, 237, 139, 253, 107, 80, 102, 180, 217,
2, 151, 221, 123, 109, 1, 52, 134, 66, 46, 253, 57, 138, 117, 175, 55, 178, 79, 223,
239, 245, 234, 233, 226, 117, 231, 78, 198, 78, 2, 159, 80, 154, 124, 204, 7, 126, 0,
142, 193, 47, 140, 251, 185, 2, 170, 241, 180, 249, 208, 163, 239, 186, 141, 210, 48, 116,
32, 246, 195, 34, 150, 19, 188, 19, 224, 196, 146, 224, 83, 83, 15, 224, 78, 201, 226,
249, 186, 151, 243, 139, 58, 226, 70, 199, 181, 118, 60, 213, 109, 255, 248, 3, 19, 181,
23, 243, 122, 169, 212, 205, 252, 228, 173, 75, 173, 144, 68, 104, 39, 55, 243, 98, 26,
57, 41, 207, 175, 102, 165, 29, 102, 158, 32, 121, 83, 56, 109, 205, 225, 66, 155, 222,
38, 73, 42, 212, 218, 110, 60, 1, 166, 48, 99, 193, 105, 141, 145, 25, 244, 54, 54,
90, 213, 87, 212, 40, 143, 66, 246, 112, 132, 146, 79, 171, 220, 121, 128, 182, 232, 189,
184, 143, 237, 27, 80, 86, 169, 226, 112, 158, 25, 166, 248, 238, 253, 204, 23, 141, 15,
13, 254, 147, 160, 77, 63, 124, 199, 191, 50, 175, 124, 234, 62, 105, 6, 143, 192, 176,
113, 48, 78, 139, 215, 71, 121, 213, 20, 144, 98, 35, 158, 96, 183, 62, 174, 246, 187,
117, 182, 237, 37, 50, 216, 99, 156, 223, 243, 93, 143, 101, 142, 222, 240, 101, 37, 106,
58, 57, 250, 157, 93, 153, 254, 20, 216, 172, 10, 147, 34, 192, 129, 71, 243, 90, 171,
144, 57, 159, 238, 201, 4, 124, 167, 244, 225, 205, 95, 28, 7, 89, 185, 100, 243, 184,
121, 203, 100, 131, 95, 135, 68, 224, 207, 56, 58, 122, 201, 115, 25, 183, 61, 30, 51,
229, 18, 21, 178, 113, 49, 186, 203, 235, 31, 191, 163, 152, 138, 8, 28, 233, 143, 97,
202, 95, 153, 4, 217, 98, 120, 243, 26, 182, 17, 77, 155, 36, 99, 78, 150, 149, 8,
98, 128, 39, 33, 36, 192, 172, 45, 220, 149, 189, 61, 96, 28, 215, 100, 246, 58, 221,
233, 84, 147, 251, 162, 47, 31, 5, 125, 181, 154, 134, 23, 27, 174, 57, 64, 110, 229,
109, 75, 123, 43, 136, 219, 71, 95, 64, 61, 154, 29, 39, 238, 177, 34, 145, 225, 65,
150, 94, 247, 49, 229, 15, 77, 147, 72, 141, 2, 45, 251, 77, 169, 38, 213, 132, 110,
53, 196, 172, 207, 226, 212, 190, 148, 246, 79, 117, 56, 230, 212, 48, 23, 185, 63, 100,
76, 136, 242, 78, 181, 237, 156, 95, 20, 113, 227, 131, 167, 168, 47, 119, 139, 3, 53,
31, 250, 133, 149, 50, 107, 105, 99, 130, 34, 162, 231, 111, 42, 217, 190, 224, 199, 90,
63, 220, 204, 35, 95, 115, 203, 143, 234, 86, 147, 32, 118, 141, 165, 11, 192, 16, 117,
35, 147, 152, 198, 123, 7, 240, 84, 198, 209, 28, 33, 17, 248, 237, 52, 88, 97, 255,
231, 76, 86, 122, 109, 204, 8, 18, 216, 201, 35, 77, 237, 183, 229, 179, 50, 237, 164,
135, 179, 118, 164, 213, 135, 157, 195, 187, 245, 36, 187, 220, 113, 18, 87, 222, 222, 96,
241, 183, 42, 21, 4, 23, 205, 233, 203, 0, 214, 112, 136, 138, 230, 44, 95, 110, 201,
34, 41, 191, 71, 229, 155, 185, 247, 243, 151, 214, 84, 137, 141, 126, 159, 146, 149, 108,
124, 97, 109, 82, 209, 245, 221, 183, 34, 60, 37, 236, 95, 79, 171, 167, 53, 71, 96,
45, 58, 248, 3, 142, 129, 145, 12, 33, 36, 162, 142, 160, 3, 251, 243, 213, 240, 208,
141, 19, 13, 178, 255, 109, 2, 170, 20, 55, 241, 116, 101, 44, 108, 105, 186, 238, 251,
199, 15, 31, 106, 157, 191, 110, 152, 178, 67, 137, 131, 208, 156, 144, 131, 155, 253, 134,
70, 18, 190, 55, 134, 35, 99, 243, 140, 30, 225, 135, 230, 240, 166, 81, 142, 102, 191,
39, 25, 3, 177, 156, 211, 77, 45, 87, 233, 43, 221, 48, 61, 155, 103, 195, 191, 203,
182, 75, 233, 152, 211, 208, 136, 121, 33, 23, 224, 224, 62, 249, 227, 239, 149, 183, 61,
195, 15, 39, 238, 236, 87, 43, 136, 191, 239, 71, 138, 166, 147, 116, 62, 102, 68, 199,
224, 101, 223, 193, 70, 29, 186, 42, 13, 80, 225, 75, 19, 241, 115, 1, 221, 202, 45,
102, 137, 29, 174, 20, 195, 66, 136, 2, 168, 205, 201, 137, 50, 168, 74, 121, 198, 4,
163, 212, 85, 133, 31, 105, 118, 146, 106, 84, 93, 152, 187, 231, 181, 105, 251, 121, 171,
132, 123, 84, 81, 69, 221, 132, 238, 40, 253, 181, 45, 161, 137, 130, 39, 169, 235, 158,
59, 86, 242, 153, 239, 173, 128, 165, 23, 123, 30, 195, 0, 154, 23, 81, 224, 245, 214,
206, 30, 212, 131, 75, 117, 12, 206, 157, 181, 186, 59, 241, 17, 45, 138, 0, 219, 11,
165, 243, 135, 196, 182, 135, 95, 205, 217, 63, 195, 175, 14, 225, 131, 145, 45, 249, 158,
251, 150, 84, 182, 209, 70, 199, 255, 209, 199, 219, 220, 109, 206, 99, 50, 132, 234, 146,
82, 195, 209, 22, 114, 223, 247, 246, 113, 37, 239, 16, 33, 134, 100, 215, 88, 170, 158,
87, 123, 102, 50, 88, 211, 1, 187, 6, 134, 165, 152, 216, 105, 106, 239, 220, 74, 231,
210, 187, 12, 194, 204, 45, 72, 49, 4, 160, 219, 162, 248, 87, 8, 43, 176, 220, 44,
107, 227, 178, 17, 124, 139, 122, 230, 122, 87, 48, 97, 42, 236, 110, 236, 185, 155, 53,
234, 159, 214, 198, 66, 206, 30, 75, 249, 206, 40, 38, 57, 11, 217, 74, 136, 100, 197,
110, 223, 29, 159, 65, 71, 140, 175, 51, 69, 74, 105, 48, 234, 63, 246, 45, 13, 20,
121, 7, 226, 161, 46, 28, 173, 7, 103, 53, 108, 45, 164, 76, 74, 68, 141, 145, 208,
61, 197, 22, 136, 46, 70, 115, 110, 60, 161, 124, 81, 26, 132, 51, 188, 178, 79, 106,
186, 183, 160, 39, 228, 68, 115, 46, 136, 1, 192, 89, 62, 133, 112, 198, 180, 182, 58,
34, 243, 219, 158, 69, 245, 34, 120, 178, 213, 200, 28, 143, 128, 188, 182, 100, 1, 41,
146, 137, 43, 82, 227, 105, 216, 83, 48, 140, 10, 106, 175, 254, 70, 77, 67, 59, 112,
188, 237, 69, 133, 10, 212, 5, 198, 138, 105, 199, 180, 252, 81, 223, 79, 53, 73, 39,
137, 121, 180, 148, 228, 99, 146, 42, 177, 214, 102, 33, 147, 84, 102, 25, 94, 59, 31,
37, 197, 137, 237, 122, 133, 63, 90, 213, 116, 163, 253, 253, 29, 177, 145, 2, 21, 36,
45, 198, 251, 147, 231, 143, 232, 78, 168, 71, 137, 199, 108, 79, 80, 90, 201, 214, 153,
35, 172, 13, 199, 169, 11, 228, 91, 157, 231, 112, 193, 20, 54, 189, 167, 30, 77, 144,
108, 245, 215, 246, 189, 68, 69, 14, 158, 14, 228, 55, 50, 145, 69, 249, 58, 80, 222,
149, 237, 198, 5, 175, 218, 60, 109, 130, 91, 186, 18, 200, 175, 234, 190, 109, 46, 3,
123, 204, 18, 96, 4, 68, 241, 73, 62, 44, 154, 29, 193, 136, 227, 199, 55, 189, 4,
164, 64, 95, 95, 82, 39, 15, 60, 230, 124, 107, 233, 248, 55, 251, 89, 60, 63, 75,
134, 126, 119, 32, 156, 57, 168, 127, 0, 224, 61, 5, 133, 125, 100, 228, 208, 140, 243,
12, 114, 111, 119, 92, 104, 175, 87, 193, 236, 151, 13, 114, 21, 132, 146, 177, 189, 59,
49, 190, 27, 110, 195, 160, 236, 40, 132, 188, 181, 120, 201, 40, 232, 65, 132, 80, 241,
220, 18, 221, 115, 31, 79, 137, 164, 226, 58, 98, 29, 108, 32, 57, 219, 228, 218, 199,
13, 95, 132, 195, 215, 77, 235, 191, 143, 112, 16, 128, 76, 35, 93, 191, 66, 173, 73,
231, 143, 132, 73, 173, 240, 106, 231, 203, 78, 193, 147, 92, 33, 23, 31, 248, 100, 11,
184, 243, 123, 201, 115, 200, 236, 209, 135, 47, 126, 209, 22, 14, 85, 95, 188, 69, 202,
163, 17, 24, 101, 164, 117, 134, 187, 148, 127, 31, 159, 55, 19, 27, 1, 135, 227, 237,
89, 107, 28, 216, 60, 51, 230, 145, 147, 163, 215, 93, 70, 232, 118, 172, 140, 235, 50,
71, 128, 177, 103, 32, 233, 123, 60, 234, 2, 31, 216, 91, 139, 244, 52, 200, 40, 26,
90, 188, 189, 49, 25, 4, 25, 144, 176, 166, 124, 227, 237, 252, 148, 85, 29, 125, 208,
89, 104, 210, 121, 64, 46, 4, 53, 99, 204, 93, 125, 38, 25, 59, 88, 51, 64, 113,
195, 241, 23, 64, 212, 5, 60, 104, 90, 90, 230, 42, 179, 78, 253, 44, 143, 44, 49,
196, 143, 254, 34, 13, 36, 60, 73, 125, 112, 137, 239, 52, 122, 7, 116, 79, 12, 177,
183, 103, 11, 158, 146, 190, 237, 143, 235, 124, 188, 28, 65, 76, 26, 100, 89, 63, 160,
163, 188, 17, 44, 172, 69, 167, 179, 185, 246, 191, 107, 174, 38, 118, 76, 184, 53, 58,
72, 32, 182, 5, 61, 248, 81, 88, 92, 170, 152, 253, 77, 84, 14, 122, 1, 83, 34,
180, 13, 25, 115, 120, 199, 154, 238, 20, 83, 36, 79, 155, 68, 5, 160, 130, 254, 242,
218, 90, 156, 114, 87, 234, 199, 101, 101, 200, 185, 135, 124, 198, 160, 240, 62, 104, 138,
45, 125, 222, 81, 204, 122, 150, 210, 26, 24, 208, 12, 242, 42, 169, 101, 130, 148, 44,
232, 249, 245, 161, 128, 113, 103, 33, 98, 166, 137, 236, 212, 7, 202, 38, 211, 69, 188,
165, 95, 212, 118, 108, 199, 161, 22, 45, 35, 170, 90, 11, 163, 79, 173, 36, 193, 20,
69, 35, 187, 207, 16, 144, 214, 219, 182, 170, 32, 114, 79, 128, 71, 198, 237, 15, 103,
4, 60, 139, 175, 150, 151, 82, 230, 68, 119, 168, 89, 188, 204, 20, 140, 220, 165, 98,
184, 91, 12, 217, 205, 92, 90, 20, 35, 71, 36, 138, 76, 96, 22, 251, 247, 173, 78,
222, 241, 197, 134, 75, 130, 83, 96, 14, 47, 5, 113, 232, 96, 126, 193, 45, 218, 28,
66, 253, 99, 103, 136, 176, 200, 158, 171, 191, 76, 249, 158, 62, 190, 37, 137, 65, 120,
233, 80, 168, 238, 193, 145, 79, 63, 82, 125, 26, 111, 191, 24, 210, 39, 161, 131, 239,
64, 46, 175, 140, 39, 77, 202, 230, 115, 84, 40, 235, 62, 120, 148, 45, 57, 37, 124,
121, 120, 249, 148, 231, 185, 172, 186, 224, 77, 61, 207, 141, 107, 126, 26, 147, 204, 229,
121, 63, 58, 161, 43, 120, 25, 191, 165, 83, 228, 34, 205, 92, 27, 97, 67, 213, 13,
253, 182, 91, 59, 133, 233, 166, 4, 4, 57, 209, 233, 179, 16, 35, 85, 59, 155, 111,
250, 65, 194, 223, 99, 144, 59, 127, 241, 127, 85, 255, 125, 11, 90, 184, 145, 68, 95,
150, 72, 153, 103, 49, 76, 120, 85, 161, 179, 241, 16, 174, 51, 211, 142, 150, 99, 201,
22, 85, 73, 108, 84, 199, 120, 175, 128, 9, 243, 223, 160, 59, 120, 8, 109, 197, 128,
194, 103, 52, 180, 119, 227, 231, 75, 113, 126, 175, 59, 148, 4, 132, 1, 89, 75, 121,
8, 204, 131, 251, 171, 36, 55, 36, 44, 165, 233, 172, 103, 80, 224, 28, 200, 195, 3,
20, 53, 129, 195, 112, 22, 200, 244, 23, 34, 64, 145, 42, 12, 20, 38, 184, 56, 94,
220, 101, 3, 198, 17, 107, 22, 242, 135, 222, 182, 138, 243, 235, 11, 182, 91, 34, 127,
80, 58, 161, 145, 203, 204, 158, 224, 242, 86, 24, 81, 51, 126, 84, 249, 143, 191, 15,
130, 70, 238, 57, 209, 225, 36, 221, 152, 128, 255, 24, 208, 57, 186, 97, 4, 134, 255,
229, 121, 86, 254, 202, 137, 124, 31, 130, 12, 222, 146, 142, 37, 129, 199, 247, 98, 236,
212, 251, 108, 211, 20, 60, 13, 206, 158, 18, 84};
SimpleConvolution::SimpleConvolution() {
width_ = 64;
height_ = 64;
mask_width_ = 3;
mask_height_ = mask_width_;
randomize_seed_ = 0;
if (!IsPowerOf2(width_)) {
width_ = RoundToPowerOf2(width_);
}
if (!IsPowerOf2(height_)) {
height_ = RoundToPowerOf2(height_);
}
if (!(mask_width_ % 2)) {
mask_width_++;
}
if (!(mask_height_ % 2)) {
mask_height_++;
}
if (width_ * height_ < 256) {
width_ = 64;
height_ = 64;
}
const uint32_t input_size_bytes = width_ * height_ * sizeof(uint32_t);
const uint32_t mask_size_bytes = mask_width_ * mask_height_ * sizeof(float);
SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, sizeof(kernel_args_t));
SetInDescr(INPUT_BUF_ID, SYS_DES_ID, input_size_bytes);
SetInDescr(MASK_BUF_ID, SYS_DES_ID, mask_size_bytes);
SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, input_size_bytes);
SetHostDescr(REFOUT_BUF_ID, REFOUT_DES_ID, input_size_bytes);
if (!randomize_seed_) TEST_ASSERT(sizeof(input_data_) <= input_size_bytes);
}
void SimpleConvolution::Init() {
std::clog << "SimpleConvolution::init :" << std::endl;
mem_descr_t kernarg_des = GetDescr(KERNARG_BUF_ID);
mem_descr_t input_des = GetDescr(INPUT_BUF_ID);
mem_descr_t mask_des = GetDescr(MASK_BUF_ID);
mem_descr_t output_des = GetDescr(LOCAL_BUF_ID);
#if 0
printf("kernarg_des %p 0x%x\n", kernarg_des.ptr, kernarg_des.size);
printf("input_des %p 0x%x\n", input_des.ptr, input_des.size);
printf("mask_des %p 0x%x\n", mask_des.ptr, mask_des.size);
printf("output_des %p 0x%x\n", output_des.ptr, output_des.size);
#endif
uint32_t* input = reinterpret_cast<uint32_t*>(input_des.ptr);
uint32_t* output_local = reinterpret_cast<uint32_t*>(output_des.ptr);
float* mask = reinterpret_cast<float*>(mask_des.ptr);
kernel_args_t* kernel_args = reinterpret_cast<kernel_args_t*>(kernarg_des.ptr);
if (randomize_seed_) {
// random initialisation of input
FillRandom<uint32_t>(input, width_, height_, 0, 255, randomize_seed_);
} else {
// initialization with preset values
memcpy(input, input_data_, width_ * height_ * sizeof(uint32_t));
}
// Fill a blurr filter or some other filter of your choice
const float val = 1.0f / (mask_width_ * 2.0f - 1.0f);
for (uint32_t i = 0; i < (mask_width_ * mask_height_); i++) {
mask[i] = 0;
}
for (uint32_t i = 0; i < mask_width_; i++) {
uint32_t y = mask_height_ / 2;
mask[y * mask_width_ + i] = val;
}
for (uint32_t i = 0; i < mask_height_; i++) {
uint32_t x = mask_width_ / 2;
mask[i * mask_width_ + x] = val;
}
// Print the INPUT array.
std::clog << std::dec;
PrintArray<uint32_t>("> Input[0]", input, width_, 1);
PrintArray<float>("> Mask", mask, mask_width_, mask_height_);
// Fill the kernel args
kernel_args->arg1 = output_local;
kernel_args->arg2 = input;
kernel_args->arg3 = mask;
kernel_args->arg4 = width_;
kernel_args->arg41 = height_;
kernel_args->arg5 = mask_width_;
kernel_args->arg51 = mask_height_;
// Calculate the reference output
ReferenceImplementation(reinterpret_cast<uint32_t*>(GetRefOut()), input, mask, width_, height_,
mask_width_, mask_height_);
}
void SimpleConvolution::PrintOutput(const void* ptr) const {
PrintArray<uint32_t>("> Output[0]", reinterpret_cast<const uint32_t*>(ptr), width_, 1);
}
bool SimpleConvolution::ReferenceImplementation(uint32_t* output, const uint32_t* input,
const float* mask, const uint32_t width,
const uint32_t height, const uint32_t mask_width,
const uint32_t mask_height) {
const uint32_t vstep = (mask_width - 1) / 2;
const uint32_t hstep = (mask_height - 1) / 2;
// for each pixel in the input
for (uint32_t x = 0; x < width; x++) {
for (uint32_t y = 0; y < height; y++) {
// find the left, right, top and bottom indices such that
// the indices do not go beyond image boundaires
const uint32_t left = (x < vstep) ? 0 : (x - vstep);
const uint32_t right = ((x + vstep) >= width) ? width - 1 : (x + vstep);
const uint32_t top = (y < hstep) ? 0 : (y - hstep);
const uint32_t bottom = ((y + hstep) >= height) ? height - 1 : (y + hstep);
// initializing wighted sum value
float sum_fx = 0;
for (uint32_t i = left; i <= right; ++i) {
for (uint32_t j = top; j <= bottom; ++j) {
// performing wighted sum within the mask boundaries
uint32_t mask_idx = (j - (y - hstep)) * mask_width + (i - (x - vstep));
uint32_t index = j * width + i;
// to round to the nearest integer
sum_fx += ((float)input[index] * mask[mask_idx]);
}
}
sum_fx += 0.5f;
output[y * width + x] = uint32_t(sum_fx);
}
}
return true;
}
@@ -0,0 +1,94 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
#define TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
#include <map>
#include <vector>
#include "ctrl/test_kernel.h"
// Class implements SimpleConvolution kernel parameters
class SimpleConvolution : public TestKernel {
public:
// Kernel buffers IDs
enum { INPUT_BUF_ID, LOCAL_BUF_ID, MASK_BUF_ID, KERNARG_BUF_ID, REFOUT_BUF_ID };
// Constructor
SimpleConvolution();
// Initialize method
void Init();
// Return compute grid size
uint32_t GetGridSize() const { return width_ * height_; }
// Print output
void PrintOutput(const void* ptr) const;
// Return name
std::string Name() const { return std::string("SimpleConvolution"); }
private:
// Local kernel arguments declaration
struct kernel_args_t {
void* arg1;
void* arg2;
void* arg3;
uint32_t arg4;
uint32_t arg41;
uint32_t arg5;
uint32_t arg51;
};
// Reference CPU implementation of Simple Convolution
// @param output Output matrix after performing convolution
// @param input Input matrix on which convolution is to be performed
// @param mask mask matrix using which convolution was to be performed
// @param input_dimensions dimensions of the input matrix
// @param mask_dimensions dimensions of the mask matrix
// @return bool true on success and false on failure
bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask,
const uint32_t width, const uint32_t height,
const uint32_t maskWidth, const uint32_t maskHeight);
// Width of the Input array
uint32_t width_;
// Height of the Input array
uint32_t height_;
// Mask dimensions
uint32_t mask_width_;
// Mask dimensions
uint32_t mask_height_;
// Randomize input data
unsigned randomize_seed_;
// Input data
static const uint32_t input_data_[];
};
#endif // TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
+98
Wyświetl plik
@@ -0,0 +1,98 @@
#ifndef EVT_STATS_H_
#define EVT_STATS_H_
#include <stdint.h>
#include <map>
#include <set>
#include <sstream>
#include <utility>
template <class evt_id_t, class evt_weight_t>
class EvtStatsT {
public:
typedef std::mutex mutex_t;
typedef uint64_t evt_count_t;
typedef double evt_avr_t;
struct evt_record_t {
uint64_t count;
evt_avr_t avr;
evt_record_t() : count(0), avr(0) {}
};
typedef typename std::map<evt_id_t, evt_record_t> map_t;
typedef typename std::map<evt_id_t, const char*> labels_t;
// Comparison function
struct cmpfun {
template <typename T> bool operator()(const T& a, const T& b) const {
return (a.second.avr != b.second.avr) ? a.second.avr < b.second.avr : a.first < b.first;
}
};
inline void add_event(evt_id_t id, evt_weight_t weight) {
std::lock_guard<mutex_t> lck(mutex_);
//printf("EvtStats %p ::add_event %u %lu\n", this, id, weight); fflush(stdout);
evt_record_t& rec = map_[id];
const evt_count_t prev_count = rec.count;
const evt_count_t new_count = prev_count + 1;
const evt_avr_t prev_avr = rec.avr;
const evt_avr_t new_avr = ((prev_avr * prev_count) + weight) / new_count;
rec.count = new_count;
rec.avr = new_avr;
}
void dump() {
std::lock_guard<mutex_t> lck(mutex_);
fprintf(stdout, "Dumping %s\n", path_); fflush(stdout);
typedef typename std::set<std::pair<evt_id_t, evt_record_t>, cmpfun> set_t;
set_t s_(map_.begin(), map_.end());
uint64_t index = 0;
for (auto& e : s_) {
const evt_id_t id = e.first;
const char* label = get_label(id);
std::ostringstream oss;
oss << index << ",\"" << label << "\"," << e.second.count << "," << (uint64_t)(e.second.avr) << "," << (uint64_t)(e.second.count * e.second.avr);
fprintf(fdes_, "%s\n", oss.str().c_str());
index += 1;
}
fclose(fdes_);
}
const char* get_label(const uint32_t& id) {
auto ret = labels_.insert({id, NULL});
const char* label = ret.first->second;
return label;
}
const char* get_label(const char* id) {
return id;
}
const char* get_label(const std::string& id) {
return id.c_str();
}
void set_label(evt_id_t id, const char* label) {
//printf("EvtStats %p ::set_label %u %s\n", this, id, label); fflush(stdout);
labels_[id] = label;
}
EvtStatsT(FILE* f, const char* path) : fdes_(f), path_(path) {
//printf("EvtStats %p ::EvtStatsT()\n", this); fflush(stdout);
fprintf(fdes_, "Index,Name,Count,Avr,Total\n");
}
private:
mutex_t mutex_;
map_t map_;
labels_t labels_;
FILE* fdes_;
const char* path_;
};
typedef EvtStatsT<uint32_t, uint64_t> EvtStats;
#endif // EVT_STATS_H_
+86
Wyświetl plik
@@ -0,0 +1,86 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef TEST_UTIL_HELPER_FUNCS_H_
#define TEST_UTIL_HELPER_FUNCS_H_
#include <time.h>
#include <cmath>
#include <iostream>
#include <sstream>
#include <string>
static inline void Error(std::string error_msg) {
std::cerr << "Error: " << error_msg << std::endl;
}
template <typename T>
void PrintArray(const std::string header, const T* data, const int width, const int height) {
std::clog << header << " :\n";
for (int i = 0; i < height; i++) {
std::clog << "> ";
for (int j = 0; j < width; j++) {
std::clog << data[i * width + j] << " ";
}
std::clog << "\n";
}
}
template <typename T>
bool FillRandom(T* array_ptr, const int width, const int height, const T range_min,
const T range_max, unsigned int seed = 123) {
if (!array_ptr) {
Error("Cannot fill array. NULL pointer.");
return false;
}
if (!seed) seed = (unsigned int)time(NULL);
srand(seed);
double range = double(range_max - range_min) + 1.0;
/* random initialisation of input */
for (int i = 0; i < height; i++)
for (int j = 0; j < width; j++) {
int index = i * width + j;
array_ptr[index] = range_min + T(range * rand() / (RAND_MAX + 1.0));
}
return true;
}
template <typename T> T RoundToPowerOf2(T val) {
int bytes = sizeof(T);
val--;
for (int i = 0; i < bytes; i++) val |= val >> (1 << i);
val++;
return val;
}
template <typename T> bool IsPowerOf2(T val) {
long long long_val = val;
return (((long_val & (-long_val)) - long_val == 0) && (long_val != 0));
}
#endif // TEST_UTIL_HELPER_FUNCS_H_
+1
Wyświetl plik
@@ -0,0 +1 @@
../../src/hsa_rsrc_factory.cpp
+1
Wyświetl plik
@@ -0,0 +1 @@
../../src/hsa_rsrc_factory.h
+179
Wyświetl plik
@@ -0,0 +1,179 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#include "util/perf_timer.h"
PerfTimer::PerfTimer() { freq_in_100mhz_ = MeasureTSCFreqHz(); }
PerfTimer::~PerfTimer() {
while (!timers_.empty()) {
Timer* temp = timers_.back();
timers_.pop_back();
delete temp;
}
}
// New cretaed timer instantance index will be returned
int PerfTimer::CreateTimer() {
Timer* newTimer = new Timer;
newTimer->start = 0;
newTimer->clocks = 0;
#ifdef _WIN32
QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->freq);
#else
newTimer->freq = (long long)1.0E3;
#endif
/* Push back the address of new Timer instance created */
timers_.push_back(newTimer);
return (int)(timers_.size() - 1);
}
int PerfTimer::StartTimer(int index) {
if (index >= (int)timers_.size()) {
Error("Cannot reset timer. Invalid handle.");
return FAILURE;
}
#ifdef _WIN32
// General Windows timing method
#ifndef _AMD
long long tmpStart;
QueryPerformanceCounter((LARGE_INTEGER*)&(tmpStart));
timers_[index]->start = (double)tmpStart;
#else
// AMD Windows timing method
#endif
#else
// General Linux timing method
#ifndef _AMD
struct timeval s;
gettimeofday(&s, 0);
timers_[index]->start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3));
#else
// AMD timing method
unsigned int unused;
timers_[index]->start = __rdtscp(&unused);
#endif
#endif
return SUCCESS;
}
int PerfTimer::StopTimer(int index) {
double n = 0;
if (index >= (int)timers_.size()) {
Error("Cannot reset timer. Invalid handle.");
return FAILURE;
}
#ifdef _WIN32
#ifndef _AMD
long long n1;
QueryPerformanceCounter((LARGE_INTEGER*)&(n1));
n = (double)n1;
#else
// AMD Window Timing
#endif
#else
// General Linux timing method
#ifndef _AMD
struct timeval s;
gettimeofday(&s, 0);
n = s.tv_sec * 1.0E3 + (double)(s.tv_usec / 1.0E3);
#else
// AMD Linux timing
unsigned int unused;
n = __rdtscp(&unused);
#endif
#endif
n -= timers_[index]->start;
timers_[index]->start = 0;
#ifndef _AMD
timers_[index]->clocks += n;
#else
// timers_[index]->clocks += 10 * n / freq_in_100mhz_; // unit is ns
timers_[index]->clocks += 1.0E-6 * 10 * n / freq_in_100mhz_; // convert to ms
#endif
return SUCCESS;
}
void PerfTimer::Error(std::string str) { std::cout << str << std::endl; }
double PerfTimer::ReadTimer(int index) {
if (index >= (int)timers_.size()) {
Error("Cannot read timer. Invalid handle.");
return FAILURE;
}
double reading = double(timers_[index]->clocks);
reading = double(reading / timers_[index]->freq);
return reading;
}
uint64_t PerfTimer::CoarseTimestampUs() {
#ifdef _WIN32
uint64_t freqHz, ticks;
QueryPerformanceFrequency((LARGE_INTEGER*)&freqHz);
QueryPerformanceCounter((LARGE_INTEGER*)&ticks);
// Scale numerator and divisor until (ticks * 1000000) fits in uint64_t.
while (ticks > (1ULL << 44)) {
ticks /= 16;
freqHz /= 16;
}
return (ticks * 1000000) / freqHz;
#else
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
#endif
}
uint64_t PerfTimer::MeasureTSCFreqHz() {
// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
unsigned int unused;
uint64_t tscTicksEnd;
uint64_t coarseBeginUs = CoarseTimestampUs();
uint64_t tscTicksBegin = __rdtscp(&unused);
do {
tscTicksEnd = __rdtscp(&unused);
} while (tscTicksEnd - tscTicksBegin < 1000000000);
uint64_t coarseEndUs = CoarseTimestampUs();
// Compute the TSC frequency and round to nearest 100MHz.
uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
}
+83
Wyświetl plik
@@ -0,0 +1,83 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef TEST_UTIL_PERF_TIMER_H_
#define TEST_UTIL_PERF_TIMER_H_
// Will use AMD timer or general Linux timer based on compilation flag
// Need to consider platform is Windows or Linux
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#if defined(_MSC_VER)
#include <intrin.h>
#include <time.h>
#include <windows.h>
#else
#if defined(__GNUC__)
#include <sys/time.h>
#include <x86intrin.h>
#endif // __GNUC__
#endif // _MSC_VER
#include <iostream>
#include <string>
#include <vector>
class PerfTimer {
public:
enum { SUCCESS = 0, FAILURE = 1 };
PerfTimer();
~PerfTimer();
// General Linux timing method
int CreateTimer();
int StartTimer(int index);
int StopTimer(int index);
// retrieve time
double ReadTimer(int index);
// write into a file
double WriteTimer(int index);
private:
struct Timer {
std::string name; /* name of time object */
long long freq; /* frequency */
double clocks; /* number of ticks at end */
double start; /* start point ticks */
};
std::vector<Timer*> timers_; /* vector to Timer objects */
double freq_in_100mhz_;
// AMD timing method
uint64_t CoarseTimestampUs();
uint64_t MeasureTSCFreqHz();
void Error(std::string str);
};
#endif // TEST_UTIL_PERF_TIMER_H_
+35
Wyświetl plik
@@ -0,0 +1,35 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef TEST_UTIL_TEST_ASSERT_H_
#define TEST_UTIL_TEST_ASSERT_H_
#define TEST_ASSERT(cond) \
{ \
if (!(cond)) { \
std::cerr << "Assert failed(" << #cond << ") at " << __FILE__ << ", line " << __LINE__ \
<< std::endl; \
exit(-1); \
} \
}
#endif // TEST_UTIL_TEST_ASSERT_H_
+457
Wyświetl plik
@@ -0,0 +1,457 @@
/******************************************************************************
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*******************************************************************************/
#ifndef TEST_UTIL_XML_H_
#define TEST_UTIL_XML_H_
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <map>
#include <string>
#include <vector>
namespace xml {
class Xml {
public:
typedef std::vector<char> token_t;
struct level_t;
typedef std::vector<level_t*> nodes_t;
typedef std::map<std::string, std::string> opts_t;
struct level_t {
std::string tag;
nodes_t nodes;
opts_t opts;
};
typedef std::vector<level_t*> nodes_vec_t;
typedef std::map<std::string, nodes_vec_t> map_t;
enum { DECL_STATE, BODY_STATE };
static Xml* Create(const std::string& file_name, const Xml* obj = NULL) {
Xml* xml = new Xml(file_name, obj);
if (xml != NULL) {
if (xml->Init() == false) {
delete xml;
xml = NULL;
} else {
const std::size_t pos = file_name.rfind('/');
const std::string path = (pos != std::string::npos) ? file_name.substr(0, pos + 1) : "";
xml->PreProcess();
nodes_t incl_nodes;
for (auto* node : xml->GetNodes("top.include")) {
if (node->opts.find("touch") == node->opts.end()) {
node->opts["touch"] = "";
incl_nodes.push_back(node);
}
}
for (auto* incl : incl_nodes) {
const std::string& incl_name = path + incl->opts["file"];
Xml* ixml = Create(incl_name, xml);
if (ixml == NULL) {
delete xml;
xml = NULL;
break;
} else {
delete ixml;
}
}
if (xml) {
xml->Process();
}
}
}
return xml;
}
static void Destroy(Xml* xml) { delete xml; }
std::string GetName() { return file_name_; }
void AddExpr(const std::string& full_tag, const std::string& name, const std::string& expr) {
const std::size_t pos = full_tag.rfind('.');
const std::size_t pos1 = (pos == std::string::npos) ? 0 : pos + 1;
const std::string level_tag = full_tag.substr(pos1);
level_t* level = new level_t;
(*map_)[full_tag].push_back(level);
level->tag = level_tag;
level->opts["name"] = name;
level->opts["expr"] = expr;
}
void AddConst(const std::string& full_tag, const std::string& name, const uint64_t& val) {
std::ostringstream oss;
oss << val;
AddExpr(full_tag, name, oss.str());
}
nodes_t GetNodes(const std::string& global_tag) { return (*map_)[global_tag]; }
template <class F> F ForEach(const F& f_i) {
F f = f_i;
if (map_) {
for (auto& entry : *map_) {
for (auto node : entry.second) {
if (f.fun(entry.first, node) == false) break;
}
}
}
return f;
}
template <class F> F ForEach(const F& f_i) const {
F f = f_i;
if (map_) {
for (auto& entry : *map_) {
for (auto node : entry.second) {
if (f.fun(entry.first, node) == false) break;
}
}
}
return f;
}
struct print_func {
bool fun(const std::string& global_tag, level_t* node) {
for (auto& opt : node->opts) {
std::cout << global_tag << "." << opt.first << " = " << opt.second << std::endl;
}
return true;
}
};
void Print() const {
std::cout << "XML file '" << file_name_ << "':" << std::endl;
ForEach(print_func());
}
private:
Xml(const std::string& file_name, const Xml* obj)
: file_name_(file_name),
file_line_(0),
data_size_(0),
index_(0),
state_(BODY_STATE),
comment_(false),
included_(false),
level_(NULL),
map_(NULL) {
if (obj != NULL) {
map_ = obj->map_;
level_ = obj->level_;
included_ = true;
}
}
struct delete_func {
bool fun(const std::string&, level_t* node) {
delete node;
return true;
}
};
~Xml() {
if (included_ == false) {
ForEach(delete_func());
delete map_;
}
}
bool Init() {
fd_ = open(file_name_.c_str(), O_RDONLY);
if (fd_ == -1) {
// perror((std::string("open XML file ") + file_name_).c_str());
return false;
}
if (map_ == NULL) {
map_ = new map_t;
if (map_ == NULL) return false;
AddLevel("top");
}
return true;
}
void PreProcess() {
uint32_t ind = 0;
char buf[kBufSize];
bool error = false;
while (1) {
const uint32_t pos = lseek(fd_, 0, SEEK_CUR);
uint32_t size = read(fd_, buf, kBufSize);
if (size <= 0) break;
buf[size - 1] = '\0';
if (strncmp(buf, "#include \"", 10) == 0) {
for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) {}
if (ind == size) {
fprintf(stderr, "XML PreProcess failed, line size limit %zu\n", kBufSize);
error = true;
break;
}
buf[ind] = '\0';
size = ind;
lseek(fd_, pos + ind + 1, SEEK_SET);
for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) {}
if (ind == size) {
error = true;
break;
}
buf[ind] = '\0';
AddLevel("include");
AddOption("file", &buf[10]);
UpLevel();
}
}
if (error) {
fprintf(stderr, "XML PreProcess failed, line '%s'\n", buf);
exit(1);
}
lseek(fd_, 0, SEEK_SET);
}
void Process() {
token_t remainder;
while (1) {
token_t token = (remainder.size()) ? remainder : NextToken();
remainder.clear();
// token_t token1 = token;
// token1.push_back('\0');
// std::cout << "> " << &token1[0] << std::endl;
// End of file
if (token.size() == 0) break;
switch (state_) {
case BODY_STATE:
if (token[0] == '<') {
bool node_begin = true;
unsigned ind = 1;
if (token[1] == '/') {
node_begin = false;
++ind;
}
unsigned i = ind;
while (i < token.size()) {
if (token[i] == '>') break;
++i;
}
for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]);
if (i == token.size()) {
if (node_begin)
state_ = DECL_STATE;
else
BadFormat(token);
token.push_back('\0');
} else {
token[i] = '\0';
}
const char* tag = &token[ind];
if (node_begin) {
AddLevel(tag);
} else {
if (strncmp(CurrentLevel().c_str(), tag, strlen(tag)) != 0) {
token.back() = '>';
BadFormat(token);
}
UpLevel();
}
} else {
BadFormat(token);
}
break;
case DECL_STATE:
if (token[0] == '>') {
state_ = BODY_STATE;
for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]);
continue;
} else {
token.push_back('\0');
unsigned j = 0;
for (j = 0; j < token.size(); ++j)
if (token[j] == '=') break;
if (j == token.size()) BadFormat(token);
token[j] = '\0';
const char* key = &token[0];
const char* value = &token[j + 1];
AddOption(key, value);
}
break;
default:
std::cout << "XML parser error: wrong state: " << state_ << std::endl;
exit(1);
}
}
}
bool SpaceCheck() const {
bool cond = ((buffer_[index_] == ' ') || (buffer_[index_] == '\t'));
return cond;
}
bool LineEndCheck() {
bool found = false;
if (buffer_[index_] == '\n') {
buffer_[index_] = ' ';
++file_line_;
found = true;
comment_ = false;
} else if (comment_ || (buffer_[index_] == '#')) {
found = true;
comment_ = true;
}
return found;
}
token_t NextToken() {
token_t token;
bool in_string = false;
bool special_symb = false;
while (1) {
if (data_size_ == 0) {
data_size_ = read(fd_, buffer_, kBufSize);
if (data_size_ <= 0) break;
}
if (token.empty()) {
while ((index_ < data_size_) && (SpaceCheck() || LineEndCheck())) {
++index_;
}
}
while ((index_ < data_size_) && (in_string || !(SpaceCheck() || LineEndCheck()))) {
const char symb = buffer_[index_];
bool skip_symb = false;
switch (symb) {
case '\\':
if (special_symb) {
special_symb = false;
} else {
special_symb = true;
skip_symb = true;
}
break;
case '"':
if (special_symb) {
special_symb = false;
} else {
in_string = !in_string;
if (!in_string) {
buffer_[index_] = ' ';
--index_;
}
skip_symb = true;
}
break;
}
if (!skip_symb) token.push_back(symb);
++index_;
}
if (index_ == data_size_) {
index_ = 0;
data_size_ = 0;
} else {
if (special_symb || in_string) BadFormat(token);
break;
}
}
return token;
}
void BadFormat(token_t token) {
token.push_back('\0');
std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '"
<< &token[0] << "'" << std::endl;
exit(1);
}
void AddLevel(const std::string& tag) {
level_t* level = new level_t;
level->tag = tag;
if (level_) {
level_->nodes.push_back(level);
stack_.push_back(level_);
}
level_ = level;
std::string global_tag;
for (level_t* level : stack_) {
global_tag += level->tag + ".";
}
global_tag += tag;
(*map_)[global_tag].push_back(level_);
}
void UpLevel() {
level_ = stack_.back();
stack_.pop_back();
}
std::string CurrentLevel() const { return level_->tag; }
void AddOption(const std::string& key, const std::string& value) { level_->opts[key] = value; }
const std::string file_name_;
unsigned file_line_;
int fd_;
static const size_t kBufSize = 256;
char buffer_[kBufSize];
unsigned data_size_;
unsigned index_;
unsigned state_;
bool comment_;
std::vector<level_t*> stack_;
bool included_;
level_t* level_;
map_t* map_;
};
} // namespace xml
#endif // TEST_UTIL_XML_H_