Replace the HSA standalone test
Change-Id: I89e6826eef412890185ec600cd806c7b516e62a9
This commit is contained in:
+33
-4
@@ -20,8 +20,14 @@
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
# Find clang to build the HSA code objects
|
||||
find_package(Clang REQUIRED CONFIG
|
||||
PATHS "/opt/rocm/"
|
||||
PATH_SUFFIXES "llvm/lib/cmake/clang"
|
||||
)
|
||||
|
||||
add_custom_target(mytest) # builds all the tests
|
||||
add_dependencies(mytest roctracer_tool ctrl)
|
||||
add_dependencies(mytest roctracer_tool)
|
||||
|
||||
# Add a 'check' target to run the tests.
|
||||
add_custom_target(check COMMAND ${PROJECT_BINARY_DIR}/run.sh DEPENDS mytest)
|
||||
@@ -67,9 +73,6 @@ foreach(file ${files})
|
||||
configure_file(${file} ${PROJECT_BINARY_DIR}/test/${file} COPYONLY)
|
||||
endforeach()
|
||||
|
||||
## Build HSA test
|
||||
add_subdirectory(hsa/test ${PROJECT_BINARY_DIR}/test/hsa)
|
||||
|
||||
## Build hsaco_test.cpp referenc test
|
||||
add_library(hsaco_test SHARED EXCLUDE_FROM_ALL app/hsaco_test.cpp)
|
||||
target_compile_definitions(hsaco_test PRIVATE AMD_INTERNAL_BUILD)
|
||||
@@ -88,6 +91,32 @@ target_include_directories(trace_buffer PRIVATE ${PROJECT_SOURCE_DIR}/src/tracer
|
||||
target_link_libraries(trace_buffer Threads::Threads atomic)
|
||||
add_dependencies(mytest trace_buffer)
|
||||
|
||||
|
||||
## Build the hsa copy test
|
||||
function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE)
|
||||
separate_arguments(CLANG_ARG_LIST UNIX_COMMAND
|
||||
"-O2 -x cl -Xclang -finclude-default-header -cl-denorms-are-zero -cl-std=CL2.0
|
||||
-target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -o ${OUTPUT_FILE} ${INPUT_FILE}")
|
||||
add_custom_command(OUTPUT ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||
COMMAND clang ${CLANG_ARG_LIST}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
DEPENDS ${INPUT_FILE} clang
|
||||
COMMENT "Building ${OUTPUT_FILE}..."
|
||||
VERBATIM)
|
||||
set(HSACO_TARGET_LIST ${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} PARENT_SCOPE)
|
||||
endfunction(generate_hsaco)
|
||||
|
||||
foreach(target_id ${GPU_TARGETS})
|
||||
## generate kernel bitcodes
|
||||
generate_hsaco(${target_id} ${CMAKE_CURRENT_SOURCE_DIR}/hsa/copy.cl ${target_id}_copy.hsaco)
|
||||
endforeach(target_id)
|
||||
add_custom_target(hsaco_targets DEPENDS ${HSACO_TARGET_LIST})
|
||||
|
||||
add_executable(copy EXCLUDE_FROM_ALL hsa/copy.cpp)
|
||||
target_link_libraries(copy hsa-runtime64::hsa-runtime64 Threads::Threads dl)
|
||||
add_dependencies(copy hsaco_targets)
|
||||
add_dependencies(mytest copy)
|
||||
|
||||
## copying run script
|
||||
configure_file(run.sh ${PROJECT_BINARY_DIR} COPYONLY)
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink run.sh ${PROJECT_BINARY_DIR}/run_ci.sh)
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
ROCTracer (pid=882619): input from "test/input.xml"
|
||||
0x560905682a90 agent cpu
|
||||
0x560905685470 agent gpu
|
||||
0x5609056bd500 agent gpu
|
||||
538167097046266
|
||||
HSA-trace( hsa_agent_get_info hsa_amd_memory_pool_allocate)
|
||||
HSA-activity-trace()
|
||||
538167097315584:538167097318359 882619:882619 hsa_agent_get_info({handle=94596745407120}, 0, 0x7fffa4fa77a8) = 0
|
||||
538167097322808:538167097323088 882619:882619 hsa_agent_get_info({handle=94596745407120}, 17, 0x7fffa4fa7790) = 0
|
||||
538167097333378:538167097334620 882619:882619 hsa_agent_get_info({handle=94596745417840}, 0, 0x7fffa4fa77a8) = 0
|
||||
538167097335141:538167097335411 882619:882619 hsa_agent_get_info({handle=94596745417840}, 17, 0x7fffa4fa7790) = 0
|
||||
538167097338377:538167097338798 882619:882619 hsa_agent_get_info({handle=94596745647360}, 0, 0x7fffa4fa77a8) = 0
|
||||
538167097339319:538167097339569 882619:882619 hsa_agent_get_info({handle=94596745647360}, 17, 0x7fffa4fa7790) = 0
|
||||
538167119333344:538167119348423 882619:882622 hsa_amd_memory_pool_allocate({handle=94596745407920}, 72, 0, 0x7f7d8f7f9c30) = 0
|
||||
538167119471866:538167119484039 882619:882622 hsa_amd_memory_pool_allocate({handle=94596745407920}, 256, 0, 0x7f7d8f7f9c30) = 0
|
||||
538167119592754:538167119603945 882619:882622 hsa_amd_memory_pool_allocate({handle=94596745407920}, 256, 0, 0x7f7d8f7f9c30) = 0
|
||||
@@ -0,0 +1,69 @@
|
||||
ROCTracer (pid=882613):
|
||||
0x555e40292a90 agent cpu
|
||||
0x555e40295470 agent gpu
|
||||
0x555e402cd500 agent gpu
|
||||
538166319764062
|
||||
HSA-trace()
|
||||
HSA-activity-trace()
|
||||
538166319915287:538166319920086 882613:882613 hsa_amd_profiling_async_copy_enable(1) = 0
|
||||
538166320173874:538166320174335 882613:882613 hsa_agent_get_info({handle=93863291726480}, 0, 0x7ffddcd49dd8) = 0
|
||||
538166320175287:538166320175548 882613:882613 hsa_agent_get_info({handle=93863291726480}, 17, 0x7ffddcd49dc0) = 0
|
||||
538166320177511:538166320178523 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291726976}, 0, 0x7ffddcd49bdc) = 0
|
||||
538166320179144:538166320179415 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291726976}, 1, 0x7ffddcd49be0) = 0
|
||||
538166320180337:538166320180587 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291726976}, 2, 0x7ffddcd49c00) = 0
|
||||
538166320181198:538166320181449 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291726976}, 6, 0x7ffddcd49c08) = 0
|
||||
538166320183613:538166320183873 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291727280}, 0, 0x7ffddcd49bdc) = 0
|
||||
538166320184484:538166320184745 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291727280}, 1, 0x7ffddcd49be0) = 0
|
||||
538166320185336:538166320185587 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291727280}, 2, 0x7ffddcd49c00) = 0
|
||||
538166320186178:538166320186428 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291727280}, 6, 0x7ffddcd49c08) = 0
|
||||
538166320187390:538166320188031 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291728112}, 0, 0x7ffddcd49bdc) = 0
|
||||
538166320188632:538166320188883 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291728112}, 1, 0x7ffddcd49be0) = 0
|
||||
538166320189474:538166320189724 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291728112}, 2, 0x7ffddcd49c00) = 0
|
||||
538166320190205:538166320190456 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291728112}, 6, 0x7ffddcd49c08) = 0
|
||||
538166320190205:538166320191347 882613:882613 hsa_amd_agent_iterate_memory_pools({handle=93863291726480}, 1, 0x7ffddcd49e18) = 0
|
||||
538166320194203:538166320196167 882613:882613 hsa_agent_get_info({handle=93863291737200}, 0, 0x7ffddcd49dd8) = 0
|
||||
538166320196698:538166320196958 882613:882613 hsa_agent_get_info({handle=93863291737200}, 17, 0x7ffddcd49dc0) = 0
|
||||
538166320198170:538166320198421 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291720576}, 0, 0x7ffddcd49bcc) = 0
|
||||
538166320198902:538166320199162 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291720576}, 1, 0x7ffddcd49bd0) = 0
|
||||
538166320199643:538166320199894 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291720576}, 2, 0x7ffddcd49bf0) = 0
|
||||
538166320200364:538166320200615 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291720576}, 6, 0x7ffddcd49bf8) = 0
|
||||
538166320201386:538166320201727 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291721456}, 0, 0x7ffddcd49bcc) = 0
|
||||
538166320201386:538166320202288 882613:882613 hsa_amd_agent_iterate_memory_pools({handle=93863291737200}, 1, 0x7ffddcd49e18) = 0
|
||||
538166320203881:538166320204332 882613:882613 hsa_agent_get_info({handle=93863291966720}, 0, 0x7ffddcd49dd8) = 0
|
||||
538166320204843:538166320205103 882613:882613 hsa_agent_get_info({handle=93863291966720}, 17, 0x7ffddcd49dc0) = 0
|
||||
538166320206366:538166320206606 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291968448}, 0, 0x7ffddcd49bcc) = 0
|
||||
538166320207087:538166320207348 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291968448}, 1, 0x7ffddcd49bd0) = 0
|
||||
538166320207818:538166320208069 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291968448}, 2, 0x7ffddcd49bf0) = 0
|
||||
538166320209782:538166320210043 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291968448}, 6, 0x7ffddcd49bf8) = 0
|
||||
538166320210724:538166320210984 882613:882613 hsa_amd_memory_pool_get_info({handle=93863291969440}, 0, 0x7ffddcd49bcc) = 0
|
||||
538166320210724:538166320211566 882613:882613 hsa_amd_agent_iterate_memory_pools({handle=93863291966720}, 1, 0x7ffddcd49e18) = 0
|
||||
538166320210724:538166320213028 882613:882613 hsa_iterate_agents(1, 0) = 0
|
||||
538166320294151:538166336777646 882613:882616 hsa_queue_create({handle=93863291737200}, 1024, 1, 0, 0, 0, 0, 0x7f91ed49ac88) = 0
|
||||
538166336809796:538166336869439 882613:882616 hsa_code_object_reader_create_from_file(5, 0x7f91ed49acb8) = 0
|
||||
538166336870721:538166336883686 882613:882616 hsa_executable_create_alt(1, 0, 0x0, 0x7f91ed49acc0) = 0
|
||||
538166336884868:538166337268842 882613:882616 hsa_executable_load_agent_code_object({handle=140259270599920}, {handle=93863291737200}, {handle=140259270599248}, 0x0, 0) = 0
|
||||
538166337270696:538166337974663 882613:882616 hsa_executable_freeze({handle=140259270599920}, 0x0) = 0
|
||||
538166337978250:538166337979312 882613:882616 hsa_executable_get_symbol_by_name({handle=140259270599920}, 0x7f90a80029b0, 0x7f91ed49abe8, 0x7f91ed49ac08) = 1013
|
||||
538166337982077:538166337982708 882613:882616 hsa_executable_get_symbol_by_name({handle=140259270599920}, 0x7f90a80038b0, 0x7f91ed49abe8, 0x7f91ed49ac08) = 0
|
||||
538166337984311:538166337984983 882613:882616 hsa_executable_symbol_get_info({handle=140259270612544}, 22, 0x7f91ed49acd0) = 0
|
||||
538166337985904:538166337986295 882613:882616 hsa_executable_symbol_get_info({handle=140259270612544}, 14, 0x7f91ed49acd8) = 0
|
||||
538166337987207:538166337987597 882613:882616 hsa_executable_symbol_get_info({handle=140259270612544}, 13, 0x7f91ed49acdc) = 0
|
||||
538166337988499:538166337988880 882613:882616 hsa_executable_symbol_get_info({handle=140259270612544}, 11, 0x7f91ed49ace0) = 0
|
||||
538166337989792:538166337990182 882613:882616 hsa_executable_symbol_get_info({handle=140259270612544}, 12, 0x7f91ed49ace4) = 0
|
||||
538166337991855:538166338004199 882613:882616 hsa_amd_memory_pool_allocate({handle=93863291727280}, 72, 0, 0x7f91ed49ac30) = 0
|
||||
538166338005371:538166338705110 882613:882616 hsa_amd_agents_allow_access(3, 0x555e40337c20, 0, 0x7f91ecc74000) = 0
|
||||
538166338706453:538166338719107 882613:882616 hsa_amd_memory_pool_allocate({handle=93863291727280}, 256, 0, 0x7f91ed49ac30) = 0
|
||||
538166338720059:538166338850665 882613:882616 hsa_amd_agents_allow_access(3, 0x555e40337c20, 0, 0x7f91ecc72000) = 0
|
||||
538166338851607:538166338864782 882613:882616 hsa_amd_memory_pool_allocate({handle=93863291727280}, 256, 0, 0x7f91ed49ac30) = 0
|
||||
538166338865723:538166338978927 882613:882616 hsa_amd_agents_allow_access(3, 0x555e40337c20, 0, 0x7f91ecc70000) = 0
|
||||
538166338980309:538166338982524 882613:882616 hsa_amd_signal_create(1, 0, 0, 0, 0x7f91ed49ac90) = 0
|
||||
538166338983816:538166338984628 882613:882616 hsa_queue_load_write_index_relaxed(0x7f91edff6000) = 0
|
||||
538166338985549:538166338986160 882613:882616 hsa_queue_load_read_index_relaxed(0x7f91edff6000) = 0
|
||||
538166338987032:538166338987453 882613:882616 hsa_queue_store_write_index_screlease(0x7f91edff6000, 1) = void
|
||||
538166338988515:538166338989397 882613:882616 hsa_signal_store_screlease({handle=140264755457280}, 0) = void
|
||||
538166338990278:538166339006328 882613:882616 hsa_signal_wait_scacquire({handle=140264755456768}, 0, 0, 18446744073709551615, 0) = 0
|
||||
538166339009224:538166339010346 882613:882616 hsa_signal_destroy({handle=140264755456768}) = 0
|
||||
538166339011689:538166339037938 882613:882616 hsa_memory_free(0x7f91ecc72000) = 0
|
||||
538166339038930:538166339052135 882613:882616 hsa_memory_free(0x7f91ecc70000) = 0
|
||||
538166339053307:538166339082843 882613:882616 hsa_executable_destroy({handle=140259270599920}) = 0
|
||||
538166339083895:538166339087181 882613:882616 hsa_code_object_reader_destroy({handle=140259270599248}) = 0
|
||||
Plik diff jest za duży
Load Diff
Plik diff jest za duży
Load Diff
@@ -1,6 +1,6 @@
|
||||
# dummy
|
||||
MatrixTranspose_dryrun_trace --check-none
|
||||
ctrl_dryrun_trace --check-none
|
||||
copy_dryrun_trace --check-none
|
||||
MatrixTranspose_ctest_trace --check-count .*
|
||||
MatrixTranspose_test_trace --check-count .*
|
||||
MatrixTranspose_hipaact_test_trace --check-count .* --ignore-count hipMemcpy|__hipPushCallConfiguration|hipLaunchKernel|__hipPopCallConfiguration
|
||||
@@ -11,8 +11,8 @@ MatrixTranspose_hip_period_trace --check-events .* --ignore-event hipMalloc|hipF
|
||||
MatrixTranspose_hip_flush_trace --check-order .*
|
||||
MatrixTranspose_kfd_trace --check-events .*
|
||||
MatrixTranspose_hip_input_trace --check-events .*
|
||||
ctrl_hsa_trace --check-event .*
|
||||
ctrl_hsa_input_trace --check-event .*
|
||||
copy_hsa_trace --check-events .*
|
||||
copy_hsa_input_trace --check-events .*
|
||||
hsa_co_trace --check-none
|
||||
code_obj_trace --check-none
|
||||
trace_buffer_trace --check-none
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
[MITx11 license]
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -1,4 +0,0 @@
|
||||
# HSA-class
|
||||
```
|
||||
HSA high level C++ API
|
||||
```
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -18,9 +18,7 @@
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
/**
|
||||
dummy kernel
|
||||
*/
|
||||
__kernel void DummyKernel() {
|
||||
uint tid = get_global_id(0);
|
||||
__kernel void copy(__global unsigned int* a, __global unsigned int* b) {
|
||||
uint tid = get_global_id(0);
|
||||
a[tid] = b[tid];
|
||||
}
|
||||
@@ -0,0 +1,454 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_amd.h"
|
||||
#include "hsa_ext_image.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
#define CHECK(x) \
|
||||
do { \
|
||||
if ((x) != HSA_STATUS_SUCCESS) { \
|
||||
assert(false); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (false);
|
||||
|
||||
struct Device {
|
||||
struct Memory {
|
||||
hsa_amd_memory_pool_t pool;
|
||||
bool fine;
|
||||
bool kernarg;
|
||||
size_t size;
|
||||
size_t granule;
|
||||
};
|
||||
|
||||
hsa_agent_t agent;
|
||||
char name[64];
|
||||
std::vector<Memory> pools;
|
||||
uint32_t fine;
|
||||
uint32_t coarse;
|
||||
static std::vector<hsa_agent_t> all_devices;
|
||||
};
|
||||
std::vector<hsa_agent_t> Device::all_devices;
|
||||
|
||||
struct Kernel {
|
||||
uint64_t handle;
|
||||
uint32_t scratch;
|
||||
uint32_t group;
|
||||
uint32_t kernarg_size;
|
||||
uint32_t kernarg_align;
|
||||
};
|
||||
|
||||
// Assumes bitfield layout is little endian.
|
||||
// Assumes std::atomic<uint16_t> is binary compatible with uint16_t and uses HW atomics.
|
||||
union AqlHeader {
|
||||
struct {
|
||||
uint16_t type : 8;
|
||||
uint16_t barrier : 1;
|
||||
uint16_t acquire : 2;
|
||||
uint16_t release : 2;
|
||||
uint16_t reserved : 3;
|
||||
};
|
||||
uint16_t raw;
|
||||
};
|
||||
|
||||
struct BarrierValue {
|
||||
AqlHeader header;
|
||||
uint8_t AmdFormat;
|
||||
uint8_t reserved;
|
||||
uint32_t reserved1;
|
||||
hsa_signal_t signal;
|
||||
hsa_signal_value_t value;
|
||||
hsa_signal_value_t mask;
|
||||
uint32_t cond;
|
||||
uint32_t reserved2;
|
||||
uint64_t reserved3;
|
||||
uint64_t reserved4;
|
||||
hsa_signal_t completion_signal;
|
||||
};
|
||||
|
||||
union Aql {
|
||||
AqlHeader header;
|
||||
hsa_kernel_dispatch_packet_t dispatch;
|
||||
hsa_barrier_and_packet_t barrier_and;
|
||||
hsa_barrier_or_packet_t barrier_or;
|
||||
BarrierValue barrier_value;
|
||||
};
|
||||
|
||||
struct OCLHiddenArgs {
|
||||
uint64_t offset_x;
|
||||
uint64_t offset_y;
|
||||
uint64_t offset_z;
|
||||
void* printf_buffer;
|
||||
void* enqueue;
|
||||
void* enqueue2;
|
||||
void* multi_grid;
|
||||
};
|
||||
|
||||
struct hip_hiddens {
|
||||
uint64_t offset_x;
|
||||
uint64_t offset_y;
|
||||
uint64_t offset_z;
|
||||
uint64_t _;
|
||||
uint64_t _2;
|
||||
uint64_t _3;
|
||||
uint64_t multi_grid_sync;
|
||||
};
|
||||
|
||||
|
||||
std::vector<Device> cpu, gpu;
|
||||
Device::Memory kernarg;
|
||||
|
||||
struct CodeObject {
|
||||
hsa_file_t file;
|
||||
hsa_code_object_reader_t code_obj_rdr;
|
||||
hsa_executable_t executable;
|
||||
};
|
||||
|
||||
bool DeviceDiscovery() {
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_iterate_agents(
|
||||
[](hsa_agent_t agent, void*) {
|
||||
hsa_status_t err;
|
||||
|
||||
Device dev;
|
||||
dev.agent = agent;
|
||||
|
||||
dev.fine = -1u;
|
||||
dev.coarse = -1u;
|
||||
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, dev.name);
|
||||
CHECK(err);
|
||||
|
||||
hsa_device_type_t type;
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
CHECK(err);
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(
|
||||
agent,
|
||||
[](hsa_amd_memory_pool_t pool, void* data) {
|
||||
std::vector<Device::Memory>& pools =
|
||||
*reinterpret_cast<std::vector<Device::Memory>*>(data);
|
||||
hsa_status_t err;
|
||||
|
||||
hsa_amd_segment_t segment;
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
CHECK(err);
|
||||
|
||||
if (segment != HSA_AMD_SEGMENT_GLOBAL) return HSA_STATUS_SUCCESS;
|
||||
|
||||
uint32_t flags;
|
||||
err =
|
||||
hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flags);
|
||||
CHECK(err);
|
||||
|
||||
Device::Memory mem;
|
||||
mem.pool = pool;
|
||||
mem.fine = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED);
|
||||
mem.kernarg = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &mem.size);
|
||||
CHECK(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &mem.granule);
|
||||
CHECK(err);
|
||||
|
||||
pools.push_back(mem);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
(void*)&dev.pools);
|
||||
|
||||
if (!dev.pools.empty()) {
|
||||
for (size_t i = 0; i < dev.pools.size(); i++) {
|
||||
if (dev.pools[i].fine && dev.pools[i].kernarg && dev.fine == -1u) dev.fine = i;
|
||||
if (dev.pools[i].fine && !dev.pools[i].kernarg) dev.fine = i;
|
||||
if (!dev.pools[i].fine) dev.coarse = i;
|
||||
}
|
||||
|
||||
if (type == HSA_DEVICE_TYPE_CPU)
|
||||
cpu.push_back(dev);
|
||||
else
|
||||
gpu.push_back(dev);
|
||||
|
||||
Device::all_devices.push_back(dev.agent);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
nullptr);
|
||||
|
||||
[]() {
|
||||
for (auto& dev : cpu) {
|
||||
for (auto& mem : dev.pools) {
|
||||
if (mem.fine && mem.kernarg) {
|
||||
kernarg = mem;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}();
|
||||
CHECK(err);
|
||||
|
||||
if (cpu.empty() || gpu.empty() || kernarg.pool.handle == 0) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LoadCodeObject(std::string filename, hsa_agent_t agent, CodeObject& code_object) {
|
||||
hsa_status_t err;
|
||||
|
||||
code_object.file = open(filename.c_str(), O_RDONLY);
|
||||
if (code_object.file == -1) return false;
|
||||
|
||||
err = hsa_code_object_reader_create_from_file(code_object.file, &code_object.code_obj_rdr);
|
||||
CHECK(err);
|
||||
|
||||
err = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
nullptr, &code_object.executable);
|
||||
CHECK(err);
|
||||
|
||||
err = hsa_executable_load_agent_code_object(code_object.executable, agent,
|
||||
code_object.code_obj_rdr, nullptr, nullptr);
|
||||
if (err != HSA_STATUS_SUCCESS) return false;
|
||||
|
||||
err = hsa_executable_freeze(code_object.executable, nullptr);
|
||||
CHECK(err);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GetKernel(const CodeObject& code_object, std::string kernel, hsa_agent_t agent, Kernel& kern) {
|
||||
hsa_executable_symbol_t symbol;
|
||||
hsa_status_t err =
|
||||
hsa_executable_get_symbol_by_name(code_object.executable, kernel.c_str(), &agent, &symbol);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
err = hsa_executable_get_symbol_by_name(code_object.executable, (kernel + ".kd").c_str(),
|
||||
&agent, &symbol);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
&kern.handle);
|
||||
CHECK(err);
|
||||
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &kern.scratch);
|
||||
CHECK(err);
|
||||
// printf("Scratch: %d\n", kern.scratch);
|
||||
|
||||
err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
||||
&kern.group);
|
||||
CHECK(err);
|
||||
// printf("LDS: %d\n", kern.group);
|
||||
|
||||
// Remaining needs code object v2 or comgr.
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kern.kernarg_size);
|
||||
CHECK(err);
|
||||
// printf("Kernarg Size: %d\n", kern.kernarg_size);
|
||||
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &kern.kernarg_align);
|
||||
CHECK(err);
|
||||
// printf("Kernarg Align: %d\n", kern.kernarg_align);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Not for parallel insertion.
|
||||
bool SubmitPacket(hsa_queue_t* queue, Aql& pkt) {
|
||||
size_t mask = queue->size - 1;
|
||||
Aql* ring = (Aql*)queue->base_address;
|
||||
|
||||
uint64_t write = hsa_queue_load_write_index_relaxed(queue);
|
||||
uint64_t read = hsa_queue_load_read_index_relaxed(queue);
|
||||
if (write - read + 1 > queue->size) return false;
|
||||
|
||||
Aql& dst = ring[write & mask];
|
||||
|
||||
uint16_t header = pkt.header.raw;
|
||||
pkt.header.raw = dst.header.raw;
|
||||
dst = pkt;
|
||||
__atomic_store_n(&dst.header.raw, header, __ATOMIC_RELEASE);
|
||||
pkt.header.raw = header;
|
||||
|
||||
hsa_queue_store_write_index_release(queue, write + 1);
|
||||
hsa_signal_store_screlease(queue->doorbell_signal, write);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void* hsaMalloc(size_t size, const Device::Memory& mem) {
|
||||
void* ret;
|
||||
hsa_status_t err = hsa_amd_memory_pool_allocate(mem.pool, size, 0, &ret);
|
||||
CHECK(err);
|
||||
err = hsa_amd_agents_allow_access(Device::all_devices.size(), &Device::all_devices[0], nullptr,
|
||||
ret);
|
||||
CHECK(err);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* hsaMalloc(size_t size, const Device& dev, bool fine) {
|
||||
uint32_t index = fine ? dev.fine : dev.coarse;
|
||||
assert(index != -1u && "Memory type unavailable.");
|
||||
return hsaMalloc(size, dev.pools[index]);
|
||||
}
|
||||
|
||||
void test_func(int kiter, int diter, int agents) {
|
||||
for (int device_index = 0; device_index < agents; ++device_index) {
|
||||
hsa_status_t err;
|
||||
|
||||
hsa_queue_t* queue;
|
||||
err = hsa_queue_create(gpu[device_index].agent, 1024, HSA_QUEUE_TYPE_SINGLE, nullptr, nullptr,
|
||||
0, 0, &queue);
|
||||
CHECK(err);
|
||||
|
||||
CodeObject code_object;
|
||||
if (!LoadCodeObject(std::string(gpu[device_index].name) + "_copy.hsaco",
|
||||
gpu[device_index].agent, code_object)) {
|
||||
printf("Kernel file not found or not usable with given agent.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
Kernel copy;
|
||||
if (!GetKernel(code_object, "copy", gpu[device_index].agent, copy)) {
|
||||
printf("Test kernel not found.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
for (int i = 0; i < kiter; ++i) {
|
||||
struct args_t {
|
||||
uint32_t* a;
|
||||
uint32_t* b;
|
||||
OCLHiddenArgs hidden;
|
||||
};
|
||||
|
||||
args_t* args;
|
||||
args = (args_t*)hsaMalloc(sizeof(args_t), kernarg);
|
||||
memset(args, 0, sizeof(args_t));
|
||||
|
||||
uint32_t* a = (uint32_t*)hsaMalloc(64 * sizeof(uint32_t), kernarg);
|
||||
uint32_t* b = (uint32_t*)hsaMalloc(64 * sizeof(uint32_t), kernarg);
|
||||
|
||||
memset(a, 0, 64 * sizeof(uint32_t));
|
||||
memset(b, 1, 64 * sizeof(uint32_t));
|
||||
|
||||
hsa_signal_t signal;
|
||||
// Use interrupts.
|
||||
err = hsa_amd_signal_create(1, 0, nullptr, 0, &signal);
|
||||
CHECK(err);
|
||||
|
||||
for (int j = 1; j <= diter; ++j) {
|
||||
Aql packet = {0};
|
||||
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
packet.header.barrier = 1;
|
||||
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
|
||||
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
|
||||
|
||||
packet.dispatch.setup = 1;
|
||||
packet.dispatch.workgroup_size_x = 64;
|
||||
packet.dispatch.workgroup_size_y = 1;
|
||||
packet.dispatch.workgroup_size_z = 1;
|
||||
packet.dispatch.grid_size_x = 64;
|
||||
packet.dispatch.grid_size_y = 1;
|
||||
packet.dispatch.grid_size_z = 1;
|
||||
|
||||
packet.dispatch.group_segment_size = copy.group;
|
||||
packet.dispatch.private_segment_size = copy.scratch;
|
||||
packet.dispatch.kernel_object = copy.handle;
|
||||
|
||||
packet.dispatch.kernarg_address = args;
|
||||
if (j == diter) packet.dispatch.completion_signal = signal;
|
||||
|
||||
args->a = a;
|
||||
args->b = b;
|
||||
SubmitPacket(queue, packet);
|
||||
}
|
||||
hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 0, -1, HSA_WAIT_STATE_BLOCKED);
|
||||
err = hsa_signal_destroy(signal);
|
||||
CHECK(err);
|
||||
|
||||
for (int i = 0; i < 64; i++) {
|
||||
if (a[i] != b[i]) {
|
||||
printf("error at %d: expected %d, got %d\n", i, b[i], a[i]);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
err = hsa_memory_free(a);
|
||||
CHECK(err);
|
||||
err = hsa_memory_free(b);
|
||||
CHECK(err);
|
||||
}
|
||||
|
||||
err = hsa_executable_destroy(code_object.executable);
|
||||
CHECK(err);
|
||||
err = hsa_code_object_reader_destroy(code_object.code_obj_rdr);
|
||||
CHECK(err);
|
||||
close(code_object.file);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
const char* kiter_s = getenv("ROCP_KITER");
|
||||
const char* diter_s = getenv("ROCP_DITER");
|
||||
const char* agents_s = getenv("ROCP_AGENTS");
|
||||
const char* threads_s = getenv("ROCP_THRS");
|
||||
|
||||
int kiter = (kiter_s != nullptr) ? atoi(kiter_s) : 1;
|
||||
int diter = (diter_s != nullptr) ? atoi(diter_s) : 1;
|
||||
int agents = (agents_s != nullptr) ? atoi(agents_s) : 1;
|
||||
int threads = (threads_s != nullptr) ? atoi(threads_s) : 1;
|
||||
|
||||
hsa_status_t err;
|
||||
err = hsa_init();
|
||||
CHECK(err);
|
||||
|
||||
if (!DeviceDiscovery()) {
|
||||
printf("Usable devices not found.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<std::thread> t(threads);
|
||||
for (int n = 0; n < threads; ++n)
|
||||
t[n] = std::thread(test_func, kiter, diter, std::min(agents, (int)gpu.size()));
|
||||
for (int n = 0; n < threads; ++n) t[n].join();
|
||||
|
||||
err = hsa_shut_down();
|
||||
CHECK(err);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,80 +0,0 @@
|
||||
#!/bin/sh -x
|
||||
SO_EXT="hsaco"
|
||||
|
||||
TEST_NAME=$1
|
||||
DST_DIR=$2
|
||||
ROCM_DIR=$3
|
||||
TGT_LIST=$4
|
||||
|
||||
if [ -z "$TEST_NAME" ] ; then
|
||||
echo "Usage: $0 <test name> <dst dir>"
|
||||
echo " Will look for <test name>.cl and will build <test name>.$SO_EXT dynamic code object library"
|
||||
exit 1
|
||||
fi
|
||||
OBJ_NAME=$(echo "_$(basename $TEST_NAME)" | sed -e 's/_./\U&\E/g' -e 's/_//g')
|
||||
|
||||
if [ -z "$DST_DIR" ] ; then
|
||||
DST_DIR=$(dirname TEST_NAME)
|
||||
fi
|
||||
|
||||
if [ -z "$ROCM_DIR" ] ; then
|
||||
ROCM_DIR=/opt/rocm
|
||||
fi
|
||||
|
||||
if [ -z "$TGT_LIST" ] ; then
|
||||
TGT_LIST=`$ROCM_DIR/bin/rocminfo | grep "amdgcn-amd-amdhsa--" | head -n 1 | sed -n "s/^.*amdgcn-amd-amdhsa--\(\w*\).*$/\1/p"`
|
||||
fi
|
||||
|
||||
if [ -z "$TGT_LIST" ] ; then
|
||||
echo "Error: GPU targets not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
OCL_VER="2.0"
|
||||
|
||||
if [ -e $ROCM_DIR/llvm ] ; then
|
||||
LLVM_DIR=$ROCM_DIR/llvm
|
||||
LIB_DIR=$ROCM_DIR/lib
|
||||
else
|
||||
LLVM_DIR=$ROCM_DIR/hcc
|
||||
LIB_DIR=$LLVM_DIR/lib
|
||||
fi
|
||||
|
||||
# Determine whether using new or old device-libs layout
|
||||
if [ -e $LIB_DIR/bitcode/opencl.amdgcn.bc ]; then
|
||||
BC_DIR=$LIB_DIR/bitcode
|
||||
elif [ -e $LIB_DIR/opencl.amdgcn.bc ]; then
|
||||
BC_DIR=$LIB_DIR
|
||||
elif [ -e $ROCM_DIR/amdgcn/bitcode/opencl.bc ]; then
|
||||
BC_DIR=$ROCM_DIR/amdgcn/bitcode
|
||||
else
|
||||
echo "Error: Cannot find amdgcn bitcode directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CLANG_ROOT=$LLVM_DIR/lib/clang
|
||||
CLANG_DIR=`ls -d $CLANG_ROOT/* | head -n 1`
|
||||
if [ "$CLANG_DIR" = "" ] ; then
|
||||
echo "Error: LLVM clang library was not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BIN_DIR=$LLVM_DIR/bin
|
||||
INC_DIR=$CLANG_DIR/include
|
||||
if [ -e $BC_DIR/opencl.amdgcn.bc ]; then
|
||||
BITCODE_OPTS="-nogpulib \
|
||||
-Xclang -mlink-bitcode-file -Xclang $BC_DIR/opencl.amdgcn.bc \
|
||||
-Xclang -mlink-bitcode-file -Xclang $BC_DIR/ockl.amdgcn.bc \
|
||||
-Xclang -mlink-bitcode-file -Xclang $BC_DIR/ocml.amdgcn.bc"
|
||||
else
|
||||
BITCODE_OPTS="--hip-device-lib-path=$BC_DIR"
|
||||
fi
|
||||
|
||||
for GFXIP in $TGT_LIST ; do
|
||||
OBJ_PREF=$GFXIP
|
||||
OBJ_FILE="${OBJ_PREF}_${OBJ_NAME}.$SO_EXT"
|
||||
$BIN_DIR/clang -cl-std=CL$OCL_VER -include $INC_DIR/opencl-c.h $BITCODE_OPTS -target amdgcn-amd-amdhsa -mcpu=$GFXIP $TEST_NAME.cl -o $DST_DIR/$OBJ_FILE
|
||||
echo "'$OBJ_FILE' generated"
|
||||
done
|
||||
|
||||
exit 0
|
||||
@@ -1,799 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <fcntl.h>
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_amd.h>
|
||||
#include <hsa_ext_finalize.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// Callback function to get available in the system agents
|
||||
hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
HsaRsrcFactory* hsa_rsrc = reinterpret_cast<HsaRsrcFactory*>(data);
|
||||
const AgentInfo* agent_info = hsa_rsrc->AddAgentInfo(agent);
|
||||
if (agent_info != NULL) status = HSA_STATUS_SUCCESS;
|
||||
return status;
|
||||
}
|
||||
|
||||
// This function checks to see if the provided
|
||||
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
|
||||
// the function adds an additional requirement that the pool have the
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
|
||||
// pools must NOT have this property.
|
||||
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
|
||||
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
|
||||
// no pool was found meeting the requirements. If an error is encountered, we
|
||||
// return that error.
|
||||
static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
|
||||
hsa_status_t err;
|
||||
hsa_amd_segment_t segment;
|
||||
uint32_t flag;
|
||||
|
||||
if (nullptr == data) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
|
||||
|
||||
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
|
||||
|
||||
if ((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
*(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
|
||||
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
|
||||
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
|
||||
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindGlobalPool(pool, data, false);
|
||||
}
|
||||
|
||||
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
|
||||
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
|
||||
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindGlobalPool(pool, data, true);
|
||||
}
|
||||
|
||||
// Constructor of the class
|
||||
HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize_hsa) {
|
||||
hsa_status_t status;
|
||||
|
||||
cpu_pool_ = NULL;
|
||||
kern_arg_pool_ = NULL;
|
||||
|
||||
InitHsaApiTable(NULL);
|
||||
|
||||
// Initialize the Hsa Runtime
|
||||
if (initialize_hsa_) {
|
||||
status = hsa_api_.hsa_init();
|
||||
CHECK_STATUS("Error in hsa_init", status);
|
||||
}
|
||||
|
||||
// Discover the set of Gpu devices available on the platform
|
||||
status = hsa_api_.hsa_iterate_agents(GetHsaAgentsCallback, this);
|
||||
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
|
||||
if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR);
|
||||
if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR);
|
||||
|
||||
// Get AqlProfile API table
|
||||
aqlprofile_api_ = {0};
|
||||
#ifdef ROCP_LD_AQLPROFILE
|
||||
status = LoadAqlProfileLib(&aqlprofile_api_);
|
||||
#else
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
|
||||
hsa_ven_amd_aqlprofile_VERSION_MAJOR,
|
||||
sizeof(aqlprofile_api_), &aqlprofile_api_);
|
||||
#endif
|
||||
CHECK_STATUS("aqlprofile API table load failed", status);
|
||||
|
||||
// Get Loader API table
|
||||
loader_api_ = {0};
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
|
||||
sizeof(loader_api_), &loader_api_);
|
||||
CHECK_STATUS("loader API table query failed", status);
|
||||
|
||||
// Instantiate HSA timer
|
||||
timer_ = new HsaTimer(&hsa_api_);
|
||||
CHECK_STATUS("HSA timer allocation failed",
|
||||
(timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
|
||||
|
||||
// Time correlation
|
||||
const uint32_t corr_iters = 1000;
|
||||
CorrelateTime(HsaTimer::TIME_ID_CLOCK_REALTIME, corr_iters);
|
||||
CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters);
|
||||
|
||||
// System timeout
|
||||
timeout_ =
|
||||
(timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_);
|
||||
}
|
||||
|
||||
// Destructor of the class
|
||||
HsaRsrcFactory::~HsaRsrcFactory() {
|
||||
delete timer_;
|
||||
for (auto p : cpu_list_) delete p;
|
||||
for (auto p : gpu_list_) delete p;
|
||||
if (initialize_hsa_) {
|
||||
hsa_status_t status = hsa_api_.hsa_shut_down();
|
||||
CHECK_STATUS("Error in hsa_shut_down", status);
|
||||
}
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
if (hsa_api_.hsa_init == NULL) {
|
||||
if (table != NULL) {
|
||||
hsa_api_.hsa_init = table->core_->hsa_init_fn;
|
||||
hsa_api_.hsa_shut_down = table->core_->hsa_shut_down_fn;
|
||||
hsa_api_.hsa_agent_get_info = table->core_->hsa_agent_get_info_fn;
|
||||
hsa_api_.hsa_iterate_agents = table->core_->hsa_iterate_agents_fn;
|
||||
|
||||
hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn;
|
||||
hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed =
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed =
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed =
|
||||
table->core_->hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn;
|
||||
hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn;
|
||||
hsa_api_.hsa_signal_load_relaxed = table->core_->hsa_signal_load_relaxed_fn;
|
||||
hsa_api_.hsa_signal_store_relaxed = table->core_->hsa_signal_store_relaxed_fn;
|
||||
hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn;
|
||||
hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn;
|
||||
|
||||
hsa_api_.hsa_code_object_reader_create_from_file =
|
||||
table->core_->hsa_code_object_reader_create_from_file_fn;
|
||||
hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn;
|
||||
hsa_api_.hsa_executable_load_agent_code_object =
|
||||
table->core_->hsa_executable_load_agent_code_object_fn;
|
||||
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
|
||||
hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn;
|
||||
hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn;
|
||||
hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn;
|
||||
|
||||
hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn;
|
||||
hsa_api_.hsa_system_get_major_extension_table =
|
||||
table->core_->hsa_system_get_major_extension_table_fn;
|
||||
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools =
|
||||
table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn;
|
||||
hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn;
|
||||
hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn;
|
||||
hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn;
|
||||
hsa_api_.hsa_amd_memory_async_copy = table->amd_ext_->hsa_amd_memory_async_copy_fn;
|
||||
|
||||
hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn;
|
||||
hsa_api_.hsa_amd_profiling_set_profiler_enabled =
|
||||
table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_async_copy_time =
|
||||
table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_dispatch_time =
|
||||
table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn;
|
||||
} else {
|
||||
hsa_api_.hsa_init = hsa_init;
|
||||
hsa_api_.hsa_shut_down = hsa_shut_down;
|
||||
hsa_api_.hsa_agent_get_info = hsa_agent_get_info;
|
||||
hsa_api_.hsa_iterate_agents = hsa_iterate_agents;
|
||||
|
||||
hsa_api_.hsa_queue_create = hsa_queue_create;
|
||||
hsa_api_.hsa_queue_destroy = hsa_queue_destroy;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed = hsa_queue_load_write_index_relaxed;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed = hsa_queue_store_write_index_relaxed;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed = hsa_queue_load_read_index_relaxed;
|
||||
|
||||
hsa_api_.hsa_signal_create = hsa_signal_create;
|
||||
hsa_api_.hsa_signal_destroy = hsa_signal_destroy;
|
||||
hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed;
|
||||
hsa_api_.hsa_signal_store_relaxed = hsa_signal_store_relaxed;
|
||||
hsa_api_.hsa_signal_wait_scacquire = hsa_signal_wait_scacquire;
|
||||
hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease;
|
||||
|
||||
hsa_api_.hsa_code_object_reader_create_from_file = hsa_code_object_reader_create_from_file;
|
||||
hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt;
|
||||
hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object;
|
||||
hsa_api_.hsa_executable_freeze = hsa_executable_freeze;
|
||||
hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol;
|
||||
hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info;
|
||||
hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols;
|
||||
|
||||
hsa_api_.hsa_system_get_info = hsa_system_get_info;
|
||||
hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table;
|
||||
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools = hsa_amd_agent_iterate_memory_pools;
|
||||
hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info;
|
||||
hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate;
|
||||
hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access;
|
||||
hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy;
|
||||
|
||||
hsa_api_.hsa_amd_signal_async_handler = hsa_amd_signal_async_handler;
|
||||
hsa_api_.hsa_amd_profiling_set_profiler_enabled = hsa_amd_profiling_set_profiler_enabled;
|
||||
hsa_api_.hsa_amd_profiling_get_async_copy_time = hsa_amd_profiling_get_async_copy_time;
|
||||
hsa_api_.hsa_amd_profiling_get_dispatch_time = hsa_amd_profiling_get_dispatch_time;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) {
|
||||
void* handle = dlopen(kAqlProfileLib, RTLD_NOW);
|
||||
if (handle == NULL) {
|
||||
fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror());
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
dlerror(); /* Clear any existing error */
|
||||
|
||||
api->hsa_ven_amd_aqlprofile_error_string =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_error_string)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_error_string");
|
||||
api->hsa_ven_amd_aqlprofile_validate_event =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_validate_event)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_validate_event");
|
||||
api->hsa_ven_amd_aqlprofile_start =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_start)*)dlsym(handle, "hsa_ven_amd_aqlprofile_start");
|
||||
api->hsa_ven_amd_aqlprofile_stop =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_stop)*)dlsym(handle, "hsa_ven_amd_aqlprofile_stop");
|
||||
#ifdef AQLPROF_NEW_API
|
||||
api->hsa_ven_amd_aqlprofile_read =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_read)*)dlsym(handle, "hsa_ven_amd_aqlprofile_read");
|
||||
#endif
|
||||
api->hsa_ven_amd_aqlprofile_legacy_get_pm4 =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
|
||||
api->hsa_ven_amd_aqlprofile_get_info = (decltype(::hsa_ven_amd_aqlprofile_get_info)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_get_info");
|
||||
api->hsa_ven_amd_aqlprofile_iterate_data =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_iterate_data");
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Add system agent info
|
||||
const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
// Determine if device is a Gpu agent
|
||||
hsa_status_t status;
|
||||
AgentInfo* agent_info = NULL;
|
||||
|
||||
hsa_device_type_t type;
|
||||
status = hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
CHECK_STATUS("Error Calling hsa_agent_get_info", status);
|
||||
|
||||
if (type == HSA_DEVICE_TYPE_CPU) {
|
||||
agent_info = new AgentInfo{};
|
||||
agent_info->dev_id = agent;
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
|
||||
agent_info->dev_index = cpu_list_.size();
|
||||
|
||||
status =
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool;
|
||||
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool,
|
||||
&agent_info->kern_arg_pool);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL))
|
||||
kern_arg_pool_ = &agent_info->kern_arg_pool;
|
||||
agent_info->gpu_pool = {};
|
||||
|
||||
cpu_list_.push_back(agent_info);
|
||||
cpu_agents_.push_back(agent);
|
||||
}
|
||||
|
||||
if (type == HSA_DEVICE_TYPE_GPU) {
|
||||
agent_info = new AgentInfo{};
|
||||
agent_info->dev_id = agent;
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
|
||||
strncpy(agent_info->gfxip, agent_info->name, 4);
|
||||
agent_info->gfxip[4] = '\0';
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
|
||||
agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false;
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT),
|
||||
&agent_info->cu_num);
|
||||
hsa_api_.hsa_agent_get_info(agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU),
|
||||
&agent_info->waves_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU),
|
||||
&agent_info->simds_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
|
||||
&agent_info->se_num);
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE),
|
||||
&agent_info->shader_arrays_per_se);
|
||||
|
||||
agent_info->cpu_pool = {};
|
||||
agent_info->kern_arg_pool = {};
|
||||
status =
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
|
||||
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status);
|
||||
|
||||
// GFX8 and GFX9 SGPR/VGPR block sizes
|
||||
agent_info->sgpr_block_dflt = (strcmp(agent_info->gfxip, "gfx8") == 0) ? 1 : 2;
|
||||
agent_info->sgpr_block_size = 8;
|
||||
agent_info->vgpr_block_size = 4;
|
||||
|
||||
// Set GPU index
|
||||
agent_info->dev_index = gpu_list_.size();
|
||||
gpu_list_.push_back(agent_info);
|
||||
gpu_agents_.push_back(agent);
|
||||
}
|
||||
|
||||
if (agent_info) agent_map_[agent.handle] = agent_info;
|
||||
|
||||
return agent_info;
|
||||
}
|
||||
|
||||
// Return systen agent info
|
||||
const AgentInfo* HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) {
|
||||
const AgentInfo* agent_info = NULL;
|
||||
auto it = agent_map_.find(agent.handle);
|
||||
if (it != agent_map_.end()) {
|
||||
agent_info = it->second;
|
||||
}
|
||||
return agent_info;
|
||||
}
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
//
|
||||
uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); }
|
||||
|
||||
// Get the count of Hsa Cpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Cpu agents on platform
|
||||
//
|
||||
uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); }
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
//
|
||||
// @param idx Gpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
|
||||
// Determine if request is valid
|
||||
uint32_t size = uint32_t(gpu_list_.size());
|
||||
if (idx >= size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy AgentInfo from specified index
|
||||
*agent_info = gpu_list_[idx];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get the AgentInfo handle of a Cpu device
|
||||
//
|
||||
// @param idx Cpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
|
||||
// Determine if request is valid
|
||||
uint32_t size = uint32_t(cpu_list_.size());
|
||||
if (idx >= size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy AgentInfo from specified index
|
||||
*agent_info = cpu_list_[idx];
|
||||
return true;
|
||||
}
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
//
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
//
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
//
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
|
||||
hsa_queue_t** queue) {
|
||||
hsa_status_t status;
|
||||
status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
|
||||
UINT32_MAX, UINT32_MAX, queue);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
// @param value Initial value of signal object
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
// @return bool true if successful, false otherwise
|
||||
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
|
||||
hsa_status_t status;
|
||||
status = hsa_api_.hsa_signal_create(value, 0, NULL, signal);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Allocate memory for use by a kernel of specified size in specified
|
||||
// agent's memory region.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Allocate memory to pass kernel parameters.
|
||||
// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint8_t* buffer = NULL;
|
||||
if (!cpu_agents_.empty()) {
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
// Both the CPU and GPU can access the kernel arguments
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
hsa_agent_t ag_list[1] = {agent_info->dev_id};
|
||||
status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
|
||||
}
|
||||
}
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Allocate system memory accessible by both CPU and GPU
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
if (!cpu_agents_.empty()) {
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
// Both the CPU and GPU can access the memory
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
hsa_agent_t ag_list[1] = {agent_info->dev_id};
|
||||
status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer);
|
||||
}
|
||||
}
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Allocate memory for command buffer.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t size) {
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
uint8_t* ptr = (agent_info->is_apu && CMD_MEMORY_MMAP)
|
||||
? reinterpret_cast<uint8_t*>(
|
||||
mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0, 0))
|
||||
: AllocateSysMemory(agent_info, size);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Wait signal
|
||||
hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const {
|
||||
const hsa_signal_value_t exp_value = signal_value - 1;
|
||||
hsa_signal_value_t ret_value = signal_value;
|
||||
while (1) {
|
||||
ret_value = hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value,
|
||||
timeout_, HSA_WAIT_STATE_BLOCKED);
|
||||
if (ret_value == exp_value) break;
|
||||
if (ret_value != signal_value) {
|
||||
std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value
|
||||
<< "), ret_value(" << ret_value << ")" << std::endl
|
||||
<< std::flush;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const {
|
||||
SignalWait(signal, signal_value);
|
||||
hsa_api_.hsa_signal_store_relaxed(const_cast<hsa_signal_t&>(signal), signal_value);
|
||||
}
|
||||
|
||||
// Copy data from GPU to host memory
|
||||
bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
if (!cpu_agents_.empty()) {
|
||||
hsa_signal_t s = {};
|
||||
status = hsa_api_.hsa_signal_create(1, 0, NULL, &s);
|
||||
CHECK_STATUS("hsa_signal_create()", status);
|
||||
status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s);
|
||||
CHECK_STATUS("hsa_amd_memory_async_copy()", status);
|
||||
SignalWait(s, 1);
|
||||
status = hsa_api_.hsa_signal_destroy(s);
|
||||
CHECK_STATUS("hsa_signal_destroy()", status);
|
||||
}
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size) {
|
||||
return Memcpy(agent_info->dev_id, dst, src, size);
|
||||
}
|
||||
|
||||
// Memory free method
|
||||
bool HsaRsrcFactory::FreeMemory(void* ptr) {
|
||||
const hsa_status_t status = hsa_memory_free(ptr);
|
||||
CHECK_STATUS("hsa_memory_free", status);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
// @return bool true if successful, false otherwise
|
||||
bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path,
|
||||
const char* kernel_name, hsa_executable_t* executable,
|
||||
hsa_executable_symbol_t* code_desc) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
// Build the code object filename
|
||||
std::string filename(brig_path);
|
||||
std::clog << "Code object filename: " << filename << std::endl;
|
||||
|
||||
// Open the file containing code object
|
||||
hsa_file_t file_handle = open(filename.c_str(), O_RDONLY);
|
||||
if (file_handle == -1) {
|
||||
std::cerr << "Error: failed to load '" << filename << "'" << std::endl;
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create code object reader
|
||||
hsa_code_object_reader_t code_obj_rdr = {0};
|
||||
status = hsa_api_.hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
std::cerr << "Failed to create code object reader '" << filename << "'" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create executable.
|
||||
status = hsa_api_.hsa_executable_create_alt(
|
||||
HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable);
|
||||
CHECK_STATUS("Error in creating executable object", status);
|
||||
|
||||
// Load code object.
|
||||
status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id,
|
||||
code_obj_rdr, NULL, NULL);
|
||||
CHECK_STATUS("Error in loading executable object", status);
|
||||
|
||||
// Freeze executable.
|
||||
status = hsa_api_.hsa_executable_freeze(*executable, "");
|
||||
CHECK_STATUS("Error in freezing executable object", status);
|
||||
|
||||
// Get symbol handle.
|
||||
hsa_executable_symbol_t kernelSymbol;
|
||||
status = hsa_api_.hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0,
|
||||
&kernelSymbol);
|
||||
CHECK_STATUS("Error in looking up kernel symbol", status);
|
||||
|
||||
// Update output parameter
|
||||
*code_desc = kernelSymbol;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
|
||||
std::cout << std::flush;
|
||||
std::clog << header << " :" << std::endl;
|
||||
|
||||
const AgentInfo* agent_info;
|
||||
int size = uint32_t(gpu_list_.size());
|
||||
for (int idx = 0; idx < size; idx++) {
|
||||
agent_info = gpu_list_[idx];
|
||||
|
||||
std::clog << "> agent[" << idx << "] :" << std::endl;
|
||||
std::clog << ">> Name : " << agent_info->name << std::endl;
|
||||
std::clog << ">> APU : " << agent_info->is_apu << std::endl;
|
||||
std::clog << ">> HSAIL profile : " << agent_info->profile << std::endl;
|
||||
std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl;
|
||||
std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl;
|
||||
std::clog << ">> CU number : " << agent_info->cu_num << std::endl;
|
||||
std::clog << ">> Waves per CU : " << agent_info->waves_per_cu << std::endl;
|
||||
std::clog << ">> SIMDs per CU : " << agent_info->simds_per_cu << std::endl;
|
||||
std::clog << ">> SE number : " << agent_info->se_num << std::endl;
|
||||
std::clog << ">> Shader Arrays per SE : " << agent_info->shader_arrays_per_se << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
|
||||
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
|
||||
|
||||
// adevance command queue
|
||||
const uint64_t write_idx = hsa_api_.hsa_queue_load_write_index_relaxed(queue);
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed(queue, write_idx + 1);
|
||||
while ((write_idx - hsa_api_.hsa_queue_load_read_index_relaxed(queue)) >= queue->size) {
|
||||
sched_yield();
|
||||
}
|
||||
|
||||
uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
|
||||
uint32_t* queue_slot =
|
||||
reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b));
|
||||
const uint32_t* slot_data = reinterpret_cast<const uint32_t*>(packet);
|
||||
|
||||
// Copy buffered commands into the queue slot.
|
||||
// Overwrite the AQL invalid header (first dword) last.
|
||||
// This prevents the slot from being read until it's fully written.
|
||||
memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t));
|
||||
std::atomic<uint32_t>* header_atomic_ptr =
|
||||
reinterpret_cast<std::atomic<uint32_t>*>(&queue_slot[0]);
|
||||
header_atomic_ptr->store(slot_data[0], std::memory_order_release);
|
||||
|
||||
// ringdoor bell
|
||||
hsa_api_.hsa_signal_store_relaxed(queue->doorbell_signal, write_idx);
|
||||
|
||||
return write_idx;
|
||||
}
|
||||
|
||||
uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) {
|
||||
const uint32_t slot_size_b = CMD_SLOT_SIZE_B;
|
||||
if ((size_bytes & (slot_size_b - 1)) != 0) {
|
||||
fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes);
|
||||
abort();
|
||||
}
|
||||
|
||||
const char* begin = reinterpret_cast<const char*>(packet);
|
||||
const char* end = begin + size_bytes;
|
||||
uint64_t write_idx = 0;
|
||||
for (const char* ptr = begin; ptr < end; ptr += slot_size_b) {
|
||||
write_idx = Submit(queue, ptr);
|
||||
}
|
||||
|
||||
return write_idx;
|
||||
}
|
||||
|
||||
const char* HsaRsrcFactory::GetKernelName(uint64_t addr) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
const auto it = symbols_map_->find(addr);
|
||||
if (it == symbols_map_->end()) {
|
||||
fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr);
|
||||
abort();
|
||||
}
|
||||
return strdup(it->second);
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
executable_tracking_on_ = true;
|
||||
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec,
|
||||
hsa_executable_symbol_t symbol, void* data) {
|
||||
hsa_symbol_kind_t value = (hsa_symbol_kind_t)0;
|
||||
hsa_status_t status =
|
||||
hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value);
|
||||
CHECK_STATUS("Error in getting symbol info", status);
|
||||
if (value == HSA_SYMBOL_KIND_KERNEL) {
|
||||
uint64_t addr = 0;
|
||||
uint32_t len = 0;
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr);
|
||||
CHECK_STATUS("Error in getting kernel object", status);
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH,
|
||||
&len);
|
||||
CHECK_STATUS("Error in getting name len", status);
|
||||
char* name = new char[len + 1];
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
|
||||
CHECK_STATUS("Error in getting kernel name", status);
|
||||
name[len] = 0;
|
||||
auto ret = symbols_map_->insert({addr, name});
|
||||
if (ret.second == false) {
|
||||
delete[] ret.first->second;
|
||||
ret.first->second = name;
|
||||
}
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable,
|
||||
const char* options) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t;
|
||||
hsa_status_t status =
|
||||
hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL);
|
||||
CHECK_STATUS("Error in iterating executable symbols", status);
|
||||
return hsa_api_.hsa_executable_freeze(executable, options);
|
||||
;
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::DumpHandles(FILE* file) {
|
||||
auto beg = agent_map_.begin();
|
||||
auto end = agent_map_.end();
|
||||
for (auto it = beg; it != end; ++it) {
|
||||
const AgentInfo* agent_info = it->second;
|
||||
fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle,
|
||||
(agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu");
|
||||
if (agent_info->cpu_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle);
|
||||
if (agent_info->kern_arg_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle);
|
||||
if (agent_info->gpu_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool gpu\n", agent_info->gpu_pool.handle);
|
||||
}
|
||||
fflush(file);
|
||||
}
|
||||
|
||||
std::atomic<HsaRsrcFactory*> HsaRsrcFactory::instance_{};
|
||||
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
|
||||
HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX;
|
||||
hsa_pfn_t HsaRsrcFactory::hsa_api_{};
|
||||
bool HsaRsrcFactory::executable_tracking_on_ = false;
|
||||
HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = NULL;
|
||||
@@ -1,516 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _HSA_RSRC_FACTORY_H_
|
||||
#define _HSA_RSRC_FACTORY_H_
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_api_trace.h>
|
||||
#include <hsa_ext_amd.h>
|
||||
#include <hsa_ext_finalize.h>
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
#include <hsa_ven_amd_loader.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define HSA_ARGUMENT_ALIGN_BYTES 16
|
||||
#define HSA_QUEUE_ALIGN_BYTES 64
|
||||
#define HSA_PACKET_ALIGN_BYTES 64
|
||||
|
||||
#define CHECK_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_SUCCESS) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_ITER_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_INFO_BREAK) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static const size_t MEM_PAGE_BYTES = 0x1000;
|
||||
static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
|
||||
typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
|
||||
|
||||
struct hsa_pfn_t {
|
||||
decltype(::hsa_init)* hsa_init;
|
||||
decltype(::hsa_shut_down)* hsa_shut_down;
|
||||
decltype(::hsa_agent_get_info)* hsa_agent_get_info;
|
||||
decltype(::hsa_iterate_agents)* hsa_iterate_agents;
|
||||
|
||||
decltype(::hsa_queue_create)* hsa_queue_create;
|
||||
decltype(::hsa_queue_destroy)* hsa_queue_destroy;
|
||||
decltype(::hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed;
|
||||
decltype(::hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed;
|
||||
decltype(::hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed;
|
||||
|
||||
decltype(::hsa_signal_create)* hsa_signal_create;
|
||||
decltype(::hsa_signal_destroy)* hsa_signal_destroy;
|
||||
decltype(::hsa_signal_load_relaxed)* hsa_signal_load_relaxed;
|
||||
decltype(::hsa_signal_store_relaxed)* hsa_signal_store_relaxed;
|
||||
decltype(::hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire;
|
||||
decltype(::hsa_signal_store_screlease)* hsa_signal_store_screlease;
|
||||
|
||||
decltype(::hsa_code_object_reader_create_from_file)* hsa_code_object_reader_create_from_file;
|
||||
decltype(::hsa_executable_create_alt)* hsa_executable_create_alt;
|
||||
decltype(::hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object;
|
||||
decltype(::hsa_executable_freeze)* hsa_executable_freeze;
|
||||
decltype(::hsa_executable_get_symbol)* hsa_executable_get_symbol;
|
||||
decltype(::hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info;
|
||||
decltype(::hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols;
|
||||
|
||||
decltype(::hsa_system_get_info)* hsa_system_get_info;
|
||||
decltype(::hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table;
|
||||
|
||||
decltype(::hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools;
|
||||
decltype(::hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info;
|
||||
decltype(::hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate;
|
||||
decltype(::hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access;
|
||||
decltype(::hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy;
|
||||
|
||||
decltype(::hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler;
|
||||
decltype(::hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled;
|
||||
decltype(::hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time;
|
||||
decltype(::hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time;
|
||||
};
|
||||
|
||||
// Encapsulates information about a Hsa Agent such as its
|
||||
// handle, name, max queue size, max wavefront size, etc.
|
||||
struct AgentInfo {
|
||||
// Handle of Agent
|
||||
hsa_agent_t dev_id;
|
||||
|
||||
// Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
|
||||
uint32_t dev_type;
|
||||
|
||||
// APU flag
|
||||
bool is_apu;
|
||||
|
||||
// Agent system index
|
||||
uint32_t dev_index;
|
||||
|
||||
// GFXIP name
|
||||
char gfxip[64];
|
||||
|
||||
// Name of Agent whose length is less than 64
|
||||
char name[64];
|
||||
|
||||
// Max size of Wavefront size
|
||||
uint32_t max_wave_size;
|
||||
|
||||
// Max size of Queue buffer
|
||||
uint32_t max_queue_size;
|
||||
|
||||
// Hsail profile supported by agent
|
||||
hsa_profile_t profile;
|
||||
|
||||
// CPU/GPU/kern-arg memory pools
|
||||
hsa_amd_memory_pool_t cpu_pool;
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
hsa_amd_memory_pool_t kern_arg_pool;
|
||||
|
||||
// The number of compute unit available in the agent.
|
||||
uint32_t cu_num;
|
||||
|
||||
// Maximum number of waves possible in a Compute Unit.
|
||||
uint32_t waves_per_cu;
|
||||
|
||||
// Number of SIMD's per compute unit CU
|
||||
uint32_t simds_per_cu;
|
||||
|
||||
// Number of Shader Engines (SE) in Gpu
|
||||
uint32_t se_num;
|
||||
|
||||
// Number of Shader Arrays Per Shader Engines in Gpu
|
||||
uint32_t shader_arrays_per_se;
|
||||
|
||||
// SGPR/VGPR block sizes
|
||||
uint32_t sgpr_block_dflt;
|
||||
uint32_t sgpr_block_size;
|
||||
uint32_t vgpr_block_size;
|
||||
};
|
||||
|
||||
// HSA timer class
|
||||
// Provides current HSA timestampa and system-clock/ns conversion API
|
||||
class HsaTimer {
|
||||
public:
|
||||
typedef uint64_t timestamp_t;
|
||||
static const timestamp_t TIMESTAMP_MAX = UINT64_MAX;
|
||||
typedef long double freq_t;
|
||||
|
||||
enum time_id_t { TIME_ID_CLOCK_REALTIME = 0, TIME_ID_CLOCK_MONOTONIC = 1, TIME_ID_NUMBER };
|
||||
|
||||
HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) {
|
||||
timestamp_t sysclock_hz = 0;
|
||||
hsa_status_t status =
|
||||
hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status);
|
||||
sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz;
|
||||
}
|
||||
|
||||
// Methods for system-clock/ns conversion
|
||||
timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const {
|
||||
return timestamp_t((freq_t)sysclock * sysclock_factor_);
|
||||
}
|
||||
timestamp_t ns_to_sysclock(const timestamp_t& time) const {
|
||||
return timestamp_t((freq_t)time / sysclock_factor_);
|
||||
}
|
||||
|
||||
// Method for timespec/ns conversion
|
||||
static timestamp_t timespec_to_ns(const timespec& time) {
|
||||
return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec;
|
||||
}
|
||||
|
||||
// Return timestamp in 'ns'
|
||||
timestamp_t timestamp_ns() const {
|
||||
timestamp_t sysclock;
|
||||
hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
|
||||
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)", status);
|
||||
return sysclock_to_ns(sysclock);
|
||||
}
|
||||
|
||||
// Return time in 'ns'
|
||||
static timestamp_t clocktime_ns(clockid_t clock_id) {
|
||||
timespec time;
|
||||
clock_gettime(clock_id, &time);
|
||||
return timespec_to_ns(time);
|
||||
}
|
||||
|
||||
// Return pair of correlated values of profiling timestamp and time with
|
||||
// correlation error for a given time ID and number of iterations
|
||||
void correlated_pair_ns(time_id_t time_id, uint32_t iters, timestamp_t* timestamp_v,
|
||||
timestamp_t* time_v, timestamp_t* error_v) const {
|
||||
clockid_t clock_id = 0;
|
||||
switch (clock_id) {
|
||||
case TIME_ID_CLOCK_REALTIME:
|
||||
clock_id = CLOCK_REALTIME;
|
||||
break;
|
||||
case TIME_ID_CLOCK_MONOTONIC:
|
||||
clock_id = CLOCK_MONOTONIC;
|
||||
break;
|
||||
default:
|
||||
CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR);
|
||||
}
|
||||
|
||||
std::vector<timestamp_t> ts_vec(iters);
|
||||
std::vector<timespec> tm_vec(iters);
|
||||
const uint32_t steps = iters - 1;
|
||||
|
||||
for (uint32_t i = 0; i < iters; ++i) {
|
||||
hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]);
|
||||
clock_gettime(clock_id, &tm_vec[i]);
|
||||
}
|
||||
|
||||
const timestamp_t ts_base = sysclock_to_ns(ts_vec.front());
|
||||
const timestamp_t tm_base = timespec_to_ns(tm_vec.front());
|
||||
const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps);
|
||||
|
||||
timestamp_t ts_accum = 0;
|
||||
timestamp_t tm_accum = 0;
|
||||
for (uint32_t i = 0; i < iters; ++i) {
|
||||
ts_accum += (ts_vec[i] - ts_base);
|
||||
tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base);
|
||||
}
|
||||
|
||||
*timestamp_v = (ts_accum / iters) + ts_base + error;
|
||||
*time_v = (tm_accum / iters) + tm_base;
|
||||
*error_v = error;
|
||||
}
|
||||
|
||||
private:
|
||||
// Timestamp frequency factor
|
||||
freq_t sysclock_factor_;
|
||||
// HSA API table
|
||||
const hsa_pfn_t* const hsa_api_;
|
||||
};
|
||||
|
||||
class HsaRsrcFactory {
|
||||
public:
|
||||
static const size_t CMD_SLOT_SIZE_B = 0x40;
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
typedef HsaTimer::timestamp_t timestamp_t;
|
||||
|
||||
static HsaRsrcFactory* Create(bool initialize_hsa = true) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
HsaRsrcFactory* obj = instance_.load(std::memory_order_relaxed);
|
||||
if (obj == NULL) {
|
||||
obj = new HsaRsrcFactory(initialize_hsa);
|
||||
instance_.store(obj, std::memory_order_release);
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
static HsaRsrcFactory& Instance() {
|
||||
HsaRsrcFactory* obj = instance_.load(std::memory_order_acquire);
|
||||
if (obj == NULL) obj = Create(false);
|
||||
hsa_status_t status = (obj != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
CHECK_STATUS("HsaRsrcFactory::Instance() failed", status);
|
||||
return *obj;
|
||||
}
|
||||
|
||||
static void Destroy() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_) delete instance_.load();
|
||||
instance_ = NULL;
|
||||
}
|
||||
|
||||
// Return system agent info
|
||||
const AgentInfo* GetAgentInfo(const hsa_agent_t agent);
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
uint32_t GetCountOfGpuAgents();
|
||||
|
||||
// Get the count of Hsa Cpu Agents available on the platform
|
||||
// @return uint32_t Number of Cpu agents on platform
|
||||
uint32_t GetCountOfCpuAgents();
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
// @param idx Gpu Agent at specified index
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
// @return bool true if successful, false otherwise
|
||||
bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
|
||||
|
||||
// Get the AgentInfo handle of a Cpu device
|
||||
// @param idx Cpu Agent at specified index
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
// @return bool true if successful, false otherwise
|
||||
bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
// @return bool true if successful, false otherwise
|
||||
bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue);
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
// @param value Initial value of signal object
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
// @return bool true if successful, false otherwise
|
||||
bool CreateSignal(uint32_t value, hsa_signal_t* signal);
|
||||
|
||||
// Allocate local GPU memory
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Allocate memory tp pass kernel parameters
|
||||
// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* AllocateKernArgMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Allocate system memory accessible from both CPU and GPU
|
||||
// Memory is alocated accessible to all CPU agents and AgentInfo parameter is ignored.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Allocate memory for command buffer.
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
// @param size Size of memory in terms of bytes
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Wait signal
|
||||
hsa_signal_value_t SignalWait(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const;
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
|
||||
|
||||
// Copy data from GPU to host memory
|
||||
bool Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size);
|
||||
bool Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size);
|
||||
|
||||
// Memory free method
|
||||
static bool FreeMemory(void* ptr);
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
// @return true if successful, false otherwise
|
||||
bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name,
|
||||
hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc);
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool PrintGpuAgents(const std::string& header);
|
||||
|
||||
// Submit AQL packet to given queue
|
||||
static uint64_t Submit(hsa_queue_t* queue, const void* packet);
|
||||
static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes);
|
||||
|
||||
// Enable executables loading tracking
|
||||
static bool IsExecutableTracking() { return executable_tracking_on_; }
|
||||
static void EnableExecutableTracking(HsaApiTable* table);
|
||||
static const char* GetKernelName(uint64_t addr);
|
||||
|
||||
// Initialize HSA API table
|
||||
void static InitHsaApiTable(HsaApiTable* table);
|
||||
static const hsa_pfn_t* HsaApi() { return &hsa_api_; }
|
||||
|
||||
// Return AqlProfile API table
|
||||
typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t;
|
||||
const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; }
|
||||
|
||||
// Return Loader API table
|
||||
const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; }
|
||||
|
||||
// Methods for system-clock/ns conversion and timestamp in 'ns'
|
||||
timestamp_t SysclockToNs(const timestamp_t& sysclock) const {
|
||||
return timer_->sysclock_to_ns(sysclock);
|
||||
}
|
||||
timestamp_t NsToSysclock(const timestamp_t& time) const { return timer_->ns_to_sysclock(time); }
|
||||
timestamp_t TimestampNs() const { return timer_->timestamp_ns(); }
|
||||
|
||||
timestamp_t GetSysTimeout() const { return timeout_; }
|
||||
static timestamp_t GetTimeoutNs() { return timeout_ns_; }
|
||||
static void SetTimeoutNs(const timestamp_t& time) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
timeout_ns_ = time;
|
||||
if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time);
|
||||
}
|
||||
|
||||
void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters) {
|
||||
timestamp_t timestamp_v = 0;
|
||||
timestamp_t time_v = 0;
|
||||
timestamp_t error_v = 0;
|
||||
timer_->correlated_pair_ns(time_id, iters, ×tamp_v, &time_v, &error_v);
|
||||
time_shift_[time_id] = time_v - timestamp_v;
|
||||
time_error_[time_id] = error_v;
|
||||
}
|
||||
|
||||
hsa_status_t GetTime(uint32_t time_id, timestamp_t value, uint64_t* time) {
|
||||
if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR;
|
||||
*time = value + time_shift_[time_id];
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t GetTimestamp(uint32_t time_id, uint64_t value, timestamp_t* timestamp) {
|
||||
if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR;
|
||||
*timestamp = value - time_shift_[time_id];
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void DumpHandles(FILE* output_file);
|
||||
|
||||
private:
|
||||
// System agents iterating callback
|
||||
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data);
|
||||
|
||||
// Callback function to find and bind kernarg region of an agent
|
||||
static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data);
|
||||
|
||||
// Load AQL profile HSA extension library directly
|
||||
static hsa_status_t LoadAqlProfileLib(aqlprofile_pfn_t* api);
|
||||
|
||||
// Constructor of the class. Will initialize the Hsa Runtime and
|
||||
// query the system topology to get the list of Cpu and Gpu devices
|
||||
explicit HsaRsrcFactory(bool initialize_hsa);
|
||||
|
||||
// Destructor of the class
|
||||
~HsaRsrcFactory();
|
||||
|
||||
// Add an instance of AgentInfo representing a Hsa Gpu agent
|
||||
const AgentInfo* AddAgentInfo(const hsa_agent_t agent);
|
||||
|
||||
// To mmap command buffer memory
|
||||
static const bool CMD_MEMORY_MMAP = false;
|
||||
|
||||
// HSA was initialized
|
||||
const bool initialize_hsa_;
|
||||
|
||||
static std::atomic<HsaRsrcFactory*> instance_;
|
||||
static mutex_t mutex_;
|
||||
|
||||
// Used to maintain a list of Hsa Gpu Agent Info
|
||||
std::vector<const AgentInfo*> gpu_list_;
|
||||
std::vector<hsa_agent_t> gpu_agents_;
|
||||
|
||||
// Used to maintain a list of Hsa Cpu Agent Info
|
||||
std::vector<const AgentInfo*> cpu_list_;
|
||||
std::vector<hsa_agent_t> cpu_agents_;
|
||||
|
||||
// System agents map
|
||||
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
|
||||
|
||||
// Executables loading tracking
|
||||
typedef std::map<uint64_t, const char*> symbols_map_t;
|
||||
static symbols_map_t* symbols_map_;
|
||||
static bool executable_tracking_on_;
|
||||
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable,
|
||||
const char* options);
|
||||
static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol,
|
||||
void* data);
|
||||
|
||||
// HSA runtime API table
|
||||
static hsa_pfn_t hsa_api_;
|
||||
|
||||
// AqlProfile API table
|
||||
aqlprofile_pfn_t aqlprofile_api_;
|
||||
|
||||
// Loader API table
|
||||
hsa_ven_amd_loader_1_00_pfn_t loader_api_;
|
||||
|
||||
// System timeout, ns
|
||||
static timestamp_t timeout_ns_;
|
||||
// System timeout, sysclock
|
||||
timestamp_t timeout_;
|
||||
|
||||
// HSA timer
|
||||
HsaTimer* timer_;
|
||||
|
||||
// Time shift array to support time conversion
|
||||
timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER];
|
||||
timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER];
|
||||
|
||||
// CPU/kern-arg memory pools
|
||||
hsa_amd_memory_pool_t* cpu_pool_;
|
||||
hsa_amd_memory_pool_t* kern_arg_pool_;
|
||||
};
|
||||
|
||||
#endif // _HSA_RSRC_FACTORY_H_
|
||||
@@ -1,79 +0,0 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
find_package(Clang REQUIRED CONFIG
|
||||
PATHS
|
||||
"/opt/rocm/"
|
||||
PATH_SUFFIXES
|
||||
"llvm/lib/cmake/clang")
|
||||
|
||||
function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE)
|
||||
separate_arguments(CLANG_ARG_LIST UNIX_COMMAND
|
||||
"-O2 -x cl -Xclang -finclude-default-header -cl-denorms-are-zero -cl-std=CL2.0
|
||||
-target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -o ${OUTPUT_FILE} ${INPUT_FILE}")
|
||||
|
||||
## Add custom command to produce a code object file.
|
||||
## This depends on the kernel source file & compiler.
|
||||
## It does not pickup devicelib changes. It is not clear
|
||||
## how to do that after conversion to --rocm-path is done.
|
||||
add_custom_command(OUTPUT ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||
COMMAND clang ${CLANG_ARG_LIST}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
DEPENDS ${INPUT_FILE} clang
|
||||
COMMENT "Building ${OUTPUT_FILE}..."
|
||||
VERBATIM)
|
||||
|
||||
set(HSACO_TARGET_LIST ${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} PARENT_SCOPE)
|
||||
endfunction(generate_hsaco)
|
||||
|
||||
function(build_kernel_for_devices KERNEL_FILE KERNEL_NAME)
|
||||
foreach(target_id ${GPU_TARGETS})
|
||||
## generate kernel bitcodes
|
||||
generate_hsaco(${target_id} ${KERNEL_FILE} ${target_id}_${KERNEL_NAME}.hsaco)
|
||||
endforeach(target_id)
|
||||
set(HSACO_TARGET_LIST ${HSACO_TARGET_LIST} PARENT_SCOPE)
|
||||
endfunction(build_kernel_for_devices)
|
||||
|
||||
set(HSACO_TARGET_LIST "")
|
||||
build_kernel_for_devices("${CMAKE_CURRENT_SOURCE_DIR}/dummy_kernel/dummy_kernel.cl" "DummyKernel")
|
||||
build_kernel_for_devices("${CMAKE_CURRENT_SOURCE_DIR}/simple_convolution/simple_convolution.cl" "SimpleConvolution")
|
||||
add_custom_target(hsaco_targets DEPENDS ${HSACO_TARGET_LIST})
|
||||
|
||||
## Test kernel
|
||||
set(TEST_NAME simple_convolution)
|
||||
set(KERN_SRC ${TEST_NAME}/${TEST_NAME}.cpp)
|
||||
|
||||
## Util sources
|
||||
file(GLOB UTIL_SRC "util/*.cpp")
|
||||
|
||||
## Test control sources
|
||||
set(CTRL_SRC
|
||||
app/test.cpp
|
||||
ctrl/test_hsa.cpp)
|
||||
|
||||
## Building ctrl test executable
|
||||
add_executable(ctrl EXCLUDE_FROM_ALL ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC})
|
||||
target_compile_definitions(ctrl PRIVATE AMD_INTERNAL_BUILD)
|
||||
target_include_directories(ctrl PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR})
|
||||
target_link_libraries(ctrl hsa-runtime64::hsa-runtime64 Threads::Threads dl)
|
||||
add_dependencies(ctrl hsaco_targets)
|
||||
add_dependencies(mytest ctrl)
|
||||
@@ -1,84 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <hsa.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
#include "ctrl/run_kernel.h"
|
||||
#include "ctrl/test_aql.h"
|
||||
#include "dummy_kernel/dummy_kernel.h"
|
||||
#include "simple_convolution/simple_convolution.h"
|
||||
|
||||
void thread_fun(const int kiter, const int diter, const uint32_t agents_number) {
|
||||
const AgentInfo* agent_info[agents_number];
|
||||
hsa_queue_t* queue[agents_number];
|
||||
HsaRsrcFactory* rsrc = &HsaRsrcFactory::Instance();
|
||||
|
||||
for (uint32_t n = 0; n < agents_number; ++n) {
|
||||
uint32_t agent_id = n % rsrc->GetCountOfGpuAgents();
|
||||
if (rsrc->GetGpuAgentInfo(agent_id, &agent_info[n]) == false) {
|
||||
fprintf(stderr, "AgentInfo failed\n");
|
||||
abort();
|
||||
}
|
||||
if (rsrc->CreateQueue(agent_info[n], 128, &queue[n]) == false) {
|
||||
fprintf(stderr, "CreateQueue failed\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < kiter; ++i) {
|
||||
for (uint32_t n = 0; n < agents_number; ++n) {
|
||||
RunKernel<DummyKernel, TestAql>(0, NULL, agent_info[n], queue[n], diter);
|
||||
RunKernel<SimpleConvolution, TestAql>(0, NULL, agent_info[n], queue[n], diter);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t n = 0; n < agents_number; ++n) {
|
||||
hsa_queue_destroy(queue[n]);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
const char* kiter_s = getenv("ROCP_KITER");
|
||||
const char* diter_s = getenv("ROCP_DITER");
|
||||
const char* agents_s = getenv("ROCP_AGENTS");
|
||||
const char* thrs_s = getenv("ROCP_THRS");
|
||||
|
||||
const int kiter = (kiter_s != NULL) ? atol(kiter_s) : 1;
|
||||
const int diter = (diter_s != NULL) ? atol(diter_s) : 1;
|
||||
const uint32_t agents_number = (agents_s != NULL) ? (uint32_t)atol(agents_s) : 1;
|
||||
const int thrs = (thrs_s != NULL) ? atol(thrs_s) : 1;
|
||||
|
||||
TestHsa::HsaInstantiate();
|
||||
|
||||
std::vector<std::thread> t(thrs);
|
||||
for (int n = 0; n < thrs; ++n) {
|
||||
t[n] = std::thread(thread_fun, kiter, diter, agents_number);
|
||||
}
|
||||
for (int n = 0; n < thrs; ++n) {
|
||||
t[n].join();
|
||||
}
|
||||
|
||||
TestHsa::HsaShutdown();
|
||||
return 0;
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_CTRL_RUN_KERNEL_H_
|
||||
#define TEST_CTRL_RUN_KERNEL_H_
|
||||
|
||||
#include "ctrl/test_hsa.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
template <class Kernel, class Test>
|
||||
bool RunKernel(int argc = 0, char* argv[] = NULL, const AgentInfo* agent_info = NULL,
|
||||
hsa_queue_t* queue = NULL, int count = 1) {
|
||||
bool ret_val = false;
|
||||
|
||||
if (getenv("ROC_TEST_TRACE") == NULL) std::clog.rdbuf(NULL);
|
||||
|
||||
|
||||
// Create test kernel object
|
||||
Kernel test_kernel;
|
||||
|
||||
TestHsa* test_hsa = new TestHsa(&test_kernel);
|
||||
test_hsa->SetAgentInfo(agent_info);
|
||||
test_hsa->SetQueue(queue);
|
||||
|
||||
TestAql* test_aql = new Test(test_hsa);
|
||||
TEST_ASSERT(test_aql != NULL);
|
||||
if (test_aql == NULL) return 1;
|
||||
|
||||
// Initialization of Hsa Runtime
|
||||
ret_val = test_aql->Initialize(argc, argv);
|
||||
if (ret_val == false) {
|
||||
std::cerr << "Error in the test initialization" << std::endl;
|
||||
// TEST_ASSERT(ret_val);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Setup Hsa resources needed for execution
|
||||
ret_val = test_aql->Setup();
|
||||
if (ret_val == false) {
|
||||
std::cerr << "Error in creating hsa resources" << std::endl;
|
||||
TEST_ASSERT(ret_val);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Kernel dspatch iterations
|
||||
for (int i = 0; i < count; ++i) {
|
||||
// Run test kernel
|
||||
ret_val = test_aql->Run();
|
||||
if (ret_val == false) {
|
||||
std::cerr << "Error in running the test kernel" << std::endl;
|
||||
TEST_ASSERT(ret_val);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify the results of the execution
|
||||
ret_val = test_aql->VerifyResults();
|
||||
if (ret_val) {
|
||||
std::clog << "Test : Passed" << std::endl;
|
||||
} else {
|
||||
std::clog << "Test : Failed" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Print time taken by sample
|
||||
test_aql->PrintTime();
|
||||
|
||||
test_aql->Cleanup();
|
||||
delete test_aql;
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
#endif // TEST_CTRL_RUN_KERNEL_H_
|
||||
@@ -1,75 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_CTRL_TEST_AQL_H_
|
||||
#define TEST_CTRL_TEST_AQL_H_
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
// Test AQL interface
|
||||
class TestAql {
|
||||
public:
|
||||
explicit TestAql(TestAql* t = 0) : test_(t) {}
|
||||
virtual ~TestAql() {
|
||||
if (test_) delete test_;
|
||||
}
|
||||
|
||||
TestAql* Test() { return test_; }
|
||||
virtual const AgentInfo* GetAgentInfo() { return (test_) ? test_->GetAgentInfo() : 0; }
|
||||
virtual hsa_queue_t* GetQueue() { return (test_) ? test_->GetQueue() : 0; }
|
||||
virtual HsaRsrcFactory* GetRsrcFactory() { return (test_) ? test_->GetRsrcFactory() : 0; }
|
||||
|
||||
// Initialize application environment including setting
|
||||
// up of various configuration parameters based on
|
||||
// command line arguments
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool Initialize(int argc, char** argv) {
|
||||
return (test_) ? test_->Initialize(argc, argv) : true;
|
||||
}
|
||||
|
||||
// Setup application parameters for exectuion
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool Setup() { return (test_) ? test_->Setup() : true; }
|
||||
|
||||
// Run the kernel
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool Run() { return (test_) ? test_->Run() : true; }
|
||||
|
||||
// Verify results
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool VerifyResults() { return (test_) ? test_->VerifyResults() : true; }
|
||||
|
||||
// Print to console the time taken to execute kernel
|
||||
virtual void PrintTime() {
|
||||
if (test_) test_->PrintTime();
|
||||
}
|
||||
|
||||
// Release resources e.g. memory allocations
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool Cleanup() { return (test_) ? test_->Cleanup() : true; }
|
||||
|
||||
private:
|
||||
TestAql* const test_;
|
||||
};
|
||||
|
||||
#endif // TEST_CTRL_TEST_AQL_H_
|
||||
@@ -1,279 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "ctrl/test_hsa.h"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "util/test_assert.h"
|
||||
#include "util/helper_funcs.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
HsaRsrcFactory* TestHsa::hsa_rsrc_ = NULL;
|
||||
|
||||
HsaRsrcFactory* TestHsa::HsaInstantiate() {
|
||||
// Instantiate an instance of Hsa Resources Factory
|
||||
if (hsa_rsrc_ == NULL) {
|
||||
hsa_rsrc_ = HsaRsrcFactory::Create();
|
||||
// Print properties of the agents
|
||||
hsa_rsrc_->PrintGpuAgents("> GPU agents");
|
||||
}
|
||||
return hsa_rsrc_;
|
||||
}
|
||||
|
||||
void TestHsa::HsaShutdown() {
|
||||
if (hsa_rsrc_) hsa_rsrc_->Destroy();
|
||||
}
|
||||
|
||||
bool TestHsa::Initialize(int /*arg_cnt*/, char** /*arg_list*/) {
|
||||
std::clog << "TestHsa::Initialize :" << std::endl;
|
||||
|
||||
// Instantiate a Timer object
|
||||
setup_timer_idx_ = hsa_timer_.CreateTimer();
|
||||
dispatch_timer_idx_ = hsa_timer_.CreateTimer();
|
||||
|
||||
if (hsa_rsrc_ == NULL) {
|
||||
TEST_ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create an instance of Gpu agent
|
||||
if (agent_info_ == NULL) {
|
||||
const uint32_t agent_id = 0;
|
||||
if (!hsa_rsrc_->GetGpuAgentInfo(agent_id, &agent_info_)) {
|
||||
agent_info_ = NULL;
|
||||
std::cerr << "> error: agent[" << agent_id << "] is not found" << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
std::clog << "> Using agent[" << agent_info_->dev_index << "] : " << agent_info_->name
|
||||
<< std::endl;
|
||||
|
||||
// Create an instance of Aql Queue
|
||||
if (hsa_queue_ == NULL) {
|
||||
const uint32_t num_pkts = 128;
|
||||
if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) {
|
||||
hsa_queue_ = NULL;
|
||||
TEST_ASSERT(false);
|
||||
}
|
||||
my_queue_ = true;
|
||||
}
|
||||
|
||||
// Obtain handle of signal
|
||||
hsa_rsrc_->CreateSignal(1, &hsa_signal_);
|
||||
|
||||
// Obtain the code object file name
|
||||
std::string agentName(agent_info_->name);
|
||||
brig_path_obj_.append(agentName);
|
||||
brig_path_obj_.append("_" + name_ + ".hsaco");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestHsa::Setup() {
|
||||
std::clog << "TestHsa::setup :" << std::endl;
|
||||
|
||||
// Start the timer object
|
||||
hsa_timer_.StartTimer(setup_timer_idx_);
|
||||
|
||||
// Load and Finalize Kernel Code Descriptor
|
||||
const char* brig_path = brig_path_obj_.c_str();
|
||||
bool suc = hsa_rsrc_->LoadAndFinalize(agent_info_, brig_path, symb_.c_str(), &hsa_exec_,
|
||||
&kernel_code_desc_);
|
||||
if (suc == false) {
|
||||
std::cerr << "Error in loading and finalizing Kernel" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
mem_map_t& mem_map = test_->GetMemMap();
|
||||
for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) {
|
||||
mem_descr_t& des = it->second;
|
||||
if (des.size == 0) continue;
|
||||
|
||||
switch (des.id) {
|
||||
case TestKernel::LOCAL_DES_ID:
|
||||
des.ptr = hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size);
|
||||
break;
|
||||
case TestKernel::KERNARG_DES_ID: {
|
||||
// Check the kernel args size
|
||||
const size_t kernarg_size = des.size;
|
||||
size_t size_info = 0;
|
||||
const hsa_status_t status = hsa_executable_symbol_get_info(
|
||||
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info);
|
||||
TEST_ASSERT(status == HSA_STATUS_SUCCESS);
|
||||
size_info = kernarg_size;
|
||||
const bool kernarg_missmatch = (kernarg_size > size_info);
|
||||
if (kernarg_missmatch) {
|
||||
std::cout << "kernarg_size = " << kernarg_size << ", size_info = " << size_info
|
||||
<< std::flush << std::endl;
|
||||
TEST_ASSERT(!kernarg_missmatch);
|
||||
break;
|
||||
}
|
||||
// ALlocate kernarg memory
|
||||
des.size = size_info;
|
||||
des.ptr = hsa_rsrc_->AllocateKernArgMemory(agent_info_, size_info);
|
||||
if (des.ptr) memset(des.ptr, 0, size_info);
|
||||
break;
|
||||
}
|
||||
case TestKernel::SYS_DES_ID:
|
||||
des.ptr = hsa_rsrc_->AllocateSysMemory(agent_info_, des.size);
|
||||
if (des.ptr) memset(des.ptr, 0, des.size);
|
||||
break;
|
||||
case TestKernel::NULL_DES_ID:
|
||||
des.ptr = NULL;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
TEST_ASSERT(des.ptr != NULL);
|
||||
if (des.ptr == NULL) return false;
|
||||
}
|
||||
test_->Init();
|
||||
|
||||
// Stop the timer object
|
||||
hsa_timer_.StopTimer(setup_timer_idx_);
|
||||
setup_time_taken_ = hsa_timer_.ReadTimer(setup_timer_idx_);
|
||||
total_time_taken_ = setup_time_taken_;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestHsa::Run() {
|
||||
std::clog << "TestHsa::run :" << std::endl;
|
||||
|
||||
const uint32_t work_group_size = 64;
|
||||
const uint32_t work_grid_size = test_->GetGridSize();
|
||||
uint32_t group_segment_size = 0;
|
||||
uint32_t private_segment_size = 0;
|
||||
uint64_t code_handle = 0;
|
||||
|
||||
// Retrieve the amount of group memory needed
|
||||
hsa_executable_symbol_get_info(
|
||||
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &group_segment_size);
|
||||
|
||||
// Retrieve the amount of private memory needed
|
||||
hsa_executable_symbol_get_info(kernel_code_desc_,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
||||
&private_segment_size);
|
||||
|
||||
|
||||
// Retrieve handle of the code block
|
||||
hsa_executable_symbol_get_info(kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
&code_handle);
|
||||
|
||||
// Initialize the dispatch packet.
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
memset(&aql, 0, sizeof(aql));
|
||||
// Set the packet's type, barrier bit, acquire and release fences
|
||||
aql.header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE;
|
||||
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE;
|
||||
// Populate Aql packet with default values
|
||||
aql.setup = 1;
|
||||
aql.grid_size_x = work_grid_size;
|
||||
aql.grid_size_y = 1;
|
||||
aql.grid_size_z = 1;
|
||||
aql.workgroup_size_x = work_group_size;
|
||||
aql.workgroup_size_y = 1;
|
||||
aql.workgroup_size_z = 1;
|
||||
// Bind the kernel code descriptor and arguments
|
||||
aql.kernel_object = code_handle;
|
||||
aql.kernarg_address = test_->GetKernargPtr();
|
||||
aql.group_segment_size = group_segment_size;
|
||||
aql.private_segment_size = private_segment_size;
|
||||
// Initialize Aql packet with handle of signal
|
||||
hsa_signal_store_relaxed(hsa_signal_, 1);
|
||||
aql.completion_signal = hsa_signal_;
|
||||
|
||||
std::clog << "> Executing kernel: \"" << name_ << "\"" << std::endl;
|
||||
|
||||
// Start the timer object
|
||||
hsa_timer_.StartTimer(dispatch_timer_idx_);
|
||||
|
||||
// Submit AQL packet to the queue
|
||||
const uint64_t que_idx = hsa_rsrc_->Submit(hsa_queue_, &aql);
|
||||
|
||||
std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl
|
||||
<< std::flush;
|
||||
|
||||
// Wait on the dispatch signal until the kernel is finished.
|
||||
// Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling
|
||||
if (hsa_signal_wait_scacquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX,
|
||||
HSA_WAIT_STATE_BLOCKED) != 0) {
|
||||
TEST_ASSERT("signal_wait failed");
|
||||
}
|
||||
|
||||
std::clog << "> DONE, que_idx=" << que_idx << std::endl;
|
||||
|
||||
// Stop the timer object
|
||||
hsa_timer_.StopTimer(dispatch_timer_idx_);
|
||||
dispatch_time_taken_ = hsa_timer_.ReadTimer(dispatch_timer_idx_);
|
||||
total_time_taken_ += dispatch_time_taken_;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestHsa::VerifyResults() {
|
||||
bool cmp = false;
|
||||
void* output = NULL;
|
||||
const uint32_t size = test_->GetOutputSize();
|
||||
bool suc = false;
|
||||
|
||||
if (size == 0) return true;
|
||||
|
||||
// Copy local kernel output buffers from local memory into host memory
|
||||
if (test_->IsOutputLocal()) {
|
||||
output = hsa_rsrc_->AllocateSysMemory(agent_info_, size);
|
||||
suc = hsa_rsrc_->Memcpy(agent_info_, output, test_->GetOutputPtr(), size);
|
||||
if (!suc) std::clog << "> VerifyResults: Memcpy failed" << std::endl << std::flush;
|
||||
} else {
|
||||
output = test_->GetOutputPtr();
|
||||
suc = true;
|
||||
}
|
||||
|
||||
if ((output != NULL) && suc) {
|
||||
// Print the test output
|
||||
test_->PrintOutput(output);
|
||||
// Compare the results and see if they match
|
||||
cmp = (memcmp(output, test_->GetRefOut(), size) == 0);
|
||||
}
|
||||
|
||||
if (test_->IsOutputLocal() && (output != NULL)) hsa_rsrc_->FreeMemory(output);
|
||||
|
||||
return cmp;
|
||||
}
|
||||
|
||||
void TestHsa::PrintTime() {
|
||||
std::clog << "Time taken for Setup by " << this->name_ << " : " << this->setup_time_taken_
|
||||
<< std::endl;
|
||||
std::clog << "Time taken for Dispatch by " << this->name_ << " : " << this->dispatch_time_taken_
|
||||
<< std::endl;
|
||||
std::clog << "Time taken in Total by " << this->name_ << " : " << this->total_time_taken_
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
bool TestHsa::Cleanup() {
|
||||
hsa_executable_destroy(hsa_exec_);
|
||||
hsa_signal_destroy(hsa_signal_);
|
||||
if (my_queue_) hsa_queue_destroy(hsa_queue_);
|
||||
hsa_queue_ = NULL;
|
||||
agent_info_ = NULL;
|
||||
return true;
|
||||
}
|
||||
@@ -1,127 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_CTRL_TEST_HSA_H_
|
||||
#define TEST_CTRL_TEST_HSA_H_
|
||||
|
||||
#include "ctrl/test_aql.h"
|
||||
#include "ctrl/test_kernel.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/perf_timer.h"
|
||||
|
||||
// Class implements HSA test
|
||||
class TestHsa : public TestAql {
|
||||
public:
|
||||
// Instantiate HSA resources
|
||||
static HsaRsrcFactory* HsaInstantiate();
|
||||
static void HsaShutdown();
|
||||
|
||||
// Constructor
|
||||
explicit TestHsa(TestKernel* test) : test_(test), name_(test->Name()), symb_(test->SymbName()) {
|
||||
total_time_taken_ = 0;
|
||||
setup_time_taken_ = 0;
|
||||
dispatch_time_taken_ = 0;
|
||||
agent_info_ = NULL;
|
||||
hsa_queue_ = NULL;
|
||||
my_queue_ = false;
|
||||
hsa_exec_ = {};
|
||||
}
|
||||
|
||||
// Get methods for Agent Info, HAS queue, HSA Resourcse Manager
|
||||
HsaRsrcFactory* GetRsrcFactory() { return hsa_rsrc_; }
|
||||
hsa_agent_t HsaAgent() { return agent_info_->dev_id; }
|
||||
const AgentInfo* GetAgentInfo() { return agent_info_; }
|
||||
void SetAgentInfo(const AgentInfo* agent_info) { agent_info_ = agent_info; }
|
||||
hsa_queue_t* GetQueue() { return hsa_queue_; }
|
||||
void SetQueue(hsa_queue_t* queue) { hsa_queue_ = queue; }
|
||||
|
||||
// Initialize application environment including setting
|
||||
// up of various configuration parameters based on
|
||||
// command line arguments
|
||||
// @return bool true on success and false on failure
|
||||
bool Initialize(int argc, char** argv);
|
||||
|
||||
// Setup application parameters for exectuion
|
||||
// @return bool true on success and false on failure
|
||||
bool Setup();
|
||||
|
||||
// Run the BinarySearch kernel
|
||||
// @return bool true on success and false on failure
|
||||
bool Run();
|
||||
|
||||
// Verify against reference implementation
|
||||
// @return bool true on success and false on failure
|
||||
bool VerifyResults();
|
||||
|
||||
// Print to console the time taken to execute kernel
|
||||
void PrintTime();
|
||||
|
||||
// Release resources e.g. memory allocations
|
||||
// @return bool true on success and false on failure
|
||||
bool Cleanup();
|
||||
|
||||
private:
|
||||
typedef TestKernel::mem_descr_t mem_descr_t;
|
||||
typedef TestKernel::mem_map_t mem_map_t;
|
||||
typedef TestKernel::mem_it_t mem_it_t;
|
||||
|
||||
// Test object
|
||||
TestKernel* test_;
|
||||
|
||||
// Path of Brig file
|
||||
std::string brig_path_obj_;
|
||||
|
||||
// Used to track time taken to run the sample
|
||||
double total_time_taken_;
|
||||
double setup_time_taken_;
|
||||
double dispatch_time_taken_;
|
||||
|
||||
// Handle of signal
|
||||
hsa_signal_t hsa_signal_;
|
||||
|
||||
// Handle of Kernel Code Descriptor
|
||||
hsa_executable_symbol_t kernel_code_desc_;
|
||||
|
||||
// Instance of timer object
|
||||
uint32_t setup_timer_idx_;
|
||||
uint32_t dispatch_timer_idx_;
|
||||
PerfTimer hsa_timer_;
|
||||
|
||||
// Instance of Hsa Resources Factory
|
||||
static HsaRsrcFactory* hsa_rsrc_;
|
||||
|
||||
// Handle to an Hsa Gpu Agent
|
||||
const AgentInfo* agent_info_;
|
||||
|
||||
// Handle to an Hsa Queue
|
||||
hsa_queue_t* hsa_queue_;
|
||||
bool my_queue_;
|
||||
|
||||
// Test kernel name
|
||||
std::string name_;
|
||||
|
||||
// Test kernel name
|
||||
std::string symb_;
|
||||
|
||||
// Kernel executable
|
||||
hsa_executable_t hsa_exec_;
|
||||
};
|
||||
|
||||
#endif // TEST_CTRL_TEST_HSA_H_
|
||||
@@ -1,136 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_CTRL_TEST_KERNEL_H_
|
||||
#define TEST_CTRL_TEST_KERNEL_H_
|
||||
|
||||
#include <string>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <map>
|
||||
|
||||
// Class implements kernel test
|
||||
class TestKernel {
|
||||
public:
|
||||
// Exported buffers IDs
|
||||
enum buf_id_t { KERNARG_EXP_ID, OUTPUT_EXP_ID, REFOUT_EXP_ID };
|
||||
// Memory descriptors IDs
|
||||
enum des_id_t { NULL_DES_ID, LOCAL_DES_ID, KERNARG_DES_ID, SYS_DES_ID, REFOUT_DES_ID };
|
||||
|
||||
// Memory descriptors vector declaration
|
||||
struct mem_descr_t {
|
||||
des_id_t id;
|
||||
void* ptr;
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
// Memory map declaration
|
||||
typedef std::map<uint32_t, mem_descr_t> mem_map_t;
|
||||
typedef mem_map_t::iterator mem_it_t;
|
||||
typedef mem_map_t::const_iterator mem_const_it_t;
|
||||
|
||||
virtual ~TestKernel() {}
|
||||
|
||||
// Initialize method
|
||||
virtual void Init() = 0;
|
||||
|
||||
// Return kernel memory map
|
||||
mem_map_t& GetMemMap() { return mem_map_; }
|
||||
|
||||
// Return NULL descriptor
|
||||
static mem_descr_t NullDescriptor() { return {NULL_DES_ID, NULL, 0}; }
|
||||
|
||||
// Check if decripter is local
|
||||
bool IsLocal(const mem_descr_t& descr) const { return (descr.id == LOCAL_DES_ID); }
|
||||
|
||||
// Methods to get the kernel attributes
|
||||
const mem_descr_t& GetKernargDescr() { return *test_map_[KERNARG_EXP_ID]; }
|
||||
const mem_descr_t& GetOutputDescr() { return *test_map_[OUTPUT_EXP_ID]; }
|
||||
void* GetKernargPtr() { return GetKernargDescr().ptr; }
|
||||
uint32_t GetKernargSize() { return GetKernargDescr().size; }
|
||||
void* GetOutputPtr() { return GetOutputDescr().ptr; }
|
||||
uint32_t GetOutputSize() { return GetOutputDescr().size; }
|
||||
bool IsOutputLocal() { return IsLocal(GetOutputDescr()); }
|
||||
virtual uint32_t GetGridSize() const = 0;
|
||||
|
||||
// Return reference output
|
||||
void* GetRefOut() { return test_map_[REFOUT_EXP_ID]->ptr; }
|
||||
|
||||
// Print output
|
||||
virtual void PrintOutput(const void* ptr) const = 0;
|
||||
|
||||
// Return name
|
||||
virtual std::string Name() const = 0;
|
||||
|
||||
// Return name
|
||||
virtual std::string SymbName() { return Name() + ".kd"; }
|
||||
|
||||
protected:
|
||||
// Set buffer descriptor
|
||||
bool SetInDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
|
||||
bool suc = SetMemDescr(buf_id, des_id, size);
|
||||
if (des_id == KERNARG_DES_ID) {
|
||||
test_map_[KERNARG_EXP_ID] = &mem_map_[buf_id];
|
||||
}
|
||||
return suc;
|
||||
}
|
||||
|
||||
// Set results descriptor
|
||||
bool SetOutDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
|
||||
bool suc = SetMemDescr(buf_id, des_id, size);
|
||||
test_map_[OUTPUT_EXP_ID] = &mem_map_[buf_id];
|
||||
return suc;
|
||||
}
|
||||
|
||||
// Set host descriptor
|
||||
bool SetHostDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
|
||||
bool suc = SetMemDescr(buf_id, des_id, size);
|
||||
if (suc) {
|
||||
mem_descr_t& descr = mem_map_[buf_id];
|
||||
descr.ptr = malloc(size);
|
||||
if (des_id == REFOUT_DES_ID) {
|
||||
test_map_[REFOUT_EXP_ID] = &descr;
|
||||
}
|
||||
if (descr.ptr == NULL) suc = false;
|
||||
}
|
||||
return suc;
|
||||
}
|
||||
|
||||
// Get memory descriptor
|
||||
mem_descr_t GetDescr(const uint32_t& buf_id) const {
|
||||
mem_const_it_t it = mem_map_.find(buf_id);
|
||||
return (it != mem_map_.end()) ? it->second : NullDescriptor();
|
||||
}
|
||||
|
||||
private:
|
||||
// Set memory descriptor
|
||||
bool SetMemDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) {
|
||||
const mem_descr_t des = {des_id, NULL, size};
|
||||
auto ret = mem_map_.insert(mem_map_t::value_type(buf_id, des));
|
||||
return ret.second;
|
||||
}
|
||||
|
||||
// Kernel memory map object
|
||||
mem_map_t mem_map_;
|
||||
// Test memory map object
|
||||
std::map<uint32_t, mem_descr_t*> test_map_;
|
||||
};
|
||||
|
||||
#endif // TEST_CTRL_TEST_KERNEL_H_
|
||||
@@ -1,68 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
|
||||
#define TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "ctrl/test_kernel.h"
|
||||
|
||||
// Class implements DummyKernel kernel parameters
|
||||
class DummyKernel : public TestKernel {
|
||||
public:
|
||||
// Kernel buffers IDs
|
||||
enum { KERNARG_BUF_ID, LOCAL_BUF_ID };
|
||||
|
||||
// Constructor
|
||||
DummyKernel() : width_(64), height_(64) {
|
||||
SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, 0);
|
||||
SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, 0);
|
||||
}
|
||||
|
||||
// Initialize method
|
||||
void Init() {}
|
||||
|
||||
// Return compute grid size
|
||||
uint32_t GetGridSize() const { return width_ * height_; }
|
||||
|
||||
// Print output
|
||||
void PrintOutput(const void* ptr) const {}
|
||||
|
||||
// Return name
|
||||
std::string Name() const { return std::string("DummyKernel"); }
|
||||
|
||||
private:
|
||||
// Reference CPU implementation
|
||||
bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask,
|
||||
const uint32_t width, const uint32_t height,
|
||||
const uint32_t maskWidth, const uint32_t maskHeight) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Width of the Input array
|
||||
const uint32_t width_;
|
||||
|
||||
// Height of the Input array
|
||||
const uint32_t height_;
|
||||
};
|
||||
|
||||
#endif // TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_
|
||||
@@ -1,45 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
# test trace
|
||||
export ROC_TEST_TRACE=1
|
||||
|
||||
# kernels loading iterations
|
||||
export ROCP_KITER=50
|
||||
# kernels dispatching iterations per kernel load
|
||||
# dispatching to the same queue
|
||||
export ROCP_DITER=50
|
||||
# GPU agents number
|
||||
export ROCP_AGENTS=2
|
||||
# host threads number
|
||||
# each thread creates a queue pre GPU agent
|
||||
export ROCP_THRS=3
|
||||
|
||||
eval ./test/ctrl
|
||||
|
||||
#valgrind --leak-check=full $tbin
|
||||
#valgrind --tool=massif $tbin
|
||||
#ms_print massif.out.<N>
|
||||
|
||||
exit 0
|
||||
@@ -1,74 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
/**
|
||||
* SimpleConvolution is where each pixel of the output image
|
||||
* is the weighted sum of the neighborhood pixels of the input image
|
||||
* The neighborhood is defined by the dimensions of the mask and
|
||||
* weight of each neighbor is defined by the mask itself.
|
||||
* @param output Output matrix after performing convolution
|
||||
* @param input Input matrix on which convolution is to be performed
|
||||
* @param mask mask matrix using which convolution was to be performed
|
||||
* @param inputDimensions dimensions of the input matrix
|
||||
* @param maskDimensions dimensions of the mask matrix
|
||||
*/
|
||||
__kernel void SimpleConvolution(__global uint * output,
|
||||
__global uint * input,
|
||||
__global float * mask,
|
||||
const uint2 inputDimensions,
|
||||
const uint2 maskDimensions) {
|
||||
|
||||
uint tid = get_global_id(0);
|
||||
|
||||
uint width = inputDimensions.x;
|
||||
uint height = inputDimensions.y;
|
||||
|
||||
uint x = tid%width;
|
||||
uint y = tid/width;
|
||||
|
||||
uint maskWidth = maskDimensions.x;
|
||||
uint maskHeight = maskDimensions.y;
|
||||
|
||||
uint vstep = (maskWidth -1)/2;
|
||||
uint hstep = (maskHeight -1)/2;
|
||||
|
||||
// find the left, right, top and bottom indices such that
|
||||
// the indices do not go beyond image boundaires
|
||||
uint left = (x < vstep) ? 0 : (x - vstep);
|
||||
uint right = ((x + vstep) >= width) ? width - 1 : (x + vstep);
|
||||
uint top = (y < hstep) ? 0 : (y - hstep);
|
||||
uint bottom = ((y + hstep) >= height)? height - 1: (y + hstep);
|
||||
|
||||
// initializing wighted sum value
|
||||
float sumFX = 0;
|
||||
|
||||
for(uint i = left; i <= right; ++i) {
|
||||
for(uint j = top; j <= bottom; ++j) {
|
||||
// performing wighted sum within the mask boundaries
|
||||
uint maskIndex = (j - (y - hstep)) * maskWidth + (i - (x - vstep));
|
||||
uint index = j * width + i;
|
||||
sumFX += ((float)input[index] * mask[maskIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
// To round to the nearest integer
|
||||
sumFX += 0.5f;
|
||||
output[tid] = (uint)sumFX;
|
||||
}
|
||||
@@ -1,386 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "simple_convolution/simple_convolution.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "util/helper_funcs.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
const uint32_t SimpleConvolution::input_data_[]{
|
||||
15, 201, 51, 89, 92, 34, 96, 66, 11, 225, 161, 96, 81, 211, 108, 124, 202, 244, 182,
|
||||
90, 215, 92, 98, 20, 44, 225, 55, 247, 202, 0, 45, 218, 202, 97, 51, 39, 131, 147,
|
||||
105, 143, 116, 11, 239, 198, 222, 92, 67, 169, 81, 250, 3, 40, 86, 101, 60, 131, 70,
|
||||
116, 123, 17, 117, 168, 236, 64, 10, 31, 103, 142, 179, 209, 29, 40, 220, 13, 239, 187,
|
||||
105, 50, 100, 186, 44, 104, 227, 131, 205, 32, 6, 20, 149, 130, 38, 10, 43, 18, 75,
|
||||
53, 50, 178, 195, 230, 132, 225, 14, 96, 238, 253, 27, 88, 48, 128, 18, 92, 232, 246,
|
||||
224, 182, 23, 231, 203, 172, 105, 241, 183, 148, 4, 2, 202, 55, 181, 142, 29, 57, 111,
|
||||
43, 153, 93, 41, 181, 181, 89, 54, 200, 182, 31, 190, 150, 213, 213, 126, 160, 130, 232,
|
||||
146, 57, 125, 151, 59, 71, 206, 240, 213, 236, 42, 68, 24, 195, 162, 65, 121, 87, 155,
|
||||
175, 31, 81, 207, 222, 232, 164, 180, 102, 69, 55, 79, 216, 112, 204, 112, 171, 19, 63,
|
||||
156, 233, 43, 198, 46, 67, 138, 208, 132, 4, 39, 32, 180, 71, 113, 131, 38, 90, 40,
|
||||
219, 193, 109, 18, 16, 70, 131, 220, 182, 46, 240, 245, 203, 217, 32, 146, 7, 100, 28,
|
||||
216, 233, 32, 255, 9, 213, 71, 123, 88, 110, 213, 128, 74, 150, 238, 93, 166, 52, 224,
|
||||
131, 234, 15, 115, 224, 218, 76, 1, 108, 84, 101, 137, 44, 79, 170, 44, 88, 127, 116,
|
||||
211, 216, 226, 168, 88, 45, 63, 70, 138, 230, 123, 107, 105, 101, 122, 220, 70, 84, 41,
|
||||
71, 193, 125, 173, 75, 169, 252, 245, 213, 84, 117, 73, 40, 77, 44, 209, 166, 90, 16,
|
||||
237, 229, 246, 104, 80, 95, 206, 202, 60, 20, 31, 101, 92, 225, 226, 9, 44, 140, 5,
|
||||
34, 97, 89, 151, 171, 129, 229, 216, 82, 139, 51, 99, 120, 24, 89, 225, 104, 185, 175,
|
||||
50, 246, 196, 82, 91, 32, 51, 62, 42, 96, 202, 47, 130, 44, 137, 26, 215, 10, 255,
|
||||
176, 93, 138, 227, 193, 3, 251, 27, 229, 100, 212, 149, 151, 202, 89, 233, 38, 122, 29,
|
||||
100, 164, 125, 46, 212, 0, 90, 93, 26, 50, 103, 25, 226, 197, 164, 198, 135, 168, 194,
|
||||
162, 141, 38, 119, 34, 190, 66, 124, 167, 104, 247, 197, 204, 156, 67, 251, 112, 67, 85,
|
||||
205, 93, 135, 53, 119, 106, 251, 28, 49, 130, 196, 243, 36, 82, 26, 155, 117, 216, 221,
|
||||
241, 128, 70, 233, 70, 18, 133, 137, 14, 245, 204, 99, 195, 42, 235, 248, 161, 86, 243,
|
||||
190, 135, 118, 130, 123, 154, 213, 150, 54, 74, 111, 20, 60, 240, 90, 37, 54, 109, 171,
|
||||
191, 123, 161, 140, 222, 100, 182, 202, 93, 88, 32, 80, 23, 168, 198, 153, 36, 97, 111,
|
||||
187, 151, 185, 43, 172, 245, 27, 6, 27, 82, 115, 199, 18, 239, 104, 158, 206, 205, 85,
|
||||
152, 42, 174, 185, 123, 197, 98, 65, 95, 135, 163, 206, 66, 59, 136, 109, 231, 125, 137,
|
||||
237, 153, 219, 97, 96, 237, 81, 201, 140, 31, 150, 226, 183, 192, 144, 113, 59, 86, 212,
|
||||
125, 182, 91, 33, 132, 158, 92, 12, 12, 68, 138, 149, 50, 36, 113, 147, 133, 95, 229,
|
||||
78, 235, 4, 228, 206, 188, 165, 95, 45, 225, 181, 1, 94, 107, 93, 128, 240, 251, 220,
|
||||
252, 7, 32, 135, 156, 83, 171, 14, 230, 48, 109, 203, 126, 89, 208, 99, 39, 140, 9,
|
||||
134, 185, 234, 60, 187, 73, 167, 24, 201, 152, 20, 166, 148, 27, 199, 28, 184, 26, 199,
|
||||
198, 0, 248, 52, 204, 119, 141, 157, 218, 181, 41, 227, 59, 227, 206, 119, 159, 23, 31,
|
||||
184, 224, 183, 204, 134, 76, 231, 77, 105, 160, 103, 48, 103, 104, 41, 155, 53, 160, 41,
|
||||
210, 123, 222, 252, 95, 26, 223, 45, 146, 126, 68, 177, 54, 37, 105, 3, 171, 182, 235,
|
||||
249, 31, 139, 97, 80, 243, 202, 121, 143, 0, 26, 184, 210, 149, 151, 207, 244, 177, 174,
|
||||
34, 67, 45, 102, 245, 100, 140, 95, 104, 55, 21, 83, 49, 53, 223, 147, 134, 210, 93,
|
||||
0, 97, 93, 26, 26, 48, 175, 178, 255, 164, 99, 174, 198, 167, 220, 45, 156, 64, 185,
|
||||
252, 168, 241, 18, 252, 35, 71, 219, 182, 205, 173, 19, 206, 15, 113, 232, 42, 161, 152,
|
||||
220, 160, 60, 64, 79, 3, 231, 43, 49, 132, 108, 235, 128, 21, 220, 146, 17, 255, 218,
|
||||
236, 182, 168, 154, 201, 118, 170, 58, 94, 212, 220, 246, 177, 125, 51, 241, 204, 55, 216,
|
||||
248, 104, 92, 100, 83, 221, 121, 48, 111, 138, 47, 73, 119, 230, 241, 17, 175, 103, 187,
|
||||
234, 198, 144, 199, 188, 65, 68, 240, 51, 17, 39, 11, 9, 143, 104, 109, 227, 70, 231,
|
||||
19, 181, 113, 66, 255, 233, 41, 241, 250, 217, 89, 182, 196, 31, 71, 139, 220, 137, 208,
|
||||
204, 188, 225, 243, 200, 234, 131, 48, 88, 102, 119, 63, 121, 44, 177, 188, 44, 154, 229,
|
||||
29, 149, 190, 118, 76, 130, 150, 147, 14, 114, 28, 222, 62, 217, 191, 50, 161, 170, 181,
|
||||
210, 2, 28, 73, 66, 149, 117, 243, 81, 162, 141, 55, 191, 35, 245, 54, 111, 120, 204,
|
||||
2, 134, 62, 31, 100, 125, 248, 36, 175, 153, 206, 101, 107, 209, 129, 181, 19, 22, 43,
|
||||
7, 104, 205, 149, 159, 140, 184, 149, 195, 39, 14, 143, 42, 148, 205, 73, 249, 74, 66,
|
||||
30, 250, 219, 237, 96, 71, 190, 225, 253, 210, 248, 40, 218, 96, 245, 111, 0, 130, 39,
|
||||
150, 69, 79, 165, 212, 122, 57, 162, 195, 51, 237, 6, 82, 231, 225, 63, 71, 41, 253,
|
||||
41, 38, 208, 33, 78, 170, 130, 68, 26, 131, 198, 66, 26, 12, 145, 191, 224, 11, 249,
|
||||
130, 207, 44, 112, 213, 126, 88, 183, 190, 160, 225, 187, 201, 8, 140, 235, 87, 55, 109,
|
||||
155, 81, 241, 98, 147, 11, 110, 37, 202, 79, 49, 195, 210, 0, 240, 66, 214, 110, 154,
|
||||
142, 44, 58, 111, 232, 4, 119, 117, 239, 207, 172, 93, 106, 254, 78, 205, 145, 89, 59,
|
||||
183, 35, 138, 232, 230, 92, 233, 214, 159, 191, 69, 58, 78, 114, 116, 189, 91, 121, 53,
|
||||
208, 104, 4, 125, 198, 111, 123, 20, 60, 13, 109, 120, 196, 145, 3, 172, 119, 95, 150,
|
||||
78, 255, 85, 147, 57, 163, 6, 174, 97, 97, 39, 151, 50, 144, 155, 175, 86, 11, 43,
|
||||
107, 71, 56, 216, 191, 253, 105, 194, 170, 225, 34, 64, 47, 34, 150, 195, 91, 58, 201,
|
||||
10, 155, 43, 49, 50, 93, 194, 206, 13, 25, 217, 56, 132, 33, 112, 92, 225, 109, 198,
|
||||
164, 23, 167, 199, 88, 215, 234, 238, 155, 69, 40, 100, 80, 196, 144, 129, 246, 237, 68,
|
||||
197, 250, 93, 159, 51, 225, 193, 163, 62, 163, 17, 4, 71, 41, 172, 15, 130, 132, 249,
|
||||
112, 31, 63, 152, 132, 143, 92, 20, 17, 83, 1, 86, 25, 252, 179, 185, 47, 149, 122,
|
||||
211, 211, 29, 229, 216, 101, 15, 133, 117, 145, 9, 111, 1, 40, 175, 154, 173, 62, 247,
|
||||
193, 80, 75, 194, 166, 100, 191, 90, 29, 239, 239, 152, 194, 195, 182, 168, 156, 27, 183,
|
||||
33, 145, 73, 43, 0, 75, 83, 175, 229, 0, 238, 221, 194, 63, 40, 133, 230, 140, 68,
|
||||
64, 170, 51, 48, 66, 246, 243, 248, 159, 144, 20, 87, 177, 165, 160, 220, 166, 235, 48,
|
||||
86, 209, 49, 68, 174, 243, 132, 214, 120, 106, 99, 189, 170, 13, 241, 219, 80, 232, 207,
|
||||
72, 135, 95, 92, 223, 16, 2, 127, 237, 169, 107, 29, 255, 61, 79, 68, 236, 67, 200,
|
||||
194, 188, 50, 38, 121, 221, 52, 107, 184, 132, 84, 136, 204, 219, 231, 41, 186, 248, 44,
|
||||
58, 229, 213, 166, 3, 212, 227, 82, 25, 207, 150, 225, 146, 82, 20, 185, 204, 242, 237,
|
||||
55, 170, 113, 139, 50, 62, 103, 26, 103, 34, 18, 148, 93, 247, 105, 3, 251, 62, 231,
|
||||
77, 87, 182, 227, 57, 73, 54, 77, 2, 2, 63, 239, 57, 234, 97, 197, 29, 159, 44,
|
||||
55, 7, 79, 74, 155, 172, 66, 5, 175, 61, 67, 150, 139, 155, 77, 111, 212, 151, 165,
|
||||
34, 153, 167, 98, 137, 225, 77, 234, 166, 107, 138, 211, 163, 145, 34, 237, 45, 206, 47,
|
||||
50, 126, 108, 117, 21, 248, 17, 98, 103, 230, 249, 12, 9, 147, 179, 107, 29, 149, 185,
|
||||
7, 59, 37, 146, 14, 200, 35, 49, 182, 80, 0, 230, 130, 126, 83, 248, 148, 75, 9,
|
||||
247, 178, 240, 240, 190, 249, 132, 114, 101, 161, 7, 30, 169, 67, 68, 59, 82, 12, 95,
|
||||
131, 195, 176, 131, 169, 51, 2, 252, 44, 150, 72, 54, 141, 250, 38, 126, 185, 31, 3,
|
||||
44, 132, 165, 52, 163, 78, 120, 231, 138, 202, 244, 234, 77, 183, 155, 209, 97, 207, 212,
|
||||
94, 251, 107, 166, 49, 249, 161, 88, 120, 91, 120, 123, 135, 253, 33, 188, 160, 112, 52,
|
||||
136, 250, 254, 125, 229, 76, 53, 128, 30, 150, 79, 243, 244, 75, 95, 155, 125, 88, 60,
|
||||
213, 209, 152, 78, 77, 32, 75, 110, 220, 236, 222, 17, 117, 217, 15, 242, 190, 92, 39,
|
||||
63, 123, 190, 143, 111, 178, 219, 206, 78, 88, 38, 138, 46, 247, 34, 124, 69, 66, 199,
|
||||
179, 31, 179, 145, 48, 41, 106, 64, 27, 41, 157, 67, 105, 24, 1, 249, 135, 179, 212,
|
||||
86, 1, 44, 124, 140, 91, 116, 175, 215, 185, 242, 159, 108, 17, 83, 254, 66, 124, 105,
|
||||
131, 151, 146, 32, 218, 252, 57, 219, 245, 193, 143, 201, 23, 145, 246, 148, 30, 82, 8,
|
||||
206, 41, 194, 192, 201, 47, 210, 28, 46, 20, 152, 151, 151, 48, 42, 184, 11, 38, 241,
|
||||
231, 28, 179, 119, 230, 202, 8, 220, 94, 39, 46, 103, 245, 88, 42, 181, 33, 90, 136,
|
||||
62, 136, 156, 214, 31, 52, 7, 74, 237, 19, 113, 223, 250, 141, 146, 113, 115, 92, 122,
|
||||
80, 187, 161, 126, 35, 150, 215, 78, 76, 249, 168, 212, 55, 48, 113, 14, 80, 166, 21,
|
||||
154, 147, 40, 12, 114, 35, 153, 5, 148, 12, 98, 15, 92, 29, 176, 219, 65, 71, 179,
|
||||
143, 147, 172, 56, 104, 227, 104, 218, 241, 185, 128, 7, 84, 20, 47, 96, 135, 82, 249,
|
||||
140, 231, 6, 238, 246, 99, 12, 167, 63, 77, 238, 242, 221, 130, 158, 21, 235, 129, 126,
|
||||
197, 114, 56, 69, 121, 140, 90, 169, 237, 225, 252, 231, 109, 228, 237, 91, 219, 81, 104,
|
||||
130, 144, 181, 113, 130, 147, 244, 32, 169, 223, 162, 39, 164, 21, 95, 234, 143, 236, 68,
|
||||
57, 217, 37, 53, 192, 147, 25, 174, 239, 245, 0, 87, 119, 144, 13, 232, 19, 160, 220,
|
||||
51, 73, 188, 214, 113, 96, 235, 209, 75, 122, 190, 144, 179, 151, 181, 233, 88, 73, 3,
|
||||
7, 56, 248, 7, 143, 112, 152, 156, 89, 171, 61, 53, 223, 135, 242, 181, 248, 83, 161,
|
||||
202, 158, 28, 136, 46, 208, 32, 228, 186, 121, 45, 189, 128, 102, 182, 136, 246, 38, 32,
|
||||
147, 127, 204, 208, 181, 171, 87, 167, 97, 80, 250, 2, 26, 153, 31, 163, 200, 239, 195,
|
||||
172, 169, 60, 218, 103, 188, 65, 30, 69, 55, 68, 102, 202, 196, 50, 154, 121, 221, 242,
|
||||
33, 63, 67, 28, 66, 93, 181, 97, 0, 126, 81, 196, 43, 251, 0, 5, 98, 189, 70,
|
||||
128, 3, 126, 197, 105, 72, 137, 155, 227, 3, 121, 214, 36, 184, 25, 65, 250, 118, 247,
|
||||
91, 119, 117, 173, 60, 160, 168, 60, 166, 10, 250, 237, 139, 253, 107, 80, 102, 180, 217,
|
||||
2, 151, 221, 123, 109, 1, 52, 134, 66, 46, 253, 57, 138, 117, 175, 55, 178, 79, 223,
|
||||
239, 245, 234, 233, 226, 117, 231, 78, 198, 78, 2, 159, 80, 154, 124, 204, 7, 126, 0,
|
||||
142, 193, 47, 140, 251, 185, 2, 170, 241, 180, 249, 208, 163, 239, 186, 141, 210, 48, 116,
|
||||
32, 246, 195, 34, 150, 19, 188, 19, 224, 196, 146, 224, 83, 83, 15, 224, 78, 201, 226,
|
||||
249, 186, 151, 243, 139, 58, 226, 70, 199, 181, 118, 60, 213, 109, 255, 248, 3, 19, 181,
|
||||
23, 243, 122, 169, 212, 205, 252, 228, 173, 75, 173, 144, 68, 104, 39, 55, 243, 98, 26,
|
||||
57, 41, 207, 175, 102, 165, 29, 102, 158, 32, 121, 83, 56, 109, 205, 225, 66, 155, 222,
|
||||
38, 73, 42, 212, 218, 110, 60, 1, 166, 48, 99, 193, 105, 141, 145, 25, 244, 54, 54,
|
||||
90, 213, 87, 212, 40, 143, 66, 246, 112, 132, 146, 79, 171, 220, 121, 128, 182, 232, 189,
|
||||
184, 143, 237, 27, 80, 86, 169, 226, 112, 158, 25, 166, 248, 238, 253, 204, 23, 141, 15,
|
||||
13, 254, 147, 160, 77, 63, 124, 199, 191, 50, 175, 124, 234, 62, 105, 6, 143, 192, 176,
|
||||
113, 48, 78, 139, 215, 71, 121, 213, 20, 144, 98, 35, 158, 96, 183, 62, 174, 246, 187,
|
||||
117, 182, 237, 37, 50, 216, 99, 156, 223, 243, 93, 143, 101, 142, 222, 240, 101, 37, 106,
|
||||
58, 57, 250, 157, 93, 153, 254, 20, 216, 172, 10, 147, 34, 192, 129, 71, 243, 90, 171,
|
||||
144, 57, 159, 238, 201, 4, 124, 167, 244, 225, 205, 95, 28, 7, 89, 185, 100, 243, 184,
|
||||
121, 203, 100, 131, 95, 135, 68, 224, 207, 56, 58, 122, 201, 115, 25, 183, 61, 30, 51,
|
||||
229, 18, 21, 178, 113, 49, 186, 203, 235, 31, 191, 163, 152, 138, 8, 28, 233, 143, 97,
|
||||
202, 95, 153, 4, 217, 98, 120, 243, 26, 182, 17, 77, 155, 36, 99, 78, 150, 149, 8,
|
||||
98, 128, 39, 33, 36, 192, 172, 45, 220, 149, 189, 61, 96, 28, 215, 100, 246, 58, 221,
|
||||
233, 84, 147, 251, 162, 47, 31, 5, 125, 181, 154, 134, 23, 27, 174, 57, 64, 110, 229,
|
||||
109, 75, 123, 43, 136, 219, 71, 95, 64, 61, 154, 29, 39, 238, 177, 34, 145, 225, 65,
|
||||
150, 94, 247, 49, 229, 15, 77, 147, 72, 141, 2, 45, 251, 77, 169, 38, 213, 132, 110,
|
||||
53, 196, 172, 207, 226, 212, 190, 148, 246, 79, 117, 56, 230, 212, 48, 23, 185, 63, 100,
|
||||
76, 136, 242, 78, 181, 237, 156, 95, 20, 113, 227, 131, 167, 168, 47, 119, 139, 3, 53,
|
||||
31, 250, 133, 149, 50, 107, 105, 99, 130, 34, 162, 231, 111, 42, 217, 190, 224, 199, 90,
|
||||
63, 220, 204, 35, 95, 115, 203, 143, 234, 86, 147, 32, 118, 141, 165, 11, 192, 16, 117,
|
||||
35, 147, 152, 198, 123, 7, 240, 84, 198, 209, 28, 33, 17, 248, 237, 52, 88, 97, 255,
|
||||
231, 76, 86, 122, 109, 204, 8, 18, 216, 201, 35, 77, 237, 183, 229, 179, 50, 237, 164,
|
||||
135, 179, 118, 164, 213, 135, 157, 195, 187, 245, 36, 187, 220, 113, 18, 87, 222, 222, 96,
|
||||
241, 183, 42, 21, 4, 23, 205, 233, 203, 0, 214, 112, 136, 138, 230, 44, 95, 110, 201,
|
||||
34, 41, 191, 71, 229, 155, 185, 247, 243, 151, 214, 84, 137, 141, 126, 159, 146, 149, 108,
|
||||
124, 97, 109, 82, 209, 245, 221, 183, 34, 60, 37, 236, 95, 79, 171, 167, 53, 71, 96,
|
||||
45, 58, 248, 3, 142, 129, 145, 12, 33, 36, 162, 142, 160, 3, 251, 243, 213, 240, 208,
|
||||
141, 19, 13, 178, 255, 109, 2, 170, 20, 55, 241, 116, 101, 44, 108, 105, 186, 238, 251,
|
||||
199, 15, 31, 106, 157, 191, 110, 152, 178, 67, 137, 131, 208, 156, 144, 131, 155, 253, 134,
|
||||
70, 18, 190, 55, 134, 35, 99, 243, 140, 30, 225, 135, 230, 240, 166, 81, 142, 102, 191,
|
||||
39, 25, 3, 177, 156, 211, 77, 45, 87, 233, 43, 221, 48, 61, 155, 103, 195, 191, 203,
|
||||
182, 75, 233, 152, 211, 208, 136, 121, 33, 23, 224, 224, 62, 249, 227, 239, 149, 183, 61,
|
||||
195, 15, 39, 238, 236, 87, 43, 136, 191, 239, 71, 138, 166, 147, 116, 62, 102, 68, 199,
|
||||
224, 101, 223, 193, 70, 29, 186, 42, 13, 80, 225, 75, 19, 241, 115, 1, 221, 202, 45,
|
||||
102, 137, 29, 174, 20, 195, 66, 136, 2, 168, 205, 201, 137, 50, 168, 74, 121, 198, 4,
|
||||
163, 212, 85, 133, 31, 105, 118, 146, 106, 84, 93, 152, 187, 231, 181, 105, 251, 121, 171,
|
||||
132, 123, 84, 81, 69, 221, 132, 238, 40, 253, 181, 45, 161, 137, 130, 39, 169, 235, 158,
|
||||
59, 86, 242, 153, 239, 173, 128, 165, 23, 123, 30, 195, 0, 154, 23, 81, 224, 245, 214,
|
||||
206, 30, 212, 131, 75, 117, 12, 206, 157, 181, 186, 59, 241, 17, 45, 138, 0, 219, 11,
|
||||
165, 243, 135, 196, 182, 135, 95, 205, 217, 63, 195, 175, 14, 225, 131, 145, 45, 249, 158,
|
||||
251, 150, 84, 182, 209, 70, 199, 255, 209, 199, 219, 220, 109, 206, 99, 50, 132, 234, 146,
|
||||
82, 195, 209, 22, 114, 223, 247, 246, 113, 37, 239, 16, 33, 134, 100, 215, 88, 170, 158,
|
||||
87, 123, 102, 50, 88, 211, 1, 187, 6, 134, 165, 152, 216, 105, 106, 239, 220, 74, 231,
|
||||
210, 187, 12, 194, 204, 45, 72, 49, 4, 160, 219, 162, 248, 87, 8, 43, 176, 220, 44,
|
||||
107, 227, 178, 17, 124, 139, 122, 230, 122, 87, 48, 97, 42, 236, 110, 236, 185, 155, 53,
|
||||
234, 159, 214, 198, 66, 206, 30, 75, 249, 206, 40, 38, 57, 11, 217, 74, 136, 100, 197,
|
||||
110, 223, 29, 159, 65, 71, 140, 175, 51, 69, 74, 105, 48, 234, 63, 246, 45, 13, 20,
|
||||
121, 7, 226, 161, 46, 28, 173, 7, 103, 53, 108, 45, 164, 76, 74, 68, 141, 145, 208,
|
||||
61, 197, 22, 136, 46, 70, 115, 110, 60, 161, 124, 81, 26, 132, 51, 188, 178, 79, 106,
|
||||
186, 183, 160, 39, 228, 68, 115, 46, 136, 1, 192, 89, 62, 133, 112, 198, 180, 182, 58,
|
||||
34, 243, 219, 158, 69, 245, 34, 120, 178, 213, 200, 28, 143, 128, 188, 182, 100, 1, 41,
|
||||
146, 137, 43, 82, 227, 105, 216, 83, 48, 140, 10, 106, 175, 254, 70, 77, 67, 59, 112,
|
||||
188, 237, 69, 133, 10, 212, 5, 198, 138, 105, 199, 180, 252, 81, 223, 79, 53, 73, 39,
|
||||
137, 121, 180, 148, 228, 99, 146, 42, 177, 214, 102, 33, 147, 84, 102, 25, 94, 59, 31,
|
||||
37, 197, 137, 237, 122, 133, 63, 90, 213, 116, 163, 253, 253, 29, 177, 145, 2, 21, 36,
|
||||
45, 198, 251, 147, 231, 143, 232, 78, 168, 71, 137, 199, 108, 79, 80, 90, 201, 214, 153,
|
||||
35, 172, 13, 199, 169, 11, 228, 91, 157, 231, 112, 193, 20, 54, 189, 167, 30, 77, 144,
|
||||
108, 245, 215, 246, 189, 68, 69, 14, 158, 14, 228, 55, 50, 145, 69, 249, 58, 80, 222,
|
||||
149, 237, 198, 5, 175, 218, 60, 109, 130, 91, 186, 18, 200, 175, 234, 190, 109, 46, 3,
|
||||
123, 204, 18, 96, 4, 68, 241, 73, 62, 44, 154, 29, 193, 136, 227, 199, 55, 189, 4,
|
||||
164, 64, 95, 95, 82, 39, 15, 60, 230, 124, 107, 233, 248, 55, 251, 89, 60, 63, 75,
|
||||
134, 126, 119, 32, 156, 57, 168, 127, 0, 224, 61, 5, 133, 125, 100, 228, 208, 140, 243,
|
||||
12, 114, 111, 119, 92, 104, 175, 87, 193, 236, 151, 13, 114, 21, 132, 146, 177, 189, 59,
|
||||
49, 190, 27, 110, 195, 160, 236, 40, 132, 188, 181, 120, 201, 40, 232, 65, 132, 80, 241,
|
||||
220, 18, 221, 115, 31, 79, 137, 164, 226, 58, 98, 29, 108, 32, 57, 219, 228, 218, 199,
|
||||
13, 95, 132, 195, 215, 77, 235, 191, 143, 112, 16, 128, 76, 35, 93, 191, 66, 173, 73,
|
||||
231, 143, 132, 73, 173, 240, 106, 231, 203, 78, 193, 147, 92, 33, 23, 31, 248, 100, 11,
|
||||
184, 243, 123, 201, 115, 200, 236, 209, 135, 47, 126, 209, 22, 14, 85, 95, 188, 69, 202,
|
||||
163, 17, 24, 101, 164, 117, 134, 187, 148, 127, 31, 159, 55, 19, 27, 1, 135, 227, 237,
|
||||
89, 107, 28, 216, 60, 51, 230, 145, 147, 163, 215, 93, 70, 232, 118, 172, 140, 235, 50,
|
||||
71, 128, 177, 103, 32, 233, 123, 60, 234, 2, 31, 216, 91, 139, 244, 52, 200, 40, 26,
|
||||
90, 188, 189, 49, 25, 4, 25, 144, 176, 166, 124, 227, 237, 252, 148, 85, 29, 125, 208,
|
||||
89, 104, 210, 121, 64, 46, 4, 53, 99, 204, 93, 125, 38, 25, 59, 88, 51, 64, 113,
|
||||
195, 241, 23, 64, 212, 5, 60, 104, 90, 90, 230, 42, 179, 78, 253, 44, 143, 44, 49,
|
||||
196, 143, 254, 34, 13, 36, 60, 73, 125, 112, 137, 239, 52, 122, 7, 116, 79, 12, 177,
|
||||
183, 103, 11, 158, 146, 190, 237, 143, 235, 124, 188, 28, 65, 76, 26, 100, 89, 63, 160,
|
||||
163, 188, 17, 44, 172, 69, 167, 179, 185, 246, 191, 107, 174, 38, 118, 76, 184, 53, 58,
|
||||
72, 32, 182, 5, 61, 248, 81, 88, 92, 170, 152, 253, 77, 84, 14, 122, 1, 83, 34,
|
||||
180, 13, 25, 115, 120, 199, 154, 238, 20, 83, 36, 79, 155, 68, 5, 160, 130, 254, 242,
|
||||
218, 90, 156, 114, 87, 234, 199, 101, 101, 200, 185, 135, 124, 198, 160, 240, 62, 104, 138,
|
||||
45, 125, 222, 81, 204, 122, 150, 210, 26, 24, 208, 12, 242, 42, 169, 101, 130, 148, 44,
|
||||
232, 249, 245, 161, 128, 113, 103, 33, 98, 166, 137, 236, 212, 7, 202, 38, 211, 69, 188,
|
||||
165, 95, 212, 118, 108, 199, 161, 22, 45, 35, 170, 90, 11, 163, 79, 173, 36, 193, 20,
|
||||
69, 35, 187, 207, 16, 144, 214, 219, 182, 170, 32, 114, 79, 128, 71, 198, 237, 15, 103,
|
||||
4, 60, 139, 175, 150, 151, 82, 230, 68, 119, 168, 89, 188, 204, 20, 140, 220, 165, 98,
|
||||
184, 91, 12, 217, 205, 92, 90, 20, 35, 71, 36, 138, 76, 96, 22, 251, 247, 173, 78,
|
||||
222, 241, 197, 134, 75, 130, 83, 96, 14, 47, 5, 113, 232, 96, 126, 193, 45, 218, 28,
|
||||
66, 253, 99, 103, 136, 176, 200, 158, 171, 191, 76, 249, 158, 62, 190, 37, 137, 65, 120,
|
||||
233, 80, 168, 238, 193, 145, 79, 63, 82, 125, 26, 111, 191, 24, 210, 39, 161, 131, 239,
|
||||
64, 46, 175, 140, 39, 77, 202, 230, 115, 84, 40, 235, 62, 120, 148, 45, 57, 37, 124,
|
||||
121, 120, 249, 148, 231, 185, 172, 186, 224, 77, 61, 207, 141, 107, 126, 26, 147, 204, 229,
|
||||
121, 63, 58, 161, 43, 120, 25, 191, 165, 83, 228, 34, 205, 92, 27, 97, 67, 213, 13,
|
||||
253, 182, 91, 59, 133, 233, 166, 4, 4, 57, 209, 233, 179, 16, 35, 85, 59, 155, 111,
|
||||
250, 65, 194, 223, 99, 144, 59, 127, 241, 127, 85, 255, 125, 11, 90, 184, 145, 68, 95,
|
||||
150, 72, 153, 103, 49, 76, 120, 85, 161, 179, 241, 16, 174, 51, 211, 142, 150, 99, 201,
|
||||
22, 85, 73, 108, 84, 199, 120, 175, 128, 9, 243, 223, 160, 59, 120, 8, 109, 197, 128,
|
||||
194, 103, 52, 180, 119, 227, 231, 75, 113, 126, 175, 59, 148, 4, 132, 1, 89, 75, 121,
|
||||
8, 204, 131, 251, 171, 36, 55, 36, 44, 165, 233, 172, 103, 80, 224, 28, 200, 195, 3,
|
||||
20, 53, 129, 195, 112, 22, 200, 244, 23, 34, 64, 145, 42, 12, 20, 38, 184, 56, 94,
|
||||
220, 101, 3, 198, 17, 107, 22, 242, 135, 222, 182, 138, 243, 235, 11, 182, 91, 34, 127,
|
||||
80, 58, 161, 145, 203, 204, 158, 224, 242, 86, 24, 81, 51, 126, 84, 249, 143, 191, 15,
|
||||
130, 70, 238, 57, 209, 225, 36, 221, 152, 128, 255, 24, 208, 57, 186, 97, 4, 134, 255,
|
||||
229, 121, 86, 254, 202, 137, 124, 31, 130, 12, 222, 146, 142, 37, 129, 199, 247, 98, 236,
|
||||
212, 251, 108, 211, 20, 60, 13, 206, 158, 18, 84};
|
||||
|
||||
SimpleConvolution::SimpleConvolution() {
|
||||
width_ = 64;
|
||||
height_ = 64;
|
||||
mask_width_ = 3;
|
||||
mask_height_ = mask_width_;
|
||||
randomize_seed_ = 0;
|
||||
|
||||
if (!IsPowerOf2(width_)) {
|
||||
width_ = RoundToPowerOf2(width_);
|
||||
}
|
||||
|
||||
if (!IsPowerOf2(height_)) {
|
||||
height_ = RoundToPowerOf2(height_);
|
||||
}
|
||||
|
||||
if (!(mask_width_ % 2)) {
|
||||
mask_width_++;
|
||||
}
|
||||
|
||||
if (!(mask_height_ % 2)) {
|
||||
mask_height_++;
|
||||
}
|
||||
|
||||
if (width_ * height_ < 256) {
|
||||
width_ = 64;
|
||||
height_ = 64;
|
||||
}
|
||||
|
||||
const uint32_t input_size_bytes = width_ * height_ * sizeof(uint32_t);
|
||||
const uint32_t mask_size_bytes = mask_width_ * mask_height_ * sizeof(float);
|
||||
|
||||
SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, sizeof(kernel_args_t));
|
||||
SetInDescr(INPUT_BUF_ID, SYS_DES_ID, input_size_bytes);
|
||||
SetInDescr(MASK_BUF_ID, SYS_DES_ID, mask_size_bytes);
|
||||
SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, input_size_bytes);
|
||||
SetHostDescr(REFOUT_BUF_ID, REFOUT_DES_ID, input_size_bytes);
|
||||
|
||||
if (!randomize_seed_) TEST_ASSERT(sizeof(input_data_) <= input_size_bytes);
|
||||
}
|
||||
|
||||
void SimpleConvolution::Init() {
|
||||
std::clog << "SimpleConvolution::init :" << std::endl;
|
||||
|
||||
mem_descr_t kernarg_des = GetDescr(KERNARG_BUF_ID);
|
||||
mem_descr_t input_des = GetDescr(INPUT_BUF_ID);
|
||||
mem_descr_t mask_des = GetDescr(MASK_BUF_ID);
|
||||
mem_descr_t output_des = GetDescr(LOCAL_BUF_ID);
|
||||
#if 0
|
||||
printf("kernarg_des %p 0x%x\n", kernarg_des.ptr, kernarg_des.size);
|
||||
printf("input_des %p 0x%x\n", input_des.ptr, input_des.size);
|
||||
printf("mask_des %p 0x%x\n", mask_des.ptr, mask_des.size);
|
||||
printf("output_des %p 0x%x\n", output_des.ptr, output_des.size);
|
||||
#endif
|
||||
uint32_t* input = reinterpret_cast<uint32_t*>(input_des.ptr);
|
||||
uint32_t* output_local = reinterpret_cast<uint32_t*>(output_des.ptr);
|
||||
float* mask = reinterpret_cast<float*>(mask_des.ptr);
|
||||
kernel_args_t* kernel_args = reinterpret_cast<kernel_args_t*>(kernarg_des.ptr);
|
||||
|
||||
if (randomize_seed_) {
|
||||
// random initialisation of input
|
||||
FillRandom<uint32_t>(input, width_, height_, 0, 255, randomize_seed_);
|
||||
} else {
|
||||
// initialization with preset values
|
||||
memcpy(input, input_data_, width_ * height_ * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
// Fill a blurr filter or some other filter of your choice
|
||||
const float val = 1.0f / (mask_width_ * 2.0f - 1.0f);
|
||||
for (uint32_t i = 0; i < (mask_width_ * mask_height_); i++) {
|
||||
mask[i] = 0;
|
||||
}
|
||||
for (uint32_t i = 0; i < mask_width_; i++) {
|
||||
uint32_t y = mask_height_ / 2;
|
||||
mask[y * mask_width_ + i] = val;
|
||||
}
|
||||
for (uint32_t i = 0; i < mask_height_; i++) {
|
||||
uint32_t x = mask_width_ / 2;
|
||||
mask[i * mask_width_ + x] = val;
|
||||
}
|
||||
|
||||
// Print the INPUT array.
|
||||
std::clog << std::dec;
|
||||
PrintArray<uint32_t>("> Input[0]", input, width_, 1);
|
||||
PrintArray<float>("> Mask", mask, mask_width_, mask_height_);
|
||||
|
||||
// Fill the kernel args
|
||||
kernel_args->arg1 = output_local;
|
||||
kernel_args->arg2 = input;
|
||||
kernel_args->arg3 = mask;
|
||||
kernel_args->arg4 = width_;
|
||||
kernel_args->arg41 = height_;
|
||||
kernel_args->arg5 = mask_width_;
|
||||
kernel_args->arg51 = mask_height_;
|
||||
|
||||
// Calculate the reference output
|
||||
ReferenceImplementation(reinterpret_cast<uint32_t*>(GetRefOut()), input, mask, width_, height_,
|
||||
mask_width_, mask_height_);
|
||||
}
|
||||
|
||||
void SimpleConvolution::PrintOutput(const void* ptr) const {
|
||||
PrintArray<uint32_t>("> Output[0]", reinterpret_cast<const uint32_t*>(ptr), width_, 1);
|
||||
}
|
||||
|
||||
bool SimpleConvolution::ReferenceImplementation(uint32_t* output, const uint32_t* input,
|
||||
const float* mask, const uint32_t width,
|
||||
const uint32_t height, const uint32_t mask_width,
|
||||
const uint32_t mask_height) {
|
||||
const uint32_t vstep = (mask_width - 1) / 2;
|
||||
const uint32_t hstep = (mask_height - 1) / 2;
|
||||
|
||||
// for each pixel in the input
|
||||
for (uint32_t x = 0; x < width; x++) {
|
||||
for (uint32_t y = 0; y < height; y++) {
|
||||
// find the left, right, top and bottom indices such that
|
||||
// the indices do not go beyond image boundaires
|
||||
const uint32_t left = (x < vstep) ? 0 : (x - vstep);
|
||||
const uint32_t right = ((x + vstep) >= width) ? width - 1 : (x + vstep);
|
||||
const uint32_t top = (y < hstep) ? 0 : (y - hstep);
|
||||
const uint32_t bottom = ((y + hstep) >= height) ? height - 1 : (y + hstep);
|
||||
|
||||
// initializing wighted sum value
|
||||
float sum_fx = 0;
|
||||
for (uint32_t i = left; i <= right; ++i) {
|
||||
for (uint32_t j = top; j <= bottom; ++j) {
|
||||
// performing wighted sum within the mask boundaries
|
||||
uint32_t mask_idx = (j - (y - hstep)) * mask_width + (i - (x - vstep));
|
||||
uint32_t index = j * width + i;
|
||||
|
||||
// to round to the nearest integer
|
||||
sum_fx += ((float)input[index] * mask[mask_idx]);
|
||||
}
|
||||
}
|
||||
sum_fx += 0.5f;
|
||||
output[y * width + x] = uint32_t(sum_fx);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1,92 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
|
||||
#define TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "ctrl/test_kernel.h"
|
||||
|
||||
// Class implements SimpleConvolution kernel parameters
|
||||
class SimpleConvolution : public TestKernel {
|
||||
public:
|
||||
// Kernel buffers IDs
|
||||
enum { INPUT_BUF_ID, LOCAL_BUF_ID, MASK_BUF_ID, KERNARG_BUF_ID, REFOUT_BUF_ID };
|
||||
|
||||
// Constructor
|
||||
SimpleConvolution();
|
||||
|
||||
// Initialize method
|
||||
void Init();
|
||||
|
||||
// Return compute grid size
|
||||
uint32_t GetGridSize() const { return width_ * height_; }
|
||||
|
||||
// Print output
|
||||
void PrintOutput(const void* ptr) const;
|
||||
|
||||
// Return name
|
||||
std::string Name() const { return std::string("SimpleConvolution"); }
|
||||
|
||||
private:
|
||||
// Local kernel arguments declaration
|
||||
struct kernel_args_t {
|
||||
void* arg1;
|
||||
void* arg2;
|
||||
void* arg3;
|
||||
uint32_t arg4;
|
||||
uint32_t arg41;
|
||||
uint32_t arg5;
|
||||
uint32_t arg51;
|
||||
};
|
||||
|
||||
// Reference CPU implementation of Simple Convolution
|
||||
// @param output Output matrix after performing convolution
|
||||
// @param input Input matrix on which convolution is to be performed
|
||||
// @param mask mask matrix using which convolution was to be performed
|
||||
// @param input_dimensions dimensions of the input matrix
|
||||
// @param mask_dimensions dimensions of the mask matrix
|
||||
// @return bool true on success and false on failure
|
||||
bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask,
|
||||
const uint32_t width, const uint32_t height,
|
||||
const uint32_t maskWidth, const uint32_t maskHeight);
|
||||
|
||||
// Width of the Input array
|
||||
uint32_t width_;
|
||||
|
||||
// Height of the Input array
|
||||
uint32_t height_;
|
||||
|
||||
// Mask dimensions
|
||||
uint32_t mask_width_;
|
||||
|
||||
// Mask dimensions
|
||||
uint32_t mask_height_;
|
||||
|
||||
// Randomize input data
|
||||
unsigned randomize_seed_;
|
||||
|
||||
// Input data
|
||||
static const uint32_t input_data_[];
|
||||
};
|
||||
|
||||
#endif // TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
|
||||
@@ -1,115 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef EVT_STATS_H_
|
||||
#define EVT_STATS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
template <class evt_id_t, class evt_weight_t> class EvtStatsT {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
typedef uint64_t evt_count_t;
|
||||
typedef double evt_avr_t;
|
||||
struct evt_record_t {
|
||||
uint64_t count;
|
||||
evt_avr_t avr;
|
||||
evt_record_t() : count(0), avr(0) {}
|
||||
};
|
||||
typedef typename std::map<evt_id_t, evt_record_t> map_t;
|
||||
typedef typename std::map<evt_id_t, const char*> labels_t;
|
||||
|
||||
// Comparison function
|
||||
struct cmpfun {
|
||||
template <typename T> bool operator()(const T& a, const T& b) const {
|
||||
return (a.second.avr != b.second.avr) ? a.second.avr < b.second.avr : a.first < b.first;
|
||||
}
|
||||
};
|
||||
|
||||
inline void add_event(evt_id_t id, evt_weight_t weight) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
// printf("EvtStats %p ::add_event %u %lu\n", this, id, weight); fflush(stdout);
|
||||
|
||||
evt_record_t& rec = map_[id];
|
||||
const evt_count_t prev_count = rec.count;
|
||||
const evt_count_t new_count = prev_count + 1;
|
||||
const evt_avr_t prev_avr = rec.avr;
|
||||
const evt_avr_t new_avr = ((prev_avr * prev_count) + weight) / new_count;
|
||||
|
||||
rec.count = new_count;
|
||||
rec.avr = new_avr;
|
||||
}
|
||||
|
||||
void dump() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
fprintf(stdout, "Dumping %s\n", path_);
|
||||
fflush(stdout);
|
||||
|
||||
typedef typename std::set<std::pair<evt_id_t, evt_record_t>, cmpfun> set_t;
|
||||
set_t s_(map_.begin(), map_.end());
|
||||
|
||||
uint64_t index = 0;
|
||||
for (auto& e : s_) {
|
||||
const evt_id_t id = e.first;
|
||||
const char* label = get_label(id);
|
||||
std::ostringstream oss;
|
||||
oss << index << ",\"" << label << "\"," << e.second.count << "," << (uint64_t)(e.second.avr)
|
||||
<< "," << (uint64_t)(e.second.count * e.second.avr);
|
||||
fprintf(fdes_, "%s\n", oss.str().c_str());
|
||||
index += 1;
|
||||
}
|
||||
|
||||
fclose(fdes_);
|
||||
}
|
||||
|
||||
const char* get_label(const uint32_t& id) {
|
||||
auto ret = labels_.insert({id, NULL});
|
||||
const char* label = ret.first->second;
|
||||
return label;
|
||||
}
|
||||
const char* get_label(const char* id) { return id; }
|
||||
const char* get_label(const std::string& id) { return id.c_str(); }
|
||||
|
||||
void set_label(evt_id_t id, const char* label) {
|
||||
// printf("EvtStats %p ::set_label %u %s\n", this, id, label); fflush(stdout);
|
||||
labels_[id] = label;
|
||||
}
|
||||
|
||||
EvtStatsT(FILE* f, const char* path) : fdes_(f), path_(path) {
|
||||
// printf("EvtStats %p ::EvtStatsT()\n", this); fflush(stdout);
|
||||
fprintf(fdes_, "Index,Name,Count,Avr,Total\n");
|
||||
}
|
||||
|
||||
private:
|
||||
mutex_t mutex_;
|
||||
map_t map_;
|
||||
labels_t labels_;
|
||||
FILE* fdes_;
|
||||
const char* path_;
|
||||
};
|
||||
|
||||
typedef EvtStatsT<uint32_t, uint64_t> EvtStats;
|
||||
|
||||
#endif // EVT_STATS_H_
|
||||
@@ -1,84 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_UTIL_HELPER_FUNCS_H_
|
||||
#define TEST_UTIL_HELPER_FUNCS_H_
|
||||
|
||||
#include <time.h>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
static inline void Error(std::string error_msg) {
|
||||
std::cerr << "Error: " << error_msg << std::endl;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void PrintArray(const std::string header, const T* data, const int width, const int height) {
|
||||
std::clog << header << " :\n";
|
||||
for (int i = 0; i < height; i++) {
|
||||
std::clog << "> ";
|
||||
for (int j = 0; j < width; j++) {
|
||||
std::clog << data[i * width + j] << " ";
|
||||
}
|
||||
std::clog << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool FillRandom(T* array_ptr, const int width, const int height, const T range_min,
|
||||
const T range_max, unsigned int seed = 123) {
|
||||
if (!array_ptr) {
|
||||
Error("Cannot fill array. NULL pointer.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!seed) seed = (unsigned int)time(NULL);
|
||||
|
||||
srand(seed);
|
||||
double range = double(range_max - range_min) + 1.0;
|
||||
|
||||
/* random initialisation of input */
|
||||
for (int i = 0; i < height; i++)
|
||||
for (int j = 0; j < width; j++) {
|
||||
int index = i * width + j;
|
||||
array_ptr[index] = range_min + T(range * rand() / (RAND_MAX + 1.0));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T> T RoundToPowerOf2(T val) {
|
||||
int bytes = sizeof(T);
|
||||
|
||||
val--;
|
||||
for (int i = 0; i < bytes; i++) val |= val >> (1 << i);
|
||||
val++;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template <typename T> bool IsPowerOf2(T val) {
|
||||
long long long_val = val;
|
||||
return (((long_val & (-long_val)) - long_val == 0) && (long_val != 0));
|
||||
}
|
||||
|
||||
#endif // TEST_UTIL_HELPER_FUNCS_H_
|
||||
@@ -1 +0,0 @@
|
||||
../../src/hsa_rsrc_factory.cpp
|
||||
@@ -1 +0,0 @@
|
||||
../../src/hsa_rsrc_factory.h
|
||||
@@ -1,177 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "util/perf_timer.h"
|
||||
|
||||
PerfTimer::PerfTimer() { freq_in_100mhz_ = MeasureTSCFreqHz(); }
|
||||
|
||||
PerfTimer::~PerfTimer() {
|
||||
while (!timers_.empty()) {
|
||||
Timer* temp = timers_.back();
|
||||
timers_.pop_back();
|
||||
delete temp;
|
||||
}
|
||||
}
|
||||
|
||||
// New cretaed timer instantance index will be returned
|
||||
int PerfTimer::CreateTimer() {
|
||||
Timer* newTimer = new Timer;
|
||||
newTimer->start = 0;
|
||||
newTimer->clocks = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->freq);
|
||||
#else
|
||||
newTimer->freq = (long long)1.0E3;
|
||||
#endif
|
||||
|
||||
/* Push back the address of new Timer instance created */
|
||||
timers_.push_back(newTimer);
|
||||
return (int)(timers_.size() - 1);
|
||||
}
|
||||
|
||||
int PerfTimer::StartTimer(int index) {
|
||||
if (index >= (int)timers_.size()) {
|
||||
Error("Cannot reset timer. Invalid handle.");
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
// General Windows timing method
|
||||
#ifndef _AMD
|
||||
long long tmpStart;
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&(tmpStart));
|
||||
timers_[index]->start = (double)tmpStart;
|
||||
#else
|
||||
// AMD Windows timing method
|
||||
#endif
|
||||
#else
|
||||
// General Linux timing method
|
||||
#ifndef _AMD
|
||||
struct timeval s;
|
||||
gettimeofday(&s, 0);
|
||||
timers_[index]->start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3));
|
||||
#else
|
||||
// AMD timing method
|
||||
unsigned int unused;
|
||||
timers_[index]->start = __rdtscp(&unused);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int PerfTimer::StopTimer(int index) {
|
||||
double n = 0;
|
||||
if (index >= (int)timers_.size()) {
|
||||
Error("Cannot reset timer. Invalid handle.");
|
||||
return FAILURE;
|
||||
}
|
||||
#ifdef _WIN32
|
||||
#ifndef _AMD
|
||||
long long n1;
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&(n1));
|
||||
n = (double)n1;
|
||||
#else
|
||||
// AMD Window Timing
|
||||
#endif
|
||||
|
||||
#else
|
||||
// General Linux timing method
|
||||
#ifndef _AMD
|
||||
struct timeval s;
|
||||
gettimeofday(&s, 0);
|
||||
n = s.tv_sec * 1.0E3 + (double)(s.tv_usec / 1.0E3);
|
||||
#else
|
||||
// AMD Linux timing
|
||||
unsigned int unused;
|
||||
n = __rdtscp(&unused);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
n -= timers_[index]->start;
|
||||
timers_[index]->start = 0;
|
||||
|
||||
#ifndef _AMD
|
||||
timers_[index]->clocks += n;
|
||||
#else
|
||||
// timers_[index]->clocks += 10 * n / freq_in_100mhz_; // unit is ns
|
||||
timers_[index]->clocks += 1.0E-6 * 10 * n / freq_in_100mhz_; // convert to ms
|
||||
#endif
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
void PerfTimer::Error(std::string str) { std::cout << str << std::endl; }
|
||||
|
||||
|
||||
double PerfTimer::ReadTimer(int index) {
|
||||
if (index >= (int)timers_.size()) {
|
||||
Error("Cannot read timer. Invalid handle.");
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
double reading = double(timers_[index]->clocks);
|
||||
|
||||
reading = double(reading / timers_[index]->freq);
|
||||
|
||||
return reading;
|
||||
}
|
||||
|
||||
|
||||
uint64_t PerfTimer::CoarseTimestampUs() {
|
||||
#ifdef _WIN32
|
||||
uint64_t freqHz, ticks;
|
||||
QueryPerformanceFrequency((LARGE_INTEGER*)&freqHz);
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&ticks);
|
||||
|
||||
// Scale numerator and divisor until (ticks * 1000000) fits in uint64_t.
|
||||
while (ticks > (1ULL << 44)) {
|
||||
ticks /= 16;
|
||||
freqHz /= 16;
|
||||
}
|
||||
|
||||
return (ticks * 1000000) / freqHz;
|
||||
#else
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
|
||||
return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
|
||||
#endif
|
||||
}
|
||||
|
||||
uint64_t PerfTimer::MeasureTSCFreqHz() {
|
||||
// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
|
||||
unsigned int unused;
|
||||
uint64_t tscTicksEnd;
|
||||
|
||||
uint64_t coarseBeginUs = CoarseTimestampUs();
|
||||
uint64_t tscTicksBegin = __rdtscp(&unused);
|
||||
do {
|
||||
tscTicksEnd = __rdtscp(&unused);
|
||||
} while (tscTicksEnd - tscTicksBegin < 1000000000);
|
||||
|
||||
uint64_t coarseEndUs = CoarseTimestampUs();
|
||||
|
||||
// Compute the TSC frequency and round to nearest 100MHz.
|
||||
uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
|
||||
uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
|
||||
return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
|
||||
}
|
||||
@@ -1,81 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_UTIL_PERF_TIMER_H_
|
||||
#define TEST_UTIL_PERF_TIMER_H_
|
||||
|
||||
// Will use AMD timer or general Linux timer based on compilation flag
|
||||
// Need to consider platform is Windows or Linux
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#include <time.h>
|
||||
#include <windows.h>
|
||||
#else
|
||||
#if defined(__GNUC__)
|
||||
#include <sys/time.h>
|
||||
#include <x86intrin.h>
|
||||
#endif // __GNUC__
|
||||
#endif // _MSC_VER
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class PerfTimer {
|
||||
public:
|
||||
enum { SUCCESS = 0, FAILURE = 1 };
|
||||
|
||||
PerfTimer();
|
||||
~PerfTimer();
|
||||
|
||||
// General Linux timing method
|
||||
int CreateTimer();
|
||||
int StartTimer(int index);
|
||||
int StopTimer(int index);
|
||||
|
||||
// retrieve time
|
||||
double ReadTimer(int index);
|
||||
// write into a file
|
||||
double WriteTimer(int index);
|
||||
|
||||
private:
|
||||
struct Timer {
|
||||
std::string name; /* name of time object */
|
||||
long long freq; /* frequency */
|
||||
double clocks; /* number of ticks at end */
|
||||
double start; /* start point ticks */
|
||||
};
|
||||
|
||||
std::vector<Timer*> timers_; /* vector to Timer objects */
|
||||
double freq_in_100mhz_;
|
||||
|
||||
// AMD timing method
|
||||
uint64_t CoarseTimestampUs();
|
||||
uint64_t MeasureTSCFreqHz();
|
||||
|
||||
void Error(std::string str);
|
||||
};
|
||||
|
||||
#endif // TEST_UTIL_PERF_TIMER_H_
|
||||
@@ -1,33 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_UTIL_TEST_ASSERT_H_
|
||||
#define TEST_UTIL_TEST_ASSERT_H_
|
||||
|
||||
#define TEST_ASSERT(cond) \
|
||||
{ \
|
||||
if (!(cond)) { \
|
||||
std::cerr << "Assert failed(" << #cond << ") at " << __FILE__ << ", line " << __LINE__ \
|
||||
<< std::endl; \
|
||||
exit(-1); \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif // TEST_UTIL_TEST_ASSERT_H_
|
||||
@@ -1,457 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_UTIL_XML_H_
|
||||
#define TEST_UTIL_XML_H_
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace xml {
|
||||
|
||||
class Xml {
|
||||
public:
|
||||
typedef std::vector<char> token_t;
|
||||
|
||||
struct level_t;
|
||||
typedef std::vector<level_t*> nodes_t;
|
||||
typedef std::map<std::string, std::string> opts_t;
|
||||
struct level_t {
|
||||
std::string tag;
|
||||
nodes_t nodes;
|
||||
opts_t opts;
|
||||
};
|
||||
typedef std::vector<level_t*> nodes_vec_t;
|
||||
typedef std::map<std::string, nodes_vec_t> map_t;
|
||||
|
||||
enum { DECL_STATE, BODY_STATE };
|
||||
|
||||
static Xml* Create(const std::string& file_name, const Xml* obj = NULL) {
|
||||
Xml* xml = new Xml(file_name, obj);
|
||||
if (xml != NULL) {
|
||||
if (xml->Init() == false) {
|
||||
delete xml;
|
||||
xml = NULL;
|
||||
} else {
|
||||
const std::size_t pos = file_name.rfind('/');
|
||||
const std::string path = (pos != std::string::npos) ? file_name.substr(0, pos + 1) : "";
|
||||
|
||||
xml->PreProcess();
|
||||
nodes_t incl_nodes;
|
||||
for (auto* node : xml->GetNodes("top.include")) {
|
||||
if (node->opts.find("touch") == node->opts.end()) {
|
||||
node->opts["touch"] = "";
|
||||
incl_nodes.push_back(node);
|
||||
}
|
||||
}
|
||||
for (auto* incl : incl_nodes) {
|
||||
const std::string& incl_name = path + incl->opts["file"];
|
||||
Xml* ixml = Create(incl_name, xml);
|
||||
if (ixml == NULL) {
|
||||
delete xml;
|
||||
xml = NULL;
|
||||
break;
|
||||
} else {
|
||||
delete ixml;
|
||||
}
|
||||
}
|
||||
if (xml) {
|
||||
xml->Process();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
static void Destroy(Xml* xml) { delete xml; }
|
||||
|
||||
std::string GetName() { return file_name_; }
|
||||
|
||||
void AddExpr(const std::string& full_tag, const std::string& name, const std::string& expr) {
|
||||
const std::size_t pos = full_tag.rfind('.');
|
||||
const std::size_t pos1 = (pos == std::string::npos) ? 0 : pos + 1;
|
||||
const std::string level_tag = full_tag.substr(pos1);
|
||||
level_t* level = new level_t;
|
||||
(*map_)[full_tag].push_back(level);
|
||||
level->tag = level_tag;
|
||||
level->opts["name"] = name;
|
||||
level->opts["expr"] = expr;
|
||||
}
|
||||
|
||||
void AddConst(const std::string& full_tag, const std::string& name, const uint64_t& val) {
|
||||
std::ostringstream oss;
|
||||
oss << val;
|
||||
AddExpr(full_tag, name, oss.str());
|
||||
}
|
||||
|
||||
nodes_t GetNodes(const std::string& global_tag) { return (*map_)[global_tag]; }
|
||||
|
||||
template <class F> F ForEach(const F& f_i) {
|
||||
F f = f_i;
|
||||
if (map_) {
|
||||
for (auto& entry : *map_) {
|
||||
for (auto node : entry.second) {
|
||||
if (f.fun(entry.first, node) == false) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
template <class F> F ForEach(const F& f_i) const {
|
||||
F f = f_i;
|
||||
if (map_) {
|
||||
for (auto& entry : *map_) {
|
||||
for (auto node : entry.second) {
|
||||
if (f.fun(entry.first, node) == false) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
struct print_func {
|
||||
bool fun(const std::string& global_tag, level_t* node) {
|
||||
for (auto& opt : node->opts) {
|
||||
std::cout << global_tag << "." << opt.first << " = " << opt.second << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
void Print() const {
|
||||
std::cout << "XML file '" << file_name_ << "':" << std::endl;
|
||||
ForEach(print_func());
|
||||
}
|
||||
|
||||
private:
|
||||
Xml(const std::string& file_name, const Xml* obj)
|
||||
: file_name_(file_name),
|
||||
file_line_(0),
|
||||
data_size_(0),
|
||||
index_(0),
|
||||
state_(BODY_STATE),
|
||||
comment_(false),
|
||||
included_(false),
|
||||
level_(NULL),
|
||||
map_(NULL) {
|
||||
if (obj != NULL) {
|
||||
map_ = obj->map_;
|
||||
level_ = obj->level_;
|
||||
included_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
struct delete_func {
|
||||
bool fun(const std::string&, level_t* node) {
|
||||
delete node;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
~Xml() {
|
||||
if (included_ == false) {
|
||||
ForEach(delete_func());
|
||||
delete map_;
|
||||
}
|
||||
}
|
||||
|
||||
bool Init() {
|
||||
fd_ = open(file_name_.c_str(), O_RDONLY);
|
||||
if (fd_ == -1) {
|
||||
// perror((std::string("open XML file ") + file_name_).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (map_ == NULL) {
|
||||
map_ = new map_t;
|
||||
if (map_ == NULL) return false;
|
||||
AddLevel("top");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PreProcess() {
|
||||
uint32_t ind = 0;
|
||||
char buf[kBufSize];
|
||||
bool error = false;
|
||||
|
||||
while (1) {
|
||||
const uint32_t pos = lseek(fd_, 0, SEEK_CUR);
|
||||
uint32_t size = read(fd_, buf, kBufSize);
|
||||
if (size <= 0) break;
|
||||
buf[size - 1] = '\0';
|
||||
|
||||
if (strncmp(buf, "#include \"", 10) == 0) {
|
||||
for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) {
|
||||
}
|
||||
if (ind == size) {
|
||||
fprintf(stderr, "XML PreProcess failed, line size limit %zu\n", kBufSize);
|
||||
error = true;
|
||||
break;
|
||||
}
|
||||
buf[ind] = '\0';
|
||||
size = ind;
|
||||
lseek(fd_, pos + ind + 1, SEEK_SET);
|
||||
|
||||
for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) {
|
||||
}
|
||||
if (ind == size) {
|
||||
error = true;
|
||||
break;
|
||||
}
|
||||
buf[ind] = '\0';
|
||||
|
||||
AddLevel("include");
|
||||
AddOption("file", &buf[10]);
|
||||
UpLevel();
|
||||
}
|
||||
}
|
||||
|
||||
if (error) {
|
||||
fprintf(stderr, "XML PreProcess failed, line '%s'\n", buf);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
lseek(fd_, 0, SEEK_SET);
|
||||
}
|
||||
|
||||
void Process() {
|
||||
token_t remainder;
|
||||
|
||||
while (1) {
|
||||
token_t token = (remainder.size()) ? remainder : NextToken();
|
||||
remainder.clear();
|
||||
|
||||
// token_t token1 = token;
|
||||
// token1.push_back('\0');
|
||||
// std::cout << "> " << &token1[0] << std::endl;
|
||||
|
||||
// End of file
|
||||
if (token.size() == 0) break;
|
||||
|
||||
switch (state_) {
|
||||
case BODY_STATE:
|
||||
if (token[0] == '<') {
|
||||
bool node_begin = true;
|
||||
unsigned ind = 1;
|
||||
if (token[1] == '/') {
|
||||
node_begin = false;
|
||||
++ind;
|
||||
}
|
||||
|
||||
unsigned i = ind;
|
||||
while (i < token.size()) {
|
||||
if (token[i] == '>') break;
|
||||
++i;
|
||||
}
|
||||
for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]);
|
||||
|
||||
if (i == token.size()) {
|
||||
if (node_begin)
|
||||
state_ = DECL_STATE;
|
||||
else
|
||||
BadFormat(token);
|
||||
token.push_back('\0');
|
||||
} else {
|
||||
token[i] = '\0';
|
||||
}
|
||||
|
||||
const char* tag = &token[ind];
|
||||
if (node_begin) {
|
||||
AddLevel(tag);
|
||||
} else {
|
||||
if (strncmp(CurrentLevel().c_str(), tag, strlen(tag)) != 0) {
|
||||
token.back() = '>';
|
||||
BadFormat(token);
|
||||
}
|
||||
UpLevel();
|
||||
}
|
||||
} else {
|
||||
BadFormat(token);
|
||||
}
|
||||
break;
|
||||
case DECL_STATE:
|
||||
if (token[0] == '>') {
|
||||
state_ = BODY_STATE;
|
||||
for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]);
|
||||
continue;
|
||||
} else {
|
||||
token.push_back('\0');
|
||||
unsigned j = 0;
|
||||
for (j = 0; j < token.size(); ++j)
|
||||
if (token[j] == '=') break;
|
||||
if (j == token.size()) BadFormat(token);
|
||||
token[j] = '\0';
|
||||
const char* key = &token[0];
|
||||
const char* value = &token[j + 1];
|
||||
AddOption(key, value);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::cout << "XML parser error: wrong state: " << state_ << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SpaceCheck() const {
|
||||
bool cond = ((buffer_[index_] == ' ') || (buffer_[index_] == '\t'));
|
||||
return cond;
|
||||
}
|
||||
|
||||
bool LineEndCheck() {
|
||||
bool found = false;
|
||||
if (buffer_[index_] == '\n') {
|
||||
buffer_[index_] = ' ';
|
||||
++file_line_;
|
||||
found = true;
|
||||
comment_ = false;
|
||||
} else if (comment_ || (buffer_[index_] == '#')) {
|
||||
found = true;
|
||||
comment_ = true;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
token_t NextToken() {
|
||||
token_t token;
|
||||
bool in_string = false;
|
||||
bool special_symb = false;
|
||||
|
||||
while (1) {
|
||||
if (data_size_ == 0) {
|
||||
data_size_ = read(fd_, buffer_, kBufSize);
|
||||
if (data_size_ <= 0) break;
|
||||
}
|
||||
|
||||
if (token.empty()) {
|
||||
while ((index_ < data_size_) && (SpaceCheck() || LineEndCheck())) {
|
||||
++index_;
|
||||
}
|
||||
}
|
||||
while ((index_ < data_size_) && (in_string || !(SpaceCheck() || LineEndCheck()))) {
|
||||
const char symb = buffer_[index_];
|
||||
bool skip_symb = false;
|
||||
|
||||
switch (symb) {
|
||||
case '\\':
|
||||
if (special_symb) {
|
||||
special_symb = false;
|
||||
} else {
|
||||
special_symb = true;
|
||||
skip_symb = true;
|
||||
}
|
||||
break;
|
||||
case '"':
|
||||
if (special_symb) {
|
||||
special_symb = false;
|
||||
} else {
|
||||
in_string = !in_string;
|
||||
if (!in_string) {
|
||||
buffer_[index_] = ' ';
|
||||
--index_;
|
||||
}
|
||||
skip_symb = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (!skip_symb) token.push_back(symb);
|
||||
++index_;
|
||||
}
|
||||
|
||||
if (index_ == data_size_) {
|
||||
index_ = 0;
|
||||
data_size_ = 0;
|
||||
} else {
|
||||
if (special_symb || in_string) BadFormat(token);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
void BadFormat(token_t token) {
|
||||
token.push_back('\0');
|
||||
std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '"
|
||||
<< &token[0] << "'" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void AddLevel(const std::string& tag) {
|
||||
level_t* level = new level_t;
|
||||
level->tag = tag;
|
||||
if (level_) {
|
||||
level_->nodes.push_back(level);
|
||||
stack_.push_back(level_);
|
||||
}
|
||||
level_ = level;
|
||||
|
||||
std::string global_tag;
|
||||
for (level_t* level : stack_) {
|
||||
global_tag += level->tag + ".";
|
||||
}
|
||||
global_tag += tag;
|
||||
(*map_)[global_tag].push_back(level_);
|
||||
}
|
||||
|
||||
void UpLevel() {
|
||||
level_ = stack_.back();
|
||||
stack_.pop_back();
|
||||
}
|
||||
|
||||
std::string CurrentLevel() const { return level_->tag; }
|
||||
|
||||
void AddOption(const std::string& key, const std::string& value) { level_->opts[key] = value; }
|
||||
|
||||
const std::string file_name_;
|
||||
unsigned file_line_;
|
||||
int fd_;
|
||||
|
||||
static const size_t kBufSize = 256;
|
||||
char buffer_[kBufSize];
|
||||
|
||||
unsigned data_size_;
|
||||
unsigned index_;
|
||||
unsigned state_;
|
||||
bool comment_;
|
||||
std::vector<level_t*> stack_;
|
||||
bool included_;
|
||||
level_t* level_;
|
||||
map_t* map_;
|
||||
};
|
||||
|
||||
} // namespace xml
|
||||
|
||||
#endif // TEST_UTIL_XML_H_
|
||||
+3
-3
@@ -114,7 +114,7 @@ eval_test() {
|
||||
|
||||
# Tests dry run
|
||||
eval_test "MatrixTranspose dry run" ./test/MatrixTranspose MatrixTranspose_dryrun_trace
|
||||
eval_test "ctrl dry run" ./test/hsa/ctrl ctrl_dryrun_trace
|
||||
eval_test "copy dry run" ./test/copy copy_dryrun_trace
|
||||
|
||||
# Standalone test
|
||||
# rocTrecer is used explicitely by test
|
||||
@@ -158,11 +158,11 @@ export ROCP_AGENTS=1
|
||||
# each thread creates a queue pre GPU agent
|
||||
export ROCP_THRS=1
|
||||
|
||||
eval_test "tool HSA test" ./test/hsa/ctrl ctrl_hsa_trace
|
||||
eval_test "tool HSA test" ./test/copy copy_hsa_trace
|
||||
|
||||
echo "<trace name=\"HSA\"><parameters api=\"hsa_agent_get_info, hsa_amd_memory_pool_allocate\"></parameters></trace>" > test/input.xml
|
||||
export ROCP_INPUT=test/input.xml
|
||||
eval_test "tool HSA test input" ./test/hsa/ctrl ctrl_hsa_input_trace
|
||||
eval_test "tool HSA test input" ./test/copy copy_hsa_input_trace
|
||||
unset ROCP_INPUT
|
||||
|
||||
export HSA_TOOLS_LIB="$ROCTRACER_LIB_PATH/libroctracer64.so ./test/libhsaco_test.so"
|
||||
|
||||
Reference in New Issue
Block a user