Add default values for kernel struct (#798)
* Add default values for kernel struct * Update hsa-queue-dependency app - default initializers - check HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED for memory pools - clang-tidy fixes (member -> static, etc.) * Update run-ci.py - add --progress --output-on-failure -V if no other options regarding verbosity are passed - improve the ability to control the stages --------- Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
This commit is contained in:
+44
-14
@@ -195,6 +195,8 @@ def generate_dashboard_script(args):
|
||||
MEMCHECK = 0
|
||||
|
||||
_script = f"""
|
||||
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
|
||||
|
||||
macro(dashboard_submit)
|
||||
if("{SUBMIT}" GREATER 0)
|
||||
ctest_submit({ARGN})
|
||||
@@ -214,7 +216,10 @@ def generate_dashboard_script(args):
|
||||
endmacro()
|
||||
"""
|
||||
|
||||
STAGES = ";".join([itr.upper() for itr in args.stages])
|
||||
|
||||
_script += f"""
|
||||
set(STAGES "{STAGES}")
|
||||
ctest_start({DASHBOARD_MODE})
|
||||
ctest_update(SOURCE "{SOURCE_DIR}" RETURN_VALUE _update_ret
|
||||
CAPTURE_CMAKE_ERROR _update_err)
|
||||
@@ -227,20 +232,24 @@ def generate_dashboard_script(args):
|
||||
|
||||
handle_error("Configure" _configure_ret)
|
||||
|
||||
ctest_build(BUILD "{BINARY_DIR}" RETURN_VALUE _build_ret)
|
||||
dashboard_submit(PARTS Build RETURN_VALUE _submit_ret)
|
||||
if("BUILD" IN_LIST STAGES)
|
||||
ctest_build(BUILD "{BINARY_DIR}" RETURN_VALUE _build_ret)
|
||||
dashboard_submit(PARTS Build RETURN_VALUE _submit_ret)
|
||||
|
||||
handle_error("Build" _build_ret)
|
||||
|
||||
if("{MEMCHECK}" GREATER 0)
|
||||
ctest_memcheck(BUILD "{BINARY_DIR}" RETURN_VALUE _test_ret)
|
||||
dashboard_submit(PARTS Test RETURN_VALUE _submit_ret)
|
||||
else()
|
||||
ctest_test(BUILD "{BINARY_DIR}" RETURN_VALUE _test_ret)
|
||||
dashboard_submit(PARTS Test RETURN_VALUE _submit_ret)
|
||||
handle_error("Build" _build_ret)
|
||||
endif()
|
||||
|
||||
if("{CODECOV}" GREATER 0)
|
||||
if("TEST" IN_LIST STAGES)
|
||||
if("{MEMCHECK}" GREATER 0)
|
||||
ctest_memcheck(BUILD "{BINARY_DIR}" RETURN_VALUE _test_ret)
|
||||
dashboard_submit(PARTS Test RETURN_VALUE _submit_ret)
|
||||
else()
|
||||
ctest_test(BUILD "{BINARY_DIR}" RETURN_VALUE _test_ret)
|
||||
dashboard_submit(PARTS Test RETURN_VALUE _submit_ret)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if("{CODECOV}" GREATER 0 AND "COVERAGE" IN_LIST STAGES)
|
||||
ctest_coverage(
|
||||
BUILD "{BINARY_DIR}"
|
||||
RETURN_VALUE _coverage_ret
|
||||
@@ -493,20 +502,39 @@ if __name__ == "__main__":
|
||||
dashboard_args.append(f"{args.mode}{itr}")
|
||||
|
||||
try:
|
||||
verbose_options = (
|
||||
"--progress",
|
||||
"-V",
|
||||
"-VV",
|
||||
"--debug",
|
||||
"--output-on-failure",
|
||||
"-Q",
|
||||
"--quiet",
|
||||
)
|
||||
if not args.quiet and len(ctest_args) == 0:
|
||||
ctest_args = ["--output-on-failure", "-V"]
|
||||
elif not args.quiet:
|
||||
opts_union = [x for x in ctest_args if x in verbose_options]
|
||||
if len(opts_union) == 0:
|
||||
ctest_args += ["--progress", "--output-on-failure", "-V"]
|
||||
|
||||
# always fail if no tests exist
|
||||
ctest_args += ["--no-tests=error"]
|
||||
|
||||
run(
|
||||
run_args = (
|
||||
[CTEST_CMD]
|
||||
+ dashboard_args
|
||||
+ [
|
||||
"-S",
|
||||
os.path.join(args.binary_dir, "dashboard.cmake"),
|
||||
]
|
||||
+ ctest_args,
|
||||
+ ctest_args
|
||||
)
|
||||
|
||||
print("CTest command: {}".format(" ".join(run_args)))
|
||||
|
||||
run(
|
||||
run_args,
|
||||
check=True,
|
||||
)
|
||||
finally:
|
||||
@@ -522,7 +550,9 @@ if __name__ == "__main__":
|
||||
):
|
||||
if not os.path.isfile(file):
|
||||
continue
|
||||
if "CoverageLog-" in os.path.basename(file):
|
||||
elif "CoverageLog-" in os.path.basename(file):
|
||||
continue
|
||||
elif "Test.xml" in os.path.basename(file):
|
||||
continue
|
||||
print(f"\n\n###### Reading {file}... ######\n\n")
|
||||
with open(file, "r") as inpf:
|
||||
|
||||
@@ -43,7 +43,7 @@ foreach(target_id ${GPU_TARGETS})
|
||||
# generate kernel bitcodes
|
||||
generate_hsaco(${target_id} ${CMAKE_CURRENT_SOURCE_DIR}/copy.cl
|
||||
${target_id}_copy.hsaco)
|
||||
endforeach(target_id)
|
||||
endforeach()
|
||||
|
||||
add_custom_target(generate_hsaco_targets DEPENDS ${HSACO_TARGET_LIST})
|
||||
|
||||
@@ -71,8 +71,6 @@ find_package(rocprofiler-sdk REQUIRED)
|
||||
target_link_libraries(multiqueue_testapp PRIVATE rocprofiler::rocprofiler
|
||||
rocprofiler::tests-common-library)
|
||||
|
||||
target_compile_definitions(multiqueue_testapp PUBLIC AMD_INTERNAL_BUILD=1)
|
||||
|
||||
find_package(
|
||||
hsa-runtime64
|
||||
REQUIRED
|
||||
|
||||
@@ -64,12 +64,14 @@ main()
|
||||
printf("Test kernel A not found.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
MQDependencyTest::Kernel copyB;
|
||||
if(!obj.get_kernel(code_object, "copyB", obj.gpu[0].agent, copyB))
|
||||
{
|
||||
printf("Test kernel B not found.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
MQDependencyTest::Kernel copyC;
|
||||
if(!obj.get_kernel(code_object, "copyC", obj.gpu[0].agent, copyC))
|
||||
{
|
||||
@@ -79,14 +81,13 @@ main()
|
||||
|
||||
struct args_t
|
||||
{
|
||||
uint32_t* a;
|
||||
uint32_t* b;
|
||||
MQDependencyTest::OCLHiddenArgs hidden;
|
||||
uint32_t* a = nullptr;
|
||||
uint32_t* b = nullptr;
|
||||
MQDependencyTest::OCLHiddenArgs hidden = {};
|
||||
};
|
||||
|
||||
args_t* args;
|
||||
args = static_cast<args_t*>(obj.hsa_malloc(sizeof(args_t), obj.kernarg));
|
||||
memset(args, 0, sizeof(args_t));
|
||||
args_t* args = static_cast<args_t*>(obj.hsa_malloc(sizeof(args_t), obj.kernarg));
|
||||
*args = {};
|
||||
|
||||
uint32_t* a = static_cast<uint32_t*>(obj.hsa_malloc(64 * sizeof(uint32_t), obj.kernarg));
|
||||
uint32_t* b = static_cast<uint32_t*>(obj.hsa_malloc(64 * sizeof(uint32_t), obj.kernarg));
|
||||
@@ -95,15 +96,21 @@ main()
|
||||
memset(b, 1, 64 * sizeof(uint32_t));
|
||||
|
||||
// Create queue in gpu agent and prepare a kernel dispatch packet
|
||||
hsa_queue_t* queue1;
|
||||
status = hsa_queue_create(
|
||||
obj.gpu[0].agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue1);
|
||||
hsa_queue_t* queue1 = nullptr;
|
||||
status = hsa_queue_create(obj.gpu[0].agent,
|
||||
1024,
|
||||
HSA_QUEUE_TYPE_SINGLE,
|
||||
nullptr,
|
||||
nullptr,
|
||||
UINT32_MAX,
|
||||
UINT32_MAX,
|
||||
&queue1);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
// Create a signal with a value of 1 and attach it to the first kernel
|
||||
// dispatch packet
|
||||
hsa_signal_t completion_signal_1;
|
||||
status = hsa_signal_create(1, 0, NULL, &completion_signal_1);
|
||||
hsa_signal_t completion_signal_1 = {};
|
||||
status = hsa_signal_create(1, 0, nullptr, &completion_signal_1);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
// First dispath packet on queue 1, Kernel A
|
||||
@@ -137,12 +144,12 @@ main()
|
||||
|
||||
// Create a signal with a value of 1 and attach it to the second kernel
|
||||
// dispatch packet
|
||||
hsa_signal_t completion_signal_2;
|
||||
status = hsa_signal_create(1, 0, NULL, &completion_signal_2);
|
||||
hsa_signal_t completion_signal_2 = {};
|
||||
status = hsa_signal_create(1, 0, nullptr, &completion_signal_2);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
hsa_signal_t completion_signal_3;
|
||||
status = hsa_signal_create(1, 0, NULL, &completion_signal_3);
|
||||
hsa_signal_t completion_signal_3 = {};
|
||||
status = hsa_signal_create(1, 0, nullptr, &completion_signal_3);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
// Create barrier-AND packet that is enqueued in queue 1
|
||||
@@ -186,9 +193,15 @@ main()
|
||||
}
|
||||
|
||||
// Create queue 2
|
||||
hsa_queue_t* queue2;
|
||||
status = hsa_queue_create(
|
||||
obj.gpu[0].agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue2);
|
||||
hsa_queue_t* queue2 = nullptr;
|
||||
status = hsa_queue_create(obj.gpu[0].agent,
|
||||
1024,
|
||||
HSA_QUEUE_TYPE_SINGLE,
|
||||
nullptr,
|
||||
nullptr,
|
||||
UINT32_MAX,
|
||||
UINT32_MAX,
|
||||
&queue2);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
// Create barrier-AND packet that is enqueued in queue 2
|
||||
@@ -277,6 +290,7 @@ main()
|
||||
|
||||
status = hsa_memory_free(a);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
status = hsa_memory_free(b);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
@@ -285,5 +299,6 @@ main()
|
||||
|
||||
status = hsa_code_object_reader_destroy(code_object.code_obj_rdr);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
close(code_object.file);
|
||||
}
|
||||
|
||||
@@ -88,18 +88,18 @@ public:
|
||||
|
||||
struct CodeObject
|
||||
{
|
||||
hsa_file_t file;
|
||||
hsa_code_object_reader_t code_obj_rdr;
|
||||
hsa_executable_t executable;
|
||||
hsa_file_t file = 0;
|
||||
hsa_code_object_reader_t code_obj_rdr = {};
|
||||
hsa_executable_t executable = {};
|
||||
};
|
||||
|
||||
struct Kernel
|
||||
{
|
||||
uint64_t handle;
|
||||
uint32_t scratch;
|
||||
uint32_t group;
|
||||
uint32_t kernarg_size;
|
||||
uint32_t kernarg_align;
|
||||
uint64_t handle = 0;
|
||||
uint32_t scratch = 0;
|
||||
uint32_t group = 0;
|
||||
uint32_t kernarg_size = 0;
|
||||
uint32_t kernarg_align = 0;
|
||||
};
|
||||
|
||||
union AqlHeader
|
||||
@@ -112,23 +112,23 @@ public:
|
||||
uint16_t release : 2;
|
||||
uint16_t reserved : 3;
|
||||
};
|
||||
uint16_t raw;
|
||||
uint16_t raw = 0;
|
||||
};
|
||||
|
||||
struct BarrierValue
|
||||
{
|
||||
AqlHeader header;
|
||||
uint8_t AmdFormat;
|
||||
uint8_t reserved;
|
||||
uint32_t reserved1;
|
||||
hsa_signal_t signal;
|
||||
hsa_signal_value_t value;
|
||||
hsa_signal_value_t mask;
|
||||
uint32_t cond;
|
||||
uint32_t reserved2;
|
||||
uint64_t reserved3;
|
||||
uint64_t reserved4;
|
||||
hsa_signal_t completion_signal;
|
||||
AqlHeader header = {};
|
||||
uint8_t AmdFormat = 0;
|
||||
uint8_t reserved = 0;
|
||||
uint32_t reserved1 = 0;
|
||||
hsa_signal_t signal = {};
|
||||
hsa_signal_value_t value = 0;
|
||||
hsa_signal_value_t mask = 0;
|
||||
uint32_t cond = 0;
|
||||
uint32_t reserved2 = 0;
|
||||
uint64_t reserved3 = 0;
|
||||
uint64_t reserved4 = 0;
|
||||
hsa_signal_t completion_signal = {};
|
||||
};
|
||||
|
||||
union Aql
|
||||
@@ -137,21 +137,23 @@ public:
|
||||
hsa_kernel_dispatch_packet_t dispatch;
|
||||
hsa_barrier_and_packet_t barrier_and;
|
||||
hsa_barrier_or_packet_t barrier_or;
|
||||
BarrierValue barrier_value;
|
||||
BarrierValue barrier_value = {};
|
||||
};
|
||||
|
||||
struct OCLHiddenArgs
|
||||
{
|
||||
uint64_t offset_x;
|
||||
uint64_t offset_y;
|
||||
uint64_t offset_z;
|
||||
void* printf_buffer;
|
||||
void* enqueue;
|
||||
void* enqueue2;
|
||||
void* multi_grid;
|
||||
uint64_t offset_x = 0;
|
||||
uint64_t offset_y = 0;
|
||||
uint64_t offset_z = 0;
|
||||
void* printf_buffer = nullptr;
|
||||
void* enqueue = nullptr;
|
||||
void* enqueue2 = nullptr;
|
||||
void* multi_grid = nullptr;
|
||||
};
|
||||
|
||||
bool load_code_object(std::string filename, hsa_agent_t agent, CodeObject& code_object)
|
||||
static bool load_code_object(const std::string& filename,
|
||||
hsa_agent_t agent,
|
||||
CodeObject& code_object)
|
||||
{
|
||||
hsa_status_t err;
|
||||
code_object.file = open(filename.c_str(), O_RDONLY);
|
||||
@@ -181,10 +183,10 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool get_kernel(const CodeObject& code_object,
|
||||
std::string kernel,
|
||||
hsa_agent_t agent,
|
||||
Kernel& kern)
|
||||
static bool get_kernel(const CodeObject& code_object,
|
||||
const std::string& kernel,
|
||||
hsa_agent_t agent,
|
||||
Kernel& kern)
|
||||
{
|
||||
hsa_executable_symbol_t symbol;
|
||||
hsa_status_t err = hsa_executable_get_symbol_by_name(
|
||||
@@ -207,7 +209,7 @@ public:
|
||||
}
|
||||
|
||||
// Not for parallel insertion.
|
||||
bool submit_packet(hsa_queue_t* queue, Aql& pkt)
|
||||
static bool submit_packet(hsa_queue_t* queue, Aql& pkt)
|
||||
{
|
||||
size_t mask = queue->size - 1;
|
||||
Aql* ring = static_cast<Aql*>(queue->base_address);
|
||||
@@ -230,26 +232,26 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
void* hsa_malloc(size_t size, const Device::Memory& mem)
|
||||
static void* hsa_malloc(size_t size, const Device::Memory& mem)
|
||||
{
|
||||
void* ret;
|
||||
hsa_status_t err = hsa_amd_memory_pool_allocate(mem.pool, size, 0, &ret);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_amd_agents_allow_access(
|
||||
Device::all_devices.size(), &Device::all_devices[0], nullptr, ret);
|
||||
Device::all_devices.size(), Device::all_devices.data(), nullptr, ret);
|
||||
RET_IF_HSA_ERR(err);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* hsa_malloc(size_t size, const Device& dev, bool fine)
|
||||
static void* hsa_malloc(size_t size, const Device& dev, bool fine)
|
||||
{
|
||||
uint32_t index = fine ? dev.fine : dev.coarse;
|
||||
assert(index != -1u && "Memory type unavailable.");
|
||||
return hsa_malloc(size, dev.pools[index]);
|
||||
}
|
||||
|
||||
bool device_discovery()
|
||||
static bool device_discovery()
|
||||
{
|
||||
hsa_status_t err;
|
||||
|
||||
@@ -273,10 +275,15 @@ public:
|
||||
error = hsa_amd_agent_iterate_memory_pools(
|
||||
agent,
|
||||
[](hsa_amd_memory_pool_t pool, void* data) {
|
||||
std::vector<Device::Memory>& pools =
|
||||
*reinterpret_cast<std::vector<Device::Memory>*>(data);
|
||||
auto& pools = *reinterpret_cast<std::vector<Device::Memory>*>(data);
|
||||
hsa_status_t status;
|
||||
|
||||
bool allowed = false;
|
||||
status = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &allowed);
|
||||
|
||||
if(!allowed) return HSA_STATUS_SUCCESS;
|
||||
|
||||
hsa_amd_segment_t segment;
|
||||
status = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
@@ -290,9 +297,10 @@ public:
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
Device::Memory mem;
|
||||
mem.pool = pool;
|
||||
mem.fine = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED);
|
||||
mem.kernarg = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT);
|
||||
mem.pool = pool;
|
||||
mem.fine = ((flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED) != 0u);
|
||||
mem.kernarg =
|
||||
((flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT) != 0u);
|
||||
|
||||
status = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &mem.size);
|
||||
|
||||
Reference in New Issue
Block a user