Files
gobhardw a67f4fdd93 SWDEV-465520: RHEL9 toolchain doesnt seem to support experimental filesystem
Change-Id: I6b1e7f42c49b1c7af412c0b68851724861c9970a


[ROCm/rocprofiler commit: 1e69b3e2f6]
2024-06-05 12:50:48 -04:00

344 lines
11 KiB
C++

/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/** \mainpage ROC Profiler Multi Queue Dependency Test
*
* \section introduction Introduction
*
* The goal of this test is to ensure ROC profiler does not go to deadlock
* when multiple queue are created and they are dependent on each other
*
*/
#include "multiqueue_testapp.h"
#include "src/utils/filesystem.hpp"
#include "src/utils/exception.h"
namespace fs = rocprofiler::common::filesystem;
std::vector<hsa_agent_t> Device::all_devices;
std::string GetRunningPath(std::string string_to_erase);
static void init_test_path();
std::string test_app_path;
std::string hasco_path;
int main() {
hsa_status_t status;
MQDependencyTest obj;
// Get Agent info
obj.DeviceDiscovery();
char agent_name[64];
status = hsa_agent_get_info(gpu[0].agent, HSA_AGENT_INFO_NAME, agent_name);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
// set global test path for this test
init_test_path();
// Getting Current Path
std::string app_path = GetRunningPath(test_app_path + "multiqueue_testapp");
// Getting hasco Path
std::string ko_path = app_path + hasco_path + std::string(agent_name) + "_copy.hsaco";
MQDependencyTest::CodeObject code_object;
if (!obj.LoadCodeObject(ko_path, gpu[0].agent, code_object)) {
printf("Kernel file not found or not usable with given agent.\n");
abort();
}
MQDependencyTest::Kernel copyA;
if (!obj.GetKernel(code_object, "copyA", gpu[0].agent, copyA)) {
printf("Test kernel A not found.\n");
abort();
}
MQDependencyTest::Kernel copyB;
if (!obj.GetKernel(code_object, "copyB", gpu[0].agent, copyB)) {
printf("Test kernel B not found.\n");
abort();
}
MQDependencyTest::Kernel copyC;
if (!obj.GetKernel(code_object, "copyC", gpu[0].agent, copyC)) {
printf("Test kernel C not found.\n");
abort();
}
struct args_t {
uint32_t* a;
uint32_t* b;
MQDependencyTest::OCLHiddenArgs hidden;
};
args_t* args;
args = static_cast<args_t*>(obj.hsaMalloc(sizeof(args_t), kernarg));
memset(args, 0, sizeof(args_t));
uint32_t* a = static_cast<uint32_t*>(obj.hsaMalloc(64 * sizeof(uint32_t), kernarg));
uint32_t* b = static_cast<uint32_t*>(obj.hsaMalloc(64 * sizeof(uint32_t), kernarg));
memset(a, 0, 64 * sizeof(uint32_t));
memset(b, 1, 64 * sizeof(uint32_t));
// Create queue in gpu agent and prepare a kernel dispatch packet
hsa_queue_t* queue1;
status = hsa_queue_create(gpu[0].agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX,
UINT32_MAX, &queue1);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
// Create a signal with a value of 1 and attach it to the first kernel
// dispatch packet
hsa_signal_t completion_signal_1;
status = hsa_signal_create(1, 0, NULL, &completion_signal_1);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
// First dispath packet on queue 1, Kernel A
{
MQDependencyTest::Aql packet{};
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
packet.header.barrier = 1;
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
packet.dispatch.setup = 1;
packet.dispatch.workgroup_size_x = 64;
packet.dispatch.workgroup_size_y = 1;
packet.dispatch.workgroup_size_z = 1;
packet.dispatch.grid_size_x = 64;
packet.dispatch.grid_size_y = 1;
packet.dispatch.grid_size_z = 1;
packet.dispatch.group_segment_size = copyA.group;
packet.dispatch.private_segment_size = copyA.scratch;
packet.dispatch.kernel_object = copyA.handle;
packet.dispatch.kernarg_address = args;
packet.dispatch.completion_signal = completion_signal_1;
args->a = a;
args->b = b;
// Tell packet processor of A to launch the first kernel dispatch packet
obj.SubmitPacket(queue1, packet);
}
// Create a signal with a value of 1 and attach it to the second kernel
// dispatch packet
hsa_signal_t completion_signal_2;
status = hsa_signal_create(1, 0, NULL, &completion_signal_2);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
hsa_signal_t completion_signal_3;
status = hsa_signal_create(1, 0, NULL, &completion_signal_3);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
// Create barrier-AND packet that is enqueued in queue 1
{
MQDependencyTest::Aql packet{};
packet.header.type = HSA_PACKET_TYPE_BARRIER_AND;
packet.header.barrier = 1;
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
packet.barrier_and.dep_signal[0] = completion_signal_2;
obj.SubmitPacket(queue1, packet);
}
// Second dispath packet on queue 1, Kernel C
{
MQDependencyTest::Aql packet{};
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
packet.header.barrier = 1;
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
packet.dispatch.setup = 1;
packet.dispatch.workgroup_size_x = 64;
packet.dispatch.workgroup_size_y = 1;
packet.dispatch.workgroup_size_z = 1;
packet.dispatch.grid_size_x = 64;
packet.dispatch.grid_size_y = 1;
packet.dispatch.grid_size_z = 1;
packet.dispatch.group_segment_size = copyC.group;
packet.dispatch.private_segment_size = copyC.scratch;
packet.dispatch.kernel_object = copyC.handle;
packet.dispatch.completion_signal = completion_signal_3;
packet.dispatch.kernarg_address = args;
args->a = a;
args->b = b;
// Tell packet processor to launch the second kernel dispatch packet
obj.SubmitPacket(queue1, packet);
}
// Create queue 2
hsa_queue_t* queue2;
status = hsa_queue_create(gpu[0].agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX,
UINT32_MAX, &queue2);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
// Create barrier-AND packet that is enqueued in queue 2
{
MQDependencyTest::Aql packet{};
packet.header.type = HSA_PACKET_TYPE_BARRIER_AND;
packet.header.barrier = 1;
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
packet.barrier_and.dep_signal[0] = completion_signal_1;
obj.SubmitPacket(queue2, packet);
}
// Third dispath packet on queue 2, Kernel B
{
MQDependencyTest::Aql packet{};
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
packet.header.barrier = 1;
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
packet.dispatch.setup = 1;
packet.dispatch.workgroup_size_x = 64;
packet.dispatch.workgroup_size_y = 1;
packet.dispatch.workgroup_size_z = 1;
packet.dispatch.grid_size_x = 64;
packet.dispatch.grid_size_y = 1;
packet.dispatch.grid_size_z = 1;
packet.dispatch.group_segment_size = copyB.group;
packet.dispatch.private_segment_size = copyB.scratch;
packet.dispatch.kernel_object = copyB.handle;
packet.dispatch.kernarg_address = args;
packet.dispatch.completion_signal = completion_signal_2;
args->a = a;
args->b = b;
// Tell packet processor to launch the third kernel dispatch packet
obj.SubmitPacket(queue2, packet);
}
// Wait on the completion signal
hsa_signal_wait_relaxed(completion_signal_1, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX,
HSA_WAIT_STATE_BLOCKED);
// Wait on the completion signal
hsa_signal_wait_relaxed(completion_signal_2, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX,
HSA_WAIT_STATE_BLOCKED);
// Wait on the completion signal
hsa_signal_wait_relaxed(completion_signal_3, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX,
HSA_WAIT_STATE_BLOCKED);
for (int i = 0; i < 64; i++) {
if (a[i] != b[i]) {
printf("error at %d: expected %d, got %d\n", i, b[i], a[i]);
abort();
}
}
// Clearing data structures and memory
status = hsa_signal_destroy(completion_signal_1);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
status = hsa_signal_destroy(completion_signal_2);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
status = hsa_signal_destroy(completion_signal_3);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
if (queue1 != nullptr) {
status = hsa_queue_destroy(queue1);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
}
if (queue2 != nullptr) {
status = hsa_queue_destroy(queue2);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
}
status = hsa_memory_free(a);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
status = hsa_memory_free(b);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
status = hsa_executable_destroy(code_object.executable);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
status = hsa_code_object_reader_destroy(code_object.code_obj_rdr);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
close(code_object.file);
}
// This function returns the running path of executable
std::string GetRunningPath(std::string string_to_erase) {
std::string path;
char* real_path;
Dl_info dl_info;
if (0 != dladdr(reinterpret_cast<void*>(main), &dl_info)) {
std::string to_erase = string_to_erase;
path = dl_info.dli_fname;
real_path = realpath(path.c_str(), NULL);
if (real_path == nullptr) {
throw(std::string("Error! in extracting real path"));
}
path.clear(); // reset path
path.append(real_path);
size_t pos = path.find(to_erase);
if (pos != std::string::npos) path.erase(pos, to_erase.length());
} else {
throw(std::string("Error! in extracting real path"));
}
return path;
}
bool is_installed_path() {
std::string path;
char* real_path;
Dl_info dl_info;
if (0 != dladdr(reinterpret_cast<void*>(main), &dl_info)) {
path = dl_info.dli_fname;
real_path = realpath(path.c_str(), NULL);
if (real_path == nullptr) {
throw(std::string("Error! in extracting real path"));
}
path.clear(); // reset path
path.append(real_path);
if (path.find("/opt") != std::string::npos) {
return true;
}
}
return false;
}
static void init_test_path() {
if (is_installed_path()) {
test_app_path = "share/rocprofiler/tests/featuretests/profiler/apps/";
hasco_path = "share/rocprofiler/tests/";
} else {
test_app_path = "tests-v2/featuretests/profiler/apps/";
hasco_path = "tests-v2/featuretests/profiler/";
}
}