Files
Evgeny 7c4369bde4 Initial Commit
Contributors:
Ammar ELWazir <aelwazir@amd.com>
AravindanC <aravindan.cheruvally@amd.com>
Benjamin Welton <bewelton@amd.com>
Ma, Bing <Bing.Ma@amd.com>
Chun Yang <chun.yang@amd.com>
Cole Nelson <cole.nelson@amd.com>
Ethan Stewart <ethan.stewart@amd.com>
Evgeny <evgeny.shcherbakov@amd.com>
Freddy Paul <Freddy.paul@amd.com>
Giovanni Baraldi <gbaraldi@amd.com>
Gopesh Bhardwaj <Gopesh.Bhardwaj@amd.com>
Icarus Sparry <icarus.sparry@amd.com>
itrowbri <Ian.Trowbridge@amd.com>
James Edwards <JamesAdrian.Edwards@amd.com>
jatang <jatang@amd.com>
Jeremy Newton <Jeremy.Newton@amd.com>
Jonathan Kim <jonathan.kim@amd.com>
Kent Russell <kent.russell@amd.com>
Kiumars Sabeti <kiumars.sabeti@amd.com>
Lang Yu <lang.yu@amd.com>
Laurent Morichetti <laurent.morichetti@amd.com>
Mallya, Ameya Keshava <AmeyaKeshava.Mallya@amd.com>
Manjunath Jakaraddi <manjunath.jakaraddi@amd.com>
Mark Laws <markdavid.laws@amd.com>
Mohan Kumar Mithur <Mohan.KumarMithur@amd.com>
Nicholas Curtis <nicurtis@amd.com>
Nirmal Unnikrishnan <Nirmal.Unnikrishnan@amd.com>
Parag Bhandari <parag.bhandari@amd.com>
Ranjith Ramakrishnan <Ranjith.Ramakrishnan@amd.com>
Robert Gregory <Robert.Gregory@amd.com>
Saravanan Solaiyappan <saravanan.solaiyappan@amd.com>
Saurabh Verma <saurabh.verma@amd.com>
Srihari Uttanur <srihari.u@amd.com>
Srinivasan Subramanian <srinivasan.subramanian@amd.com>
Sriraksha Nagaraj <Sriraksha.Nagaraj@amd.com>
Sushma Vaddireddy <svaddire@amd.com>
Xianwei Zhang <Xianwei.Zhang@amd.com>


[ROCm/aqlprofile commit: 1ed169e30c]
2025-05-28 10:10:47 -05:00

365 γραμμές
12 KiB
C++

// MIT License
//
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "ctrl/test_hsa.h"
#include <atomic>
#include <cassert>
#include "util/helper_funcs.h"
#include "util/hsa_rsrc_factory.h"
#include "util/test_assert.h"
HsaRsrcFactory* TestHsa::hsa_rsrc_ = NULL;
const AgentInfo* TestHsa::agent_info_ = NULL;
hsa_queue_t* TestHsa::hsa_queue_ = NULL;
uint32_t TestHsa::agent_id_ = 0;
HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) {
// Instantiate an instance of Hsa Resources Factory
if (hsa_rsrc_ == NULL) {
agent_id_ = agent_ind;
hsa_rsrc_ = HsaRsrcFactory::Create();
// Print properties of the agents
hsa_rsrc_->PrintGpuAgents("> GPU agents");
// Create an instance of Gpu agent
if (!hsa_rsrc_->GetGpuAgentInfo(agent_ind, &agent_info_)) {
agent_info_ = NULL;
std::cerr << "> error: agent[" << agent_ind << "] is not found" << std::endl;
return NULL;
}
std::clog << "> Using agent[" << agent_ind << "] : " << agent_info_->name << std::endl;
// Create an instance of Aql Queue
if (hsa_queue_ == NULL) {
uint32_t num_pkts = 1024;
if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) {
hsa_queue_ = NULL;
TEST_ASSERT(false);
}
}
}
return hsa_rsrc_;
}
void TestHsa::HsaShutdown() {
if (hsa_queue_ != NULL) {
hsa_queue_destroy(hsa_queue_);
hsa_queue_ = NULL;
}
if (hsa_rsrc_) hsa_rsrc_->Destroy();
}
bool TestHsa::Initialize(int arg_cnt, char** arg_list) {
std::clog << "TestHsa::Initialize :" << std::endl;
// Instantiate a Timer object
setup_timer_idx_ = hsa_timer_.CreateTimer();
dispatch_timer_idx_ = hsa_timer_.CreateTimer();
if (HsaInstantiate(agent_id_) == NULL) {
TEST_ASSERT(false);
return false;
}
// Obtain handle of signal
hsa_rsrc_->CreateSignal(1, &hsa_signal_);
// Obtain the code object file name
std::string agentName(agent_info_->name);
if (agentName.find(":") != std::string::npos) {
agentName = agentName.substr(0, agentName.find(":"));
}
brig_path_obj_.append(agentName + "_" + name_ + ".hsaco");
return true;
}
bool TestHsa::Setup() {
std::clog << "TestHsa::setup :" << std::endl;
// Start the timer object
hsa_timer_.StartTimer(setup_timer_idx_);
// Load and Finalize Kernel Code Descriptor
const char* brig_path = brig_path_obj_.c_str();
bool suc = hsa_rsrc_->LoadAndFinalize(agent_info_, brig_path, symb_.c_str(), &hsa_exec_,
&kernel_code_desc_);
if (suc == false) {
std::cerr << "Error in loading and finalizing Kernel" << std::endl;
return false;
}
mem_map_t& mem_map = test_->GetMemMap();
for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) {
mem_descr_t& des = it->second;
switch (des.id) {
case TestKernel::LOCAL_DES_ID:
des.ptr = hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size);
break;
case TestKernel::KERNARG_DES_ID: {
// Check the kernel args size
const size_t kernarg_size = des.size;
size_t size_info = 0;
const hsa_status_t status = hsa_executable_symbol_get_info(
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info);
TEST_ASSERT(status == HSA_STATUS_SUCCESS);
size_info = kernarg_size;
const bool kernarg_missmatch = (kernarg_size > size_info);
if (kernarg_missmatch) {
std::cout << "kernarg_size = " << kernarg_size << ", size_info = " << size_info
<< std::flush << std::endl;
TEST_ASSERT(!kernarg_missmatch);
break;
}
// ALlocate kernarg memory
des.size = size_info;
des.ptr = hsa_rsrc_->AllocateKernArgMemory(agent_info_, size_info);
if (des.ptr) memset(des.ptr, 0, size_info);
break;
}
case TestKernel::SYS_DES_ID:
des.ptr = hsa_rsrc_->AllocateSysMemory(agent_info_, des.size);
if (des.ptr) memset(des.ptr, 0, des.size);
break;
case TestKernel::NULL_DES_ID:
des.ptr = NULL;
break;
default:
break;
}
TEST_ASSERT(des.ptr != NULL);
if (des.ptr == NULL) return false;
}
test_->Init();
// Stop the timer object
hsa_timer_.StopTimer(setup_timer_idx_);
setup_time_taken_ = hsa_timer_.ReadTimer(setup_timer_idx_);
total_time_taken_ = setup_time_taken_;
return true;
}
bool TestHsa::Run() {
std::clog << "TestHsa::run :" << std::endl;
const uint32_t work_group_size = 64;
const uint32_t work_grid_size = test_->GetGridSize();
uint32_t group_segment_size = 0;
uint32_t private_segment_size = 0;
uint64_t code_handle = 0;
// Retrieve the amount of group memory needed
hsa_executable_symbol_get_info(
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &group_segment_size);
// Retrieve the amount of private memory needed
hsa_executable_symbol_get_info(kernel_code_desc_,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
&private_segment_size);
// Retrieve handle of the code block
hsa_executable_symbol_get_info(kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
&code_handle);
// Initialize the dispatch packet.
hsa_kernel_dispatch_packet_t aql;
memset(&aql, 0, sizeof(aql));
// Set the packet's type, barrier bit, acquire and release fences
aql.header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE;
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE;
// Populate Aql packet with default values
aql.setup = 1;
aql.grid_size_x = work_grid_size;
aql.grid_size_y = 1;
aql.grid_size_z = 1;
aql.workgroup_size_x = work_group_size;
aql.workgroup_size_y = 1;
aql.workgroup_size_z = 1;
// Bind the kernel code descriptor and arguments
aql.kernel_object = code_handle;
aql.kernarg_address = test_->GetKernargPtr();
aql.group_segment_size = group_segment_size;
aql.private_segment_size = private_segment_size;
// Initialize Aql packet with handle of signal
hsa_signal_store_relaxed(hsa_signal_, 1);
aql.completion_signal = hsa_signal_;
std::clog << "> Executing kernel: \"" << name_ << "\"" << std::endl;
// Start the timer object
hsa_timer_.StartTimer(dispatch_timer_idx_);
// Submit AQL packet to the queue
const uint64_t que_idx = hsa_rsrc_->Submit(hsa_queue_, &aql);
std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl;
// Wait on the dispatch signal until the kernel is finished.
// Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling
if (hsa_signal_wait_scacquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX,
HSA_WAIT_STATE_BLOCKED) != 0) {
TEST_ASSERT(false);
}
std::clog << "> DONE, que_idx=" << que_idx << std::endl;
// Stop the timer object
hsa_timer_.StopTimer(dispatch_timer_idx_);
dispatch_time_taken_ = hsa_timer_.ReadTimer(dispatch_timer_idx_);
total_time_taken_ += dispatch_time_taken_;
return true;
}
bool TestHsa::VerifyResults() {
bool cmp = false;
void* output = NULL;
const uint32_t size = test_->GetOutputSize();
bool suc = false;
// Copy local kernel output buffers from local memory into host memory
if (test_->IsOutputLocal()) {
output = hsa_rsrc_->AllocateSysMemory(agent_info_, size);
suc = hsa_rsrc_->Memcpy(agent_info_, output, test_->GetOutputPtr(), size);
if (!suc) std::clog << "> VerifyResults: Memcpy failed" << std::endl << std::flush;
} else {
output = test_->GetOutputPtr();
suc = true;
}
if ((output != NULL) && suc) {
// Print the test output
test_->PrintOutput(output);
// Compare the results and see if they match
cmp = (memcmp(output, test_->GetRefOut(), size) >= 0);
}
if (test_->IsOutputLocal() && (output != NULL)) hsa_rsrc_->FreeMemory(output);
return cmp;
}
void TestHsa::PrintTime() {
std::clog << "Time taken for Setup by " << this->name_ << " : " << this->setup_time_taken_
<< std::endl;
std::clog << "Time taken for Dispatch by " << this->name_ << " : " << this->dispatch_time_taken_
<< std::endl;
std::clog << "Time taken in Total by " << this->name_ << " : " << this->total_time_taken_
<< std::endl;
}
bool TestHsa::Cleanup() {
hsa_executable_destroy(hsa_exec_);
hsa_signal_destroy(hsa_signal_);
return true;
}
bool TestHsa::RunSdma(size_t sdma_size) {
std::cout << "Run SDMA test ..." << std::endl;
const AgentInfo* cpu_agent{nullptr};
hsa_rsrc_->GetCpuAgentInfo(0, &cpu_agent);
const AgentInfo* gpu_agent{nullptr};
hsa_rsrc_->GetGpuAgentInfo(0, &gpu_agent);
// allocate SDMA buffers: src_buf, dest_buf and gpu_buf.
void* src_buf = hsa_rsrc_->AllocateSysMemory(gpu_agent, sdma_size);
assert(src_buf != nullptr);
void* dest_buf = hsa_rsrc_->AllocateSysMemory(gpu_agent, sdma_size);
assert(dest_buf != nullptr);
void* gpu_buf = hsa_rsrc_->AllocateLocalMemory(gpu_agent, sdma_size);
assert(gpu_buf != nullptr);
for (size_t i = 0; i < sdma_size; ++i) {
((char*)src_buf)[i] = i;
((char*)dest_buf)[sdma_size - 1 - i] = i & 0xFF;
}
for (size_t i = 0; i < 10; ++i)
std::cout << i << ": src_buf = " << (unsigned)(((char*)src_buf)[i] & 0xFF)
<< ", dest_buf = " << (unsigned)(((char*)dest_buf)[i] & 0xFF) << std::endl;
hsa_status_t status;
hsa_signal_t completion_signal;
status = hsa_signal_create(1, 0, NULL, &completion_signal);
CHECK_STATUS("hsa_signal_create", status);
// SDMA src_buf -> gpu_buf
status = hsa_amd_memory_async_copy(gpu_buf, gpu_agent->dev_id, src_buf, cpu_agent->dev_id,
sdma_size, 0, nullptr, completion_signal);
CHECK_STATUS("hsa_amd_memory_async_copy(...): src_buf -> gpu_buf", status);
while (1) {
const hsa_signal_value_t signal_value = hsa_signal_wait_scacquire(
completion_signal, HSA_SIGNAL_CONDITION_LT, 1, 5000000, HSA_WAIT_STATE_BLOCKED);
if (signal_value == 0) {
break;
} else {
CHECK_STATUS("hsa_signal_wait_scacquire(): src_buf -> gpu_buf", HSA_STATUS_ERROR);
}
}
status = hsa_signal_destroy(completion_signal);
CHECK_STATUS("hsa_signal_destroy()", status);
// SDMA gpu_buf -> dest_buf
hsa_signal_t completion_signal1;
status = hsa_signal_create(1, 0, NULL, &completion_signal1);
CHECK_STATUS("hsa_signal_create", status);
status = hsa_amd_memory_async_copy(dest_buf, cpu_agent->dev_id, gpu_buf, gpu_agent->dev_id,
sdma_size, 0, nullptr, completion_signal1);
CHECK_STATUS("hsa_amd_memory_async_copy(...): gpu_buf -> dest_buf", status);
while (1) {
const hsa_signal_value_t signal_value = hsa_signal_wait_scacquire(
completion_signal1, HSA_SIGNAL_CONDITION_LT, 1, 500000, HSA_WAIT_STATE_BLOCKED);
if (signal_value == 0) {
break;
} else {
CHECK_STATUS("hsa_signal_wait_scacquire(): gpu_buf -> dest_buf", HSA_STATUS_ERROR);
}
}
status = hsa_signal_destroy(completion_signal1);
CHECK_STATUS("hsa_signal_destroy()", status);
// check copy results
for (size_t i = 0; i < sdma_size; ++i) {
assert(((char*)src_buf)[i] == ((char*)dest_buf)[i]);
}
std::cout << std::endl;
// print out some dma data.
for (size_t i = 0; i < 10; ++i)
std::cout << i << ": src_buf = " << (int)((char*)src_buf)[i]
<< ", dest_buf = " << (int)((char*)dest_buf)[i] << std::endl;
return true;
}