Change-Id: Id22dcafbf0ea0b346f3a03d4acef27350b706f36


[ROCm/ROCR-Runtime commit: a632bfddb2]
Этот коммит содержится в:
srinivas Charupally
2018-05-31 15:29:37 +05:30
родитель 87a76fb0ec
Коммит 15a708e717
5 изменённых файлов: 784 добавлений и 0 удалений
+600
Просмотреть файл
@@ -0,0 +1,600 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <algorithm>
#include <iostream>
#include <vector>
#include "suites/functional/aql_barrier_bit.h"
#include "common/base_rocr_utils.h"
#include "common/concurrent_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_finalize.h"
static const int NUM_WAIT_KERNELS = 8;
static inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
hsa_kernel_dispatch_packet_t* queue_packet) {
__atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
header | (setup << 16), __ATOMIC_RELEASE);
}
AqlBarrierBitTest::AqlBarrierBitTest(bool set, bool notSet) : TestBase() {
set_num_iteration(10); // Number of iterations to execute of the main test;
// This is a default value which can be overridden
// on the command line.
if (set) {
set_title("RocR Aql Barrier Bit Set Test");
set_description("This test checks the barrier bit functionality, set");
} else if (notSet) {
set_title("RocR Concurrent Shutdown Test");
set_description("This test checks the barrier bit functionality, un set");
}
}
AqlBarrierBitTest::~AqlBarrierBitTest(void) {
}
void AqlBarrierBitTest::SetUp(void) {
hsa_status_t err;
TestBase::SetUp();
err = rocrtst::SetDefaultAgents(this);
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
err = rocrtst::SetPoolsTypical(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
return;
}
void AqlBarrierBitTest::Run(void) {
if (!rocrtst::CheckProfile(this)) {
return;
}
TestBase::Run();
}
void AqlBarrierBitTest::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void AqlBarrierBitTest::DisplayResults(void) const {
// Compare required profile for this test case with what we're actually
// running on
if (!rocrtst::CheckProfile(this)) {
return;
}
return;
}
void AqlBarrierBitTest::Close() {
// This will close handles opened within rocrtst utility calls and call
// hsa_shut_down(), so it should be done after other hsa cleanup
}
void AqlBarrierBitTest::BarrierBitSet(void) {
hsa_status_t status;
// The kernarg data structure
typedef struct __attribute__ ((aligned(16))) signal_args_s {
void *signal_values;
} signal_args_t;
signal_args_t signal_args;
// Get the GPU agents into a vector
std::vector<hsa_agent_t> agent_list;
status = hsa_iterate_agents(rocrtst::IterateGPUAgents, &agent_list);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Get CPU agent to get the kern_arg pool
std::vector<hsa_agent_t> cpu_agent;
status = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpu_agent);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Repeat the test for each agent
unsigned int ii;
for (ii = 0; ii < agent_list.size(); ++ii) {
// Check if the queue supports dispatch
uint32_t features = 0;
status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_FEATURE, &features);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) {
continue;
}
// Find a memory pool that supports fine grained memory
hsa_amd_memory_pool_t global_pool;
global_pool.handle = (uint64_t)-1;
status = hsa_amd_agent_iterate_memory_pools(agent_list[ii], rocrtst::GetGlobalMemoryPool, &global_pool);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Obtain the agent's machine model
hsa_machine_model_t machine_model;
status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Obtain the agent's profile
hsa_profile_t profile;
status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_PROFILE, &profile);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Find a memory pool that supports kernel arguments
hsa_amd_memory_pool_t kernarg_pool;
kernarg_pool.handle = (uint64_t)-1;
status = hsa_amd_agent_iterate_memory_pools(cpu_agent[0], rocrtst::GetKernArgMemoryPool, &kernarg_pool);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Create a queue
hsa_queue_t* queue;
status = hsa_queue_create(agent_list[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
set_kernel_file_name("signal_operations_kernels.hsaco");
set_kernel_name("signal_wait_kernel");
status = rocrtst::LoadKernelFromObjFile(this, &agent_list[ii]);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
// Allocate the kernel argument buffer from the correct pool
signal_args_t* kernarg_buffer = NULL;
status = hsa_amd_memory_pool_allocate(kernarg_pool,
sizeof(signal_args_t), 0,
reinterpret_cast<void**>(&kernarg_buffer));
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_agents_allow_access(1, &agent_list[ii], NULL, kernarg_buffer);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Create the completion signal
hsa_signal_t completion_signal;
status = hsa_signal_create(1, 0, NULL, &completion_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
hsa_amd_memory_pool_access_t access;
status = hsa_amd_agent_memory_pool_get_info(cpu_agent[0],
global_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
hsa_signal_t* kernel_signal;
hsa_signal_value_t* set_value;
hsa_signal_t s;
status = hsa_signal_create(1, 0, NULL, &s);
if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
// Create the kernel signal
status = hsa_amd_memory_pool_allocate(global_pool,
sizeof(hsa_signal_t), 0, reinterpret_cast<void**>(&kernel_signal));
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_agents_allow_access(1, &cpu_agent[0], NULL, kernel_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_signal_create(1, 0, NULL, kernel_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_memory_pool_allocate(global_pool,
sizeof(hsa_signal_value_t), 0, reinterpret_cast<void**>(&set_value));
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_agents_allow_access(1, &cpu_agent[0], NULL, set_value);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
memset(set_value, 0, sizeof(hsa_signal_value_t));
// Set the signal_args with kernel_signal, will be accessed from Kernel side
signal_args.signal_values = reinterpret_cast<void*>(kernel_signal);
}
memcpy(kernarg_buffer, &signal_args, sizeof(signal_args_t));
// Create the set kernel completion signal
hsa_signal_t set_kernel_completion_signal;
status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &set_kernel_completion_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Create the wait kernel completion signals
hsa_signal_t wait_kernel_completion_signal[NUM_WAIT_KERNELS];
int jj;
for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &wait_kernel_completion_signal[jj]);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
}
// Setup the dispatch packet
hsa_kernel_dispatch_packet_t dispatch_packet;
memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t));
dispatch_packet.workgroup_size_x = 1;
dispatch_packet.workgroup_size_y = 1;
dispatch_packet.workgroup_size_z = 1;
dispatch_packet.grid_size_x = 1;
dispatch_packet.grid_size_y = 1;
dispatch_packet.grid_size_z = 1;
dispatch_packet.kernel_object = kernel_object();
dispatch_packet.group_segment_size = group_segment_size();
dispatch_packet.private_segment_size = private_segment_size();
dispatch_packet.kernarg_address = kernarg_buffer;
for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
// Set the appropriate completion signal
dispatch_packet.completion_signal = wait_kernel_completion_signal[jj];
// Dispatch the kernel
// const uint32_t queue_size = queue->size;
const uint32_t queue_mask = queue->size - 1;
// write to command queue
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(queue->base_address)[index & queue_mask] = dispatch_packet;
hsa_queue_store_write_index_relaxed(queue, index + 1);
dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
dispatch_packet.header |= 0 << HSA_PACKET_HEADER_BARRIER;
dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
void* q_base = queue->base_address;
// Set the Aql packet header
AtomicSetPacketHeader(dispatch_packet.header, dispatch_packet.setup,
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(q_base))[index & queue_mask]);
// ringdoor bell
hsa_signal_store_relaxed(queue->doorbell_signal, index);
}
// Dispatch the set kernel, setting the barrier bit to 1
dispatch_packet.header |= 1 == HSA_PACKET_HEADER_BARRIER;
set_kernel_file_name("signal_operations_kernels.hsaco");
set_kernel_name("signal_st_rlx_kernel");
status = rocrtst::LoadKernelFromObjFile(this, &agent_list[ii]);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
// Set the appropriate completion signal and code descriptor values
dispatch_packet.kernel_object = kernel_object();
dispatch_packet.group_segment_size = group_segment_size();
dispatch_packet.private_segment_size = private_segment_size();
dispatch_packet.kernarg_address = kernarg_buffer;
// Dispatch the kernel
// const uint32_t queue_size = queue->size;
const uint32_t queue_mask = queue->size - 1;
// write to command queue
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(queue->base_address)[index & queue_mask] = dispatch_packet;
hsa_queue_store_write_index_relaxed(queue, index + 1);
// ringdoor bell
hsa_signal_store_relaxed(queue->doorbell_signal, index);
// Query the systems timestamp frequency for wait timeout
uint16_t freq;
status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, reinterpret_cast<void*>(&freq));
// Wait on the completion signal of the set kernel, but
// timeout after 1 second
uint64_t wait_time = (uint64_t) freq;
hsa_signal_value_t signal_value;
signal_value = hsa_signal_wait_relaxed(set_kernel_completion_signal,
HSA_SIGNAL_CONDITION_EQ, 0, wait_time, HSA_WAIT_STATE_ACTIVE);
ASSERT_EQ(1, signal_value);
// Wait on the completion signals of each of the wait kernels, again timing out after 1 second
for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
signal_value = hsa_signal_wait_relaxed(wait_kernel_completion_signal[jj],
HSA_SIGNAL_CONDITION_EQ, 0, wait_time, HSA_WAIT_STATE_ACTIVE);
ASSERT_EQ(1, signal_value);
}
// destroy the signal created for async copy
status = hsa_signal_destroy(completion_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
status = hsa_amd_memory_pool_free(kernel_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_memory_pool_free(set_value);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
} else {
status = hsa_amd_memory_unlock(kernel_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_memory_unlock(set_value);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
}
// Destroy the queue
status = hsa_queue_destroy(queue);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
}
}
void AqlBarrierBitTest::BarrierBitNotSet(void) {
hsa_status_t status;
// The kernarg data structure
typedef struct __attribute__ ((aligned(16))) signal_args_s {
void *signal_values;
} signal_args_t;
signal_args_t signal_args;
// Get the GPU agents into a vector
std::vector<hsa_agent_t> agent_list;
status = hsa_iterate_agents(rocrtst::IterateGPUAgents, &agent_list);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Get CPU agent to get the kern_arg pool
std::vector<hsa_agent_t> cpu_agent;
status = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpu_agent);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Repeat the test for each agent
unsigned int ii;
for (ii = 0; ii < agent_list.size(); ++ii) {
// Check if the queue supports dispatch
uint32_t features = 0;
status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_FEATURE, &features);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
if (0 == (features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) {
continue;
}
// Find a memory pool that supports fine grained memory
hsa_amd_memory_pool_t global_pool;
global_pool.handle = (uint64_t)-1;
status = hsa_amd_agent_iterate_memory_pools(agent_list[ii], rocrtst::GetGlobalMemoryPool, &global_pool);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Obtain the agent's machine model
hsa_machine_model_t machine_model;
status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_MACHINE_MODEL, &machine_model);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Obtain the agent's profile
hsa_profile_t profile;
status = hsa_agent_get_info(agent_list[ii], HSA_AGENT_INFO_PROFILE, &profile);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Find a memory pool that supports kernel arguments
hsa_amd_memory_pool_t kernarg_pool;
kernarg_pool.handle = (uint64_t)-1;
status = hsa_amd_agent_iterate_memory_pools(cpu_agent[0], rocrtst::GetKernArgMemoryPool, &kernarg_pool);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Create a queue
hsa_queue_t* queue;
status = hsa_queue_create(agent_list[ii], 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
set_kernel_file_name("signal_operations_kernels.hsaco");
set_kernel_name("signal_wait_kernel");
status = rocrtst::LoadKernelFromObjFile(this, &agent_list[ii]);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
// Allocate the kernel argument buffer from the correct pool
signal_args_t* kernarg_buffer = NULL;
status = hsa_amd_memory_pool_allocate(kernarg_pool,
sizeof(signal_args_t), 0,
reinterpret_cast<void**>(&kernarg_buffer));
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_agents_allow_access(1, &agent_list[ii], NULL, kernarg_buffer);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Create the completion signal
hsa_signal_t completion_signal;
status = hsa_signal_create(1, 0, NULL, &completion_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
hsa_amd_memory_pool_access_t access;
status = hsa_amd_agent_memory_pool_get_info(cpu_agent[0],
global_pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
hsa_signal_t* kernel_signal;
hsa_signal_value_t* set_value;
hsa_signal_t s;
status = hsa_signal_create(1, 0, NULL, &s);
if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
// Create the kernel signal
status = hsa_amd_memory_pool_allocate(global_pool,
sizeof(hsa_signal_t), 0, reinterpret_cast<void**>(&kernel_signal));
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_agents_allow_access(1, &cpu_agent[0], NULL, kernel_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_signal_create(1, 0, NULL, kernel_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_memory_pool_allocate(global_pool,
sizeof(hsa_signal_value_t), 0, reinterpret_cast<void**>(&set_value));
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_agents_allow_access(1, &cpu_agent[0], NULL, set_value);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
memset(set_value, 0, sizeof(hsa_signal_value_t));
// Set the signal_args with kernel_signal, will be accessed from Kernel side
signal_args.signal_values = reinterpret_cast<void*>(kernel_signal);
}
memcpy(kernarg_buffer, &signal_args, sizeof(signal_args_t));
// Create the set kernel completion signal
hsa_signal_t set_kernel_completion_signal;
status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &set_kernel_completion_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
// Create the wait kernel completion signals
hsa_signal_t wait_kernel_completion_signal[NUM_WAIT_KERNELS];
int jj;
for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
status = hsa_signal_create((hsa_signal_value_t)1, 0, NULL, &wait_kernel_completion_signal[jj]);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
}
// Setup the dispatch packet
hsa_kernel_dispatch_packet_t dispatch_packet;
memset(&dispatch_packet, 0, sizeof(hsa_kernel_dispatch_packet_t));
dispatch_packet.workgroup_size_x = 1;
dispatch_packet.workgroup_size_y = 1;
dispatch_packet.workgroup_size_z = 1;
dispatch_packet.grid_size_x = 1;
dispatch_packet.grid_size_y = 1;
dispatch_packet.grid_size_z = 1;
dispatch_packet.kernel_object = kernel_object();
dispatch_packet.group_segment_size = group_segment_size();
dispatch_packet.private_segment_size = private_segment_size();
dispatch_packet.kernarg_address = kernarg_buffer;
for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
// Set the appropriate completion signal
dispatch_packet.completion_signal = wait_kernel_completion_signal[jj];
// Dispatch the kernel
// const uint32_t queue_size = queue->size;
const uint32_t queue_mask = queue->size - 1;
// write to command queue
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(queue->base_address)[index & queue_mask] = dispatch_packet;
hsa_queue_store_write_index_relaxed(queue, index + 1);
dispatch_packet.header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
dispatch_packet.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
dispatch_packet.header |= 0 << HSA_PACKET_HEADER_BARRIER;
dispatch_packet.setup |= 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
void* q_base = queue->base_address;
// Set the Aql packet header
AtomicSetPacketHeader(dispatch_packet.header, dispatch_packet.setup,
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(q_base))[index & queue_mask]);
// ringdoor bell
hsa_signal_store_relaxed(queue->doorbell_signal, index);
}
// Dispatch the set kernel, NOT setting the barrier bit
set_kernel_file_name("signal_operations_kernels.hsaco");
set_kernel_name("signal_st_rlx_kernel");
status = rocrtst::LoadKernelFromObjFile(this, &agent_list[ii]);
ASSERT_EQ(status, HSA_STATUS_SUCCESS);
// Set the appropriate completion signal and code descriptor values
dispatch_packet.kernel_object = kernel_object();
dispatch_packet.group_segment_size = group_segment_size();
dispatch_packet.private_segment_size = private_segment_size();
dispatch_packet.kernarg_address = kernarg_buffer;
// Set the appropriate completion signal
dispatch_packet.completion_signal = set_kernel_completion_signal;
// Dispatch the kernel
// const uint32_t queue_size = queue->size;
const uint32_t queue_mask = queue->size - 1;
// write to command queue
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(queue->base_address)[index & queue_mask] = dispatch_packet;
hsa_queue_store_write_index_relaxed(queue, index + 1);
// ringdoor bell
hsa_signal_store_relaxed(queue->doorbell_signal, index);
// Query the systems timestamp frequency for wait timeout
uint16_t freq;
status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, reinterpret_cast<void*>(&freq));
// Wait on the completion signal of the set kernel, but
// timeout after 1 second
uint64_t wait_time = (uint64_t) freq;
hsa_signal_value_t signal_value;
signal_value = hsa_signal_wait_relaxed(set_kernel_completion_signal,
HSA_SIGNAL_CONDITION_EQ, 0, wait_time, HSA_WAIT_STATE_ACTIVE);
ASSERT_EQ(1, signal_value);
// Wait on the completion signals of each of the wait kernels, again timing out after 1 second
for (jj = 0; jj < NUM_WAIT_KERNELS; ++jj) {
signal_value = hsa_signal_wait_relaxed(wait_kernel_completion_signal[jj],
HSA_SIGNAL_CONDITION_EQ, 0, wait_time, HSA_WAIT_STATE_ACTIVE);
ASSERT_EQ(1, signal_value);
}
// Check kernel signal
std::cout << "Kernel_signal Value after package execustion(should be 0) = " << (kernel_signal->handle) << std::endl;
// destroy the signal created for async copy
status = hsa_signal_destroy(s);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_signal_destroy(completion_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
status = hsa_amd_memory_pool_free(kernel_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_memory_pool_free(set_value);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
} else {
status = hsa_amd_memory_unlock(kernel_signal);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
status = hsa_amd_memory_unlock(set_value);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
}
// Destroy the queue
status = hsa_queue_destroy(queue);
ASSERT_EQ(HSA_STATUS_SUCCESS, status);
}
}
+82
Просмотреть файл
@@ -0,0 +1,82 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCRTST_SUITES_FUNCTIONAL_AQL_BARRIER_BIT_H_
#define ROCRTST_SUITES_FUNCTIONAL_AQL_BARRIER_BIT_H_
#include <pthread.h>
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"
class AqlBarrierBitTest : public TestBase {
public:
AqlBarrierBitTest(bool, bool);
// @Brief: Destructor for the AqlBarrierBitTest class
virtual ~AqlBarrierBitTest();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrive the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
// @Brief: Runtime will be initialized Num_Times
void BarrierBitSet(void);
void BarrierBitNotSet(void);
};
#endif // ROCRTST_SUITES_FUNCTIONAL_AQL_BARRIER_BIT_H_
+4
Просмотреть файл
@@ -332,6 +332,10 @@ set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/atomicOperations_kernels.cl")
build_sample_for_devices("atomicOperations")
# Signal Operations
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/signal_operations.cl")
build_sample_for_devices("signal_operations")
# Build rules
add_executable(${ROCRTST} ${performanceSources} ${functionalSources} ${negativeSources} ${stressSources}
+83
Просмотреть файл
@@ -0,0 +1,83 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
__kernel void signal_st_rlx_kernel(__global void* dumy_signal)
{
int tid = get_global_id(0);
volatile __global long* p = (volatile __global long* )(dumy_signal);
atom_xchg(p,0);
}
__kernel void signal_st_rlx_kernel_multi(__global void* dumy_signal)
{
int tid = get_global_id(0);
int offset = 8*tid; // handle is of long unsigned int, having size of 8bytes
volatile __global long* p = (volatile __global long* )(dumy_signal+ offset);
atom_xchg(p,0);
}
__kernel void signal_wait_kernel(__global void* dumy_signal)
{
int tid = get_global_id(0);
volatile __global long* p = (volatile __global long* )(dumy_signal);
while(!(*p == 0)) { } // Will be using the volatile type as we dont have atom_cmp() function from Khronos spec
}
__kernel void signal_wait_kernel_multi(__global void* dumy_signal)
{
int tid = get_global_id(0);
int offset = 8*tid; // handle is of long unsigned int, having size of 8bytes
volatile __global long* p = (volatile __global long* )(dumy_signal + offset);
while(!(*p == 0)) { } // Will be using the volatile type as we dont have atom_cmp() function from Khronos spec
}
+15
Просмотреть файл
@@ -69,6 +69,7 @@
#include "suites/functional/concurrent_shutdown.h"
#include "suites/functional/reference_count.h"
#include "suites/functional/signal_concurrent.h"
#include "suites/functional/aql_barrier_bit.h"
#include "rocm_smi/rocm_smi.h"
static RocrTstGlobals *sRocrtstGlvalues = nullptr;
@@ -191,6 +192,20 @@ TEST(rocrtstFunc, Signal_Create_Concurrently) {
RunCustomTestEpilog(&sd);
}
TEST(rocrtstFunc, Aql_Barrier_Bit_Set) {
AqlBarrierBitTest ab(true, false);
RunCustomTestProlog(&ab);
ab.BarrierBitSet();
RunCustomTestEpilog(&ab);
}
TEST(rocrtstFunc, Aql_Barrier_Bit_Not_Set) {
AqlBarrierBitTest ab(false, true);
RunCustomTestProlog(&ab);
ab.BarrierBitNotSet();
RunCustomTestEpilog(&ab);
}
TEST(rocrtstFunc, Memory_Max_Mem) {
MemoryTest mt;