Queue validation tests and memory alignment tests

Change-Id: I96d8c2898795240288517bdcbc2b48ff2cc04f66


[ROCm/ROCR-Runtime commit: 08a253684b]
Этот коммит содержится в:
rohit pathania
2018-05-15 12:27:05 +05:30
родитель c35946cb79
Коммит d77f587d15
5 изменённых файлов: 1269 добавлений и 1 удалений
+270
Просмотреть файл
@@ -0,0 +1,270 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>
#include "suites/functional/memory_alignment.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "common/concurrent_utils.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_finalize.h"
static const uint32_t kNumThreads = 4096;
typedef struct control_block {
hsa_amd_memory_pool_t* pool;
} cb_t;
// Callback function which will call upon when need
// to allocate memory from the pool in the thread.
static void CallbackVerifyPoolAlignmendFunc(void *data) {
hsa_status_t err;
cb_t *cb = reinterpret_cast<cb_t*>(data);
rocrtst::pool_info_t info;
memset(&info, 0, sizeof(rocrtst::pool_info_t));
err = rocrtst::AcquirePoolInfo(*(cb->pool), &info);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
if (info.alloc_allowed) {
// Get the allocated alignment size
size_t alignment_size = info.alloc_alignment;
EXPECT_TRUE(alignment_size);
// Verifies the alignment attribute is a power of 2
if (info.size != 0) {
EXPECT_TRUE((alignment_size&&(!(alignment_size&(alignment_size-1)))));
}
}
return;
}
MemoryAlignmentTest::MemoryAlignmentTest(void) :
TestBase() {
set_num_iteration(10); // Number of iterations to execute of the main test;
// This is a default value which can be overridden
// on the command line.
set_title("RocR Memory Alignment Test");
set_description(" This test verifies that each memory pool of the agent that"
" has HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED alloc memory, It is "
" aligned as specified by the HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT"
" and has the alignment attribute is a power of 2.");
}
MemoryAlignmentTest::~MemoryAlignmentTest(void) {
}
// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void MemoryAlignmentTest::SetUp(void) {
hsa_status_t err;
TestBase::SetUp();
err = rocrtst::SetDefaultAgents(this);
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
err = rocrtst::SetPoolsTypical(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
return;
}
void MemoryAlignmentTest::Run(void) {
// Compare required profile for this test case with what we're actually
// running on
if (!rocrtst::CheckProfile(this)) {
return;
}
TestBase::Run();
}
void MemoryAlignmentTest::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void MemoryAlignmentTest::DisplayResults(void) const {
// Compare required profile for this test case with what we're actually
// running on
if (!rocrtst::CheckProfile(this)) {
return;
}
return;
}
void MemoryAlignmentTest::Close() {
// This will close handles opened within rocrtst utility calls and call
// hsa_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
static const char kSubTestSeparator[] = " **************************";
static void PrintMemorySubtestHeader(const char *header) {
std::cout << " *** Memory Functional Subtest: " << header << " ***" << std::endl;
}
static void PrintAgentNameAndType(hsa_agent_t agent) {
hsa_status_t err;
char ag_name[64];
hsa_device_type_t ag_type;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, ag_name);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &ag_type);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
std::cout << " Agent: " << ag_name << " (";
switch (ag_type) {
case HSA_DEVICE_TYPE_CPU:
std::cout << "CPU)";
break;
case HSA_DEVICE_TYPE_GPU:
std::cout << "GPU)";
break;
case HSA_DEVICE_TYPE_DSP:
std::cout << "DSP)";
break;
}
std::cout << std::endl;
return;
}
void MemoryAlignmentTest::MemoryPoolAlignment(hsa_agent_t agent,
hsa_amd_memory_pool_t pool) {
hsa_status_t err;
rocrtst::pool_info_t pool_i;
err = rocrtst::AcquirePoolInfo(pool, &pool_i);
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
if (verbosity() > 0) {
PrintAgentNameAndType(agent);
}
if (pool_i.alloc_allowed) {
// Get the allocated alignment size
size_t alignment_size = pool_i.alloc_alignment;
EXPECT_TRUE(alignment_size);
// Verifies the alignment attribute is a power of 2
if (pool_i.size != 0) {
EXPECT_TRUE((alignment_size&&(!(alignment_size&(alignment_size-1)))));
}
// verifies that alignment attribute is a power of 2 in different threads
rocrtst::test_group* tg_concurrent = rocrtst::TestGroupCreate(kNumThreads);
// The control blocks are used to pass data to the threads
uint32_t kk;
cb_t cb[kNumThreads];
for (kk = 0; kk < kNumThreads; kk++) {
cb[kk].pool = &pool;
rocrtst::TestGroupAdd(tg_concurrent, &CallbackVerifyPoolAlignmendFunc, &cb[kk], 1);
}
// Create threads for each test
rocrtst::TestGroupThreadCreate(tg_concurrent);
// Start to run tests
rocrtst::TestGroupStart(tg_concurrent);
// Wait all tests finish
rocrtst::TestGroupWait(tg_concurrent);
// Exit all tests
rocrtst::TestGroupExit(tg_concurrent);
// Destroy thread group and cleanup resources
rocrtst::TestGroupDestroy(tg_concurrent);
}
return;
}
void MemoryAlignmentTest::MemoryPoolAlignment(void) {
hsa_status_t err;
std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
if (verbosity() > 0) {
PrintMemorySubtestHeader("MemoryPoolAlignment in Basic func & Stress Test");
}
err = rocrtst::GetAgentPools(&agent_pools);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
auto pool_idx = 0;
for (auto a : agent_pools) {
for (auto p : a->pools) {
if (verbosity() > 0) {
std::cout << " Pool " << pool_idx++ << ":" << std::endl;
}
MemoryPoolAlignment(a->agent, p);
}
}
if (verbosity() > 0) {
std::cout << "subtest Passed" << std::endl;
std::cout << kSubTestSeparator << std::endl;
}
}
+84
Просмотреть файл
@@ -0,0 +1,84 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_ALIGNMENT_H_
#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_ALIGNMENT_H_
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"
class MemoryAlignmentTest : public TestBase {
public:
MemoryAlignmentTest();
// @Brief: Destructor for test case of MemoryTest
virtual ~MemoryAlignmentTest();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrive the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
void MemoryPoolAlignment(void);
private:
void MemoryPoolAlignment(hsa_agent_t agent,
hsa_amd_memory_pool_t pool);
};
#endif // ROCRTST_SUITES_FUNCTIONAL_MEMORY_ALIGNMENT_H_
+766
Просмотреть файл
@@ -0,0 +1,766 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <fcntl.h>
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>
#include <string>
#include "suites/negative/queue_validation.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_finalize.h"
static const uint32_t kMaxQueueSizeForAgent = 1024;
static const uint32_t kMaxQueue = 64;
typedef struct test_validation_data_t {
bool cb_triggered;
hsa_queue_t** queue_pointer;
hsa_status_t expected_status;
} test_validation_data;
static void CallbackQueueErrorHandling(hsa_status_t status, hsa_queue_t *source, void *data);
// This wrapper atomically writes the provided header and setup to the
// provided AQL packet. The provided AQL packet address should be in the
// queue memory space.
static inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
hsa_kernel_dispatch_packet_t* queue_packet) {
__atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
header | (setup << 16), __ATOMIC_RELEASE);
}
QueueValidation::QueueValidation(bool launch_InvalidDimension,
bool launch_InvalidGroupMemory,
bool launch_InvalidKernelObject,
bool launch_InvalidPacket,
bool launch_InvalidWorkGroupSize) :TestBase() {
set_num_iteration(10); // Number of iterations to execute of the main test;
// This is a default value which can be overridden
// on the command line.
std::string name;
std::string desc;
name = "RocR Queue Validation";
desc = "This series of tests submit different negative aql packet into the queue"
" and verifies that queue error handling callback called with proper exception.";
if (launch_InvalidDimension) {
name += " For InvalidDimension";
desc += " This test verifies that if an aql packet specifies a dimension "
" value above 3, the queue's error handling callback will trigger";
} else if (launch_InvalidGroupMemory) {
name += " For InvalidGroupMemory";
desc += " This test verifies that if an aql packet specifies an invalid group"
" memory size, the queue's error handling.";
} else if (launch_InvalidKernelObject) {
name += " ForInvalidKernelObject";
desc += " This test verifies that if an aql packet specifies an invalid"
" kernel object, the queue's error handling callback will trigger.";
} else if (launch_InvalidPacket) {
name += " For InvalidPacket";
desc += " This test verifies that if an aql packet is invalid (bad packet type),"
" the queue's error handling callback will trigger.";
} else if (launch_InvalidWorkGroupSize) {
name += " For InvalidWorkGroupSize";
desc += " This test verifies that if an aql packet specifies an invalid"
" workgroup size, the queue's error handling callback will trigger.";
}
set_title(name);
set_description(desc);
memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
set_kernel_file_name("dispatch_time_kernels.hsaco");
set_kernel_name("empty_kernel");
}
QueueValidation::~QueueValidation(void) {
}
// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void QueueValidation::SetUp(void) {
hsa_status_t err;
TestBase::SetUp();
err = rocrtst::SetDefaultAgents(this);
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
err = rocrtst::SetPoolsTypical(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Fill up the kernel packet except header
err = rocrtst::InitializeAQLPacket(this, &aql());
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
return;
}
void QueueValidation::Run(void) {
// Compare required profile for this test case with what we're actually
// running on
if (!rocrtst::CheckProfile(this)) {
return;
}
TestBase::Run();
}
void QueueValidation::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void QueueValidation::DisplayResults(void) const {
// Compare required profile for this test case with what we're actually
// running on
if (!rocrtst::CheckProfile(this)) {
return;
}
return;
}
void QueueValidation::Close() {
// This will close handles opened within rocrtst utility calls and call
// hsa_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
static const char kSubTestSeparator[] = " **************************";
static void PrintDebugSubtestHeader(const char *header) {
std::cout << " *** QueueValidation Subtest: " << header << " ***" << std::endl;
}
void QueueValidation::WriteAQLPktToQueue(hsa_queue_t* q) {
void* queue_base = q->base_address;
const uint32_t queue_mask = q->size - 1;
uint64_t index = hsa_queue_add_write_index_relaxed(q, 1);
reinterpret_cast<hsa_kernel_dispatch_packet_t *>(
queue_base)[index & queue_mask] = aql();
}
void QueueValidation::QueueValidationForInvalidDimension(hsa_agent_t cpuAgent,
hsa_agent_t gpuAgent) {
hsa_status_t err;
// get queue size
uint32_t queue_max = 0;
err = hsa_agent_get_info(gpuAgent,
HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Adjust the size to the max of 1024
queue_max = (queue_max < kMaxQueueSizeForAgent) ? queue_max: kMaxQueueSizeForAgent;
hsa_queue_t *queue[kMaxQueue]; // command queue
uint32_t ii;
for (ii = 0; ii < kMaxQueue; ++ii) {
test_validation_data user_data;
// set callback flag to false if callback called then it will change to true
user_data.cb_triggered = false;
// set the queue pointer
user_data.queue_pointer = &queue[ii];
// set the expected status in queue error calback handling
user_data.expected_status = HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS;
// create queue
err = hsa_queue_create(gpuAgent,
queue_max, HSA_QUEUE_TYPE_SINGLE,
CallbackQueueErrorHandling, &user_data, 0, 0, &queue[ii]);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Create the executable, get symbol by name and load the code object
err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// setting the dimesion more than 3
aql().setup = 4;
aql().kernel_object = kernel_object();
const uint32_t queue_mask = queue[ii]->size - 1;
// Load index for writing header later to command queue at same index
uint64_t index = hsa_queue_load_write_index_relaxed(queue[ii]);
// This function simply copies the data we've collected so far into our
// local AQL packet, except the the setup and header fields.
WriteAQLPktToQueue(queue[ii]);
aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
void* q_base = queue[ii]->base_address;
// Set the Aql packet header
AtomicSetPacketHeader(aql().header, aql().setup,
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(q_base))[index & queue_mask]);
// ringdoor bell
hsa_signal_store_relaxed(queue[ii]->doorbell_signal, index);
// wait for the signal long enough for the queue error handling callback to happen
hsa_signal_value_t completion;
completion = hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
0xffffff, HSA_WAIT_STATE_ACTIVE);
// completion signal should not be changed.
ASSERT_EQ(completion, 1);
// queue error handling callback should be triggered
ASSERT_EQ(user_data.cb_triggered, true);
hsa_signal_store_relaxed(aql().completion_signal, 1);
}
for (ii = 0; ii < kMaxQueue; ++ii) {
if (queue[ii]) { hsa_queue_destroy(queue[ii]); }
}
}
void QueueValidation::QueueValidationInvalidGroupMemory(hsa_agent_t cpuAgent,
hsa_agent_t gpuAgent) {
hsa_status_t err;
// Fill up the kernel packet except header
err = rocrtst::InitializeAQLPacket(this, &aql());
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
// get queue size
uint32_t queue_max = 0;
err = hsa_agent_get_info(gpuAgent,
HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Adjust the size to the max of 1024
queue_max = (queue_max < kMaxQueueSizeForAgent) ? queue_max: kMaxQueueSizeForAgent;
hsa_queue_t *queue[kMaxQueue]; // command queue
uint32_t ii;
for (ii = 0; ii < kMaxQueue; ++ii) {
test_validation_data user_data;
// set callback flag to false if callback called then it will change to true
user_data.cb_triggered = false;
// set the queue pointer
user_data.queue_pointer = &queue[ii];
// set the expected status in queue error calback handling
user_data.expected_status = HSA_STATUS_ERROR_INVALID_ALLOCATION;
// create queue
err = hsa_queue_create(gpuAgent,
queue_max, HSA_QUEUE_TYPE_SINGLE,
CallbackQueueErrorHandling, &user_data, 0, 0, &queue[ii]);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Create the executable, get symbol by name and load the code object
err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
aql().kernel_object = kernel_object();
// Request a large group memory segment size
aql().group_segment_size = (uint32_t)-1;
const uint32_t queue_mask = queue[ii]->size - 1;
// Load index for writing header later to command queue at same index
uint64_t index = hsa_queue_load_write_index_relaxed(queue[ii]);
// This function simply copies the data we've collected so far into our
// local AQL packet, except the the setup and header fields.
WriteAQLPktToQueue(queue[ii]);
aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
void* q_base = queue[ii]->base_address;
// Set the Aql packet header
AtomicSetPacketHeader(aql().header, aql().setup,
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(q_base))[index & queue_mask]);
// ringdoor bell
hsa_signal_store_relaxed(queue[ii]->doorbell_signal, index);
// wait for the signal long enough for the queue error handling callback to happen
hsa_signal_value_t completion;
completion = hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
0xffffff, HSA_WAIT_STATE_ACTIVE);
// completion signal should not be changed.
ASSERT_EQ(completion, 1);
// queue error handling callback should be triggered
ASSERT_EQ(user_data.cb_triggered, true);
hsa_signal_store_relaxed(aql().completion_signal, 1);
}
for (ii = 0; ii < kMaxQueue; ++ii) {
if (queue[ii]) { hsa_queue_destroy(queue[ii]); }
}
}
void QueueValidation::QueueValidationForInvalidKernelObject(hsa_agent_t cpuAgent,
hsa_agent_t gpuAgent) {
hsa_status_t err;
// Fill up the kernel packet except header
err = rocrtst::InitializeAQLPacket(this, &aql());
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
// get queue size
uint32_t queue_max = 0;
err = hsa_agent_get_info(gpuAgent,
HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Adjust the size to the max of 1024
queue_max = (queue_max < kMaxQueueSizeForAgent) ? queue_max: kMaxQueueSizeForAgent;
hsa_queue_t *queue[kMaxQueue]; // command queue
uint32_t ii;
for (ii = 0; ii < kMaxQueue; ++ii) {
test_validation_data user_data;
// set callback flag to false if callback called then it will change to true
user_data.cb_triggered = false;
// set the queue pointer
user_data.queue_pointer = &queue[ii];
// set the expected status in queue error calback handling
user_data.expected_status = HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
// create queue
err = hsa_queue_create(gpuAgent,
kMaxQueueSizeForAgent, HSA_QUEUE_TYPE_SINGLE,
CallbackQueueErrorHandling, &user_data, 0, 0, &queue[ii]);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Create the executable, get symbol by name and load the code object
err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// setting the null code object
aql().kernel_object = 0;
const uint32_t queue_mask = queue[ii]->size - 1;
// Load index for writing header later to command queue at same index
uint64_t index = hsa_queue_load_write_index_relaxed(queue[ii]);
// This function simply copies the data we've collected so far into our
// local AQL packet, except the the setup and header fields.
WriteAQLPktToQueue(queue[ii]);
aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
void* q_base = queue[ii]->base_address;
// Set the Aql packet header
AtomicSetPacketHeader(aql().header, aql().setup,
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(q_base))[index & queue_mask]);
// ringdoor bell
hsa_signal_store_relaxed(queue[ii]->doorbell_signal, index);
// wait for the signal long enough for the queue error handling callback to happen
hsa_signal_value_t completion;
completion = hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
0xffffff, HSA_WAIT_STATE_ACTIVE);
// completion signal should not be changed.
ASSERT_EQ(completion, 1);
// queue error handling callback should be triggered
ASSERT_EQ(user_data.cb_triggered, true);
hsa_signal_store_relaxed(aql().completion_signal, 1);
}
for (ii = 0; ii < kMaxQueue; ++ii) {
if (queue[ii]) { hsa_queue_destroy(queue[ii]); }
}
}
void QueueValidation::QueueValidationForInvalidPacket(hsa_agent_t cpuAgent,
hsa_agent_t gpuAgent) {
hsa_status_t err;
// Fill up the kernel packet except header
err = rocrtst::InitializeAQLPacket(this, &aql());
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
// get queue size
uint32_t queue_max = 0;
err = hsa_agent_get_info(gpuAgent,
HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Adjust the size to the max of 1024
queue_max = (queue_max < kMaxQueueSizeForAgent) ? queue_max: kMaxQueueSizeForAgent;
hsa_queue_t *queue[kMaxQueue]; // command queue
uint32_t ii;
for (ii = 0; ii < kMaxQueue; ++ii) {
test_validation_data user_data;
// set callback flag to false if callback called then it will change to true
user_data.cb_triggered = false;
// set the queue pointer
user_data.queue_pointer = &queue[ii];
// set the expected status in queue error calback handling
user_data.expected_status = HSA_STATUS_ERROR_INVALID_PACKET_FORMAT;
// create queue
err = hsa_queue_create(gpuAgent,
queue_max, HSA_QUEUE_TYPE_SINGLE,
CallbackQueueErrorHandling, &user_data, 0, 0, &queue[ii]);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Create the executable, get symbol by name and load the code object
err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
const uint32_t queue_mask = queue[ii]->size - 1;
// Load index for writing header later to command queue at same index
uint64_t index = hsa_queue_load_write_index_relaxed(queue[ii]);
// This function simply copies the data we've collected so far into our
// local AQL packet, except the the setup and header fields.
WriteAQLPktToQueue(queue[ii]);
// setting the invalid packet type
aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
aql().header |= -1 << HSA_PACKET_HEADER_TYPE;
aql().kernel_object = kernel_object();
void* q_base = queue[ii]->base_address;
// Set the Aql packet header
AtomicSetPacketHeader(aql().header, aql().setup,
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(q_base))[index & queue_mask]);
// ringdoor bell
hsa_signal_store_relaxed(queue[ii]->doorbell_signal, index);
// wait for the signal long enough for the queue error handling callback to happen
hsa_signal_value_t completion;
completion = hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
0xffffff, HSA_WAIT_STATE_ACTIVE);
// completion signal should not be changed.
ASSERT_EQ(completion, 1);
// queue error handling callback should be triggered
ASSERT_EQ(user_data.cb_triggered, true);
hsa_signal_store_relaxed(aql().completion_signal, 1);
}
for (ii = 0; ii < kMaxQueue; ++ii) {
if (queue[ii]) { hsa_queue_destroy(queue[ii]); }
}
}
void QueueValidation::QueueValidationForInvalidWorkGroupSize(hsa_agent_t cpuAgent,
hsa_agent_t gpuAgent) {
hsa_status_t err;
// Fill up the kernel packet except header
err = rocrtst::InitializeAQLPacket(this, &aql());
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
// get queue size
uint32_t queue_max = 0;
err = hsa_agent_get_info(gpuAgent,
HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_max);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Adjust the size to the max of 1024
queue_max = (queue_max < kMaxQueueSizeForAgent) ? queue_max: kMaxQueueSizeForAgent;
hsa_queue_t *queue[kMaxQueue]; // command queue
uint32_t ii;
for (ii = 0; ii < kMaxQueue; ++ii) {
uint32_t jj;
for (jj = 1; jj <= 3; ++jj) {
test_validation_data user_data;
// set callback flag to false if callback called then it will change to true
user_data.cb_triggered = false;
// set the queue pointer
user_data.queue_pointer = &queue[ii];
// set the expected status in queue error calback handling
user_data.expected_status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
// create queue
err = hsa_queue_create(gpuAgent,
kMaxQueueSizeForAgent, HSA_QUEUE_TYPE_SINGLE,
CallbackQueueErrorHandling, &user_data, 0, 0, &queue[ii]);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Create the executable, get symbol by name and load the code object
err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
aql().setup |= jj << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
aql().workgroup_size_x = (jj == 1) ? (uint16_t)-1 : 1;
aql().workgroup_size_y = (jj == 2) ? (uint16_t)-1 : 1;
aql().workgroup_size_z = (jj == 3) ? (uint16_t)-1 : 1;
aql().kernel_object = kernel_object();
const uint32_t queue_mask = queue[ii]->size - 1;
// Load index for writing header later to command queue at same index
uint64_t index = hsa_queue_load_write_index_relaxed(queue[ii]);
// This function simply copies the data we've collected so far into our
// local AQL packet, except the the setup and header fields.
WriteAQLPktToQueue(queue[ii]);
aql().header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
aql().header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
void* q_base = queue[ii]->base_address;
// Set the Aql packet header
AtomicSetPacketHeader(aql().header, aql().setup,
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
(q_base))[index & queue_mask]);
// ringdoor bell
hsa_signal_store_relaxed(queue[ii]->doorbell_signal, index);
// wait for the signal long enough for the queue error handling callback to happen
hsa_signal_value_t completion;
completion = hsa_signal_wait_scacquire(aql().completion_signal, HSA_SIGNAL_CONDITION_LT, 1,
0xffffff, HSA_WAIT_STATE_ACTIVE);
// completion signal should not be changed.
ASSERT_EQ(completion, 1);
// queue error handling callback should be triggered
ASSERT_EQ(user_data.cb_triggered, true);
hsa_signal_store_relaxed(aql().completion_signal, 1);
if (queue[ii]) { hsa_queue_destroy(queue[ii]); }
}
}
}
void QueueValidation::QueueValidationForInvalidDimension(void) {
hsa_status_t err;
if (verbosity() > 0) {
PrintDebugSubtestHeader("InvalidDimensionTest");
}
// find all cpu agents
std::vector<hsa_agent_t> cpus;
err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// find all gpu agents
std::vector<hsa_agent_t> gpus;
err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
for (unsigned int i = 0 ; i< gpus.size(); ++i) {
QueueValidationForInvalidDimension(cpus[0], gpus[i]);
}
if (verbosity() > 0) {
std::cout << "subtest Passed" << std::endl;
std::cout << kSubTestSeparator << std::endl;
}
}
void QueueValidation::QueueValidationInvalidGroupMemory(void) {
hsa_status_t err;
if (verbosity() > 0) {
PrintDebugSubtestHeader("InvalidGroupMemory");
}
// find all cpu agents
std::vector<hsa_agent_t> cpus;
err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// find all gpu agents
std::vector<hsa_agent_t> gpus;
err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
for (unsigned int i = 0 ; i< gpus.size(); ++i) {
QueueValidationInvalidGroupMemory(cpus[0], gpus[i]);
}
if (verbosity() > 0) {
std::cout << "subtest Passed" << std::endl;
std::cout << kSubTestSeparator << std::endl;
}
}
void QueueValidation::QueueValidationForInvalidKernelObject(void) {
hsa_status_t err;
if (verbosity() > 0) {
PrintDebugSubtestHeader("InvalidKernelObject");
}
// find all cpu agents
std::vector<hsa_agent_t> cpus;
err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// find all gpu agents
std::vector<hsa_agent_t> gpus;
err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
for (unsigned int i = 0 ; i< gpus.size(); ++i) {
QueueValidationForInvalidKernelObject(cpus[0], gpus[i]);
}
if (verbosity() > 0) {
std::cout << "subtest Passed" << std::endl;
std::cout << kSubTestSeparator << std::endl;
}
}
void QueueValidation::QueueValidationForInvalidPacket(void) {
hsa_status_t err;
if (verbosity() > 0) {
PrintDebugSubtestHeader("InvalidPacket");
}
// find all cpu agents
std::vector<hsa_agent_t> cpus;
err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// find all gpu agents
std::vector<hsa_agent_t> gpus;
err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
for (unsigned int i = 0 ; i< gpus.size(); ++i) {
QueueValidationForInvalidPacket(cpus[0], gpus[i]);
}
if (verbosity() > 0) {
std::cout << "subtest Passed" << std::endl;
std::cout << kSubTestSeparator << std::endl;
}
}
void QueueValidation::QueueValidationForInvalidWorkGroupSize(void) {
hsa_status_t err;
if (verbosity() > 0) {
PrintDebugSubtestHeader("InvalidWorkGroupSize");
}
// find all cpu agents
std::vector<hsa_agent_t> cpus;
err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// find all gpu agents
std::vector<hsa_agent_t> gpus;
err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
for (unsigned int i = 0 ; i< gpus.size(); ++i) {
QueueValidationForInvalidWorkGroupSize(cpus[0], gpus[i]);
}
if (verbosity() > 0) {
std::cout << "subtest Passed" << std::endl;
std::cout << kSubTestSeparator << std::endl;
}
}
void CallbackQueueErrorHandling(hsa_status_t status, hsa_queue_t* source, void* data) {
ASSERT_NE(source, nullptr);
ASSERT_NE(data, nullptr);
test_validation_data *debug_data = reinterpret_cast<test_validation_data*>(data);
hsa_queue_t * queue = *(debug_data->queue_pointer);
debug_data->cb_triggered = true;
// check the status
ASSERT_EQ(status, debug_data->expected_status);
// check the queue id and user data
ASSERT_EQ(source->id, queue->id);
return;
}
+104
Просмотреть файл
@@ -0,0 +1,104 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCRTST_SUITES_NEGATIVE_QUEUE_VALIDATION_H_
#define ROCRTST_SUITES_NEGATIVE_QUEUE_VALIDATION_H_
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"
class QueueValidation : public TestBase {
public:
QueueValidation(bool launch_InvalidDimension,
bool launch_InvalidGroupMemory,
bool launch_InvalidKernelObject,
bool launch_InvalidPacket,
bool launch_InvalidWorkGroupSize);
// @Brief: Destructor for test case of MemoryTest
virtual ~QueueValidation();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrive the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
// @Brief: Verifies that if an aql packet specifies a dimension
// value above 3, the queue's error handling callback will trigger
void QueueValidationForInvalidDimension(void);
// @Brief: Verifies that if an aql packet specifies an invalid group
// memory size, the queue's error handling
void QueueValidationInvalidGroupMemory(void);
// @Brief: Verifies that if an aql packet specifies an invalid
// kernel object, the queue's error handling callback will trigger.
void QueueValidationForInvalidKernelObject(void);
// @Brief: Verifies that if an aql packet is invalid (bad packet type),
// the queue's error handling callback will trigger
void QueueValidationForInvalidPacket(void);
// @Brief: Verifies that if an aql packet specifies an invalid
// workgroup size, the queue's error handling callback will trigger.
void QueueValidationForInvalidWorkGroupSize(void);
private:
void QueueValidationForInvalidDimension(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
void QueueValidationInvalidGroupMemory(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
void QueueValidationForInvalidKernelObject(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
void QueueValidationForInvalidPacket(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
void QueueValidationForInvalidWorkGroupSize(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
void WriteAQLPktToQueue(hsa_queue_t* q);
};
#endif // ROCRTST_SUITES_NEGATIVE_QUEUE_VALIDATION_H_
+45 -1
Просмотреть файл
@@ -52,11 +52,13 @@
#include "suites/functional/memory_basic.h"
#include "suites/functional/memory_access.h"
#include "suites/functional/ipc.h"
#include "suites/functional/memory_alignment.h"
#include "suites/performance/dispatch_time.h"
#include "suites/performance/memory_async_copy.h"
#include "suites/performance/memory_async_copy_numa.h"
#include "suites/performance/enqueueLatency.h"
#include "suites/negative/memory_allocate_negative_tests.h"
#include "suites/negative/queue_validation.h"
#include "suites/stress/memory_concurrent_tests.h"
#include "suites/test_common/test_case_template.h"
#include "suites/test_common/main.h"
@@ -181,6 +183,13 @@ TEST(rocrtstFunc, DebugBasicTests) {
RunCustomTestEpilog(&mt);
}
TEST(rocrtstFunc, Memory_Alignment_Test) {
MemoryAlignmentTest ma;
RunCustomTestProlog(&ma);
ma.MemoryPoolAlignment();
RunCustomTestEpilog(&ma);
}
TEST(rocrtstNeg, Memory_Negative_Tests) {
MemoryAllocateNegativeTest mt;
RunCustomTestProlog(&mt);
@@ -189,6 +198,41 @@ TEST(rocrtstNeg, Memory_Negative_Tests) {
RunCustomTestEpilog(&mt);
}
TEST(rocrtstNeg, Queue_Validation_InvalidDimension) {
QueueValidation qv(true, false, false, false, false);
RunCustomTestProlog(&qv);
qv.QueueValidationForInvalidDimension();
RunCustomTestEpilog(&qv);
}
TEST(rocrtstNeg, Queue_Validation_InvalidGroupMemory) {
QueueValidation qv(false, true, false, false, false);
RunCustomTestProlog(&qv);
qv.QueueValidationInvalidGroupMemory();
RunCustomTestEpilog(&qv);
}
TEST(rocrtstNeg, Queue_Validation_InvalidKernelObject) {
QueueValidation qv(false, false, true, false, false);
RunCustomTestProlog(&qv);
qv.QueueValidationForInvalidKernelObject();
RunCustomTestEpilog(&qv);
}
TEST(rocrtstNeg, Queue_Validation_InvalidPacket) {
QueueValidation qv(false, false, false, true, false);
RunCustomTestProlog(&qv);
qv.QueueValidationForInvalidPacket();
RunCustomTestEpilog(&qv);
}
TEST(rocrtstNeg, Queue_Validation_InvalidWorkGroupSize) {
QueueValidation qv(false, false, false, false, true);
RunCustomTestProlog(&qv);
qv.QueueValidationForInvalidWorkGroupSize();
RunCustomTestEpilog(&qv);
}
TEST(rocrtstStress, Memory_Concurrent_Allocate_Test) {
MemoryConcurrentTest mt(true, false, false);
RunCustomTestProlog(&mt);
@@ -203,7 +247,7 @@ TEST(rocrtstStress, Memory_Concurrent_Free_Test) {
RunCustomTestEpilog(&mt);
}
TEST(rocrtstStress, Memory_Concurrent_Pool_Info_Test) {
TEST(rocrtstStress, DISABLED_Memory_Concurrent_Pool_Info_Test) {
MemoryConcurrentTest mt(false, false, true);
RunCustomTestProlog(&mt);
mt.MemoryConcurrentPoolGetInfo();