Added CPU to GPU and GPU to CPU MemoryAccess Tests, Added enqueue latency Tests
Change-Id: I18643d283101b792fa25705c8149ddc5a9eefe73
This commit is contained in:
کامیت شده توسط
Rohit Pathania
والد
563581223c
کامیت
ee917eca68
Executable
+502
@@ -0,0 +1,502 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "suites/functional/memory_access.h"
|
||||
#include "common/base_rocr_utils.h"
|
||||
#include "common/common.h"
|
||||
#include "common/helper_funcs.h"
|
||||
#include "common/hsatimer.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_finalize.h"
|
||||
|
||||
static const uint32_t kNumBufferElements = 256;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define RET_IF_HSA_ERR(err) { \
|
||||
if ((err) != HSA_STATUS_SUCCESS) { \
|
||||
const char* msg = 0; \
|
||||
hsa_status_string(err, &msg); \
|
||||
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
|
||||
__FILE__ << ". Call returned " << err << std::endl; \
|
||||
std::cout << msg << std::endl; \
|
||||
return (err); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
|
||||
MemoryAccessTest::MemoryAccessTest(void) :
|
||||
TestBase() {
|
||||
set_num_iteration(10); // Number of iterations to execute of the main test;
|
||||
// This is a default value which can be overridden
|
||||
// on the command line.
|
||||
|
||||
set_title("RocR Memory Access Tests");
|
||||
set_description("This series of tests check memory allocation"
|
||||
"on GPU and CPU, i.e. GPU access to system memory "
|
||||
"and CPU access to GPU memory.");
|
||||
}
|
||||
|
||||
MemoryAccessTest::~MemoryAccessTest(void) {
|
||||
}
|
||||
|
||||
// Any 1-time setup involving member variables used in the rest of the test
|
||||
// should be done here.
|
||||
void MemoryAccessTest::SetUp(void) {
|
||||
hsa_status_t err;
|
||||
|
||||
TestBase::SetUp();
|
||||
|
||||
err = rocrtst::SetDefaultAgents(this);
|
||||
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
|
||||
err = rocrtst::SetPoolsTypical(this);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
return;
|
||||
}
|
||||
|
||||
void MemoryAccessTest::Run(void) {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!rocrtst::CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
TestBase::Run();
|
||||
}
|
||||
|
||||
void MemoryAccessTest::DisplayTestInfo(void) {
|
||||
TestBase::DisplayTestInfo();
|
||||
}
|
||||
|
||||
void MemoryAccessTest::DisplayResults(void) const {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!rocrtst::CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void MemoryAccessTest::Close() {
|
||||
// This will close handles opened within rocrtst utility calls and call
|
||||
// hsa_shut_down(), so it should be done after other hsa cleanup
|
||||
TestBase::Close();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
typedef struct __attribute__ ((aligned(16))) args_t {
|
||||
int *a;
|
||||
int *b;
|
||||
int *c;
|
||||
} args;
|
||||
|
||||
args *kernArgs = NULL;
|
||||
|
||||
static const char kSubTestSeparator[] = " **************************";
|
||||
|
||||
static void PrintMemorySubtestHeader(const char *header) {
|
||||
std::cout << " *** Memory Subtest: " << header << " ***" << std::endl;
|
||||
}
|
||||
|
||||
static const int kMemoryAllocSize = 1024;
|
||||
// Test to check GPU can read & write to system memory
|
||||
void MemoryAccessTest::GPUAccessToCPUMemoryTest(hsa_agent_t cpuAgent,
|
||||
hsa_agent_t gpuAgent) {
|
||||
hsa_status_t err;
|
||||
|
||||
// Get Global Memory Pool on the gpuAgent to allocate gpu buffers
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
err = hsa_amd_agent_iterate_memory_pools(gpuAgent,
|
||||
rocrtst::GetGlobalMemoryPool,
|
||||
&gpu_pool);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
hsa_amd_memory_pool_access_t access;
|
||||
hsa_amd_agent_memory_pool_get_info(cpuAgent, gpu_pool,
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
|
||||
&access);
|
||||
if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
|
||||
// hsa objects
|
||||
hsa_queue_t *queue = NULL; // command queue
|
||||
hsa_signal_t signal = {0}; // completion signal
|
||||
|
||||
|
||||
// get queue size
|
||||
uint32_t queue_size = 0;
|
||||
err = hsa_agent_get_info(gpuAgent,
|
||||
HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
// create queue
|
||||
err = hsa_queue_create(gpuAgent,
|
||||
queue_size, HSA_QUEUE_TYPE_MULTI,
|
||||
NULL, NULL, 0, 0, &queue);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
// Get System Memory Pool on the cpuAgent to allocate host side buffers
|
||||
hsa_amd_memory_pool_t global_pool;
|
||||
err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
|
||||
rocrtst::GetGlobalMemoryPool,
|
||||
&global_pool);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
|
||||
|
||||
// Find a memory pool that supports kernel arguments.
|
||||
hsa_amd_memory_pool_t kernarg_pool;
|
||||
err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
|
||||
rocrtst::GetKernArgMemoryPool,
|
||||
&kernarg_pool);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
// Allocate the host side buffers
|
||||
// (sys_data,dup_sys_data,cpuResult,kernArg) on system memory
|
||||
int *sys_data = NULL;
|
||||
int *dup_sys_data = NULL;
|
||||
int *cpuResult = NULL;
|
||||
int *gpuResult = NULL;
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(global_pool,
|
||||
kMemoryAllocSize, 0,
|
||||
reinterpret_cast<void **>(&cpuResult));
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(global_pool,
|
||||
kMemoryAllocSize, 0,
|
||||
reinterpret_cast<void **>(&sys_data));
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(global_pool,
|
||||
kMemoryAllocSize, 0,
|
||||
reinterpret_cast<void **>(&dup_sys_data));
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
|
||||
// Allocate the kernel argument buffer from the kernarg_pool.
|
||||
err = hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0,
|
||||
reinterpret_cast<void **>(&kernArgs));
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
// initialize the host buffers
|
||||
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
||||
unsigned int seed = time(NULL);
|
||||
sys_data[i] = 1 + rand_r(&seed) % 1;
|
||||
dup_sys_data[i] = sys_data[i];
|
||||
}
|
||||
|
||||
memset(cpuResult, 0, kMemoryAllocSize * sizeof(int));
|
||||
|
||||
// for the dGPU, we have coarse grained local memory,
|
||||
// so allocate memory for it on the GPU's GLOBAL segment .
|
||||
|
||||
// Get local memory of GPU to allocate device side buffers
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(gpu_pool, kMemoryAllocSize, 0,
|
||||
reinterpret_cast<void **>(&gpuResult));
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
|
||||
// Allow cpuAgent access to all allocated GPU memory.
|
||||
err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, gpuResult);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
memset(gpuResult, 0, kMemoryAllocSize * sizeof(int));
|
||||
|
||||
// Allow gpuAgent access to all allocated system memory.
|
||||
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, cpuResult);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, sys_data);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, dup_sys_data);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArgs);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
kernArgs->a = sys_data;
|
||||
kernArgs->b = cpuResult; // system memory passed to gpu for write
|
||||
kernArgs->c = gpuResult; // gpu memory to verify that gpu read system data
|
||||
|
||||
|
||||
// Create the executable, get symbol by name and load the code object
|
||||
set_kernel_file_name("gpuReadWrite_kernels.hsaco");
|
||||
set_kernel_name("gpuReadWrite");
|
||||
err = rocrtst::LoadKernelFromObjFile(this, &gpuAgent);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
|
||||
|
||||
// Fill the dispatch packet with
|
||||
// workgroup_size, grid_size, kernelArgs and completion signal
|
||||
// Put it on the queue and launch the kernel by ringing the doorbell
|
||||
|
||||
// create completion signal
|
||||
err = hsa_signal_create(1, 0, NULL, &signal);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
// create aql packet
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
memset(&aql, 0, sizeof(aql));
|
||||
|
||||
// initialize aql packet
|
||||
aql.header =
|
||||
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
|
||||
(1 << HSA_PACKET_HEADER_BARRIER) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
||||
aql.setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
|
||||
aql.workgroup_size_x = 256;
|
||||
aql.workgroup_size_y = 1;
|
||||
aql.workgroup_size_z = 1;
|
||||
aql.grid_size_x = kMemoryAllocSize;
|
||||
aql.grid_size_y = 1;
|
||||
aql.grid_size_z = 1;
|
||||
aql.private_segment_size = 0;
|
||||
aql.group_segment_size = 0;
|
||||
aql.kernel_object = kernel_object(); // kernel_code;
|
||||
aql.kernarg_address = kernArgs;
|
||||
aql.completion_signal = signal;
|
||||
|
||||
// const uint32_t queue_size = queue->size;
|
||||
const uint32_t queue_mask = queue->size - 1;
|
||||
|
||||
// write to command queue
|
||||
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>
|
||||
(queue->base_address)[index & queue_mask] = aql;
|
||||
hsa_queue_store_write_index_relaxed(queue, index + 1);
|
||||
|
||||
// ringdoor bell
|
||||
hsa_signal_store_relaxed(queue->doorbell_signal, index);
|
||||
// wait for the signal and reset it for future use
|
||||
while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1,
|
||||
(uint64_t)-1, HSA_WAIT_STATE_ACTIVE)) { }
|
||||
hsa_signal_store_relaxed(signal, 1);
|
||||
|
||||
// compare device and host side results
|
||||
if (verbosity() > 0) {
|
||||
std::cout<< "check gpu has read the system memory"<< std::endl;
|
||||
}
|
||||
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
||||
ASSERT_EQ(gpuResult[i], dup_sys_data[i]);
|
||||
}
|
||||
|
||||
if (verbosity() > 0) {
|
||||
std::cout<< "gpu has read the system memory successfully"<< std::endl;
|
||||
std::cout<< "check gpu has written to system memory"<< std::endl;
|
||||
}
|
||||
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
||||
ASSERT_EQ(cpuResult[i], i);
|
||||
}
|
||||
|
||||
if (verbosity() > 0) {
|
||||
std::cout<< "gpu has written to system memory successfully"<< std::endl;
|
||||
}
|
||||
|
||||
if (sys_data) { hsa_memory_free(sys_data); }
|
||||
if (dup_sys_data) { hsa_memory_free(dup_sys_data); }
|
||||
if (cpuResult) {hsa_memory_free(cpuResult); }
|
||||
if (gpuResult) {hsa_memory_free(gpuResult); }
|
||||
if (kernArgs) { hsa_memory_free(kernArgs); }
|
||||
if (signal.handle) { hsa_signal_destroy(signal); }
|
||||
if (queue) { hsa_queue_destroy(queue); }
|
||||
} else {
|
||||
if (verbosity() > 0) {
|
||||
std::cout<< "Test not applicable as system is not large bar."
|
||||
"Skipping."<< std::endl;
|
||||
std::cout << kSubTestSeparator << std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Test to check cpu can read & write to GPU memory
|
||||
void MemoryAccessTest::CPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent,
|
||||
hsa_agent_t gpuAgent,
|
||||
hsa_amd_memory_pool_t pool) {
|
||||
hsa_status_t err;
|
||||
|
||||
rocrtst::pool_info_t pool_i;
|
||||
err = rocrtst::AcquirePoolInfo(pool, &pool_i);
|
||||
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
|
||||
if (pool_i.segment == HSA_AMD_SEGMENT_GLOBAL &&
|
||||
pool_i.global_flag == HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) {
|
||||
hsa_amd_memory_pool_access_t access;
|
||||
hsa_amd_agent_memory_pool_get_info(cpuAgent, pool,
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
|
||||
&access);
|
||||
if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
|
||||
if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 ||
|
||||
pool_i.alloc_alignment == 0) {
|
||||
if (verbosity() > 0) {
|
||||
std::cout << " Test not applicable. Skipping." << std::endl;
|
||||
std::cout << kSubTestSeparator << std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
auto gran_sz = pool_i.alloc_granule;
|
||||
auto pool_sz = pool_i.size / gran_sz;
|
||||
auto max_alloc_size = pool_sz/2;
|
||||
unsigned int max_element = max_alloc_size/sizeof(unsigned int);
|
||||
unsigned int *gpu_data;
|
||||
unsigned int *sys_data;
|
||||
sys_data = (unsigned int*)malloc(max_alloc_size);
|
||||
memset(sys_data, 0, max_alloc_size);
|
||||
for (unsigned int i = 1; i <= max_element; ++i) {
|
||||
sys_data[i] = i;
|
||||
}
|
||||
// err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, sys_data);
|
||||
// EXPECT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
err = hsa_amd_memory_pool_allocate(pool, max_alloc_size, 0,
|
||||
reinterpret_cast<void**>(&gpu_data));
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
/*
|
||||
if (err == HSA_STATUS_ERROR) {
|
||||
err = hsa_amd_memory_pool_free(gpu_data);
|
||||
}*/
|
||||
|
||||
err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, gpu_data);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
// EXPECT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
memset(gpu_data, 0, max_alloc_size);
|
||||
|
||||
// Verify CPU can read & write to GPU memory
|
||||
std::cout<< "Verify CPU can read & write to GPU memory"<< std::endl;
|
||||
for (unsigned int i = 1; i <= max_element; ++i) {
|
||||
gpu_data[i] = i; // Write to gpu memory directly
|
||||
}
|
||||
|
||||
for (unsigned int i = 1; i <= max_element; ++i) {
|
||||
if (sys_data[i] != gpu_data[i]) { // Reading GPU memory
|
||||
fprintf(stdout, "Values not mathing !! sys_data[%d]:%d ,"
|
||||
"gpu_data[%d]\n", sys_data[i], i, gpu_data[i]);
|
||||
}
|
||||
}
|
||||
std::cout<< "CPU have read & write to GPU memory successfully"<< std::endl;
|
||||
err = hsa_amd_memory_pool_free(gpu_data);
|
||||
free(sys_data);
|
||||
} else {
|
||||
if (verbosity() > 0) {
|
||||
std::cout<< "Test not applicable as system is not large bar."
|
||||
"Skipping."<< std::endl;
|
||||
std::cout << kSubTestSeparator << std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MemoryAccessTest::CPUAccessToGPUMemoryTest(void) {
|
||||
hsa_status_t err;
|
||||
|
||||
PrintMemorySubtestHeader("CPUAccessToGPUMemoryTest in Memory Pools");
|
||||
// find all cpu agents
|
||||
std::vector<hsa_agent_t> cpus;
|
||||
err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
// find all gpu agents
|
||||
std::vector<hsa_agent_t> gpus;
|
||||
err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
for (unsigned int i = 0 ; i< gpus.size(); ++i) {
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
err = hsa_amd_agent_iterate_memory_pools(gpus[i],
|
||||
rocrtst::GetGlobalMemoryPool,
|
||||
&gpu_pool);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
CPUAccessToGPUMemoryTest(cpus[0], gpus[i], gpu_pool);
|
||||
}
|
||||
if (verbosity() > 0) {
|
||||
std::cout << "subtest Passed" << std::endl;
|
||||
std::cout << kSubTestSeparator << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryAccessTest::GPUAccessToCPUMemoryTest(void) {
|
||||
hsa_status_t err;
|
||||
|
||||
PrintMemorySubtestHeader("GPUAccessToCPUMemoryTest in Memory Pools");
|
||||
// find all cpu agents
|
||||
std::vector<hsa_agent_t> cpus;
|
||||
err = hsa_iterate_agents(rocrtst::IterateCPUAgents, &cpus);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
// find all gpu agents
|
||||
std::vector<hsa_agent_t> gpus;
|
||||
err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
for (unsigned int i = 0 ; i< gpus.size(); ++i) {
|
||||
GPUAccessToCPUMemoryTest(cpus[0], gpus[i]);
|
||||
}
|
||||
|
||||
if (verbosity() > 0) {
|
||||
std::cout << "subtest Passed" << std::endl;
|
||||
std::cout << kSubTestSeparator << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
#undef RET_IF_HSA_ERR
|
||||
Executable
+90
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_ACCESS_H_
|
||||
#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_ACCESS_H_
|
||||
|
||||
|
||||
#include "common/base_rocr.h"
|
||||
#include "hsa/hsa.h"
|
||||
#include "suites/test_common/test_base.h"
|
||||
|
||||
class MemoryAccessTest : public TestBase {
|
||||
public:
|
||||
MemoryAccessTest();
|
||||
|
||||
// @Brief: Destructor for test case of MemoryTest
|
||||
virtual ~MemoryAccessTest();
|
||||
|
||||
// @Brief: Setup the environment for measurement
|
||||
virtual void SetUp();
|
||||
|
||||
// @Brief: Core measurement execution
|
||||
virtual void Run();
|
||||
|
||||
// @Brief: Clean up and retrive the resource
|
||||
virtual void Close();
|
||||
|
||||
// @Brief: Display results
|
||||
virtual void DisplayResults() const;
|
||||
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
|
||||
|
||||
// @Brief: This test verify that CPU is able to Read & write GPU memory
|
||||
void CPUAccessToGPUMemoryTest(void);
|
||||
|
||||
// @Brief: This test verify that GPU is able to Read & write CPU memory
|
||||
void GPUAccessToCPUMemoryTest(void);
|
||||
|
||||
|
||||
private:
|
||||
void CPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent,
|
||||
hsa_agent_t gpuAgent,
|
||||
hsa_amd_memory_pool_t pool);
|
||||
void GPUAccessToCPUMemoryTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
|
||||
};
|
||||
|
||||
#endif // ROCRTST_SUITES_FUNCTIONAL_MEMORY_ACCESS_H_
|
||||
Executable
+350
@@ -0,0 +1,350 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <fcntl.h>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
#include "suites/performance/enqueueLatency.h"
|
||||
#include "common/base_rocr_utils.h"
|
||||
#include "common/common.h"
|
||||
#include "common/os.h"
|
||||
#include "common/helper_funcs.h"
|
||||
#include "common/hsatimer.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_finalize.h"
|
||||
|
||||
#define RET_IF_HSA_ERR(err) { \
|
||||
if ((err) != HSA_STATUS_SUCCESS) { \
|
||||
const char* msg = 0; \
|
||||
hsa_status_string(err, &msg); \
|
||||
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
|
||||
__FILE__ << ". Call returned " << err << std::endl; \
|
||||
std::cout << msg << std::endl; \
|
||||
return (err); \
|
||||
} \
|
||||
}
|
||||
|
||||
EnqueueLatency::
|
||||
EnqueueLatency(bool enqueueSinglePacket) : TestBase(),
|
||||
enqueue_single_(enqueueSinglePacket) {
|
||||
queue_size_ = 0;
|
||||
num_of_pkts_ = 100000;
|
||||
memset(&aql(), 0, sizeof(hsa_kernel_dispatch_packet_t));
|
||||
enqueue_time_mean_ = 0.0;
|
||||
set_num_iteration(100);
|
||||
|
||||
std::string name;
|
||||
std::string desc;
|
||||
|
||||
name = "Average Enqueue Time";
|
||||
desc = "This test measures the time when the packet enqueue to the"
|
||||
" queue and before the door bell is ring to notify the command processor "
|
||||
"to execute the packet";
|
||||
|
||||
|
||||
|
||||
if (enqueueSinglePacket) {
|
||||
name += ", Single Packet";
|
||||
desc += " One Packet at a time in queue.";
|
||||
} else {
|
||||
name += ", Multiple Packets";
|
||||
desc += " Multiple i.e. maximum Packets equeued to queue at one time";
|
||||
}
|
||||
|
||||
set_title(name);
|
||||
set_description(desc);
|
||||
}
|
||||
|
||||
EnqueueLatency::~EnqueueLatency() {
|
||||
}
|
||||
|
||||
void EnqueueLatency::SetUp() {
|
||||
hsa_status_t err;
|
||||
TestBase::SetUp();
|
||||
// If it indicates to use default signal, set env var properly
|
||||
|
||||
err = SetDefaultAgents(this);
|
||||
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
}
|
||||
|
||||
void EnqueueLatency::Run() {
|
||||
if (!rocrtst::CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
hsa_status_t err;
|
||||
TestBase::Run();
|
||||
|
||||
// find all gpu agents
|
||||
std::vector<hsa_agent_t> gpus;
|
||||
err = hsa_iterate_agents(rocrtst::IterateGPUAgents, &gpus);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
for (unsigned int i = 0 ; i< gpus.size(); ++i) {
|
||||
hsa_agent_t* gpu_dev = &gpus[i];
|
||||
char agent_name[64];
|
||||
err = hsa_agent_get_info(*gpu_dev, HSA_AGENT_INFO_NAME, agent_name);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
set_agent_name(agent_name);
|
||||
|
||||
// Create a queue
|
||||
hsa_queue_t* q = nullptr;
|
||||
rocrtst::CreateQueue(*gpu_dev, &q);
|
||||
ASSERT_NE(q, nullptr);
|
||||
set_main_queue(q);
|
||||
|
||||
set_kernel_file_name("dispatch_time_kernels.hsaco");
|
||||
set_kernel_name("empty_kernel");
|
||||
err = rocrtst::LoadKernelFromObjFile(this, gpu_dev);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
// Fill up the kernel packet except header
|
||||
err = rocrtst::InitializeAQLPacket(this, &aql());
|
||||
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
|
||||
aql().workgroup_size_x = 1;
|
||||
aql().grid_size_x = 1;
|
||||
|
||||
// Here, modify the batch size if it is larger than the queue size
|
||||
if (enqueue_single_) {
|
||||
EnqueueSinglePacket();
|
||||
} else {
|
||||
hsa_status_t err;
|
||||
uint32_t size = 0;
|
||||
err = hsa_agent_get_info(*gpu_dev, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &size);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
num_of_pkts_ = num_of_pkts_ > size ? size : num_of_pkts_;
|
||||
EnqueueMultiPackets();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
size_t EnqueueLatency::RealIterationNum() {
|
||||
return num_iteration() * 1.2 + 1;
|
||||
}
|
||||
|
||||
void EnqueueLatency::EnqueueSinglePacket() {
|
||||
std::vector<double> timer;
|
||||
|
||||
int it = RealIterationNum();
|
||||
const uint32_t queue_mask = main_queue()->size - 1;
|
||||
|
||||
// queue should be empty
|
||||
ASSERT_EQ(hsa_queue_load_read_index_scacquire(main_queue()),
|
||||
hsa_queue_load_write_index_scacquire(main_queue()));
|
||||
|
||||
void *q_base_addr = main_queue()->base_address;
|
||||
rocrtst::PerfTimer p_timer;
|
||||
for (int i = 0; i < it; i++) {
|
||||
// Get timing stamp and ring the doorbell to dispatch the kernel.
|
||||
int id = p_timer.CreateTimer();
|
||||
p_timer.StartTimer(id);
|
||||
// Obtain the current queue write index.
|
||||
uint64_t index = hsa_queue_add_write_index_relaxed(main_queue(), 1);
|
||||
|
||||
ASSERT_LT(index, main_queue()->size + index);
|
||||
|
||||
// Write the aql packet at the calculated queue index address.
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t *>(
|
||||
q_base_addr)[index & queue_mask] = aql();
|
||||
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t *>(
|
||||
q_base_addr)[index & queue_mask].header |=
|
||||
HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
|
||||
|
||||
p_timer.StopTimer(id);
|
||||
|
||||
timer.push_back(p_timer.ReadTimer(id));
|
||||
hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
|
||||
|
||||
// Wait on the dispatch signal until the kernel is finished.
|
||||
while (hsa_signal_wait_scacquire(aql().completion_signal,
|
||||
HSA_SIGNAL_CONDITION_LT, 1, (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE)) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
hsa_signal_store_screlease(aql().completion_signal, 1);
|
||||
|
||||
if (verbosity() >= VERBOSE_PROGRESS) {
|
||||
std::cout << ".";
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (verbosity() >= VERBOSE_PROGRESS) {
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// Abandon the first result and after sort, delete the last 2% value
|
||||
timer.erase(timer.begin());
|
||||
std::sort(timer.begin(), timer.end());
|
||||
|
||||
timer.erase(timer.begin() + num_iteration(), timer.end());
|
||||
|
||||
enqueue_time_mean_ = rocrtst::CalcMean(timer);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void EnqueueLatency::EnqueueMultiPackets() {
|
||||
std::vector<double> timer;
|
||||
int it = RealIterationNum();
|
||||
const uint32_t queue_mask = main_queue()->size - 1;
|
||||
|
||||
// queue should be empty
|
||||
ASSERT_EQ(hsa_queue_load_read_index_scacquire(main_queue()),
|
||||
hsa_queue_load_write_index_scacquire(main_queue()));
|
||||
|
||||
rocrtst::PerfTimer p_timer;
|
||||
|
||||
for (int i = 0; i < it; i++) {
|
||||
// Get timing stamp and ring the doorbell to dispatch the kernel.
|
||||
int id = p_timer.CreateTimer();
|
||||
p_timer.StartTimer(id);
|
||||
uint64_t* index =
|
||||
reinterpret_cast<uint64_t*>(malloc(sizeof(uint64_t) * num_of_pkts_));
|
||||
|
||||
hsa_signal_store_screlease(aql().completion_signal, num_of_pkts_);
|
||||
|
||||
for (uint32_t j = 0; j < num_of_pkts_; j++) {
|
||||
// index[j] = hsa_queue_add_write_index_scacq_screl(main_queue(), 1);
|
||||
index[j] = hsa_queue_add_write_index_relaxed(main_queue(), 1);
|
||||
|
||||
// Write the aql packet at the calculated queue index address.
|
||||
(reinterpret_cast<hsa_kernel_dispatch_packet_t*>((
|
||||
main_queue()->base_address)))[index[j] & queue_mask] = aql();
|
||||
|
||||
if (j == num_of_pkts_ - 1) {
|
||||
(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(
|
||||
main_queue()->base_address))[index[j] & queue_mask].header |=
|
||||
1 << HSA_PACKET_HEADER_BARRIER;
|
||||
}
|
||||
}
|
||||
|
||||
// Set packet header reversly; set all headers except the very first
|
||||
// one, for now.
|
||||
for (uint32_t j = num_of_pkts_ - 1; j > 0; j--) {
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(
|
||||
(main_queue()->base_address))[index[j] & queue_mask].header |=
|
||||
HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
|
||||
}
|
||||
|
||||
|
||||
// Set the very first header...
|
||||
(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(
|
||||
main_queue()->base_address))[index[0] & queue_mask].header |=
|
||||
HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
|
||||
|
||||
p_timer.StopTimer(id);
|
||||
|
||||
timer.push_back(p_timer.ReadTimer(id));
|
||||
|
||||
for (uint32_t j = 0; j < num_of_pkts_; j++) {
|
||||
hsa_signal_store_screlease(main_queue()->doorbell_signal, index[j]);
|
||||
}
|
||||
|
||||
// Wait on the dispatch signal until the kernel is finished.
|
||||
while (hsa_signal_wait_scacquire(aql().completion_signal,
|
||||
HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0) {
|
||||
}
|
||||
|
||||
|
||||
hsa_signal_store_screlease(aql().completion_signal, 1);
|
||||
|
||||
free(index);
|
||||
|
||||
if (verbosity() >= VERBOSE_PROGRESS) {
|
||||
std::cout << ".";
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
|
||||
// Abandon the first result and after sort, delete the last 2% value
|
||||
timer.erase(timer.begin());
|
||||
std::sort(timer.begin(), timer.end());
|
||||
|
||||
timer.erase(timer.begin() + num_iteration(), timer.end());
|
||||
|
||||
enqueue_time_mean_ = rocrtst::CalcMean(timer);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void EnqueueLatency::DisplayTestInfo(void) {
|
||||
TestBase::DisplayTestInfo();
|
||||
}
|
||||
|
||||
void EnqueueLatency::DisplayResults(void) const {
|
||||
if (!rocrtst::CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
TestBase::DisplayResults();
|
||||
|
||||
std::cout << "Average Time to Completion: ";
|
||||
if (enqueue_single_) {
|
||||
std::cout << enqueue_time_mean_ * 1e6;
|
||||
} else {
|
||||
std::cout << enqueue_time_mean_ * 1e6 / num_of_pkts_;
|
||||
}
|
||||
|
||||
std::cout << " uS" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
void EnqueueLatency::Close() {
|
||||
TestBase::Close();
|
||||
return;
|
||||
}
|
||||
Executable
+109
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ROCRTST_SUITES_PERFORMANCE_ENQUEUELATENCY_H_
|
||||
#define ROCRTST_SUITES_PERFORMANCE_ENQUEUELATENCY_H_
|
||||
#include <vector>
|
||||
|
||||
#include "suites/test_common/test_base.h"
|
||||
#include "common/base_rocr.h"
|
||||
#include "common/common.h"
|
||||
#include "hsa/hsa.h"
|
||||
|
||||
// @Brief: This class is defined to measure the mean latency of enqueuing
|
||||
// the packets to an empty kernel
|
||||
|
||||
class EnqueueLatency : public TestBase {
|
||||
public:
|
||||
// @Brief: Constructor
|
||||
explicit EnqueueLatency(bool launchSingleKernel);
|
||||
|
||||
// @Brief: Destructor
|
||||
virtual ~EnqueueLatency(void);
|
||||
|
||||
// @Brief: Set up the environment for the test
|
||||
virtual void SetUp(void);
|
||||
|
||||
// @Brief: Run the test case
|
||||
virtual void Run(void);
|
||||
|
||||
// @Brief: Display results we got
|
||||
virtual void DisplayResults(void) const;
|
||||
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
|
||||
// @Brief: Clean up and close the runtime
|
||||
virtual void Close(void);
|
||||
|
||||
// @Brief: Create the executable, get symbol by name and load the code object
|
||||
// virtual void LoadCodeObject(hsa_agent_t gpuAgent,uint64_t &kernel_code);
|
||||
|
||||
private:
|
||||
// @Brief: Get actual iteration number
|
||||
virtual size_t RealIterationNum(void);
|
||||
|
||||
// @Brief: Launch single packet each time
|
||||
virtual void EnqueueSinglePacket(void);
|
||||
|
||||
// @Brief: Launch multiple packets each time
|
||||
virtual void EnqueueMultiPackets(void);
|
||||
|
||||
|
||||
// @Brief: Indicate if we enqueued single pkt or not
|
||||
bool enqueue_single_;
|
||||
|
||||
// @Brief: Store the size of queue
|
||||
uint32_t queue_size_;
|
||||
|
||||
// @Brief: Number of packets in a batch
|
||||
uint32_t num_of_pkts_;
|
||||
|
||||
// @Brief: Ave. dispatch time
|
||||
double enqueue_time_mean_;
|
||||
};
|
||||
|
||||
#endif // ROCRTST_SUITES_PERFORMANCE_ENQUEUELATENCY_H_
|
||||
|
||||
@@ -304,11 +304,16 @@ build_sample_for_devices("test_case_template")
|
||||
#set(CL_FILE_LIST "${KERNELS_DIR}/p2p_mem_access_kernels.cl")
|
||||
#build_sample_for_devices("p2p_mem_access")
|
||||
|
||||
# Dispatch Time
|
||||
# Dispatch Time
|
||||
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
|
||||
set(CL_FILE_LIST "${KERNELS_DIR}/dispatch_time_kernels.cl")
|
||||
build_sample_for_devices("dispatch_time")
|
||||
|
||||
# gpuReadWrite
|
||||
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
|
||||
set(CL_FILE_LIST "${KERNELS_DIR}/gpuReadWrite_kernels.cl")
|
||||
build_sample_for_devices("gpuReadWrite")
|
||||
|
||||
# Build rules
|
||||
add_executable(${ROCRTST} ${performanceSources} ${functionalSources}
|
||||
${common_srcs} ${testCommonSources})
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
__kernel void gpuReadWrite(__global const int * a,
|
||||
__global int * b, __global int * c) {
|
||||
int i = get_global_id(0);
|
||||
// Reading the system memory and writing to gpu memory
|
||||
c[i] = a[i]; // a[i] point to system memory while c[i] to gpu memory.
|
||||
//writing to system memory
|
||||
b[i] = i;
|
||||
}
|
||||
@@ -49,10 +49,12 @@
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "suites/functional/memory_basic.h"
|
||||
#include "suites/functional/memory_access.h"
|
||||
#include "suites/functional/ipc.h"
|
||||
#include "suites/performance/dispatch_time.h"
|
||||
#include "suites/performance/memory_async_copy.h"
|
||||
#include "suites/performance/memory_async_copy_numa.h"
|
||||
#include "suites/performance/enqueueLatency.h"
|
||||
#include "suites/test_common/test_case_template.h"
|
||||
#include "suites/test_common/main.h"
|
||||
#include "suites/test_common/test_common.h"
|
||||
@@ -136,6 +138,13 @@ TEST(rocrtstFunc, IPC) {
|
||||
RunGenericTest(&ipc);
|
||||
}
|
||||
|
||||
TEST(rocrtstFunc, MemoryAccessTests) {
|
||||
MemoryAccessTest mt;
|
||||
RunCustomTestProlog(&mt);
|
||||
mt.CPUAccessToGPUMemoryTest();
|
||||
mt.GPUAccessToCPUMemoryTest();
|
||||
RunCustomTestEpilog(&mt);
|
||||
}
|
||||
// Temporarily disable this test until hsa_shut_down() is (probably not the
|
||||
// same as with the IPC test above) is addressed. To override the disable,
|
||||
// run with --gtest-also_run_disabled_tests flag.
|
||||
@@ -147,6 +156,13 @@ TEST(rocrtstFunc, DISABLED_Memory_Max_Mem) {
|
||||
RunCustomTestEpilog(&mt);
|
||||
}
|
||||
|
||||
TEST(rocrtstPerf, ENQUEUE_LATENCY) {
|
||||
EnqueueLatency singlePacketequeue(true);
|
||||
EnqueueLatency multiPacketequeue(false);
|
||||
RunGenericTest(&singlePacketequeue);
|
||||
RunGenericTest(&multiPacketequeue);
|
||||
}
|
||||
|
||||
TEST(rocrtstPerf, Memory_Async_Copy) {
|
||||
MemoryAsyncCopy mac;
|
||||
// To do full test, uncomment this:
|
||||
|
||||
مرجع در شماره جدید
Block a user