Added max. single mem. allocation test.
Change-Id: Ie81c6af0502fde56225b1e197801cf04b474feb2
[ROCm/ROCR-Runtime commit: cf24f7bb78]
This commit is contained in:
@@ -8,5 +8,4 @@
|
||||
*.bk
|
||||
*.old
|
||||
*.cmake
|
||||
build/*
|
||||
|
||||
build
|
||||
|
||||
@@ -49,6 +49,8 @@
|
||||
#include <assert.h>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
namespace rocrtst {
|
||||
|
||||
|
||||
@@ -169,20 +171,52 @@ hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
POOL_PROP_OFF, POOL_PROP_DONT_CARE);
|
||||
}
|
||||
|
||||
static hsa_status_t MakeGlobalFlagsString(const hsa_amd_memory_pool_t pool,
|
||||
std::string* out_str) {
|
||||
// Populate the vector with handles to all agents and pools
|
||||
hsa_status_t
|
||||
GetAgentPools(std::vector<std::shared_ptr<agent_pools_t>> *agent_pools) {
|
||||
hsa_status_t err;
|
||||
|
||||
uint32_t global_flag = 0;
|
||||
assert(agent_pools != nullptr);
|
||||
|
||||
auto save_agent = [](hsa_agent_t a, void *data)->hsa_status_t {
|
||||
std::vector<std::shared_ptr<agent_pools_t>> *ag_vec;
|
||||
hsa_status_t err;
|
||||
assert(data != nullptr);
|
||||
ag_vec =
|
||||
reinterpret_cast<std::vector<std::shared_ptr<agent_pools_t>> *>(data);
|
||||
std::shared_ptr<agent_pools_t> ag(new agent_pools_t);
|
||||
ag->agent = a;
|
||||
|
||||
|
||||
auto save_pool = [](hsa_amd_memory_pool_t p, void *data)->hsa_status_t {
|
||||
assert(data != nullptr);
|
||||
std::vector<hsa_amd_memory_pool_t> *p_list =
|
||||
reinterpret_cast<std::vector<hsa_amd_memory_pool_t> *>(data);
|
||||
p_list->push_back(p);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(a, save_pool,
|
||||
reinterpret_cast<void *>(&ag->pools));
|
||||
assert(err == HSA_STATUS_SUCCESS);
|
||||
|
||||
ag_vec->push_back(ag);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
err = hsa_iterate_agents(save_agent, reinterpret_cast<void *>(agent_pools));
|
||||
return err;
|
||||
}
|
||||
|
||||
static hsa_status_t MakeGlobalFlagsString(const pool_info_t *pool_i,
|
||||
std::string* out_str) {
|
||||
uint32_t global_flag = pool_i->global_flag;
|
||||
|
||||
assert(out_str != nullptr);
|
||||
|
||||
*out_str = "";
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flag);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
std::vector < std::string > flags;
|
||||
|
||||
if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & global_flag) {
|
||||
@@ -207,22 +241,17 @@ static hsa_status_t MakeGlobalFlagsString(const hsa_amd_memory_pool_t pool,
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
static hsa_status_t DumpSegment(const hsa_amd_memory_pool_t pool,
|
||||
static hsa_status_t DumpSegment(const pool_info_t *pool_i,
|
||||
std::string const *ind_lvl) {
|
||||
uint32_t segment;
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
|
||||
&segment);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
fprintf(stdout, "%s%-25s", ind_lvl->c_str(), "Pool Segment:");
|
||||
std::string seg_str = "";
|
||||
std::string tmp_str;
|
||||
|
||||
switch (segment) {
|
||||
switch (pool_i->segment) {
|
||||
case HSA_AMD_SEGMENT_GLOBAL:
|
||||
err = MakeGlobalFlagsString(pool, &tmp_str);
|
||||
err = MakeGlobalFlagsString(pool_i, &tmp_str);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
seg_str += "GLOBAL; FLAGS: " + tmp_str;
|
||||
@@ -250,57 +279,71 @@ static hsa_status_t DumpSegment(const hsa_amd_memory_pool_t pool,
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool,
|
||||
uint32_t indent) {
|
||||
hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool,
|
||||
pool_info_t *pool_i) {
|
||||
hsa_status_t err;
|
||||
std::string ind_lvl(indent, ' ');
|
||||
|
||||
DumpSegment(pool, &ind_lvl);
|
||||
|
||||
// Get the size of the POOL
|
||||
size_t pool_size = 0;
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
|
||||
&pool_size);
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
std::string sz_str = std::to_string(pool_size / 1024) + "KB";
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
|
||||
&pool_i->segment);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
// Get the size of the POOL
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
|
||||
&pool_i->pool_size);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
|
||||
&pool_i->alloc_allowed);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
|
||||
&pool_i->alloc_granule);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT,
|
||||
&pool_i->pool_alloc_alignment);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL,
|
||||
&pool_i->pl_access);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i,
|
||||
uint32_t indent) {
|
||||
std::string ind_lvl(indent, ' ');
|
||||
|
||||
DumpSegment(pool_i, &ind_lvl);
|
||||
|
||||
std::string sz_str = std::to_string(pool_i->pool_size / 1024) + "KB";
|
||||
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Size:",
|
||||
sz_str.c_str());
|
||||
|
||||
bool alloc_allowed = false;
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc_allowed);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Allocatable:",
|
||||
(alloc_allowed ? "TRUE" : "FALSE"));
|
||||
(pool_i->alloc_allowed ? "TRUE" : "FALSE"));
|
||||
|
||||
size_t alloc_granule = 0;
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &alloc_granule);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
std::string gr_str = std::to_string(alloc_granule / 1024) + "KB";
|
||||
std::string gr_str = std::to_string(pool_i->alloc_granule / 1024) + "KB";
|
||||
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Granule:",
|
||||
gr_str.c_str());
|
||||
|
||||
size_t pool_alloc_alignment = 0;
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT,
|
||||
&pool_alloc_alignment);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
std::string al_str = std::to_string(pool_alloc_alignment / 1024) + "KB";
|
||||
std::string al_str =
|
||||
std::to_string(pool_i->pool_alloc_alignment / 1024) + "KB";
|
||||
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Alignment:",
|
||||
al_str.c_str());
|
||||
|
||||
bool pl_access = 0;
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &pl_access);
|
||||
RET_IF_HSA_COMMON_ERR(err);
|
||||
|
||||
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Acessible by all:",
|
||||
(pl_access ? "TRUE" : "FALSE"));
|
||||
(pool_i->pl_access ? "TRUE" : "FALSE"));
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -55,6 +55,8 @@
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
#include "hsa/hsa_ext_finalize.h"
|
||||
@@ -75,6 +77,31 @@ namespace rocrtst {
|
||||
// refers to it has been corrected
|
||||
// #define HSA_ARGUMENT_ALIGN_BYTES 16
|
||||
|
||||
// This structure holds memory pool information acquired through hsa info
|
||||
// related calls, and is later used for reference when displaying the
|
||||
// information.
|
||||
typedef struct {
|
||||
uint32_t segment;
|
||||
size_t pool_size;
|
||||
bool alloc_allowed;
|
||||
size_t alloc_granule;
|
||||
size_t pool_alloc_alignment;
|
||||
bool pl_access;
|
||||
uint32_t global_flag;
|
||||
} pool_info_t;
|
||||
|
||||
|
||||
struct agent_pools_t{
|
||||
hsa_agent_t agent;
|
||||
std::vector<hsa_amd_memory_pool_t> pools;
|
||||
};
|
||||
|
||||
/// Fill in the pool_info_t structure for the provided pool.
|
||||
/// \param[in] pool Pool for which information will be retrieved
|
||||
/// \param[out] pool_i Pointer to structure where pool info will be stored
|
||||
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
|
||||
hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, pool_info_t *pool_i);
|
||||
|
||||
/// If the provided agent is associated with a GPU, return that agent through
|
||||
/// output parameter. This function is meant to be the call-back function used
|
||||
/// with hsa_iterate_agents to find GPU agents.
|
||||
@@ -132,7 +159,7 @@ hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data);
|
||||
/// \param[in] pool Pool to gather and dump information for
|
||||
/// \param[in] indent Number of spaces to indent output.
|
||||
/// \returns hsa_status_t HSA_STATUS_SUCCESS if no errors
|
||||
hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool,
|
||||
hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i,
|
||||
uint32_t indent = 0);
|
||||
|
||||
/// Dump information about a provided pointer to STDOUT.
|
||||
@@ -140,5 +167,8 @@ hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool,
|
||||
/// \returns HSA_STATUS_SUCCESS if there are no errors
|
||||
hsa_status_t DumpPointerInfo(void* ptr);
|
||||
|
||||
hsa_status_t GetAgentPools(
|
||||
std::vector<std::shared_ptr<agent_pools_t>> *agent_pools);
|
||||
|
||||
} // namespace rocrtst
|
||||
#endif // ROCRTST_COMMON_COMMON_H_
|
||||
|
||||
@@ -0,0 +1,262 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "suites/functional/memory_basic.h"
|
||||
#include "common/base_rocr_utils.h"
|
||||
#include "common/common.h"
|
||||
#include "common/helper_funcs.h"
|
||||
#include "common/hsatimer.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_finalize.h"
|
||||
|
||||
static const uint32_t kNumBufferElements = 256;
|
||||
|
||||
#define RET_IF_HSA_ERR(err) { \
|
||||
if ((err) != HSA_STATUS_SUCCESS) { \
|
||||
const char* msg = 0; \
|
||||
hsa_status_string(err, &msg); \
|
||||
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
|
||||
__FILE__ << ". Call returned " << err << std::endl; \
|
||||
std::cout << msg << std::endl; \
|
||||
return (err); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
MemoryTest::MemoryTest(void) :
|
||||
TestBase() {
|
||||
set_num_iteration(10); // Number of iterations to execute of the main test;
|
||||
// This is a default value which can be overridden
|
||||
// on the command line.
|
||||
set_title("RocR Memory Tests");
|
||||
set_description("This series of tests check memory allocation limits, extent"
|
||||
" of GPU access to system memory and other memory related functionality.");
|
||||
}
|
||||
|
||||
MemoryTest::~MemoryTest(void) {
|
||||
}
|
||||
|
||||
// Any 1-time setup involving member variables used in the rest of the test
|
||||
// should be done here.
|
||||
void MemoryTest::SetUp(void) {
|
||||
hsa_status_t err;
|
||||
|
||||
TestBase::SetUp();
|
||||
|
||||
err = rocrtst::SetDefaultAgents(this);
|
||||
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
|
||||
err = rocrtst::SetPoolsTypical(this);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void MemoryTest::Run(void) {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!rocrtst::CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
TestBase::Run();
|
||||
}
|
||||
|
||||
void MemoryTest::DisplayTestInfo(void) {
|
||||
TestBase::DisplayTestInfo();
|
||||
}
|
||||
|
||||
void MemoryTest::DisplayResults(void) const {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!rocrtst::CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void MemoryTest::Close() {
|
||||
// This will close handles opened within rocrtst utility calls and call
|
||||
// hsa_shut_down(), so it should be done after other hsa cleanup
|
||||
TestBase::Close();
|
||||
}
|
||||
|
||||
hsa_status_t MemoryTest::TestAllocate(hsa_amd_memory_pool_t pool, size_t sz) {
|
||||
void *ptr;
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(pool, sz, 0, &ptr);
|
||||
|
||||
if (err == HSA_STATUS_SUCCESS) {
|
||||
err = hsa_memory_free(ptr);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static const char kSubTestSeparator[] = " **************************";
|
||||
|
||||
static void PrintMemorySubtestHeader(const char *header) {
|
||||
std::cout << " *** Memory Subtest: " << header << " ***" << std::endl;
|
||||
}
|
||||
|
||||
// Test Fixtures
|
||||
void MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag,
|
||||
hsa_amd_memory_pool_t pool) {
|
||||
hsa_status_t err;
|
||||
|
||||
rocrtst::pool_info_t pool_i;
|
||||
char ag_name[64];
|
||||
hsa_device_type_t ag_type;
|
||||
|
||||
err = hsa_agent_get_info(ag, HSA_AGENT_INFO_NAME, ag_name);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
err = hsa_agent_get_info(ag, HSA_AGENT_INFO_DEVICE, &ag_type);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
if (verbosity() > 0) {
|
||||
std::cout << " Agent: " << ag_name << " (";
|
||||
switch (ag_type) {
|
||||
case HSA_DEVICE_TYPE_CPU:
|
||||
std::cout << "CPU)";
|
||||
break;
|
||||
case HSA_DEVICE_TYPE_GPU:
|
||||
std::cout << "GPU)";
|
||||
break;
|
||||
case HSA_DEVICE_TYPE_DSP:
|
||||
std::cout << "DSP)";
|
||||
break;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
err = rocrtst::AcquirePoolInfo(pool, &pool_i);
|
||||
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
|
||||
if (verbosity() > 0) {
|
||||
rocrtst::DumpMemoryPoolInfo(&pool_i, 2);
|
||||
}
|
||||
|
||||
if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 ||
|
||||
pool_i.pool_alloc_alignment == 0) {
|
||||
if (verbosity() > 0) {
|
||||
std::cout << " Test not applicable. Skipping." << std::endl;
|
||||
std::cout << kSubTestSeparator << std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Do everything in "granule" units
|
||||
auto gran_sz = pool_i.alloc_granule;
|
||||
auto pool_sz = pool_i.pool_size / gran_sz;
|
||||
|
||||
// Neg. test: Try to allocate more than the pool size
|
||||
err = TestAllocate(pool, pool_sz*gran_sz + gran_sz);
|
||||
EXPECT_EQ(HSA_STATUS_ERROR_INVALID_ALLOCATION, err);
|
||||
|
||||
auto max_alloc_size = pool_sz/2;
|
||||
uint64_t upper_bound = pool_sz;
|
||||
uint64_t lower_bound = 0;
|
||||
|
||||
while (true) {
|
||||
err = TestAllocate(pool, max_alloc_size * gran_sz);
|
||||
ASSERT_TRUE(err == HSA_STATUS_SUCCESS ||
|
||||
err == HSA_STATUS_ERROR_OUT_OF_RESOURCES);
|
||||
if (err == HSA_STATUS_SUCCESS) {
|
||||
lower_bound = max_alloc_size;
|
||||
max_alloc_size += (upper_bound - lower_bound)/2;
|
||||
} else if (err == HSA_STATUS_ERROR_OUT_OF_RESOURCES) {
|
||||
upper_bound = max_alloc_size;
|
||||
max_alloc_size -= (upper_bound - lower_bound)/2;
|
||||
}
|
||||
|
||||
if ((upper_bound - lower_bound) < 2) {
|
||||
break;
|
||||
}
|
||||
ASSERT_GT(upper_bound, lower_bound);
|
||||
}
|
||||
|
||||
if (verbosity() > 0) {
|
||||
std::cout << " Biggest single allocation size for this pool is " <<
|
||||
(max_alloc_size * gran_sz)/1024 << "KB." << std::endl;
|
||||
std::cout << " This is " <<
|
||||
static_cast<float>(max_alloc_size)/pool_sz*100 <<
|
||||
"% of the total." << std::endl;
|
||||
}
|
||||
|
||||
if (ag_type == HSA_DEVICE_TYPE_GPU) {
|
||||
EXPECT_GE((float)max_alloc_size/pool_sz, (float)15/16);
|
||||
}
|
||||
if (verbosity() > 0) {
|
||||
std::cout << kSubTestSeparator << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryTest::MaxSingleAllocationTest(void) {
|
||||
hsa_status_t err;
|
||||
std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
|
||||
|
||||
PrintMemorySubtestHeader("Maximum Single Allocation in Memory Pools");
|
||||
|
||||
err = rocrtst::GetAgentPools(&agent_pools);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
auto pool_idx = 0;
|
||||
for (auto a : agent_pools) {
|
||||
for (auto p : a->pools) {
|
||||
std::cout << " Pool " << pool_idx++ << ":" << std::endl;
|
||||
MaxSingleAllocationTest(a->agent, p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef RET_IF_HSA_ERR
|
||||
+12
-12
@@ -42,20 +42,20 @@
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_
|
||||
#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_
|
||||
|
||||
#ifndef ROCRTST_SUITES_PERFORMANCE_TEST_CASE_TEMPLATE_H_
|
||||
#define ROCRTST_SUITES_PERFORMANCE_TEST_CASE_TEMPLATE_H_
|
||||
|
||||
#include "common/base_rocr.h"
|
||||
#include "hsa/hsa.h"
|
||||
#include "suites/test_common/test_base.h"
|
||||
|
||||
class TestExample : public TestBase {
|
||||
class MemoryTest : public TestBase {
|
||||
public:
|
||||
TestExample();
|
||||
MemoryTest();
|
||||
|
||||
// @Brief: Destructor for test case of TestExample
|
||||
virtual ~TestExample();
|
||||
// @Brief: Destructor for test case of MemoryTest
|
||||
virtual ~MemoryTest();
|
||||
|
||||
// @Brief: Setup the environment for measurement
|
||||
virtual void SetUp();
|
||||
@@ -72,12 +72,12 @@ class TestExample : public TestBase {
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
|
||||
private:
|
||||
uint32_t RealIterationNum(void);
|
||||
void MaxSingleAllocationTest(void);
|
||||
|
||||
double time_mean_;
|
||||
void *src_buffer_;
|
||||
void *dst_buffer_;
|
||||
hsa_status_t TestAllocate(hsa_amd_memory_pool_t pool, size_t sz);
|
||||
|
||||
private:
|
||||
void MaxSingleAllocationTest(hsa_agent_t ag, hsa_amd_memory_pool_t pool);
|
||||
};
|
||||
|
||||
#endif // ROCRTST_SUITES_PERFORMANCE_TEST_CASE_TEMPLATE_H_
|
||||
#endif // ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_
|
||||
@@ -1,295 +0,0 @@
|
||||
#
|
||||
# Minimum version of cmake required
|
||||
#
|
||||
cmake_minimum_required(VERSION 2.8.0)
|
||||
|
||||
#
|
||||
# GCC 4.8 or higher compiler required.
|
||||
#
|
||||
# Setup build environment
|
||||
#
|
||||
# 1) Set env. variable specifying the location of ROCR header files
|
||||
#
|
||||
# export ROCR_DIR="Root for RocR install"
|
||||
#
|
||||
# 2) Set env. variable ROCRTST_BLD_TYPE to either "Debug" or "Release".
|
||||
# If not set, the default value is "Debug" is bound.
|
||||
#
|
||||
# export ROCRTST_BLD_TYPE=Debug or ROCRTST_BLD_TYPE=Release
|
||||
#
|
||||
# 3) Set env. variable ROCRTST_BLD_BITS to either "32" or "64"
|
||||
# If not set, the default value of "64" is bound.
|
||||
#
|
||||
# export ROCRTST_BLD_BITS=32 or ROCRTST_BLD_BITS=64
|
||||
#
|
||||
# 4) Set env. variable TARGET_DEVICE to indicate gpu type (e.g., gfx803,
|
||||
# gfx900, ...)
|
||||
#
|
||||
# Building rocrtst Suite
|
||||
#
|
||||
# 1) Create build folder e.g. "rocrtst/build" - any name will do
|
||||
# 2) Cd into build folder
|
||||
# 3) Run "cmake .."
|
||||
# 4) Run "make"
|
||||
#
|
||||
|
||||
#
|
||||
# Currently support for Windows platform is not present
|
||||
#
|
||||
if(WIN32)
|
||||
MESSAGE("rocrtst Suite is not supported on Windows platform")
|
||||
RETURN()
|
||||
endif()
|
||||
|
||||
#
|
||||
# Process environment variables relating to Build type, size and RT version
|
||||
#
|
||||
string(TOLOWER "$ENV{ROCRTST_BLD_TYPE}" tmp)
|
||||
if("${tmp}" STREQUAL debug)
|
||||
set(BUILD_TYPE "Debug")
|
||||
set(ISDEBUG 1)
|
||||
else()
|
||||
set(BUILD_TYPE "Release")
|
||||
set(ISDEBUG 0)
|
||||
endif()
|
||||
|
||||
if("$ENV{ROCRTST_BLD_BITS}" STREQUAL 32)
|
||||
set (ONLY64STR "")
|
||||
set (IS64BIT 0)
|
||||
else()
|
||||
set (ONLY64STR "64")
|
||||
set (IS64BIT 1)
|
||||
endif()
|
||||
|
||||
set(ROCR_INC_DIR $ENV{ROCR_DIR}/hsa/include)
|
||||
set(ROCR_LIB_DIR $ENV{ROCR_DIR}/lib)
|
||||
|
||||
#
|
||||
# Determine ROCR Header files are present
|
||||
#
|
||||
if(NOT EXISTS ${ROCR_INC_DIR}/hsa/hsa.h)
|
||||
MESSAGE("ERROR: ${ROCR_INC_DIR}/hsa/hsa.h does not exist. Check ROCR_DIR env. variable.")
|
||||
RETURN()
|
||||
endif()
|
||||
|
||||
|
||||
# Determine ROCR Library files are present
|
||||
#
|
||||
if (${IS64BIT} EQUAL 0)
|
||||
if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime.so)
|
||||
MESSAGE("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
|
||||
RETURN()
|
||||
endif()
|
||||
else()
|
||||
if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime64.so)
|
||||
MESSAGE("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
|
||||
RETURN()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (DEFINED ENV{OPENCL_DIR})
|
||||
set(CLANG $ENV{OPENCL_DIR}/bin/x86_64/clang)
|
||||
set(OPENCL_DIR $ENV{OPENCL_DIR})
|
||||
if (NOT EXISTS ${CLANG})
|
||||
message("ERROR: path to clang (${CLANG}) is not valid. Is env. variable OPENCL_DIR correct?")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (DEFINED ENV{OPENCL_VER})
|
||||
set(OPENCL_VER $ENV{OPENCL_VER})
|
||||
else()
|
||||
message("OPENCL_VER environment variable is not set. Using default")
|
||||
set(OPENCL_VER "2.0")
|
||||
endif()
|
||||
else()
|
||||
message("WARNING: OPENCL_DIR environment variable is not set. Kernels will not be built.")
|
||||
endif()
|
||||
|
||||
if (DEFINED ENV{TARGET_DEVICE})
|
||||
set(TARGET_DEVICE $ENV{TARGET_DEVICE})
|
||||
else()
|
||||
message("ERROR: TARGET_DEVICE environment variable is not defined.")
|
||||
message("Please define a valid clang target (e.g., gfx803, gfx900,...).")
|
||||
return()
|
||||
endif()
|
||||
|
||||
#
|
||||
# Set Name for rocrtst Suite Project
|
||||
#
|
||||
set(ROCRTST_SUITE_NAME "rocrtst${ONLY64STR}")
|
||||
project (${ROCRTST_SUITE_NAME})
|
||||
|
||||
#
|
||||
# Print out the build configuration being used:
|
||||
#
|
||||
# Build Src directory
|
||||
# Build Binary directory
|
||||
# Build Type: Debug Vs Release, 32 Vs 64
|
||||
# Compiler Version, etc
|
||||
#
|
||||
message("")
|
||||
message("Build Configuration:")
|
||||
message("-------------IS64BIT: " ${IS64BIT})
|
||||
message("-----------BuildType: " ${BUILD_TYPE})
|
||||
message("------------Compiler: " ${CMAKE_CXX_COMPILER})
|
||||
message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
|
||||
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
|
||||
message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
|
||||
message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
|
||||
message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
|
||||
message("-------Target Device: " ${TARGET_DEVICE})
|
||||
message("----------Clang path: " ${CLANG})
|
||||
message("-------OpenCL version " ${OPENCL_VER})
|
||||
message("")
|
||||
|
||||
set(KERNELS_DIR ${PROJECT_SOURCE_DIR}/kernels)
|
||||
#
|
||||
# Set the build type based on user input
|
||||
#
|
||||
set(CMAKE_BUILD_TYPE ${BUILD_TYPE})
|
||||
#
|
||||
# Flag to enable / disable verbose output.
|
||||
#
|
||||
SET( CMAKE_VERBOSE_MAKEFILE on )
|
||||
#
|
||||
# Compiler pre-processor definitions.
|
||||
#
|
||||
# Define MACRO "DEBUG" if build type is "Debug"
|
||||
if(${BUILD_TYPE} STREQUAL "Debug")
|
||||
add_definitions(-DDEBUG)
|
||||
endif()
|
||||
|
||||
add_definitions(-D__linux__)
|
||||
add_definitions(-DLITTLEENDIAN_CPU=1)
|
||||
|
||||
#
|
||||
# Linux Compiler options
|
||||
#
|
||||
set(CMAKE_CXX_FLAGS "-std=c++11 ")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
|
||||
|
||||
|
||||
#
|
||||
# Extend the compiler flags for 64-bit builds
|
||||
#
|
||||
if (IS64BIT)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
|
||||
endif()
|
||||
|
||||
#
|
||||
# Add compiler flags to include symbol information for debug builds
|
||||
#
|
||||
if(ISDEBUG)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0")
|
||||
endif()
|
||||
MESSAGE("ISDEBUG STEP:Done")
|
||||
|
||||
|
||||
set(ROCRTST_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
|
||||
|
||||
# Set Name for Google Test Framework and build it as a
|
||||
# static library to be linked with user test programs
|
||||
#
|
||||
set(GOOGLE_TEST_FRWK_NAME "google-test-frwk${ONLY64STR}")
|
||||
add_subdirectory(${ROCRTST_ROOT}/gtest "${PROJECT_BINARY_DIR}/gtest")
|
||||
set (ROCRTST_LIBS ${ROCRTST_LIBS} ${GOOGLE_TEST_FRWK_NAME}
|
||||
hsa-runtime-tools${ONLY64STR})
|
||||
MESSAGE("ROCRTST_LIBS SET STEP:Done")
|
||||
#
|
||||
#
|
||||
# Other source directories
|
||||
aux_source_directory(${ROCRTST_ROOT}/common common_srcs)
|
||||
aux_source_directory(${ROCRTST_ROOT}/common/rocm_smi common_smi_srcs)
|
||||
|
||||
#
|
||||
# Specify the directory containing various libraries of ROCR
|
||||
# to be linked against for building ROC Perf applications
|
||||
#
|
||||
LINK_DIRECTORIES(${ROCR_LIB_DIR})
|
||||
|
||||
#
|
||||
# Extend the list of libraries to be used for linking ROC Perf Apps
|
||||
#
|
||||
set(ROCRTST_LIBS ${ROCRTST_LIBS} hsa-runtime${ONLY64STR})
|
||||
|
||||
|
||||
# Set Name for rocrtst
|
||||
MESSAGE(${ROCRTST_LIBS})
|
||||
set(ROCRTST "rocrtst${ONLY64STR}")
|
||||
|
||||
#
|
||||
# Source files for building rocrtst
|
||||
#
|
||||
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} performanceSources)
|
||||
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/test_common testCommonSources)
|
||||
aux_source_directory(${ROCRTST_ROOT}/suites/test_common testCommonSources)
|
||||
|
||||
# Header file include path
|
||||
|
||||
include_directories(${ROCR_INC_DIR})
|
||||
include_directories(${ROCRTST_ROOT})
|
||||
include_directories(${ROCRTST_ROOT}/gtest/include)
|
||||
|
||||
# Use this function to build any samples that have kernels to be built
|
||||
function(build_kernel S_NAME)
|
||||
set(SNAME_KERNEL "${S_NAME}_kernels.hsaco")
|
||||
set(TARG_NAME "${S_NAME}_hsaco")
|
||||
set(HSACO_TARG_LIST ${HSACO_TARG_LIST} ${TARG_NAME} PARENT_SCOPE)
|
||||
separate_arguments(CLANG_ARG_LIST UNIX_COMMAND "-target amdgcn-amdh-amdhsa -mcpu=${TARGET_DEVICE} -include ${OPENCL_DIR}/include/opencl-c.h ${BITCODE_LIBS} -cl-std=CL${OPENCL_VER} ${CL_FILE_LIST} -o ${PROJECT_BINARY_DIR}/${SNAME_KERNEL}")
|
||||
add_custom_target(${TARG_NAME} ${CLANG} ${CLANG_ARG_LIST}
|
||||
COMMENT "BUILDING KERNEL..."
|
||||
VERBATIM)
|
||||
endfunction(build_kernel)
|
||||
|
||||
######################
|
||||
# Kernel Build Section
|
||||
######################
|
||||
set(KERN_SUFFIX "kernels.hsaco")
|
||||
set(BITCODE_PREF "-Xclang -mlink-bitcode-file -Xclang")
|
||||
set(BITCODE_PREF "${BITCODE_PREF} ${OPENCL_DIR}/lib/x86_64/bitcode")
|
||||
|
||||
set(COMMON_BITCODE_LIBS "${BITCODE_PREF}/opencl.amdgcn.bc")
|
||||
set(COMMON_BITCODE_LIBS "${COMMON_BITCODE_LIBS} ${BITCODE_PREF}/ockl.amdgcn.bc")
|
||||
|
||||
# To build kernels, repeat the pattern used below for the P2P kernel; this
|
||||
# pattern sets the bitcode libraries required by the kernel which will be
|
||||
# used in the build_kernel() call, which builds the kernel.
|
||||
|
||||
# Test Case Template example
|
||||
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
|
||||
set(CL_FILE_LIST "${KERNELS_DIR}/test_case_template_kernels.cl")
|
||||
build_kernel("test_case_template")
|
||||
|
||||
# P2P Memory Access
|
||||
#set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
|
||||
#set(CL_FILE_LIST "${KERNELS_DIR}/p2p_mem_access_kernels.cl")
|
||||
#build_kernel("p2p_mem_access")
|
||||
|
||||
# Dispatch Time
|
||||
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
|
||||
set(CL_FILE_LIST "${KERNELS_DIR}/dispatch_time_kernels.cl")
|
||||
build_kernel("dispatch_time")
|
||||
|
||||
# Build rules
|
||||
add_executable(${ROCRTST} ${performanceSources} ${common_srcs}
|
||||
${common_smi_srcs} ${testCommonSources})
|
||||
|
||||
target_link_libraries(${ROCRTST} ${ROCRTST_LIBS} c stdc++ dl pthread rt)
|
||||
|
||||
add_custom_target(rocrtst_kernels DEPENDS ${HSACO_TARG_LIST})
|
||||
INSTALL(TARGETS ${ROCRTST}
|
||||
ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
|
||||
LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
|
||||
RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
__kernel void
|
||||
empty_kernel(void) {
|
||||
return;
|
||||
}
|
||||
|
||||
-54
@@ -1,54 +0,0 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
__kernel void
|
||||
square(__global int *dstArray, __global const int *srcArray, const int sz) {
|
||||
unsigned int id = get_global_id(0);
|
||||
if (id < sz) {
|
||||
dstArray[id] = srcArray[id] * srcArray[id];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1,166 +0,0 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "suites/performance/dispatch_time.h"
|
||||
#include "suites/performance/memory_async_copy.h"
|
||||
#include "suites/performance/test_case_template.h"
|
||||
#include "suites/performance/main.h"
|
||||
#include "suites/test_common/test_common.h"
|
||||
|
||||
#include "common/rocm_smi/rocm_smi.h"
|
||||
|
||||
|
||||
static RocrTstGlobals *sRocrtstGlvalues = nullptr;
|
||||
|
||||
static bool GetMonitorDevices(const std::shared_ptr<rocrtst::smi::Device> &d,
|
||||
void *p) {
|
||||
std::string val_str;
|
||||
|
||||
assert(p != nullptr);
|
||||
|
||||
std::vector<std::shared_ptr<rocrtst::smi::Device>> *device_list =
|
||||
reinterpret_cast<std::vector<std::shared_ptr<rocrtst::smi::Device>> *>(p);
|
||||
|
||||
if (d->monitor() != nullptr) {
|
||||
device_list->push_back(d);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static void RunTest(TestBase *test) {
|
||||
assert(sRocrtstGlvalues != nullptr);
|
||||
|
||||
test->set_verbosity(sRocrtstGlvalues->verbosity);
|
||||
test->set_monitor_verbosity(sRocrtstGlvalues->monitor_verbosity);
|
||||
test->set_num_iteration(sRocrtstGlvalues->num_iterations);
|
||||
test->set_monitor_devices(&sRocrtstGlvalues->monitor_devices);
|
||||
|
||||
test->DisplayTestInfo();
|
||||
test->SetUp();
|
||||
test->Run();
|
||||
test->DisplayResults();
|
||||
test->Close();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// TEST ENTRY TEMPLATE:
|
||||
// TEST(rocrtst, Perf_<test name>) {
|
||||
// <Test Implementation class> <test_obj>;
|
||||
//
|
||||
// // Copy and modify implementation of RunTest() if you need to deviate
|
||||
// // from the standard pattern implemented there.
|
||||
// RunTest(&<test_obj>);
|
||||
// }
|
||||
|
||||
TEST(rocrtst, Test_Example) {
|
||||
TestExample tst;
|
||||
|
||||
rocrtst::smi::RocmSMI hw;
|
||||
hw.DiscoverDevices();
|
||||
|
||||
RunTest(&tst);
|
||||
}
|
||||
|
||||
TEST(rocrtst, Perf_Memory_Async_Copy) {
|
||||
MemoryAsyncCopy mac;
|
||||
// To do full test, uncomment this:
|
||||
// mac.set_full_test(true);
|
||||
// To test only 1 path, add lines like this:
|
||||
// mac.set_src_pool(<src pool id>);
|
||||
// mac.set_dst_pool(<dst pool id>);
|
||||
// The default is to and from the cpu to 1 gpu, and to/from a gpu to
|
||||
// another gpu
|
||||
RunTest(&mac);
|
||||
}
|
||||
|
||||
TEST(rocrtst, Perf_Dispatch_Time_Single_SpinWait) {
|
||||
DispatchTime dt(true, true);
|
||||
RunTest(&dt);
|
||||
}
|
||||
|
||||
TEST(rocrtst, Perf_Dispatch_Time_Single_Interrupt) {
|
||||
DispatchTime dt(false, true);
|
||||
RunTest(&dt);
|
||||
}
|
||||
|
||||
TEST(rocrtst, Perf_Dispatch_Time_Multi_SpinWait) {
|
||||
DispatchTime dt(true, false);
|
||||
RunTest(&dt);
|
||||
}
|
||||
|
||||
TEST(rocrtst, Perf_Dispatch_Time_Multi_Interrupt) {
|
||||
DispatchTime dt(false, false);
|
||||
RunTest(&dt);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
|
||||
RocrTstGlobals settings;
|
||||
|
||||
settings.verbosity = 1;
|
||||
settings.monitor_verbosity = 1;
|
||||
settings.num_iterations = 0;
|
||||
|
||||
if (ProcessCmdline(&settings, argc, argv)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
rocrtst::smi::RocmSMI hw;
|
||||
hw.DiscoverDevices();
|
||||
hw.IterateSMIDevices(
|
||||
GetMonitorDevices, reinterpret_cast<void *>(&settings.monitor_devices));
|
||||
|
||||
sRocrtstGlvalues = &settings;
|
||||
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef ROCRTST_SUITES_PERFORMANCE_MAIN_H_
|
||||
#define ROCRTST_SUITES_PERFORMANCE_MAIN_H_
|
||||
|
||||
#endif // ROCRTST_SUITES_PERFORMANCE_MAIN_H_
|
||||
|
||||
@@ -1,395 +0,0 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
// The purpose of this test is to provide an example of the use of the
|
||||
// common RocrTest classes and utilities that are used in many examples.
|
||||
// It can be used as a template to start off with when writing new tests.
|
||||
// In many cases, the existing boilerplate code will be sufficient as is.
|
||||
// Otherwise, the boilerplate code can be either supplemented or replaced
|
||||
// by your own code in your example, as necessary.
|
||||
//
|
||||
// The comments provided are focused more on the use of the common rocrtst
|
||||
// utilities and boilerplate code, rather than the example app. itself.
|
||||
//
|
||||
// The boilerplate code includes code for:
|
||||
// * hsa initialization and clean up
|
||||
// * code to load pre-built kernels
|
||||
// * creating queues
|
||||
// * populating AQL packets
|
||||
// * checking for required profiles
|
||||
// * finding cpu and gpu agents (callbacks for common use cases)
|
||||
// * finding pools (having common requirements)
|
||||
// * allocating and setting kernel arguments
|
||||
// * somewhat standardized output
|
||||
// * handling additional command line arguments, beyond google-test arguments
|
||||
// * support for various level of verbosity, controlled from command line arg
|
||||
// * support for building OpenCL kernels
|
||||
// * timer support
|
||||
//
|
||||
// Overview of RocrTst code organization:
|
||||
// Classes:
|
||||
// * class BaseRocR (base_rocr.h) -- base class for all rocrtst examples and
|
||||
// tests. Most of the rocrtst common utilities act on BaseRocR objects
|
||||
//
|
||||
// * TestBase (test_base.h) -- derives from BaseRocR and is the base class
|
||||
// for all tests under <rocrtst root>/suites. The implementation in TestBase
|
||||
// methods are typically actions that are required for most/all tests and
|
||||
// should therefore be called from the derived implementions of the methods.
|
||||
//
|
||||
// Utilities:
|
||||
// * <rocrtst root>/common/base_rocr_utils.<cc/h> contains a set of utilities
|
||||
// that act on BaseRocR objects.
|
||||
//
|
||||
// * <rocrtst root>/common/common.<cc/h> contain other non-BaseRocR utilities
|
||||
//
|
||||
// Special Files:
|
||||
// * main.cc -- The main google test file from which the tests are invoked.
|
||||
// There should be an entry for each test to be run there.
|
||||
//
|
||||
// * kernels -- OpenCL kernel source files should go in the kernels directory
|
||||
//
|
||||
// * CMakeLists.txt -- Host code (*.cc and *.h files) should build without
|
||||
// modifying the CMakeList.txt file, if the files are place in the
|
||||
// "performance" directory. However, an entry for OpenCL kernels. For
|
||||
// each kernel to be built, the bitcode libraries must be indicated before
|
||||
// the call to "build_kernel()" is made. See existing code for examples.
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "suites/performance/test_case_template.h"
|
||||
#include "common/base_rocr_utils.h"
|
||||
#include "common/common.h"
|
||||
#include "common/helper_funcs.h"
|
||||
#include "common/hsatimer.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_finalize.h"
|
||||
|
||||
static const uint32_t kNumBufferElements = 256;
|
||||
|
||||
#define RET_IF_HSA_ERR(err) { \
|
||||
if ((err) != HSA_STATUS_SUCCESS) { \
|
||||
const char* msg = 0; \
|
||||
hsa_status_string(err, &msg); \
|
||||
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
|
||||
__FILE__ << ". Call returned " << err << std::endl; \
|
||||
std::cout << msg << std::endl; \
|
||||
return (err); \
|
||||
} \
|
||||
}
|
||||
|
||||
// Many test cases want to perform an operation on memory sizes of various
|
||||
// granularities.
|
||||
#if 0
|
||||
static const int kNumGranularity = 20;
|
||||
const char* Str[kNumGranularity] = {"1k", "2K", "4K", "8K", "16K", "32K",
|
||||
"64K", "128K", "256K", "512K", "1M", "2M", "4M", "8M", "16M", "32M",
|
||||
"64M", "128M", "256M", "512M"};
|
||||
|
||||
const size_t Size[kNumGranularity] = {
|
||||
1024, 2*1024, 4*1024, 8*1024, 16*1024, 32*1024, 64*1024, 128*1024,
|
||||
256*1024, 512*1024, 1024*1024, 2048*1024, 4096*1024, 8*1024*1024,
|
||||
16*1024*1024, 32*1024*1024, 64*1024*1024, 128*1024*1024, 256*1024*1024,
|
||||
512*1024*1024};
|
||||
|
||||
static const int kMaxCopySize = Size[kNumGranularity - 1];
|
||||
#endif
|
||||
TestExample::TestExample(void) :
|
||||
TestBase() {
|
||||
set_num_iteration(10); // Number of iterations to execute of the main test;
|
||||
// This is a default value which can be overridden
|
||||
// on the command line.
|
||||
set_title("Test Case Example");
|
||||
set_description("Put a description of the test case here. Line breaks "
|
||||
"will be taken care of on output, not here.");
|
||||
|
||||
set_kernel_file_name("test_case_template_kernels.hsaco");
|
||||
set_kernel_name("square"); // kernel function name
|
||||
|
||||
#if 0
|
||||
// Set required profile to HSA_PROFILE_FULL or HSA_PROFILE_BASE if it
|
||||
// matters for this test. If either profile is fine, then leave with
|
||||
// default
|
||||
set_requires_profile(<value>);
|
||||
#endif
|
||||
}
|
||||
|
||||
TestExample::~TestExample(void) {
|
||||
}
|
||||
|
||||
// Any 1-time setup involving member variables used in the rest of the test
|
||||
// should be done here.
|
||||
void TestExample::SetUp(void) {
|
||||
hsa_status_t err;
|
||||
|
||||
// TestBase::SetUp() will set HSA_ENABLE_INTERRUPT if enable_interrupt() is
|
||||
// true, and call hsa_init(). It also prints the SetUp header.
|
||||
TestBase::SetUp();
|
||||
|
||||
// SetDefaultAgents(this) will assign the first CPU and GPU found on
|
||||
// iterating through the agents and assign them to cpu_device_ and
|
||||
// gpu_device1_, respectively (cpu_device() and gpu_device1()). These
|
||||
// BaseRocR member variables are used in some utilities. Additionally,
|
||||
// SetDefaultAgents() checks the profile of the gpu and compares this
|
||||
// to any required profile.
|
||||
//
|
||||
// If SetDefaultAgents() is not used, if the profile of the target GPU
|
||||
// matters for this test, it should be set with set_profile() and
|
||||
// CheckProfileAndInform() should be called to check if it is the
|
||||
// required profile
|
||||
err = rocrtst::SetDefaultAgents(this);
|
||||
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
|
||||
hsa_agent_t* gpu_dev = gpu_device1();
|
||||
|
||||
// Find and assign HSA_AMD_SEGMENT_GLOBAL pools for cpu, gpu and a kern_arg
|
||||
// pool
|
||||
err = rocrtst::SetPoolsTypical(this);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
// Create a queue
|
||||
hsa_queue_t* q = nullptr;
|
||||
rocrtst::CreateQueue(*gpu_dev, &q);
|
||||
ASSERT_NE(q, nullptr);
|
||||
set_main_queue(q);
|
||||
|
||||
err = rocrtst::LoadKernelFromObjFile(this);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
// Fill up the kernel packet (except header) with some values we've
|
||||
// collected so far, and some reasonable default values; this should be after
|
||||
// LoadKernelFromObjFile(). AllocAndSetKernArgs() will fill in the kern_args
|
||||
err = rocrtst::InitializeAQLPacket(this, &aql());
|
||||
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
|
||||
hsa_agent_t ag_list[2] = {*gpu_device1(), *cpu_device()};
|
||||
|
||||
// Allocate a few buffers for our example
|
||||
err = hsa_amd_memory_pool_allocate(cpu_pool(),
|
||||
kNumBufferElements*sizeof(uint32_t),
|
||||
0, reinterpret_cast<void**>(&src_buffer_));
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, src_buffer_);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
// Initialize the source buffer
|
||||
for (uint32_t i = 0; i < kNumBufferElements; ++i) {
|
||||
reinterpret_cast<uint32_t *>(src_buffer_)[i] = i;
|
||||
}
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(cpu_pool(),
|
||||
kNumBufferElements*sizeof(uint32_t),
|
||||
0, reinterpret_cast<void**>(&dst_buffer_));
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, dst_buffer_);
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
// Set up Kernel arguments
|
||||
// See the meta-data for the compiled OpenCL kernel code to ascertain
|
||||
// the sizes, padding and alignment required for kernel arguments.
|
||||
// This can be seen by executing
|
||||
// $ amdgcn-amd-amdhsa-readelf -aw ./binary_search_kernels.hsaco
|
||||
// The kernel code will expect the following arguments aligned as shown.
|
||||
// typedef uint32_t uint4[4];
|
||||
struct __attribute__((aligned(16))) local_args_t {
|
||||
uint32_t* dstArray;
|
||||
uint32_t* srcArray;
|
||||
uint32_t size;
|
||||
uint32_t pad;
|
||||
uint64_t global_offset_x;
|
||||
uint64_t global_offset_y;
|
||||
uint64_t global_offset_z;
|
||||
} local_args;
|
||||
|
||||
local_args.dstArray = reinterpret_cast<uint32_t *>(dst_buffer_);
|
||||
local_args.srcArray = reinterpret_cast<uint32_t *>(src_buffer_);
|
||||
local_args.size = kNumBufferElements;
|
||||
local_args.global_offset_x = 0;
|
||||
local_args.global_offset_y = 0;
|
||||
local_args.global_offset_z = 0;
|
||||
|
||||
err = rocrtst::AllocAndSetKernArgs(this, &local_args, sizeof(local_args));
|
||||
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// This wrapper atomically writes the provided header and setup to the
|
||||
// provided AQL packet. The provided AQL packet address should be in the
|
||||
// queue memory space.
|
||||
static inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
|
||||
hsa_kernel_dispatch_packet_t* queue_packet) {
|
||||
__atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
|
||||
header | (setup << 16), __ATOMIC_RELEASE);
|
||||
}
|
||||
|
||||
// Do a few extra iterations as we toss out some of the inital and final
|
||||
// iterations when calculating statistics
|
||||
uint32_t TestExample::RealIterationNum(void) {
|
||||
return num_iteration() * 1.2 + 1;
|
||||
}
|
||||
|
||||
static bool VerifyResult(uint32_t *ar, size_t sz) {
|
||||
for (size_t i = sz; i < sz; ++i) {
|
||||
if (i*i != ar[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
void TestExample::Run(void) {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!rocrtst::CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
TestBase::Run();
|
||||
|
||||
// Override whatever we need to...
|
||||
aql().workgroup_size_x = kNumBufferElements;
|
||||
aql().grid_size_x = kNumBufferElements;
|
||||
|
||||
std::vector<double> timer;
|
||||
|
||||
int it = RealIterationNum();
|
||||
hsa_kernel_dispatch_packet_t *queue_aql_packet;
|
||||
|
||||
rocrtst::PerfTimer p_timer;
|
||||
uint64_t index;
|
||||
|
||||
for (int i = 0; i < it; i++) {
|
||||
// This function simply copies the data we've collected so far into our
|
||||
// local AQL packet, except the the setup and header fields.
|
||||
queue_aql_packet = WriteAQLToQueue(this, &index);
|
||||
ASSERT_EQ(queue_aql_packet,
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t *>
|
||||
(main_queue()->base_address) + index);
|
||||
uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
|
||||
aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
|
||||
HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
|
||||
aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
|
||||
HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
|
||||
|
||||
// Create and start a timer for this iteration
|
||||
int id = p_timer.CreateTimer();
|
||||
p_timer.StartTimer(id);
|
||||
|
||||
AtomicSetPacketHeader(aql_header, aql().setup, queue_aql_packet);
|
||||
|
||||
hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
|
||||
|
||||
// Wait on the dispatch signal until the kernel is finished.
|
||||
while (hsa_signal_wait_scacquire(aql().completion_signal,
|
||||
HSA_SIGNAL_CONDITION_LT, 1, (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE)) {
|
||||
}
|
||||
|
||||
// Stop the timer
|
||||
p_timer.StopTimer(id);
|
||||
|
||||
// Store time for later analysis
|
||||
timer.push_back(p_timer.ReadTimer(id));
|
||||
hsa_signal_store_screlease(aql().completion_signal, 1);
|
||||
|
||||
ASSERT_TRUE(VerifyResult(reinterpret_cast<uint32_t *>(dst_buffer_),
|
||||
kNumBufferElements));
|
||||
|
||||
// Pay attention to verbosity level for things like progress output
|
||||
if (verbosity() >= VERBOSE_PROGRESS) {
|
||||
std::cout << ".";
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
if (verbosity() >= VERBOSE_PROGRESS) {
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// Abandon the first result and after sort, delete the last 2% value
|
||||
timer.erase(timer.begin());
|
||||
std::sort(timer.begin(), timer.end());
|
||||
timer.erase(timer.begin() + num_iteration(), timer.end());
|
||||
|
||||
time_mean_ = rocrtst::CalcMean(timer);
|
||||
}
|
||||
|
||||
void TestExample::DisplayTestInfo(void) {
|
||||
TestBase::DisplayTestInfo();
|
||||
}
|
||||
|
||||
void TestExample::DisplayResults(void) const {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!rocrtst::CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
TestBase::DisplayResults();
|
||||
std::cout << "The average time was: " << time_mean_ * 1e6 <<
|
||||
" uS" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
void TestExample::Close() {
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_amd_memory_pool_free(src_buffer_);
|
||||
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
|
||||
err = hsa_amd_memory_pool_free(dst_buffer_);
|
||||
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
|
||||
// This will close handles opened within rocrtst utility calls and call
|
||||
// hsa_shut_down(), so it should be done after other hsa cleanup
|
||||
TestBase::Close();
|
||||
}
|
||||
|
||||
|
||||
#undef RET_IF_HSA_ERR
|
||||
@@ -48,6 +48,7 @@
|
||||
#include <memory>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "suites/functional/memory_basic.h"
|
||||
#include "suites/performance/dispatch_time.h"
|
||||
#include "suites/performance/memory_async_copy.h"
|
||||
#include "suites/test_common/test_case_template.h"
|
||||
@@ -74,21 +75,39 @@ static bool GetMonitorDevices(const std::shared_ptr<rocrtst::smi::Device> &d,
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static void RunTest(TestBase *test) {
|
||||
static void SetFlags(TestBase *test) {
|
||||
assert(sRocrtstGlvalues != nullptr);
|
||||
|
||||
test->set_verbosity(sRocrtstGlvalues->verbosity);
|
||||
test->set_monitor_verbosity(sRocrtstGlvalues->monitor_verbosity);
|
||||
test->set_num_iteration(sRocrtstGlvalues->num_iterations);
|
||||
test->set_monitor_devices(&sRocrtstGlvalues->monitor_devices);
|
||||
}
|
||||
|
||||
|
||||
static void RunCustomTestProlog(TestBase *test) {
|
||||
SetFlags(test);
|
||||
|
||||
test->DisplayTestInfo();
|
||||
test->SetUp();
|
||||
test->Run();
|
||||
return;
|
||||
}
|
||||
static void RunCustomTestEpilog(TestBase *test) {
|
||||
test->DisplayResults();
|
||||
test->Close();
|
||||
return;
|
||||
}
|
||||
|
||||
// If the test case one big test, you should use RunGenericTest()
|
||||
// to run the test case. OTOH, if the test case consists of multiple
|
||||
// functions to be run as separate tests, follow this pattern:
|
||||
// * RunCustomTestProlog(test) // Run() should contain minimal code
|
||||
// * <insert call to actual test function within test case>
|
||||
// * RunCustomTestEpilog(test)
|
||||
static void RunGenericTest(TestBase *test) {
|
||||
RunCustomTestProlog(test);
|
||||
RunCustomTestEpilog(test);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -96,9 +115,9 @@ static void RunTest(TestBase *test) {
|
||||
// TEST(rocrtst, Perf_<test name>) {
|
||||
// <Test Implementation class> <test_obj>;
|
||||
//
|
||||
// // Copy and modify implementation of RunTest() if you need to deviate
|
||||
// // Copy and modify implementation of RunGenericTest() if you need to deviate
|
||||
// // from the standard pattern implemented there.
|
||||
// RunTest(&<test_obj>);
|
||||
// RunGenericTest(&<test_obj>);
|
||||
// }
|
||||
|
||||
TEST(rocrtst, Test_Example) {
|
||||
@@ -107,7 +126,15 @@ TEST(rocrtst, Test_Example) {
|
||||
rocrtst::smi::RocmSMI hw;
|
||||
hw.DiscoverDevices();
|
||||
|
||||
RunTest(&tst);
|
||||
RunGenericTest(&tst);
|
||||
}
|
||||
|
||||
TEST(rocrtstFunc, Memory_Max_Mem) {
|
||||
MemoryTest mt;
|
||||
|
||||
RunCustomTestProlog(&mt);
|
||||
mt.MaxSingleAllocationTest();
|
||||
RunCustomTestEpilog(&mt);
|
||||
}
|
||||
|
||||
TEST(rocrtstPerf, Memory_Async_Copy) {
|
||||
@@ -119,27 +146,27 @@ TEST(rocrtstPerf, Memory_Async_Copy) {
|
||||
// mac.set_dst_pool(<dst pool id>);
|
||||
// The default is to and from the cpu to 1 gpu, and to/from a gpu to
|
||||
// another gpu
|
||||
RunTest(&mac);
|
||||
RunGenericTest(&mac);
|
||||
}
|
||||
|
||||
TEST(rocrtstPerf, AQL_Dispatch_Time_Single_SpinWait) {
|
||||
DispatchTime dt(true, true);
|
||||
RunTest(&dt);
|
||||
RunGenericTest(&dt);
|
||||
}
|
||||
|
||||
TEST(rocrtstPerf, AQL_Dispatch_Time_Single_Interrupt) {
|
||||
DispatchTime dt(false, true);
|
||||
RunTest(&dt);
|
||||
RunGenericTest(&dt);
|
||||
}
|
||||
|
||||
TEST(rocrtstPerf, AQL_Dispatch_Time_Multi_SpinWait) {
|
||||
DispatchTime dt(true, false);
|
||||
RunTest(&dt);
|
||||
RunGenericTest(&dt);
|
||||
}
|
||||
|
||||
TEST(rocrtstPerf, AQL_Dispatch_Time_Multi_Interrupt) {
|
||||
DispatchTime dt(false, false);
|
||||
RunTest(&dt);
|
||||
RunGenericTest(&dt);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user