From cf24f7bb7871eb0891e04b02e342d59cbdba9288 Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Mon, 31 Jul 2017 12:04:55 -0500 Subject: [PATCH] Added max. single mem. allocation test. Change-Id: Ie81c6af0502fde56225b1e197801cf04b474feb2 --- rocrtst/.gitignore | 3 +- rocrtst/common/common.cc | 141 ++++--- rocrtst/common/common.h | 32 +- rocrtst/suites/functional/memory_basic.cc | 262 ++++++++++++ .../memory_basic.h} | 24 +- rocrtst/suites/performance/CMakeLists.txt | 295 ------------- .../kernels/dispatch_time_kernels.cl | 50 --- .../kernels/test_case_template_kernels.cl | 54 --- rocrtst/suites/performance/main.cc | 166 -------- rocrtst/suites/performance/main.h | 49 --- .../suites/performance/test_case_template.cc | 395 ------------------ rocrtst/suites/test_common/main.cc | 47 ++- 12 files changed, 435 insertions(+), 1083 deletions(-) create mode 100755 rocrtst/suites/functional/memory_basic.cc rename rocrtst/suites/{performance/test_case_template.h => functional/memory_basic.h} (84%) delete mode 100755 rocrtst/suites/performance/CMakeLists.txt delete mode 100755 rocrtst/suites/performance/kernels/dispatch_time_kernels.cl delete mode 100755 rocrtst/suites/performance/kernels/test_case_template_kernels.cl delete mode 100755 rocrtst/suites/performance/main.cc delete mode 100755 rocrtst/suites/performance/main.h delete mode 100755 rocrtst/suites/performance/test_case_template.cc diff --git a/rocrtst/.gitignore b/rocrtst/.gitignore index 3985e26408..388e9a35cf 100644 --- a/rocrtst/.gitignore +++ b/rocrtst/.gitignore @@ -8,5 +8,4 @@ *.bk *.old *.cmake -build/* - +build diff --git a/rocrtst/common/common.cc b/rocrtst/common/common.cc index 1ee4355e49..553998bb9e 100755 --- a/rocrtst/common/common.cc +++ b/rocrtst/common/common.cc @@ -49,6 +49,8 @@ #include #include #include +#include + namespace rocrtst { @@ -169,20 +171,52 @@ hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data) { POOL_PROP_OFF, POOL_PROP_DONT_CARE); } -static hsa_status_t MakeGlobalFlagsString(const hsa_amd_memory_pool_t pool, - std::string* out_str) { +// Populate the vector with handles to all agents and pools +hsa_status_t +GetAgentPools(std::vector> *agent_pools) { hsa_status_t err; - uint32_t global_flag = 0; + assert(agent_pools != nullptr); + + auto save_agent = [](hsa_agent_t a, void *data)->hsa_status_t { + std::vector> *ag_vec; + hsa_status_t err; + assert(data != nullptr); + ag_vec = + reinterpret_cast> *>(data); + std::shared_ptr ag(new agent_pools_t); + ag->agent = a; + + + auto save_pool = [](hsa_amd_memory_pool_t p, void *data)->hsa_status_t { + assert(data != nullptr); + std::vector *p_list = + reinterpret_cast *>(data); + p_list->push_back(p); + + return HSA_STATUS_SUCCESS; + }; + + err = hsa_amd_agent_iterate_memory_pools(a, save_pool, + reinterpret_cast(&ag->pools)); + assert(err == HSA_STATUS_SUCCESS); + + ag_vec->push_back(ag); + return HSA_STATUS_SUCCESS; + }; + + err = hsa_iterate_agents(save_agent, reinterpret_cast(agent_pools)); + return err; +} + +static hsa_status_t MakeGlobalFlagsString(const pool_info_t *pool_i, + std::string* out_str) { + uint32_t global_flag = pool_i->global_flag; assert(out_str != nullptr); *out_str = ""; - err = hsa_amd_memory_pool_get_info(pool, - HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flag); - RET_IF_HSA_COMMON_ERR(err); - std::vector < std::string > flags; if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & global_flag) { @@ -207,22 +241,17 @@ static hsa_status_t MakeGlobalFlagsString(const hsa_amd_memory_pool_t pool, return HSA_STATUS_SUCCESS; } -static hsa_status_t DumpSegment(const hsa_amd_memory_pool_t pool, +static hsa_status_t DumpSegment(const pool_info_t *pool_i, std::string const *ind_lvl) { - uint32_t segment; hsa_status_t err; - err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, - &segment); - RET_IF_HSA_COMMON_ERR(err); - fprintf(stdout, "%s%-25s", ind_lvl->c_str(), "Pool Segment:"); std::string seg_str = ""; std::string tmp_str; - switch (segment) { + switch (pool_i->segment) { case HSA_AMD_SEGMENT_GLOBAL: - err = MakeGlobalFlagsString(pool, &tmp_str); + err = MakeGlobalFlagsString(pool_i, &tmp_str); RET_IF_HSA_COMMON_ERR(err); seg_str += "GLOBAL; FLAGS: " + tmp_str; @@ -250,57 +279,71 @@ static hsa_status_t DumpSegment(const hsa_amd_memory_pool_t pool, return HSA_STATUS_SUCCESS; } -hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool, - uint32_t indent) { +hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, + pool_info_t *pool_i) { hsa_status_t err; - std::string ind_lvl(indent, ' '); - DumpSegment(pool, &ind_lvl); - - // Get the size of the POOL - size_t pool_size = 0; - err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, - &pool_size); + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag); RET_IF_HSA_COMMON_ERR(err); - std::string sz_str = std::to_string(pool_size / 1024) + "KB"; + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, + &pool_i->segment); + RET_IF_HSA_COMMON_ERR(err); + + // Get the size of the POOL + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, + &pool_i->pool_size); + RET_IF_HSA_COMMON_ERR(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, + &pool_i->alloc_allowed); + RET_IF_HSA_COMMON_ERR(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, + &pool_i->alloc_granule); + RET_IF_HSA_COMMON_ERR(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, + &pool_i->pool_alloc_alignment); + RET_IF_HSA_COMMON_ERR(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, + &pool_i->pl_access); + RET_IF_HSA_COMMON_ERR(err); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i, + uint32_t indent) { + std::string ind_lvl(indent, ' '); + + DumpSegment(pool_i, &ind_lvl); + + std::string sz_str = std::to_string(pool_i->pool_size / 1024) + "KB"; fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Size:", sz_str.c_str()); - bool alloc_allowed = false; - err = hsa_amd_memory_pool_get_info(pool, - HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc_allowed); - RET_IF_HSA_COMMON_ERR(err); - fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Allocatable:", - (alloc_allowed ? "TRUE" : "FALSE")); + (pool_i->alloc_allowed ? "TRUE" : "FALSE")); - size_t alloc_granule = 0; - err = hsa_amd_memory_pool_get_info(pool, - HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &alloc_granule); - RET_IF_HSA_COMMON_ERR(err); - - std::string gr_str = std::to_string(alloc_granule / 1024) + "KB"; + std::string gr_str = std::to_string(pool_i->alloc_granule / 1024) + "KB"; fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Granule:", gr_str.c_str()); - size_t pool_alloc_alignment = 0; - err = hsa_amd_memory_pool_get_info(pool, - HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, - &pool_alloc_alignment); - RET_IF_HSA_COMMON_ERR(err); - std::string al_str = std::to_string(pool_alloc_alignment / 1024) + "KB"; + std::string al_str = + std::to_string(pool_i->pool_alloc_alignment / 1024) + "KB"; fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Alignment:", al_str.c_str()); - bool pl_access = 0; - err = hsa_amd_memory_pool_get_info(pool, - HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &pl_access); - RET_IF_HSA_COMMON_ERR(err); - fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Acessible by all:", - (pl_access ? "TRUE" : "FALSE")); + (pool_i->pl_access ? "TRUE" : "FALSE")); return HSA_STATUS_SUCCESS; } diff --git a/rocrtst/common/common.h b/rocrtst/common/common.h index 08a59fa736..10abe8c19e 100755 --- a/rocrtst/common/common.h +++ b/rocrtst/common/common.h @@ -55,6 +55,8 @@ #include #include #include +#include + #include "hsa/hsa.h" #include "hsa/hsa_ext_amd.h" #include "hsa/hsa_ext_finalize.h" @@ -75,6 +77,31 @@ namespace rocrtst { // refers to it has been corrected // #define HSA_ARGUMENT_ALIGN_BYTES 16 +// This structure holds memory pool information acquired through hsa info +// related calls, and is later used for reference when displaying the +// information. +typedef struct { + uint32_t segment; + size_t pool_size; + bool alloc_allowed; + size_t alloc_granule; + size_t pool_alloc_alignment; + bool pl_access; + uint32_t global_flag; +} pool_info_t; + + +struct agent_pools_t{ + hsa_agent_t agent; + std::vector pools; +}; + +/// Fill in the pool_info_t structure for the provided pool. +/// \param[in] pool Pool for which information will be retrieved +/// \param[out] pool_i Pointer to structure where pool info will be stored +/// \returns HSA_STATUS_SUCCESS if no errors are encountered. +hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, pool_info_t *pool_i); + /// If the provided agent is associated with a GPU, return that agent through /// output parameter. This function is meant to be the call-back function used /// with hsa_iterate_agents to find GPU agents. @@ -132,7 +159,7 @@ hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data); /// \param[in] pool Pool to gather and dump information for /// \param[in] indent Number of spaces to indent output. /// \returns hsa_status_t HSA_STATUS_SUCCESS if no errors -hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool, +hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i, uint32_t indent = 0); /// Dump information about a provided pointer to STDOUT. @@ -140,5 +167,8 @@ hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool, /// \returns HSA_STATUS_SUCCESS if there are no errors hsa_status_t DumpPointerInfo(void* ptr); +hsa_status_t GetAgentPools( + std::vector> *agent_pools); + } // namespace rocrtst #endif // ROCRTST_COMMON_COMMON_H_ diff --git a/rocrtst/suites/functional/memory_basic.cc b/rocrtst/suites/functional/memory_basic.cc new file mode 100755 index 0000000000..b633dda494 --- /dev/null +++ b/rocrtst/suites/functional/memory_basic.cc @@ -0,0 +1,262 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2017, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include +#include + +#include "suites/functional/memory_basic.h" +#include "common/base_rocr_utils.h" +#include "common/common.h" +#include "common/helper_funcs.h" +#include "common/hsatimer.h" +#include "gtest/gtest.h" +#include "hsa/hsa.h" +#include "hsa/hsa_ext_finalize.h" + +static const uint32_t kNumBufferElements = 256; + +#define RET_IF_HSA_ERR(err) { \ + if ((err) != HSA_STATUS_SUCCESS) { \ + const char* msg = 0; \ + hsa_status_string(err, &msg); \ + std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \ + __FILE__ << ". Call returned " << err << std::endl; \ + std::cout << msg << std::endl; \ + return (err); \ + } \ +} + + +MemoryTest::MemoryTest(void) : + TestBase() { + set_num_iteration(10); // Number of iterations to execute of the main test; + // This is a default value which can be overridden + // on the command line. + set_title("RocR Memory Tests"); + set_description("This series of tests check memory allocation limits, extent" + " of GPU access to system memory and other memory related functionality."); +} + +MemoryTest::~MemoryTest(void) { +} + +// Any 1-time setup involving member variables used in the rest of the test +// should be done here. +void MemoryTest::SetUp(void) { + hsa_status_t err; + + TestBase::SetUp(); + + err = rocrtst::SetDefaultAgents(this); + ASSERT_EQ(HSA_STATUS_SUCCESS, err); + + err = rocrtst::SetPoolsTypical(this); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + return; +} + +void MemoryTest::Run(void) { + // Compare required profile for this test case with what we're actually + // running on + if (!rocrtst::CheckProfile(this)) { + return; + } + + TestBase::Run(); +} + +void MemoryTest::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void MemoryTest::DisplayResults(void) const { + // Compare required profile for this test case with what we're actually + // running on + if (!rocrtst::CheckProfile(this)) { + return; + } + + return; +} + +void MemoryTest::Close() { + // This will close handles opened within rocrtst utility calls and call + // hsa_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + +hsa_status_t MemoryTest::TestAllocate(hsa_amd_memory_pool_t pool, size_t sz) { + void *ptr; + hsa_status_t err; + + err = hsa_amd_memory_pool_allocate(pool, sz, 0, &ptr); + + if (err == HSA_STATUS_SUCCESS) { + err = hsa_memory_free(ptr); + } + + return err; +} + +static const char kSubTestSeparator[] = " **************************"; + +static void PrintMemorySubtestHeader(const char *header) { + std::cout << " *** Memory Subtest: " << header << " ***" << std::endl; +} + +// Test Fixtures +void MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag, + hsa_amd_memory_pool_t pool) { + hsa_status_t err; + + rocrtst::pool_info_t pool_i; + char ag_name[64]; + hsa_device_type_t ag_type; + + err = hsa_agent_get_info(ag, HSA_AGENT_INFO_NAME, ag_name); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + err = hsa_agent_get_info(ag, HSA_AGENT_INFO_DEVICE, &ag_type); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + if (verbosity() > 0) { + std::cout << " Agent: " << ag_name << " ("; + switch (ag_type) { + case HSA_DEVICE_TYPE_CPU: + std::cout << "CPU)"; + break; + case HSA_DEVICE_TYPE_GPU: + std::cout << "GPU)"; + break; + case HSA_DEVICE_TYPE_DSP: + std::cout << "DSP)"; + break; + } + std::cout << std::endl; + } + + err = rocrtst::AcquirePoolInfo(pool, &pool_i); + ASSERT_EQ(HSA_STATUS_SUCCESS, err); + + if (verbosity() > 0) { + rocrtst::DumpMemoryPoolInfo(&pool_i, 2); + } + + if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 || + pool_i.pool_alloc_alignment == 0) { + if (verbosity() > 0) { + std::cout << " Test not applicable. Skipping." << std::endl; + std::cout << kSubTestSeparator << std::endl; + } + return; + } + // Do everything in "granule" units + auto gran_sz = pool_i.alloc_granule; + auto pool_sz = pool_i.pool_size / gran_sz; + + // Neg. test: Try to allocate more than the pool size + err = TestAllocate(pool, pool_sz*gran_sz + gran_sz); + EXPECT_EQ(HSA_STATUS_ERROR_INVALID_ALLOCATION, err); + + auto max_alloc_size = pool_sz/2; + uint64_t upper_bound = pool_sz; + uint64_t lower_bound = 0; + + while (true) { + err = TestAllocate(pool, max_alloc_size * gran_sz); + ASSERT_TRUE(err == HSA_STATUS_SUCCESS || + err == HSA_STATUS_ERROR_OUT_OF_RESOURCES); + if (err == HSA_STATUS_SUCCESS) { + lower_bound = max_alloc_size; + max_alloc_size += (upper_bound - lower_bound)/2; + } else if (err == HSA_STATUS_ERROR_OUT_OF_RESOURCES) { + upper_bound = max_alloc_size; + max_alloc_size -= (upper_bound - lower_bound)/2; + } + + if ((upper_bound - lower_bound) < 2) { + break; + } + ASSERT_GT(upper_bound, lower_bound); + } + + if (verbosity() > 0) { + std::cout << " Biggest single allocation size for this pool is " << + (max_alloc_size * gran_sz)/1024 << "KB." << std::endl; + std::cout << " This is " << + static_cast(max_alloc_size)/pool_sz*100 << + "% of the total." << std::endl; + } + + if (ag_type == HSA_DEVICE_TYPE_GPU) { + EXPECT_GE((float)max_alloc_size/pool_sz, (float)15/16); + } + if (verbosity() > 0) { + std::cout << kSubTestSeparator << std::endl; + } +} + +void MemoryTest::MaxSingleAllocationTest(void) { + hsa_status_t err; + std::vector> agent_pools; + + PrintMemorySubtestHeader("Maximum Single Allocation in Memory Pools"); + + err = rocrtst::GetAgentPools(&agent_pools); + ASSERT_EQ(err, HSA_STATUS_SUCCESS); + + auto pool_idx = 0; + for (auto a : agent_pools) { + for (auto p : a->pools) { + std::cout << " Pool " << pool_idx++ << ":" << std::endl; + MaxSingleAllocationTest(a->agent, p); + } + } +} + +#undef RET_IF_HSA_ERR diff --git a/rocrtst/suites/performance/test_case_template.h b/rocrtst/suites/functional/memory_basic.h similarity index 84% rename from rocrtst/suites/performance/test_case_template.h rename to rocrtst/suites/functional/memory_basic.h index e20ed27d6b..e1c83eb35b 100755 --- a/rocrtst/suites/performance/test_case_template.h +++ b/rocrtst/suites/functional/memory_basic.h @@ -42,20 +42,20 @@ * DEALINGS WITH THE SOFTWARE. * */ +#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_ +#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_ -#ifndef ROCRTST_SUITES_PERFORMANCE_TEST_CASE_TEMPLATE_H_ -#define ROCRTST_SUITES_PERFORMANCE_TEST_CASE_TEMPLATE_H_ #include "common/base_rocr.h" #include "hsa/hsa.h" #include "suites/test_common/test_base.h" -class TestExample : public TestBase { +class MemoryTest : public TestBase { public: - TestExample(); + MemoryTest(); - // @Brief: Destructor for test case of TestExample - virtual ~TestExample(); + // @Brief: Destructor for test case of MemoryTest + virtual ~MemoryTest(); // @Brief: Setup the environment for measurement virtual void SetUp(); @@ -72,12 +72,12 @@ class TestExample : public TestBase { // @Brief: Display information about what this test does virtual void DisplayTestInfo(void); - private: - uint32_t RealIterationNum(void); + void MaxSingleAllocationTest(void); - double time_mean_; - void *src_buffer_; - void *dst_buffer_; + hsa_status_t TestAllocate(hsa_amd_memory_pool_t pool, size_t sz); + + private: + void MaxSingleAllocationTest(hsa_agent_t ag, hsa_amd_memory_pool_t pool); }; -#endif // ROCRTST_SUITES_PERFORMANCE_TEST_CASE_TEMPLATE_H_ +#endif // ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_ diff --git a/rocrtst/suites/performance/CMakeLists.txt b/rocrtst/suites/performance/CMakeLists.txt deleted file mode 100755 index 153b73e998..0000000000 --- a/rocrtst/suites/performance/CMakeLists.txt +++ /dev/null @@ -1,295 +0,0 @@ -# -# Minimum version of cmake required -# -cmake_minimum_required(VERSION 2.8.0) - -# -# GCC 4.8 or higher compiler required. -# -# Setup build environment -# -# 1) Set env. variable specifying the location of ROCR header files -# -# export ROCR_DIR="Root for RocR install" -# -# 2) Set env. variable ROCRTST_BLD_TYPE to either "Debug" or "Release". -# If not set, the default value is "Debug" is bound. -# -# export ROCRTST_BLD_TYPE=Debug or ROCRTST_BLD_TYPE=Release -# -# 3) Set env. variable ROCRTST_BLD_BITS to either "32" or "64" -# If not set, the default value of "64" is bound. -# -# export ROCRTST_BLD_BITS=32 or ROCRTST_BLD_BITS=64 -# -# 4) Set env. variable TARGET_DEVICE to indicate gpu type (e.g., gfx803, -# gfx900, ...) -# -# Building rocrtst Suite -# -# 1) Create build folder e.g. "rocrtst/build" - any name will do -# 2) Cd into build folder -# 3) Run "cmake .." -# 4) Run "make" -# - -# -# Currently support for Windows platform is not present -# -if(WIN32) - MESSAGE("rocrtst Suite is not supported on Windows platform") - RETURN() -endif() - -# -# Process environment variables relating to Build type, size and RT version -# -string(TOLOWER "$ENV{ROCRTST_BLD_TYPE}" tmp) -if("${tmp}" STREQUAL debug) - set(BUILD_TYPE "Debug") - set(ISDEBUG 1) -else() - set(BUILD_TYPE "Release") - set(ISDEBUG 0) -endif() - -if("$ENV{ROCRTST_BLD_BITS}" STREQUAL 32) - set (ONLY64STR "") - set (IS64BIT 0) -else() - set (ONLY64STR "64") - set (IS64BIT 1) -endif() - -set(ROCR_INC_DIR $ENV{ROCR_DIR}/hsa/include) -set(ROCR_LIB_DIR $ENV{ROCR_DIR}/lib) - -# -# Determine ROCR Header files are present -# -if(NOT EXISTS ${ROCR_INC_DIR}/hsa/hsa.h) - MESSAGE("ERROR: ${ROCR_INC_DIR}/hsa/hsa.h does not exist. Check ROCR_DIR env. variable.") - RETURN() -endif() - - -# Determine ROCR Library files are present -# -if (${IS64BIT} EQUAL 0) - if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime.so) - MESSAGE("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set") - RETURN() - endif() -else() - if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime64.so) - MESSAGE("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set") - RETURN() - endif() -endif() - -if (DEFINED ENV{OPENCL_DIR}) - set(CLANG $ENV{OPENCL_DIR}/bin/x86_64/clang) - set(OPENCL_DIR $ENV{OPENCL_DIR}) - if (NOT EXISTS ${CLANG}) - message("ERROR: path to clang (${CLANG}) is not valid. Is env. variable OPENCL_DIR correct?") - return() - endif() - - if (DEFINED ENV{OPENCL_VER}) - set(OPENCL_VER $ENV{OPENCL_VER}) - else() - message("OPENCL_VER environment variable is not set. Using default") - set(OPENCL_VER "2.0") - endif() -else() - message("WARNING: OPENCL_DIR environment variable is not set. Kernels will not be built.") -endif() - -if (DEFINED ENV{TARGET_DEVICE}) - set(TARGET_DEVICE $ENV{TARGET_DEVICE}) -else() - message("ERROR: TARGET_DEVICE environment variable is not defined.") - message("Please define a valid clang target (e.g., gfx803, gfx900,...).") - return() -endif() - -# -# Set Name for rocrtst Suite Project -# -set(ROCRTST_SUITE_NAME "rocrtst${ONLY64STR}") -project (${ROCRTST_SUITE_NAME}) - -# -# Print out the build configuration being used: -# -# Build Src directory -# Build Binary directory -# Build Type: Debug Vs Release, 32 Vs 64 -# Compiler Version, etc -# -message("") -message("Build Configuration:") -message("-------------IS64BIT: " ${IS64BIT}) -message("-----------BuildType: " ${BUILD_TYPE}) -message("------------Compiler: " ${CMAKE_CXX_COMPILER}) -message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION}) -message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR}) -message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR}) -message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib) -message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin) -message("-------Target Device: " ${TARGET_DEVICE}) -message("----------Clang path: " ${CLANG}) -message("-------OpenCL version " ${OPENCL_VER}) -message("") - -set(KERNELS_DIR ${PROJECT_SOURCE_DIR}/kernels) -# -# Set the build type based on user input -# -set(CMAKE_BUILD_TYPE ${BUILD_TYPE}) -# -# Flag to enable / disable verbose output. -# -SET( CMAKE_VERBOSE_MAKEFILE on ) -# -# Compiler pre-processor definitions. -# -# Define MACRO "DEBUG" if build type is "Debug" -if(${BUILD_TYPE} STREQUAL "Debug") -add_definitions(-DDEBUG) -endif() - -add_definitions(-D__linux__) -add_definitions(-DLITTLEENDIAN_CPU=1) - -# -# Linux Compiler options -# -set(CMAKE_CXX_FLAGS "-std=c++11 ") - -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic") - - -# -# Extend the compiler flags for 64-bit builds -# -if (IS64BIT) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2") -else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32") -endif() - -# -# Add compiler flags to include symbol information for debug builds -# -if(ISDEBUG) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0") -endif() -MESSAGE("ISDEBUG STEP:Done") - - -set(ROCRTST_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) - -# Set Name for Google Test Framework and build it as a -# static library to be linked with user test programs -# -set(GOOGLE_TEST_FRWK_NAME "google-test-frwk${ONLY64STR}") -add_subdirectory(${ROCRTST_ROOT}/gtest "${PROJECT_BINARY_DIR}/gtest") -set (ROCRTST_LIBS ${ROCRTST_LIBS} ${GOOGLE_TEST_FRWK_NAME} - hsa-runtime-tools${ONLY64STR}) -MESSAGE("ROCRTST_LIBS SET STEP:Done") -# -# -# Other source directories -aux_source_directory(${ROCRTST_ROOT}/common common_srcs) -aux_source_directory(${ROCRTST_ROOT}/common/rocm_smi common_smi_srcs) - -# -# Specify the directory containing various libraries of ROCR -# to be linked against for building ROC Perf applications -# -LINK_DIRECTORIES(${ROCR_LIB_DIR}) - -# -# Extend the list of libraries to be used for linking ROC Perf Apps -# -set(ROCRTST_LIBS ${ROCRTST_LIBS} hsa-runtime${ONLY64STR}) - - -# Set Name for rocrtst -MESSAGE(${ROCRTST_LIBS}) -set(ROCRTST "rocrtst${ONLY64STR}") - -# -# Source files for building rocrtst -# -aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} performanceSources) -aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/test_common testCommonSources) -aux_source_directory(${ROCRTST_ROOT}/suites/test_common testCommonSources) - -# Header file include path - -include_directories(${ROCR_INC_DIR}) -include_directories(${ROCRTST_ROOT}) -include_directories(${ROCRTST_ROOT}/gtest/include) - -# Use this function to build any samples that have kernels to be built -function(build_kernel S_NAME) - set(SNAME_KERNEL "${S_NAME}_kernels.hsaco") - set(TARG_NAME "${S_NAME}_hsaco") - set(HSACO_TARG_LIST ${HSACO_TARG_LIST} ${TARG_NAME} PARENT_SCOPE) - separate_arguments(CLANG_ARG_LIST UNIX_COMMAND "-target amdgcn-amdh-amdhsa -mcpu=${TARGET_DEVICE} -include ${OPENCL_DIR}/include/opencl-c.h ${BITCODE_LIBS} -cl-std=CL${OPENCL_VER} ${CL_FILE_LIST} -o ${PROJECT_BINARY_DIR}/${SNAME_KERNEL}") - add_custom_target(${TARG_NAME} ${CLANG} ${CLANG_ARG_LIST} - COMMENT "BUILDING KERNEL..." - VERBATIM) -endfunction(build_kernel) - -###################### -# Kernel Build Section -###################### -set(KERN_SUFFIX "kernels.hsaco") -set(BITCODE_PREF "-Xclang -mlink-bitcode-file -Xclang") -set(BITCODE_PREF "${BITCODE_PREF} ${OPENCL_DIR}/lib/x86_64/bitcode") - -set(COMMON_BITCODE_LIBS "${BITCODE_PREF}/opencl.amdgcn.bc") -set(COMMON_BITCODE_LIBS "${COMMON_BITCODE_LIBS} ${BITCODE_PREF}/ockl.amdgcn.bc") - -# To build kernels, repeat the pattern used below for the P2P kernel; this -# pattern sets the bitcode libraries required by the kernel which will be -# used in the build_kernel() call, which builds the kernel. - -# Test Case Template example -set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}") -set(CL_FILE_LIST "${KERNELS_DIR}/test_case_template_kernels.cl") -build_kernel("test_case_template") - -# P2P Memory Access -#set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}") -#set(CL_FILE_LIST "${KERNELS_DIR}/p2p_mem_access_kernels.cl") -#build_kernel("p2p_mem_access") - -# Dispatch Time -set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}") -set(CL_FILE_LIST "${KERNELS_DIR}/dispatch_time_kernels.cl") -build_kernel("dispatch_time") - -# Build rules -add_executable(${ROCRTST} ${performanceSources} ${common_srcs} - ${common_smi_srcs} ${testCommonSources}) - -target_link_libraries(${ROCRTST} ${ROCRTST_LIBS} c stdc++ dl pthread rt) - -add_custom_target(rocrtst_kernels DEPENDS ${HSACO_TARG_LIST}) -INSTALL(TARGETS ${ROCRTST} - ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib - LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib - RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin) - diff --git a/rocrtst/suites/performance/kernels/dispatch_time_kernels.cl b/rocrtst/suites/performance/kernels/dispatch_time_kernels.cl deleted file mode 100755 index 7f7b5e08ba..0000000000 --- a/rocrtst/suites/performance/kernels/dispatch_time_kernels.cl +++ /dev/null @@ -1,50 +0,0 @@ -/* - * ============================================================================= - * ROC Runtime Conformance Release License - * ============================================================================= - * The University of Illinois/NCSA - * Open Source License (NCSA) - * - * Copyright (c) 2017, Advanced Micro Devices, Inc. - * All rights reserved. - * - * Developed by: - * - * AMD Research and AMD ROC Software Development - * - * Advanced Micro Devices, Inc. - * - * www.amd.com - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal with the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimers. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimers in - * the documentation and/or other materials provided with the distribution. - * - Neither the names of , - * nor the names of its contributors may be used to endorse or promote - * products derived from this Software without specific prior written - * permission. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS WITH THE SOFTWARE. - * - */ - -__kernel void -empty_kernel(void) { - return; -} - diff --git a/rocrtst/suites/performance/kernels/test_case_template_kernels.cl b/rocrtst/suites/performance/kernels/test_case_template_kernels.cl deleted file mode 100755 index b7408570f5..0000000000 --- a/rocrtst/suites/performance/kernels/test_case_template_kernels.cl +++ /dev/null @@ -1,54 +0,0 @@ -/* - * ============================================================================= - * ROC Runtime Conformance Release License - * ============================================================================= - * The University of Illinois/NCSA - * Open Source License (NCSA) - * - * Copyright (c) 2017, Advanced Micro Devices, Inc. - * All rights reserved. - * - * Developed by: - * - * AMD Research and AMD ROC Software Development - * - * Advanced Micro Devices, Inc. - * - * www.amd.com - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal with the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimers. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimers in - * the documentation and/or other materials provided with the distribution. - * - Neither the names of , - * nor the names of its contributors may be used to endorse or promote - * products derived from this Software without specific prior written - * permission. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS WITH THE SOFTWARE. - * - */ - - __kernel void -square(__global int *dstArray, __global const int *srcArray, const int sz) { - unsigned int id = get_global_id(0); - if (id < sz) { - dstArray[id] = srcArray[id] * srcArray[id]; - } - return; -} - diff --git a/rocrtst/suites/performance/main.cc b/rocrtst/suites/performance/main.cc deleted file mode 100755 index 93a0219177..0000000000 --- a/rocrtst/suites/performance/main.cc +++ /dev/null @@ -1,166 +0,0 @@ -/* - * ============================================================================= - * ROC Runtime Conformance Release License - * ============================================================================= - * The University of Illinois/NCSA - * Open Source License (NCSA) - * - * Copyright (c) 2017, Advanced Micro Devices, Inc. - * All rights reserved. - * - * Developed by: - * - * AMD Research and AMD ROC Software Development - * - * Advanced Micro Devices, Inc. - * - * www.amd.com - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal with the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimers. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimers in - * the documentation and/or other materials provided with the distribution. - * - Neither the names of , - * nor the names of its contributors may be used to endorse or promote - * products derived from this Software without specific prior written - * permission. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS WITH THE SOFTWARE. - * - */ - -#include -#include -#include - -#include "gtest/gtest.h" -#include "suites/performance/dispatch_time.h" -#include "suites/performance/memory_async_copy.h" -#include "suites/performance/test_case_template.h" -#include "suites/performance/main.h" -#include "suites/test_common/test_common.h" - -#include "common/rocm_smi/rocm_smi.h" - - -static RocrTstGlobals *sRocrtstGlvalues = nullptr; - -static bool GetMonitorDevices(const std::shared_ptr &d, - void *p) { - std::string val_str; - - assert(p != nullptr); - - std::vector> *device_list = - reinterpret_cast> *>(p); - - if (d->monitor() != nullptr) { - device_list->push_back(d); - } - return false; -} - - -static void RunTest(TestBase *test) { - assert(sRocrtstGlvalues != nullptr); - - test->set_verbosity(sRocrtstGlvalues->verbosity); - test->set_monitor_verbosity(sRocrtstGlvalues->monitor_verbosity); - test->set_num_iteration(sRocrtstGlvalues->num_iterations); - test->set_monitor_devices(&sRocrtstGlvalues->monitor_devices); - - test->DisplayTestInfo(); - test->SetUp(); - test->Run(); - test->DisplayResults(); - test->Close(); - - return; -} - -// TEST ENTRY TEMPLATE: -// TEST(rocrtst, Perf_) { -// ; -// -// // Copy and modify implementation of RunTest() if you need to deviate -// // from the standard pattern implemented there. -// RunTest(&); -// } - -TEST(rocrtst, Test_Example) { - TestExample tst; - - rocrtst::smi::RocmSMI hw; - hw.DiscoverDevices(); - - RunTest(&tst); -} - -TEST(rocrtst, Perf_Memory_Async_Copy) { - MemoryAsyncCopy mac; - // To do full test, uncomment this: - // mac.set_full_test(true); - // To test only 1 path, add lines like this: - // mac.set_src_pool(); - // mac.set_dst_pool(); - // The default is to and from the cpu to 1 gpu, and to/from a gpu to - // another gpu - RunTest(&mac); -} - -TEST(rocrtst, Perf_Dispatch_Time_Single_SpinWait) { - DispatchTime dt(true, true); - RunTest(&dt); -} - -TEST(rocrtst, Perf_Dispatch_Time_Single_Interrupt) { - DispatchTime dt(false, true); - RunTest(&dt); -} - -TEST(rocrtst, Perf_Dispatch_Time_Multi_SpinWait) { - DispatchTime dt(true, false); - RunTest(&dt); -} - -TEST(rocrtst, Perf_Dispatch_Time_Multi_Interrupt) { - DispatchTime dt(false, false); - RunTest(&dt); -} - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - - RocrTstGlobals settings; - - settings.verbosity = 1; - settings.monitor_verbosity = 1; - settings.num_iterations = 0; - - if (ProcessCmdline(&settings, argc, argv)) { - return 1; - } - - rocrtst::smi::RocmSMI hw; - hw.DiscoverDevices(); - hw.IterateSMIDevices( - GetMonitorDevices, reinterpret_cast(&settings.monitor_devices)); - - sRocrtstGlvalues = &settings; - - return RUN_ALL_TESTS(); -} diff --git a/rocrtst/suites/performance/main.h b/rocrtst/suites/performance/main.h deleted file mode 100755 index d7d4425f84..0000000000 --- a/rocrtst/suites/performance/main.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * ============================================================================= - * ROC Runtime Conformance Release License - * ============================================================================= - * The University of Illinois/NCSA - * Open Source License (NCSA) - * - * Copyright (c) 2017, Advanced Micro Devices, Inc. - * All rights reserved. - * - * Developed by: - * - * AMD Research and AMD ROC Software Development - * - * Advanced Micro Devices, Inc. - * - * www.amd.com - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal with the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimers. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimers in - * the documentation and/or other materials provided with the distribution. - * - Neither the names of , - * nor the names of its contributors may be used to endorse or promote - * products derived from this Software without specific prior written - * permission. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS WITH THE SOFTWARE. - * - */ -#ifndef ROCRTST_SUITES_PERFORMANCE_MAIN_H_ -#define ROCRTST_SUITES_PERFORMANCE_MAIN_H_ - -#endif // ROCRTST_SUITES_PERFORMANCE_MAIN_H_ - diff --git a/rocrtst/suites/performance/test_case_template.cc b/rocrtst/suites/performance/test_case_template.cc deleted file mode 100755 index 65f24ae7b0..0000000000 --- a/rocrtst/suites/performance/test_case_template.cc +++ /dev/null @@ -1,395 +0,0 @@ -/* - * ============================================================================= - * ROC Runtime Conformance Release License - * ============================================================================= - * The University of Illinois/NCSA - * Open Source License (NCSA) - * - * Copyright (c) 2017, Advanced Micro Devices, Inc. - * All rights reserved. - * - * Developed by: - * - * AMD Research and AMD ROC Software Development - * - * Advanced Micro Devices, Inc. - * - * www.amd.com - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal with the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimers. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimers in - * the documentation and/or other materials provided with the distribution. - * - Neither the names of , - * nor the names of its contributors may be used to endorse or promote - * products derived from this Software without specific prior written - * permission. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS WITH THE SOFTWARE. - * - */ - -// The purpose of this test is to provide an example of the use of the -// common RocrTest classes and utilities that are used in many examples. -// It can be used as a template to start off with when writing new tests. -// In many cases, the existing boilerplate code will be sufficient as is. -// Otherwise, the boilerplate code can be either supplemented or replaced -// by your own code in your example, as necessary. -// -// The comments provided are focused more on the use of the common rocrtst -// utilities and boilerplate code, rather than the example app. itself. -// -// The boilerplate code includes code for: -// * hsa initialization and clean up -// * code to load pre-built kernels -// * creating queues -// * populating AQL packets -// * checking for required profiles -// * finding cpu and gpu agents (callbacks for common use cases) -// * finding pools (having common requirements) -// * allocating and setting kernel arguments -// * somewhat standardized output -// * handling additional command line arguments, beyond google-test arguments -// * support for various level of verbosity, controlled from command line arg -// * support for building OpenCL kernels -// * timer support -// -// Overview of RocrTst code organization: -// Classes: -// * class BaseRocR (base_rocr.h) -- base class for all rocrtst examples and -// tests. Most of the rocrtst common utilities act on BaseRocR objects -// -// * TestBase (test_base.h) -- derives from BaseRocR and is the base class -// for all tests under /suites. The implementation in TestBase -// methods are typically actions that are required for most/all tests and -// should therefore be called from the derived implementions of the methods. -// -// Utilities: -// * /common/base_rocr_utils. contains a set of utilities -// that act on BaseRocR objects. -// -// * /common/common. contain other non-BaseRocR utilities -// -// Special Files: -// * main.cc -- The main google test file from which the tests are invoked. -// There should be an entry for each test to be run there. -// -// * kernels -- OpenCL kernel source files should go in the kernels directory -// -// * CMakeLists.txt -- Host code (*.cc and *.h files) should build without -// modifying the CMakeList.txt file, if the files are place in the -// "performance" directory. However, an entry for OpenCL kernels. For -// each kernel to be built, the bitcode libraries must be indicated before -// the call to "build_kernel()" is made. See existing code for examples. - -#include -#include -#include - -#include "suites/performance/test_case_template.h" -#include "common/base_rocr_utils.h" -#include "common/common.h" -#include "common/helper_funcs.h" -#include "common/hsatimer.h" -#include "gtest/gtest.h" -#include "hsa/hsa.h" -#include "hsa/hsa_ext_finalize.h" - -static const uint32_t kNumBufferElements = 256; - -#define RET_IF_HSA_ERR(err) { \ - if ((err) != HSA_STATUS_SUCCESS) { \ - const char* msg = 0; \ - hsa_status_string(err, &msg); \ - std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \ - __FILE__ << ". Call returned " << err << std::endl; \ - std::cout << msg << std::endl; \ - return (err); \ - } \ -} - -// Many test cases want to perform an operation on memory sizes of various -// granularities. -#if 0 -static const int kNumGranularity = 20; -const char* Str[kNumGranularity] = {"1k", "2K", "4K", "8K", "16K", "32K", - "64K", "128K", "256K", "512K", "1M", "2M", "4M", "8M", "16M", "32M", - "64M", "128M", "256M", "512M"}; - -const size_t Size[kNumGranularity] = { - 1024, 2*1024, 4*1024, 8*1024, 16*1024, 32*1024, 64*1024, 128*1024, - 256*1024, 512*1024, 1024*1024, 2048*1024, 4096*1024, 8*1024*1024, - 16*1024*1024, 32*1024*1024, 64*1024*1024, 128*1024*1024, 256*1024*1024, - 512*1024*1024}; - -static const int kMaxCopySize = Size[kNumGranularity - 1]; -#endif -TestExample::TestExample(void) : - TestBase() { - set_num_iteration(10); // Number of iterations to execute of the main test; - // This is a default value which can be overridden - // on the command line. - set_title("Test Case Example"); - set_description("Put a description of the test case here. Line breaks " - "will be taken care of on output, not here."); - - set_kernel_file_name("test_case_template_kernels.hsaco"); - set_kernel_name("square"); // kernel function name - -#if 0 - // Set required profile to HSA_PROFILE_FULL or HSA_PROFILE_BASE if it - // matters for this test. If either profile is fine, then leave with - // default - set_requires_profile(); -#endif -} - -TestExample::~TestExample(void) { -} - -// Any 1-time setup involving member variables used in the rest of the test -// should be done here. -void TestExample::SetUp(void) { - hsa_status_t err; - - // TestBase::SetUp() will set HSA_ENABLE_INTERRUPT if enable_interrupt() is - // true, and call hsa_init(). It also prints the SetUp header. - TestBase::SetUp(); - - // SetDefaultAgents(this) will assign the first CPU and GPU found on - // iterating through the agents and assign them to cpu_device_ and - // gpu_device1_, respectively (cpu_device() and gpu_device1()). These - // BaseRocR member variables are used in some utilities. Additionally, - // SetDefaultAgents() checks the profile of the gpu and compares this - // to any required profile. - // - // If SetDefaultAgents() is not used, if the profile of the target GPU - // matters for this test, it should be set with set_profile() and - // CheckProfileAndInform() should be called to check if it is the - // required profile - err = rocrtst::SetDefaultAgents(this); - ASSERT_EQ(HSA_STATUS_SUCCESS, err); - - hsa_agent_t* gpu_dev = gpu_device1(); - - // Find and assign HSA_AMD_SEGMENT_GLOBAL pools for cpu, gpu and a kern_arg - // pool - err = rocrtst::SetPoolsTypical(this); - ASSERT_EQ(err, HSA_STATUS_SUCCESS); - - // Create a queue - hsa_queue_t* q = nullptr; - rocrtst::CreateQueue(*gpu_dev, &q); - ASSERT_NE(q, nullptr); - set_main_queue(q); - - err = rocrtst::LoadKernelFromObjFile(this); - ASSERT_EQ(err, HSA_STATUS_SUCCESS); - - // Fill up the kernel packet (except header) with some values we've - // collected so far, and some reasonable default values; this should be after - // LoadKernelFromObjFile(). AllocAndSetKernArgs() will fill in the kern_args - err = rocrtst::InitializeAQLPacket(this, &aql()); - ASSERT_EQ(HSA_STATUS_SUCCESS, err); - - hsa_agent_t ag_list[2] = {*gpu_device1(), *cpu_device()}; - - // Allocate a few buffers for our example - err = hsa_amd_memory_pool_allocate(cpu_pool(), - kNumBufferElements*sizeof(uint32_t), - 0, reinterpret_cast(&src_buffer_)); - ASSERT_EQ(err, HSA_STATUS_SUCCESS); - - err = hsa_amd_agents_allow_access(2, ag_list, NULL, src_buffer_); - ASSERT_EQ(err, HSA_STATUS_SUCCESS); - - // Initialize the source buffer - for (uint32_t i = 0; i < kNumBufferElements; ++i) { - reinterpret_cast(src_buffer_)[i] = i; - } - - err = hsa_amd_memory_pool_allocate(cpu_pool(), - kNumBufferElements*sizeof(uint32_t), - 0, reinterpret_cast(&dst_buffer_)); - ASSERT_EQ(err, HSA_STATUS_SUCCESS); - - err = hsa_amd_agents_allow_access(2, ag_list, NULL, dst_buffer_); - ASSERT_EQ(err, HSA_STATUS_SUCCESS); - - // Set up Kernel arguments - // See the meta-data for the compiled OpenCL kernel code to ascertain - // the sizes, padding and alignment required for kernel arguments. - // This can be seen by executing - // $ amdgcn-amd-amdhsa-readelf -aw ./binary_search_kernels.hsaco - // The kernel code will expect the following arguments aligned as shown. -// typedef uint32_t uint4[4]; - struct __attribute__((aligned(16))) local_args_t { - uint32_t* dstArray; - uint32_t* srcArray; - uint32_t size; - uint32_t pad; - uint64_t global_offset_x; - uint64_t global_offset_y; - uint64_t global_offset_z; - } local_args; - - local_args.dstArray = reinterpret_cast(dst_buffer_); - local_args.srcArray = reinterpret_cast(src_buffer_); - local_args.size = kNumBufferElements; - local_args.global_offset_x = 0; - local_args.global_offset_y = 0; - local_args.global_offset_z = 0; - - err = rocrtst::AllocAndSetKernArgs(this, &local_args, sizeof(local_args)); - ASSERT_EQ(err, HSA_STATUS_SUCCESS); - - return; -} - -// This wrapper atomically writes the provided header and setup to the -// provided AQL packet. The provided AQL packet address should be in the -// queue memory space. -static inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup, - hsa_kernel_dispatch_packet_t* queue_packet) { - __atomic_store_n(reinterpret_cast(queue_packet), - header | (setup << 16), __ATOMIC_RELEASE); -} - -// Do a few extra iterations as we toss out some of the inital and final -// iterations when calculating statistics -uint32_t TestExample::RealIterationNum(void) { - return num_iteration() * 1.2 + 1; -} - -static bool VerifyResult(uint32_t *ar, size_t sz) { - for (size_t i = sz; i < sz; ++i) { - if (i*i != ar[i]) { - return false; - } - } - return true; -} -void TestExample::Run(void) { - // Compare required profile for this test case with what we're actually - // running on - if (!rocrtst::CheckProfile(this)) { - return; - } - - TestBase::Run(); - - // Override whatever we need to... - aql().workgroup_size_x = kNumBufferElements; - aql().grid_size_x = kNumBufferElements; - - std::vector timer; - - int it = RealIterationNum(); - hsa_kernel_dispatch_packet_t *queue_aql_packet; - - rocrtst::PerfTimer p_timer; - uint64_t index; - - for (int i = 0; i < it; i++) { - // This function simply copies the data we've collected so far into our - // local AQL packet, except the the setup and header fields. - queue_aql_packet = WriteAQLToQueue(this, &index); - ASSERT_EQ(queue_aql_packet, - reinterpret_cast - (main_queue()->base_address) + index); - uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH; - - aql_header |= HSA_FENCE_SCOPE_SYSTEM << - HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; - aql_header |= HSA_FENCE_SCOPE_SYSTEM << - HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; - - // Create and start a timer for this iteration - int id = p_timer.CreateTimer(); - p_timer.StartTimer(id); - - AtomicSetPacketHeader(aql_header, aql().setup, queue_aql_packet); - - hsa_signal_store_screlease(main_queue()->doorbell_signal, index); - - // Wait on the dispatch signal until the kernel is finished. - while (hsa_signal_wait_scacquire(aql().completion_signal, - HSA_SIGNAL_CONDITION_LT, 1, (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE)) { - } - - // Stop the timer - p_timer.StopTimer(id); - - // Store time for later analysis - timer.push_back(p_timer.ReadTimer(id)); - hsa_signal_store_screlease(aql().completion_signal, 1); - - ASSERT_TRUE(VerifyResult(reinterpret_cast(dst_buffer_), - kNumBufferElements)); - - // Pay attention to verbosity level for things like progress output - if (verbosity() >= VERBOSE_PROGRESS) { - std::cout << "."; - fflush(stdout); - } - } - - if (verbosity() >= VERBOSE_PROGRESS) { - std::cout << std::endl; - } - - // Abandon the first result and after sort, delete the last 2% value - timer.erase(timer.begin()); - std::sort(timer.begin(), timer.end()); - timer.erase(timer.begin() + num_iteration(), timer.end()); - - time_mean_ = rocrtst::CalcMean(timer); -} - -void TestExample::DisplayTestInfo(void) { - TestBase::DisplayTestInfo(); -} - -void TestExample::DisplayResults(void) const { - // Compare required profile for this test case with what we're actually - // running on - if (!rocrtst::CheckProfile(this)) { - return; - } - - TestBase::DisplayResults(); - std::cout << "The average time was: " << time_mean_ * 1e6 << - " uS" << std::endl; - return; -} - -void TestExample::Close() { - hsa_status_t err; - - err = hsa_amd_memory_pool_free(src_buffer_); - ASSERT_EQ(HSA_STATUS_SUCCESS, err); - - err = hsa_amd_memory_pool_free(dst_buffer_); - ASSERT_EQ(HSA_STATUS_SUCCESS, err); - - // This will close handles opened within rocrtst utility calls and call - // hsa_shut_down(), so it should be done after other hsa cleanup - TestBase::Close(); -} - - -#undef RET_IF_HSA_ERR diff --git a/rocrtst/suites/test_common/main.cc b/rocrtst/suites/test_common/main.cc index 02cd188a39..3e8adaae96 100755 --- a/rocrtst/suites/test_common/main.cc +++ b/rocrtst/suites/test_common/main.cc @@ -48,6 +48,7 @@ #include #include "gtest/gtest.h" +#include "suites/functional/memory_basic.h" #include "suites/performance/dispatch_time.h" #include "suites/performance/memory_async_copy.h" #include "suites/test_common/test_case_template.h" @@ -74,21 +75,39 @@ static bool GetMonitorDevices(const std::shared_ptr &d, return false; } - -static void RunTest(TestBase *test) { +static void SetFlags(TestBase *test) { assert(sRocrtstGlvalues != nullptr); test->set_verbosity(sRocrtstGlvalues->verbosity); test->set_monitor_verbosity(sRocrtstGlvalues->monitor_verbosity); test->set_num_iteration(sRocrtstGlvalues->num_iterations); test->set_monitor_devices(&sRocrtstGlvalues->monitor_devices); +} + + +static void RunCustomTestProlog(TestBase *test) { + SetFlags(test); test->DisplayTestInfo(); test->SetUp(); test->Run(); + return; +} +static void RunCustomTestEpilog(TestBase *test) { test->DisplayResults(); test->Close(); + return; +} +// If the test case one big test, you should use RunGenericTest() +// to run the test case. OTOH, if the test case consists of multiple +// functions to be run as separate tests, follow this pattern: +// * RunCustomTestProlog(test) // Run() should contain minimal code +// * +// * RunCustomTestEpilog(test) +static void RunGenericTest(TestBase *test) { + RunCustomTestProlog(test); + RunCustomTestEpilog(test); return; } @@ -96,9 +115,9 @@ static void RunTest(TestBase *test) { // TEST(rocrtst, Perf_) { // ; // -// // Copy and modify implementation of RunTest() if you need to deviate +// // Copy and modify implementation of RunGenericTest() if you need to deviate // // from the standard pattern implemented there. -// RunTest(&); +// RunGenericTest(&); // } TEST(rocrtst, Test_Example) { @@ -107,7 +126,15 @@ TEST(rocrtst, Test_Example) { rocrtst::smi::RocmSMI hw; hw.DiscoverDevices(); - RunTest(&tst); + RunGenericTest(&tst); +} + +TEST(rocrtstFunc, Memory_Max_Mem) { + MemoryTest mt; + + RunCustomTestProlog(&mt); + mt.MaxSingleAllocationTest(); + RunCustomTestEpilog(&mt); } TEST(rocrtstPerf, Memory_Async_Copy) { @@ -119,27 +146,27 @@ TEST(rocrtstPerf, Memory_Async_Copy) { // mac.set_dst_pool(); // The default is to and from the cpu to 1 gpu, and to/from a gpu to // another gpu - RunTest(&mac); + RunGenericTest(&mac); } TEST(rocrtstPerf, AQL_Dispatch_Time_Single_SpinWait) { DispatchTime dt(true, true); - RunTest(&dt); + RunGenericTest(&dt); } TEST(rocrtstPerf, AQL_Dispatch_Time_Single_Interrupt) { DispatchTime dt(false, true); - RunTest(&dt); + RunGenericTest(&dt); } TEST(rocrtstPerf, AQL_Dispatch_Time_Multi_SpinWait) { DispatchTime dt(true, false); - RunTest(&dt); + RunGenericTest(&dt); } TEST(rocrtstPerf, AQL_Dispatch_Time_Multi_Interrupt) { DispatchTime dt(false, false); - RunTest(&dt); + RunGenericTest(&dt); } int main(int argc, char** argv) {