Added max. single mem. allocation test.

Change-Id: Ie81c6af0502fde56225b1e197801cf04b474feb2


[ROCm/ROCR-Runtime commit: cf24f7bb78]
This commit is contained in:
Chris Freehill
2017-07-31 12:04:55 -05:00
parent b39089e54c
commit 9aae431f6d
12 ha cambiato i file con 435 aggiunte e 1083 eliminazioni
+1 -2
Vedi File
@@ -8,5 +8,4 @@
*.bk
*.old
*.cmake
build/*
build
@@ -49,6 +49,8 @@
#include <assert.h>
#include <sstream>
#include <string>
#include <memory>
namespace rocrtst {
@@ -169,20 +171,52 @@ hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data) {
POOL_PROP_OFF, POOL_PROP_DONT_CARE);
}
static hsa_status_t MakeGlobalFlagsString(const hsa_amd_memory_pool_t pool,
std::string* out_str) {
// Populate the vector with handles to all agents and pools
hsa_status_t
GetAgentPools(std::vector<std::shared_ptr<agent_pools_t>> *agent_pools) {
hsa_status_t err;
uint32_t global_flag = 0;
assert(agent_pools != nullptr);
auto save_agent = [](hsa_agent_t a, void *data)->hsa_status_t {
std::vector<std::shared_ptr<agent_pools_t>> *ag_vec;
hsa_status_t err;
assert(data != nullptr);
ag_vec =
reinterpret_cast<std::vector<std::shared_ptr<agent_pools_t>> *>(data);
std::shared_ptr<agent_pools_t> ag(new agent_pools_t);
ag->agent = a;
auto save_pool = [](hsa_amd_memory_pool_t p, void *data)->hsa_status_t {
assert(data != nullptr);
std::vector<hsa_amd_memory_pool_t> *p_list =
reinterpret_cast<std::vector<hsa_amd_memory_pool_t> *>(data);
p_list->push_back(p);
return HSA_STATUS_SUCCESS;
};
err = hsa_amd_agent_iterate_memory_pools(a, save_pool,
reinterpret_cast<void *>(&ag->pools));
assert(err == HSA_STATUS_SUCCESS);
ag_vec->push_back(ag);
return HSA_STATUS_SUCCESS;
};
err = hsa_iterate_agents(save_agent, reinterpret_cast<void *>(agent_pools));
return err;
}
static hsa_status_t MakeGlobalFlagsString(const pool_info_t *pool_i,
std::string* out_str) {
uint32_t global_flag = pool_i->global_flag;
assert(out_str != nullptr);
*out_str = "";
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flag);
RET_IF_HSA_COMMON_ERR(err);
std::vector < std::string > flags;
if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & global_flag) {
@@ -207,22 +241,17 @@ static hsa_status_t MakeGlobalFlagsString(const hsa_amd_memory_pool_t pool,
return HSA_STATUS_SUCCESS;
}
static hsa_status_t DumpSegment(const hsa_amd_memory_pool_t pool,
static hsa_status_t DumpSegment(const pool_info_t *pool_i,
std::string const *ind_lvl) {
uint32_t segment;
hsa_status_t err;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
&segment);
RET_IF_HSA_COMMON_ERR(err);
fprintf(stdout, "%s%-25s", ind_lvl->c_str(), "Pool Segment:");
std::string seg_str = "";
std::string tmp_str;
switch (segment) {
switch (pool_i->segment) {
case HSA_AMD_SEGMENT_GLOBAL:
err = MakeGlobalFlagsString(pool, &tmp_str);
err = MakeGlobalFlagsString(pool_i, &tmp_str);
RET_IF_HSA_COMMON_ERR(err);
seg_str += "GLOBAL; FLAGS: " + tmp_str;
@@ -250,57 +279,71 @@ static hsa_status_t DumpSegment(const hsa_amd_memory_pool_t pool,
return HSA_STATUS_SUCCESS;
}
hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool,
uint32_t indent) {
hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool,
pool_info_t *pool_i) {
hsa_status_t err;
std::string ind_lvl(indent, ' ');
DumpSegment(pool, &ind_lvl);
// Get the size of the POOL
size_t pool_size = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
&pool_size);
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag);
RET_IF_HSA_COMMON_ERR(err);
std::string sz_str = std::to_string(pool_size / 1024) + "KB";
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
&pool_i->segment);
RET_IF_HSA_COMMON_ERR(err);
// Get the size of the POOL
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
&pool_i->pool_size);
RET_IF_HSA_COMMON_ERR(err);
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
&pool_i->alloc_allowed);
RET_IF_HSA_COMMON_ERR(err);
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
&pool_i->alloc_granule);
RET_IF_HSA_COMMON_ERR(err);
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT,
&pool_i->pool_alloc_alignment);
RET_IF_HSA_COMMON_ERR(err);
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL,
&pool_i->pl_access);
RET_IF_HSA_COMMON_ERR(err);
return HSA_STATUS_SUCCESS;
}
hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i,
uint32_t indent) {
std::string ind_lvl(indent, ' ');
DumpSegment(pool_i, &ind_lvl);
std::string sz_str = std::to_string(pool_i->pool_size / 1024) + "KB";
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Size:",
sz_str.c_str());
bool alloc_allowed = false;
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc_allowed);
RET_IF_HSA_COMMON_ERR(err);
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Allocatable:",
(alloc_allowed ? "TRUE" : "FALSE"));
(pool_i->alloc_allowed ? "TRUE" : "FALSE"));
size_t alloc_granule = 0;
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &alloc_granule);
RET_IF_HSA_COMMON_ERR(err);
std::string gr_str = std::to_string(alloc_granule / 1024) + "KB";
std::string gr_str = std::to_string(pool_i->alloc_granule / 1024) + "KB";
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Granule:",
gr_str.c_str());
size_t pool_alloc_alignment = 0;
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT,
&pool_alloc_alignment);
RET_IF_HSA_COMMON_ERR(err);
std::string al_str = std::to_string(pool_alloc_alignment / 1024) + "KB";
std::string al_str =
std::to_string(pool_i->pool_alloc_alignment / 1024) + "KB";
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Alloc Alignment:",
al_str.c_str());
bool pl_access = 0;
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &pl_access);
RET_IF_HSA_COMMON_ERR(err);
fprintf(stdout, "%s%-25s%-35s\n", ind_lvl.c_str(), "Pool Acessible by all:",
(pl_access ? "TRUE" : "FALSE"));
(pool_i->pl_access ? "TRUE" : "FALSE"));
return HSA_STATUS_SUCCESS;
}
@@ -55,6 +55,8 @@
#include <cstdlib>
#include <iostream>
#include <vector>
#include <memory>
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "hsa/hsa_ext_finalize.h"
@@ -75,6 +77,31 @@ namespace rocrtst {
// refers to it has been corrected
// #define HSA_ARGUMENT_ALIGN_BYTES 16
// This structure holds memory pool information acquired through hsa info
// related calls, and is later used for reference when displaying the
// information.
typedef struct {
uint32_t segment;
size_t pool_size;
bool alloc_allowed;
size_t alloc_granule;
size_t pool_alloc_alignment;
bool pl_access;
uint32_t global_flag;
} pool_info_t;
struct agent_pools_t{
hsa_agent_t agent;
std::vector<hsa_amd_memory_pool_t> pools;
};
/// Fill in the pool_info_t structure for the provided pool.
/// \param[in] pool Pool for which information will be retrieved
/// \param[out] pool_i Pointer to structure where pool info will be stored
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, pool_info_t *pool_i);
/// If the provided agent is associated with a GPU, return that agent through
/// output parameter. This function is meant to be the call-back function used
/// with hsa_iterate_agents to find GPU agents.
@@ -132,7 +159,7 @@ hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data);
/// \param[in] pool Pool to gather and dump information for
/// \param[in] indent Number of spaces to indent output.
/// \returns hsa_status_t HSA_STATUS_SUCCESS if no errors
hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool,
hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i,
uint32_t indent = 0);
/// Dump information about a provided pointer to STDOUT.
@@ -140,5 +167,8 @@ hsa_status_t DumpMemoryPoolInfo(const hsa_amd_memory_pool_t pool,
/// \returns HSA_STATUS_SUCCESS if there are no errors
hsa_status_t DumpPointerInfo(void* ptr);
hsa_status_t GetAgentPools(
std::vector<std::shared_ptr<agent_pools_t>> *agent_pools);
} // namespace rocrtst
#endif // ROCRTST_COMMON_COMMON_H_
@@ -0,0 +1,262 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <algorithm>
#include <iostream>
#include <vector>
#include <memory>
#include "suites/functional/memory_basic.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_finalize.h"
static const uint32_t kNumBufferElements = 256;
#define RET_IF_HSA_ERR(err) { \
if ((err) != HSA_STATUS_SUCCESS) { \
const char* msg = 0; \
hsa_status_string(err, &msg); \
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
__FILE__ << ". Call returned " << err << std::endl; \
std::cout << msg << std::endl; \
return (err); \
} \
}
MemoryTest::MemoryTest(void) :
TestBase() {
set_num_iteration(10); // Number of iterations to execute of the main test;
// This is a default value which can be overridden
// on the command line.
set_title("RocR Memory Tests");
set_description("This series of tests check memory allocation limits, extent"
" of GPU access to system memory and other memory related functionality.");
}
MemoryTest::~MemoryTest(void) {
}
// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void MemoryTest::SetUp(void) {
hsa_status_t err;
TestBase::SetUp();
err = rocrtst::SetDefaultAgents(this);
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
err = rocrtst::SetPoolsTypical(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
return;
}
void MemoryTest::Run(void) {
// Compare required profile for this test case with what we're actually
// running on
if (!rocrtst::CheckProfile(this)) {
return;
}
TestBase::Run();
}
void MemoryTest::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void MemoryTest::DisplayResults(void) const {
// Compare required profile for this test case with what we're actually
// running on
if (!rocrtst::CheckProfile(this)) {
return;
}
return;
}
void MemoryTest::Close() {
// This will close handles opened within rocrtst utility calls and call
// hsa_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
hsa_status_t MemoryTest::TestAllocate(hsa_amd_memory_pool_t pool, size_t sz) {
void *ptr;
hsa_status_t err;
err = hsa_amd_memory_pool_allocate(pool, sz, 0, &ptr);
if (err == HSA_STATUS_SUCCESS) {
err = hsa_memory_free(ptr);
}
return err;
}
static const char kSubTestSeparator[] = " **************************";
static void PrintMemorySubtestHeader(const char *header) {
std::cout << " *** Memory Subtest: " << header << " ***" << std::endl;
}
// Test Fixtures
void MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag,
hsa_amd_memory_pool_t pool) {
hsa_status_t err;
rocrtst::pool_info_t pool_i;
char ag_name[64];
hsa_device_type_t ag_type;
err = hsa_agent_get_info(ag, HSA_AGENT_INFO_NAME, ag_name);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_agent_get_info(ag, HSA_AGENT_INFO_DEVICE, &ag_type);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
if (verbosity() > 0) {
std::cout << " Agent: " << ag_name << " (";
switch (ag_type) {
case HSA_DEVICE_TYPE_CPU:
std::cout << "CPU)";
break;
case HSA_DEVICE_TYPE_GPU:
std::cout << "GPU)";
break;
case HSA_DEVICE_TYPE_DSP:
std::cout << "DSP)";
break;
}
std::cout << std::endl;
}
err = rocrtst::AcquirePoolInfo(pool, &pool_i);
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
if (verbosity() > 0) {
rocrtst::DumpMemoryPoolInfo(&pool_i, 2);
}
if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 ||
pool_i.pool_alloc_alignment == 0) {
if (verbosity() > 0) {
std::cout << " Test not applicable. Skipping." << std::endl;
std::cout << kSubTestSeparator << std::endl;
}
return;
}
// Do everything in "granule" units
auto gran_sz = pool_i.alloc_granule;
auto pool_sz = pool_i.pool_size / gran_sz;
// Neg. test: Try to allocate more than the pool size
err = TestAllocate(pool, pool_sz*gran_sz + gran_sz);
EXPECT_EQ(HSA_STATUS_ERROR_INVALID_ALLOCATION, err);
auto max_alloc_size = pool_sz/2;
uint64_t upper_bound = pool_sz;
uint64_t lower_bound = 0;
while (true) {
err = TestAllocate(pool, max_alloc_size * gran_sz);
ASSERT_TRUE(err == HSA_STATUS_SUCCESS ||
err == HSA_STATUS_ERROR_OUT_OF_RESOURCES);
if (err == HSA_STATUS_SUCCESS) {
lower_bound = max_alloc_size;
max_alloc_size += (upper_bound - lower_bound)/2;
} else if (err == HSA_STATUS_ERROR_OUT_OF_RESOURCES) {
upper_bound = max_alloc_size;
max_alloc_size -= (upper_bound - lower_bound)/2;
}
if ((upper_bound - lower_bound) < 2) {
break;
}
ASSERT_GT(upper_bound, lower_bound);
}
if (verbosity() > 0) {
std::cout << " Biggest single allocation size for this pool is " <<
(max_alloc_size * gran_sz)/1024 << "KB." << std::endl;
std::cout << " This is " <<
static_cast<float>(max_alloc_size)/pool_sz*100 <<
"% of the total." << std::endl;
}
if (ag_type == HSA_DEVICE_TYPE_GPU) {
EXPECT_GE((float)max_alloc_size/pool_sz, (float)15/16);
}
if (verbosity() > 0) {
std::cout << kSubTestSeparator << std::endl;
}
}
void MemoryTest::MaxSingleAllocationTest(void) {
hsa_status_t err;
std::vector<std::shared_ptr<rocrtst::agent_pools_t>> agent_pools;
PrintMemorySubtestHeader("Maximum Single Allocation in Memory Pools");
err = rocrtst::GetAgentPools(&agent_pools);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
auto pool_idx = 0;
for (auto a : agent_pools) {
for (auto p : a->pools) {
std::cout << " Pool " << pool_idx++ << ":" << std::endl;
MaxSingleAllocationTest(a->agent, p);
}
}
}
#undef RET_IF_HSA_ERR
@@ -42,20 +42,20 @@
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_
#define ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_
#ifndef ROCRTST_SUITES_PERFORMANCE_TEST_CASE_TEMPLATE_H_
#define ROCRTST_SUITES_PERFORMANCE_TEST_CASE_TEMPLATE_H_
#include "common/base_rocr.h"
#include "hsa/hsa.h"
#include "suites/test_common/test_base.h"
class TestExample : public TestBase {
class MemoryTest : public TestBase {
public:
TestExample();
MemoryTest();
// @Brief: Destructor for test case of TestExample
virtual ~TestExample();
// @Brief: Destructor for test case of MemoryTest
virtual ~MemoryTest();
// @Brief: Setup the environment for measurement
virtual void SetUp();
@@ -72,12 +72,12 @@ class TestExample : public TestBase {
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
private:
uint32_t RealIterationNum(void);
void MaxSingleAllocationTest(void);
double time_mean_;
void *src_buffer_;
void *dst_buffer_;
hsa_status_t TestAllocate(hsa_amd_memory_pool_t pool, size_t sz);
private:
void MaxSingleAllocationTest(hsa_agent_t ag, hsa_amd_memory_pool_t pool);
};
#endif // ROCRTST_SUITES_PERFORMANCE_TEST_CASE_TEMPLATE_H_
#endif // ROCRTST_SUITES_FUNCTIONAL_MEMORY_BASIC_H_
@@ -1,295 +0,0 @@
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 2.8.0)
#
# GCC 4.8 or higher compiler required.
#
# Setup build environment
#
# 1) Set env. variable specifying the location of ROCR header files
#
# export ROCR_DIR="Root for RocR install"
#
# 2) Set env. variable ROCRTST_BLD_TYPE to either "Debug" or "Release".
# If not set, the default value is "Debug" is bound.
#
# export ROCRTST_BLD_TYPE=Debug or ROCRTST_BLD_TYPE=Release
#
# 3) Set env. variable ROCRTST_BLD_BITS to either "32" or "64"
# If not set, the default value of "64" is bound.
#
# export ROCRTST_BLD_BITS=32 or ROCRTST_BLD_BITS=64
#
# 4) Set env. variable TARGET_DEVICE to indicate gpu type (e.g., gfx803,
# gfx900, ...)
#
# Building rocrtst Suite
#
# 1) Create build folder e.g. "rocrtst/build" - any name will do
# 2) Cd into build folder
# 3) Run "cmake .."
# 4) Run "make"
#
#
# Currently support for Windows platform is not present
#
if(WIN32)
MESSAGE("rocrtst Suite is not supported on Windows platform")
RETURN()
endif()
#
# Process environment variables relating to Build type, size and RT version
#
string(TOLOWER "$ENV{ROCRTST_BLD_TYPE}" tmp)
if("${tmp}" STREQUAL debug)
set(BUILD_TYPE "Debug")
set(ISDEBUG 1)
else()
set(BUILD_TYPE "Release")
set(ISDEBUG 0)
endif()
if("$ENV{ROCRTST_BLD_BITS}" STREQUAL 32)
set (ONLY64STR "")
set (IS64BIT 0)
else()
set (ONLY64STR "64")
set (IS64BIT 1)
endif()
set(ROCR_INC_DIR $ENV{ROCR_DIR}/hsa/include)
set(ROCR_LIB_DIR $ENV{ROCR_DIR}/lib)
#
# Determine ROCR Header files are present
#
if(NOT EXISTS ${ROCR_INC_DIR}/hsa/hsa.h)
MESSAGE("ERROR: ${ROCR_INC_DIR}/hsa/hsa.h does not exist. Check ROCR_DIR env. variable.")
RETURN()
endif()
# Determine ROCR Library files are present
#
if (${IS64BIT} EQUAL 0)
if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime.so)
MESSAGE("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
RETURN()
endif()
else()
if(NOT EXISTS ${ROCR_LIB_DIR}/libhsa-runtime64.so)
MESSAGE("ERROR: Environment variable ROCR_LIB_DIR pointing to ROCR libraries is not set")
RETURN()
endif()
endif()
if (DEFINED ENV{OPENCL_DIR})
set(CLANG $ENV{OPENCL_DIR}/bin/x86_64/clang)
set(OPENCL_DIR $ENV{OPENCL_DIR})
if (NOT EXISTS ${CLANG})
message("ERROR: path to clang (${CLANG}) is not valid. Is env. variable OPENCL_DIR correct?")
return()
endif()
if (DEFINED ENV{OPENCL_VER})
set(OPENCL_VER $ENV{OPENCL_VER})
else()
message("OPENCL_VER environment variable is not set. Using default")
set(OPENCL_VER "2.0")
endif()
else()
message("WARNING: OPENCL_DIR environment variable is not set. Kernels will not be built.")
endif()
if (DEFINED ENV{TARGET_DEVICE})
set(TARGET_DEVICE $ENV{TARGET_DEVICE})
else()
message("ERROR: TARGET_DEVICE environment variable is not defined.")
message("Please define a valid clang target (e.g., gfx803, gfx900,...).")
return()
endif()
#
# Set Name for rocrtst Suite Project
#
set(ROCRTST_SUITE_NAME "rocrtst${ONLY64STR}")
project (${ROCRTST_SUITE_NAME})
#
# Print out the build configuration being used:
#
# Build Src directory
# Build Binary directory
# Build Type: Debug Vs Release, 32 Vs 64
# Compiler Version, etc
#
message("")
message("Build Configuration:")
message("-------------IS64BIT: " ${IS64BIT})
message("-----------BuildType: " ${BUILD_TYPE})
message("------------Compiler: " ${CMAKE_CXX_COMPILER})
message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
message("-------Target Device: " ${TARGET_DEVICE})
message("----------Clang path: " ${CLANG})
message("-------OpenCL version " ${OPENCL_VER})
message("")
set(KERNELS_DIR ${PROJECT_SOURCE_DIR}/kernels)
#
# Set the build type based on user input
#
set(CMAKE_BUILD_TYPE ${BUILD_TYPE})
#
# Flag to enable / disable verbose output.
#
SET( CMAKE_VERBOSE_MAKEFILE on )
#
# Compiler pre-processor definitions.
#
# Define MACRO "DEBUG" if build type is "Debug"
if(${BUILD_TYPE} STREQUAL "Debug")
add_definitions(-DDEBUG)
endif()
add_definitions(-D__linux__)
add_definitions(-DLITTLEENDIAN_CPU=1)
#
# Linux Compiler options
#
set(CMAKE_CXX_FLAGS "-std=c++11 ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
#
# Extend the compiler flags for 64-bit builds
#
if (IS64BIT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
endif()
#
# Add compiler flags to include symbol information for debug builds
#
if(ISDEBUG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0")
endif()
MESSAGE("ISDEBUG STEP:Done")
set(ROCRTST_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
# Set Name for Google Test Framework and build it as a
# static library to be linked with user test programs
#
set(GOOGLE_TEST_FRWK_NAME "google-test-frwk${ONLY64STR}")
add_subdirectory(${ROCRTST_ROOT}/gtest "${PROJECT_BINARY_DIR}/gtest")
set (ROCRTST_LIBS ${ROCRTST_LIBS} ${GOOGLE_TEST_FRWK_NAME}
hsa-runtime-tools${ONLY64STR})
MESSAGE("ROCRTST_LIBS SET STEP:Done")
#
#
# Other source directories
aux_source_directory(${ROCRTST_ROOT}/common common_srcs)
aux_source_directory(${ROCRTST_ROOT}/common/rocm_smi common_smi_srcs)
#
# Specify the directory containing various libraries of ROCR
# to be linked against for building ROC Perf applications
#
LINK_DIRECTORIES(${ROCR_LIB_DIR})
#
# Extend the list of libraries to be used for linking ROC Perf Apps
#
set(ROCRTST_LIBS ${ROCRTST_LIBS} hsa-runtime${ONLY64STR})
# Set Name for rocrtst
MESSAGE(${ROCRTST_LIBS})
set(ROCRTST "rocrtst${ONLY64STR}")
#
# Source files for building rocrtst
#
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} performanceSources)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/test_common testCommonSources)
aux_source_directory(${ROCRTST_ROOT}/suites/test_common testCommonSources)
# Header file include path
include_directories(${ROCR_INC_DIR})
include_directories(${ROCRTST_ROOT})
include_directories(${ROCRTST_ROOT}/gtest/include)
# Use this function to build any samples that have kernels to be built
function(build_kernel S_NAME)
set(SNAME_KERNEL "${S_NAME}_kernels.hsaco")
set(TARG_NAME "${S_NAME}_hsaco")
set(HSACO_TARG_LIST ${HSACO_TARG_LIST} ${TARG_NAME} PARENT_SCOPE)
separate_arguments(CLANG_ARG_LIST UNIX_COMMAND "-target amdgcn-amdh-amdhsa -mcpu=${TARGET_DEVICE} -include ${OPENCL_DIR}/include/opencl-c.h ${BITCODE_LIBS} -cl-std=CL${OPENCL_VER} ${CL_FILE_LIST} -o ${PROJECT_BINARY_DIR}/${SNAME_KERNEL}")
add_custom_target(${TARG_NAME} ${CLANG} ${CLANG_ARG_LIST}
COMMENT "BUILDING KERNEL..."
VERBATIM)
endfunction(build_kernel)
######################
# Kernel Build Section
######################
set(KERN_SUFFIX "kernels.hsaco")
set(BITCODE_PREF "-Xclang -mlink-bitcode-file -Xclang")
set(BITCODE_PREF "${BITCODE_PREF} ${OPENCL_DIR}/lib/x86_64/bitcode")
set(COMMON_BITCODE_LIBS "${BITCODE_PREF}/opencl.amdgcn.bc")
set(COMMON_BITCODE_LIBS "${COMMON_BITCODE_LIBS} ${BITCODE_PREF}/ockl.amdgcn.bc")
# To build kernels, repeat the pattern used below for the P2P kernel; this
# pattern sets the bitcode libraries required by the kernel which will be
# used in the build_kernel() call, which builds the kernel.
# Test Case Template example
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/test_case_template_kernels.cl")
build_kernel("test_case_template")
# P2P Memory Access
#set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
#set(CL_FILE_LIST "${KERNELS_DIR}/p2p_mem_access_kernels.cl")
#build_kernel("p2p_mem_access")
# Dispatch Time
set(BITCODE_LIBS "${COMMON_BITCODE_LIBS}")
set(CL_FILE_LIST "${KERNELS_DIR}/dispatch_time_kernels.cl")
build_kernel("dispatch_time")
# Build rules
add_executable(${ROCRTST} ${performanceSources} ${common_srcs}
${common_smi_srcs} ${testCommonSources})
target_link_libraries(${ROCRTST} ${ROCRTST_LIBS} c stdc++ dl pthread rt)
add_custom_target(rocrtst_kernels DEPENDS ${HSACO_TARG_LIST})
INSTALL(TARGETS ${ROCRTST}
ARCHIVE DESTINATION ${PROJECT_BINARY_DIR}/lib
LIBRARY DESTINATION ${PROJECT_BINARY_DIR}/lib
RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin)
@@ -1,50 +0,0 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
__kernel void
empty_kernel(void) {
return;
}
@@ -1,54 +0,0 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
__kernel void
square(__global int *dstArray, __global const int *srcArray, const int sz) {
unsigned int id = get_global_id(0);
if (id < sz) {
dstArray[id] = srcArray[id] * srcArray[id];
}
return;
}
@@ -1,166 +0,0 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <string>
#include <vector>
#include <memory>
#include "gtest/gtest.h"
#include "suites/performance/dispatch_time.h"
#include "suites/performance/memory_async_copy.h"
#include "suites/performance/test_case_template.h"
#include "suites/performance/main.h"
#include "suites/test_common/test_common.h"
#include "common/rocm_smi/rocm_smi.h"
static RocrTstGlobals *sRocrtstGlvalues = nullptr;
static bool GetMonitorDevices(const std::shared_ptr<rocrtst::smi::Device> &d,
void *p) {
std::string val_str;
assert(p != nullptr);
std::vector<std::shared_ptr<rocrtst::smi::Device>> *device_list =
reinterpret_cast<std::vector<std::shared_ptr<rocrtst::smi::Device>> *>(p);
if (d->monitor() != nullptr) {
device_list->push_back(d);
}
return false;
}
static void RunTest(TestBase *test) {
assert(sRocrtstGlvalues != nullptr);
test->set_verbosity(sRocrtstGlvalues->verbosity);
test->set_monitor_verbosity(sRocrtstGlvalues->monitor_verbosity);
test->set_num_iteration(sRocrtstGlvalues->num_iterations);
test->set_monitor_devices(&sRocrtstGlvalues->monitor_devices);
test->DisplayTestInfo();
test->SetUp();
test->Run();
test->DisplayResults();
test->Close();
return;
}
// TEST ENTRY TEMPLATE:
// TEST(rocrtst, Perf_<test name>) {
// <Test Implementation class> <test_obj>;
//
// // Copy and modify implementation of RunTest() if you need to deviate
// // from the standard pattern implemented there.
// RunTest(&<test_obj>);
// }
TEST(rocrtst, Test_Example) {
TestExample tst;
rocrtst::smi::RocmSMI hw;
hw.DiscoverDevices();
RunTest(&tst);
}
TEST(rocrtst, Perf_Memory_Async_Copy) {
MemoryAsyncCopy mac;
// To do full test, uncomment this:
// mac.set_full_test(true);
// To test only 1 path, add lines like this:
// mac.set_src_pool(<src pool id>);
// mac.set_dst_pool(<dst pool id>);
// The default is to and from the cpu to 1 gpu, and to/from a gpu to
// another gpu
RunTest(&mac);
}
TEST(rocrtst, Perf_Dispatch_Time_Single_SpinWait) {
DispatchTime dt(true, true);
RunTest(&dt);
}
TEST(rocrtst, Perf_Dispatch_Time_Single_Interrupt) {
DispatchTime dt(false, true);
RunTest(&dt);
}
TEST(rocrtst, Perf_Dispatch_Time_Multi_SpinWait) {
DispatchTime dt(true, false);
RunTest(&dt);
}
TEST(rocrtst, Perf_Dispatch_Time_Multi_Interrupt) {
DispatchTime dt(false, false);
RunTest(&dt);
}
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
RocrTstGlobals settings;
settings.verbosity = 1;
settings.monitor_verbosity = 1;
settings.num_iterations = 0;
if (ProcessCmdline(&settings, argc, argv)) {
return 1;
}
rocrtst::smi::RocmSMI hw;
hw.DiscoverDevices();
hw.IterateSMIDevices(
GetMonitorDevices, reinterpret_cast<void *>(&settings.monitor_devices));
sRocrtstGlvalues = &settings;
return RUN_ALL_TESTS();
}
@@ -1,49 +0,0 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCRTST_SUITES_PERFORMANCE_MAIN_H_
#define ROCRTST_SUITES_PERFORMANCE_MAIN_H_
#endif // ROCRTST_SUITES_PERFORMANCE_MAIN_H_
@@ -1,395 +0,0 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
// The purpose of this test is to provide an example of the use of the
// common RocrTest classes and utilities that are used in many examples.
// It can be used as a template to start off with when writing new tests.
// In many cases, the existing boilerplate code will be sufficient as is.
// Otherwise, the boilerplate code can be either supplemented or replaced
// by your own code in your example, as necessary.
//
// The comments provided are focused more on the use of the common rocrtst
// utilities and boilerplate code, rather than the example app. itself.
//
// The boilerplate code includes code for:
// * hsa initialization and clean up
// * code to load pre-built kernels
// * creating queues
// * populating AQL packets
// * checking for required profiles
// * finding cpu and gpu agents (callbacks for common use cases)
// * finding pools (having common requirements)
// * allocating and setting kernel arguments
// * somewhat standardized output
// * handling additional command line arguments, beyond google-test arguments
// * support for various level of verbosity, controlled from command line arg
// * support for building OpenCL kernels
// * timer support
//
// Overview of RocrTst code organization:
// Classes:
// * class BaseRocR (base_rocr.h) -- base class for all rocrtst examples and
// tests. Most of the rocrtst common utilities act on BaseRocR objects
//
// * TestBase (test_base.h) -- derives from BaseRocR and is the base class
// for all tests under <rocrtst root>/suites. The implementation in TestBase
// methods are typically actions that are required for most/all tests and
// should therefore be called from the derived implementions of the methods.
//
// Utilities:
// * <rocrtst root>/common/base_rocr_utils.<cc/h> contains a set of utilities
// that act on BaseRocR objects.
//
// * <rocrtst root>/common/common.<cc/h> contain other non-BaseRocR utilities
//
// Special Files:
// * main.cc -- The main google test file from which the tests are invoked.
// There should be an entry for each test to be run there.
//
// * kernels -- OpenCL kernel source files should go in the kernels directory
//
// * CMakeLists.txt -- Host code (*.cc and *.h files) should build without
// modifying the CMakeList.txt file, if the files are place in the
// "performance" directory. However, an entry for OpenCL kernels. For
// each kernel to be built, the bitcode libraries must be indicated before
// the call to "build_kernel()" is made. See existing code for examples.
#include <algorithm>
#include <iostream>
#include <vector>
#include "suites/performance/test_case_template.h"
#include "common/base_rocr_utils.h"
#include "common/common.h"
#include "common/helper_funcs.h"
#include "common/hsatimer.h"
#include "gtest/gtest.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_finalize.h"
static const uint32_t kNumBufferElements = 256;
#define RET_IF_HSA_ERR(err) { \
if ((err) != HSA_STATUS_SUCCESS) { \
const char* msg = 0; \
hsa_status_string(err, &msg); \
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
__FILE__ << ". Call returned " << err << std::endl; \
std::cout << msg << std::endl; \
return (err); \
} \
}
// Many test cases want to perform an operation on memory sizes of various
// granularities.
#if 0
static const int kNumGranularity = 20;
const char* Str[kNumGranularity] = {"1k", "2K", "4K", "8K", "16K", "32K",
"64K", "128K", "256K", "512K", "1M", "2M", "4M", "8M", "16M", "32M",
"64M", "128M", "256M", "512M"};
const size_t Size[kNumGranularity] = {
1024, 2*1024, 4*1024, 8*1024, 16*1024, 32*1024, 64*1024, 128*1024,
256*1024, 512*1024, 1024*1024, 2048*1024, 4096*1024, 8*1024*1024,
16*1024*1024, 32*1024*1024, 64*1024*1024, 128*1024*1024, 256*1024*1024,
512*1024*1024};
static const int kMaxCopySize = Size[kNumGranularity - 1];
#endif
TestExample::TestExample(void) :
TestBase() {
set_num_iteration(10); // Number of iterations to execute of the main test;
// This is a default value which can be overridden
// on the command line.
set_title("Test Case Example");
set_description("Put a description of the test case here. Line breaks "
"will be taken care of on output, not here.");
set_kernel_file_name("test_case_template_kernels.hsaco");
set_kernel_name("square"); // kernel function name
#if 0
// Set required profile to HSA_PROFILE_FULL or HSA_PROFILE_BASE if it
// matters for this test. If either profile is fine, then leave with
// default
set_requires_profile(<value>);
#endif
}
TestExample::~TestExample(void) {
}
// Any 1-time setup involving member variables used in the rest of the test
// should be done here.
void TestExample::SetUp(void) {
hsa_status_t err;
// TestBase::SetUp() will set HSA_ENABLE_INTERRUPT if enable_interrupt() is
// true, and call hsa_init(). It also prints the SetUp header.
TestBase::SetUp();
// SetDefaultAgents(this) will assign the first CPU and GPU found on
// iterating through the agents and assign them to cpu_device_ and
// gpu_device1_, respectively (cpu_device() and gpu_device1()). These
// BaseRocR member variables are used in some utilities. Additionally,
// SetDefaultAgents() checks the profile of the gpu and compares this
// to any required profile.
//
// If SetDefaultAgents() is not used, if the profile of the target GPU
// matters for this test, it should be set with set_profile() and
// CheckProfileAndInform() should be called to check if it is the
// required profile
err = rocrtst::SetDefaultAgents(this);
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
hsa_agent_t* gpu_dev = gpu_device1();
// Find and assign HSA_AMD_SEGMENT_GLOBAL pools for cpu, gpu and a kern_arg
// pool
err = rocrtst::SetPoolsTypical(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Create a queue
hsa_queue_t* q = nullptr;
rocrtst::CreateQueue(*gpu_dev, &q);
ASSERT_NE(q, nullptr);
set_main_queue(q);
err = rocrtst::LoadKernelFromObjFile(this);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Fill up the kernel packet (except header) with some values we've
// collected so far, and some reasonable default values; this should be after
// LoadKernelFromObjFile(). AllocAndSetKernArgs() will fill in the kern_args
err = rocrtst::InitializeAQLPacket(this, &aql());
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
hsa_agent_t ag_list[2] = {*gpu_device1(), *cpu_device()};
// Allocate a few buffers for our example
err = hsa_amd_memory_pool_allocate(cpu_pool(),
kNumBufferElements*sizeof(uint32_t),
0, reinterpret_cast<void**>(&src_buffer_));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_agents_allow_access(2, ag_list, NULL, src_buffer_);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Initialize the source buffer
for (uint32_t i = 0; i < kNumBufferElements; ++i) {
reinterpret_cast<uint32_t *>(src_buffer_)[i] = i;
}
err = hsa_amd_memory_pool_allocate(cpu_pool(),
kNumBufferElements*sizeof(uint32_t),
0, reinterpret_cast<void**>(&dst_buffer_));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
err = hsa_amd_agents_allow_access(2, ag_list, NULL, dst_buffer_);
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
// Set up Kernel arguments
// See the meta-data for the compiled OpenCL kernel code to ascertain
// the sizes, padding and alignment required for kernel arguments.
// This can be seen by executing
// $ amdgcn-amd-amdhsa-readelf -aw ./binary_search_kernels.hsaco
// The kernel code will expect the following arguments aligned as shown.
// typedef uint32_t uint4[4];
struct __attribute__((aligned(16))) local_args_t {
uint32_t* dstArray;
uint32_t* srcArray;
uint32_t size;
uint32_t pad;
uint64_t global_offset_x;
uint64_t global_offset_y;
uint64_t global_offset_z;
} local_args;
local_args.dstArray = reinterpret_cast<uint32_t *>(dst_buffer_);
local_args.srcArray = reinterpret_cast<uint32_t *>(src_buffer_);
local_args.size = kNumBufferElements;
local_args.global_offset_x = 0;
local_args.global_offset_y = 0;
local_args.global_offset_z = 0;
err = rocrtst::AllocAndSetKernArgs(this, &local_args, sizeof(local_args));
ASSERT_EQ(err, HSA_STATUS_SUCCESS);
return;
}
// This wrapper atomically writes the provided header and setup to the
// provided AQL packet. The provided AQL packet address should be in the
// queue memory space.
static inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
hsa_kernel_dispatch_packet_t* queue_packet) {
__atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
header | (setup << 16), __ATOMIC_RELEASE);
}
// Do a few extra iterations as we toss out some of the inital and final
// iterations when calculating statistics
uint32_t TestExample::RealIterationNum(void) {
return num_iteration() * 1.2 + 1;
}
static bool VerifyResult(uint32_t *ar, size_t sz) {
for (size_t i = sz; i < sz; ++i) {
if (i*i != ar[i]) {
return false;
}
}
return true;
}
void TestExample::Run(void) {
// Compare required profile for this test case with what we're actually
// running on
if (!rocrtst::CheckProfile(this)) {
return;
}
TestBase::Run();
// Override whatever we need to...
aql().workgroup_size_x = kNumBufferElements;
aql().grid_size_x = kNumBufferElements;
std::vector<double> timer;
int it = RealIterationNum();
hsa_kernel_dispatch_packet_t *queue_aql_packet;
rocrtst::PerfTimer p_timer;
uint64_t index;
for (int i = 0; i < it; i++) {
// This function simply copies the data we've collected so far into our
// local AQL packet, except the the setup and header fields.
queue_aql_packet = WriteAQLToQueue(this, &index);
ASSERT_EQ(queue_aql_packet,
reinterpret_cast<hsa_kernel_dispatch_packet_t *>
(main_queue()->base_address) + index);
uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
// Create and start a timer for this iteration
int id = p_timer.CreateTimer();
p_timer.StartTimer(id);
AtomicSetPacketHeader(aql_header, aql().setup, queue_aql_packet);
hsa_signal_store_screlease(main_queue()->doorbell_signal, index);
// Wait on the dispatch signal until the kernel is finished.
while (hsa_signal_wait_scacquire(aql().completion_signal,
HSA_SIGNAL_CONDITION_LT, 1, (uint64_t) - 1, HSA_WAIT_STATE_ACTIVE)) {
}
// Stop the timer
p_timer.StopTimer(id);
// Store time for later analysis
timer.push_back(p_timer.ReadTimer(id));
hsa_signal_store_screlease(aql().completion_signal, 1);
ASSERT_TRUE(VerifyResult(reinterpret_cast<uint32_t *>(dst_buffer_),
kNumBufferElements));
// Pay attention to verbosity level for things like progress output
if (verbosity() >= VERBOSE_PROGRESS) {
std::cout << ".";
fflush(stdout);
}
}
if (verbosity() >= VERBOSE_PROGRESS) {
std::cout << std::endl;
}
// Abandon the first result and after sort, delete the last 2% value
timer.erase(timer.begin());
std::sort(timer.begin(), timer.end());
timer.erase(timer.begin() + num_iteration(), timer.end());
time_mean_ = rocrtst::CalcMean(timer);
}
void TestExample::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void TestExample::DisplayResults(void) const {
// Compare required profile for this test case with what we're actually
// running on
if (!rocrtst::CheckProfile(this)) {
return;
}
TestBase::DisplayResults();
std::cout << "The average time was: " << time_mean_ * 1e6 <<
" uS" << std::endl;
return;
}
void TestExample::Close() {
hsa_status_t err;
err = hsa_amd_memory_pool_free(src_buffer_);
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
err = hsa_amd_memory_pool_free(dst_buffer_);
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
// This will close handles opened within rocrtst utility calls and call
// hsa_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
#undef RET_IF_HSA_ERR
@@ -48,6 +48,7 @@
#include <memory>
#include "gtest/gtest.h"
#include "suites/functional/memory_basic.h"
#include "suites/performance/dispatch_time.h"
#include "suites/performance/memory_async_copy.h"
#include "suites/test_common/test_case_template.h"
@@ -74,21 +75,39 @@ static bool GetMonitorDevices(const std::shared_ptr<rocrtst::smi::Device> &d,
return false;
}
static void RunTest(TestBase *test) {
static void SetFlags(TestBase *test) {
assert(sRocrtstGlvalues != nullptr);
test->set_verbosity(sRocrtstGlvalues->verbosity);
test->set_monitor_verbosity(sRocrtstGlvalues->monitor_verbosity);
test->set_num_iteration(sRocrtstGlvalues->num_iterations);
test->set_monitor_devices(&sRocrtstGlvalues->monitor_devices);
}
static void RunCustomTestProlog(TestBase *test) {
SetFlags(test);
test->DisplayTestInfo();
test->SetUp();
test->Run();
return;
}
static void RunCustomTestEpilog(TestBase *test) {
test->DisplayResults();
test->Close();
return;
}
// If the test case one big test, you should use RunGenericTest()
// to run the test case. OTOH, if the test case consists of multiple
// functions to be run as separate tests, follow this pattern:
// * RunCustomTestProlog(test) // Run() should contain minimal code
// * <insert call to actual test function within test case>
// * RunCustomTestEpilog(test)
static void RunGenericTest(TestBase *test) {
RunCustomTestProlog(test);
RunCustomTestEpilog(test);
return;
}
@@ -96,9 +115,9 @@ static void RunTest(TestBase *test) {
// TEST(rocrtst, Perf_<test name>) {
// <Test Implementation class> <test_obj>;
//
// // Copy and modify implementation of RunTest() if you need to deviate
// // Copy and modify implementation of RunGenericTest() if you need to deviate
// // from the standard pattern implemented there.
// RunTest(&<test_obj>);
// RunGenericTest(&<test_obj>);
// }
TEST(rocrtst, Test_Example) {
@@ -107,7 +126,15 @@ TEST(rocrtst, Test_Example) {
rocrtst::smi::RocmSMI hw;
hw.DiscoverDevices();
RunTest(&tst);
RunGenericTest(&tst);
}
TEST(rocrtstFunc, Memory_Max_Mem) {
MemoryTest mt;
RunCustomTestProlog(&mt);
mt.MaxSingleAllocationTest();
RunCustomTestEpilog(&mt);
}
TEST(rocrtstPerf, Memory_Async_Copy) {
@@ -119,27 +146,27 @@ TEST(rocrtstPerf, Memory_Async_Copy) {
// mac.set_dst_pool(<dst pool id>);
// The default is to and from the cpu to 1 gpu, and to/from a gpu to
// another gpu
RunTest(&mac);
RunGenericTest(&mac);
}
TEST(rocrtstPerf, AQL_Dispatch_Time_Single_SpinWait) {
DispatchTime dt(true, true);
RunTest(&dt);
RunGenericTest(&dt);
}
TEST(rocrtstPerf, AQL_Dispatch_Time_Single_Interrupt) {
DispatchTime dt(false, true);
RunTest(&dt);
RunGenericTest(&dt);
}
TEST(rocrtstPerf, AQL_Dispatch_Time_Multi_SpinWait) {
DispatchTime dt(true, false);
RunTest(&dt);
RunGenericTest(&dt);
}
TEST(rocrtstPerf, AQL_Dispatch_Time_Multi_Interrupt) {
DispatchTime dt(false, false);
RunTest(&dt);
RunGenericTest(&dt);
}
int main(int argc, char** argv) {