Support GPU memory test and compute queue test using Rocr
A new diagnostic module librdc_rocr.so is created. The
module uses Rocr to test the memory allocation, memory access
and compute queue ready status.
Change-Id: I9098f4fc3209bf381b7cb3658a4e94c2e22f2fe9
[ROCm/rdc commit: 78e2f2486b]
This commit is contained in:
committed by
Shuzhou Liu
parent
ed96db8cba
commit
6b700f8005
@@ -35,6 +35,10 @@ option(BUILD_STANDALONE "Build targets for rdci and rdcd" ON)
|
||||
# which requires the ROCT-Thunk-Interface.
|
||||
option(BUILD_RASLIB "Build targets for raslib" OFF)
|
||||
|
||||
# When cmake -DBUILD_ROCRTEST=off, it will not build the librdc_rocr.so
|
||||
# which requires the Rocm run time.
|
||||
option(BUILD_ROCRTEST "Build targets for librdc_rocr.so" ON)
|
||||
|
||||
if( NOT EXISTS "${CMAKE_SOURCE_DIR}/raslib/.git" AND BUILD_RASLIB)
|
||||
message( FATAL_ERROR "The git submodule raslib is not available. Please run
|
||||
git submodule update --init --recursive
|
||||
|
||||
@@ -76,6 +76,12 @@ If only the RDC libraries are needed (i.e. only "embedded mode" is required), th
|
||||
|
||||
$ cmake -DROCM_DIR=/opt/rocm -DBUILD_STANDALONE=off <-DCMAKE_INSTALL_PREFIX=<install dir>> ..
|
||||
|
||||
## Building RDC library without ROCM Run time (optional)
|
||||
|
||||
The user can choose to not build RDC diagnostic ROCM Run time. This will eliminate the need for ROCM Run time. To build in this way, -DBUILD_ROCRTEST=off should be passed on the the cmake command line:
|
||||
|
||||
$ cmake -DROCM_DIR=/opt/rocm -DBUILD_ROCRTEST=off <-DCMAKE_INSTALL_PREFIX=<install dir>> ..
|
||||
|
||||
## Update System Library Path
|
||||
|
||||
The following commands need to be executed as root (sudo). It may be easiest to put them into a script and then run that script as root:
|
||||
|
||||
@@ -31,9 +31,7 @@ THE SOFTWARE.
|
||||
static std::string get_test_name(rdc_diag_test_cases_t test_case) {
|
||||
const std::map<rdc_diag_test_cases_t, std::string> test_desc = {
|
||||
{RDC_DIAG_COMPUTE_PROCESS, "No compute process"},
|
||||
{RDC_DIAG_SDMA_QUEUE, "SDMA Queue ready"},
|
||||
{RDC_DIAG_COMPUTE_QUEUE, "Compute Queue ready"},
|
||||
{RDC_DIAG_VRAM_CHECK, "VRAM check"},
|
||||
{RDC_DIAG_SYS_MEM_CHECK, "System memory check"},
|
||||
{RDC_DIAG_NODE_TOPOLOGY, "Node topology check"},
|
||||
{RDC_DIAG_GPU_PARAMETERS, "GPU parameters check"},
|
||||
@@ -160,7 +158,8 @@ int main(int, char **) {
|
||||
|
||||
std::cout << std::setw(22) << std::left
|
||||
<< get_test_name(RDC_DIAG_COMPUTE_PROCESS) + ":"
|
||||
<< rdc_diagnostic_result_string(test_result.status) << "\n";
|
||||
<< test_result.info << "\n";
|
||||
|
||||
|
||||
// Cleanup consists of shutting down RDC.
|
||||
cleanup:
|
||||
|
||||
@@ -421,9 +421,7 @@ typedef enum {
|
||||
RDC_DIAG_TEST_FIRST = 0,
|
||||
//!< The diagnostic test pass
|
||||
RDC_DIAG_COMPUTE_PROCESS = RDC_DIAG_TEST_FIRST,
|
||||
RDC_DIAG_SDMA_QUEUE, //!< The SDMA Queue is ready
|
||||
RDC_DIAG_COMPUTE_QUEUE, //!< The Compute Queue is ready
|
||||
RDC_DIAG_VRAM_CHECK, //!< Check VRAM
|
||||
RDC_DIAG_SYS_MEM_CHECK, //!< Check System memory
|
||||
RDC_DIAG_NODE_TOPOLOGY, //!< Report node topology
|
||||
RDC_DIAG_GPU_PARAMETERS, //!< GPU parameters in range
|
||||
|
||||
Executable
+88
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_RDC_LIB_RDCRdcPerfTimer_H_
|
||||
#define INCLUDE_RDC_LIB_RDCRdcPerfTimer_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
/// \file
|
||||
/// Timer related class.
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
class RdcPerfTimer {
|
||||
private:
|
||||
struct Timer {
|
||||
std::string name; /* < name name of time object*/
|
||||
uint64_t _freq; /* < _freq frequency*/
|
||||
uint64_t _clocks; /* < _clocks number of ticks at end*/
|
||||
uint64_t _start; /* < _start start point ticks*/
|
||||
};
|
||||
|
||||
std::vector<Timer*> _timers; /*< _timers vector to Timer objects */
|
||||
double freq_in_100mhz;
|
||||
|
||||
public:
|
||||
RdcPerfTimer(void);
|
||||
~RdcPerfTimer(void);
|
||||
|
||||
/// Create a new timer.
|
||||
/// \returns A new timer instance index
|
||||
int CreateTimer(void);
|
||||
|
||||
/// Start the timer associated with the given index
|
||||
/// \param[in] index Index of the timer to start
|
||||
/// \returns int 0 for success, non-zero otherwise
|
||||
int StartTimer(int index);
|
||||
|
||||
/// Stop the timer associated with the given index
|
||||
/// \param[in] Index Index of the timer to stop
|
||||
/// \returns int 0 for success, non-zero otherwise
|
||||
int StopTimer(int index);
|
||||
|
||||
/// Reset the timer to 0
|
||||
/// param[in] Index of the timer to reset
|
||||
/// \returns void
|
||||
void ResetTimer(int index);
|
||||
|
||||
/// Read the time value of the timer associated with the provided index.
|
||||
/// Units are seconds
|
||||
/// \param[in] index Index of the timer to read
|
||||
/// \returns double Value of the timer
|
||||
double ReadTimer(int index);
|
||||
|
||||
private:
|
||||
void Error(std::string str);
|
||||
uint64_t CoarseTimestampUs();
|
||||
uint64_t MeasureTSCFreqHz();
|
||||
};
|
||||
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // INCLUDE_RDC_LIB_RDCRdcPerfTimer_H_
|
||||
|
||||
@@ -29,6 +29,7 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/RdcDiagnostic.h"
|
||||
#include "rdc_lib/impl/RdcRasLib.h"
|
||||
#include "rdc_lib/impl/RdcSmiLib.h"
|
||||
#include "rdc_lib/impl/RdcRocrLib.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
@@ -55,7 +56,8 @@ class RdcDiagnosticModule : public RdcDiagnostic {
|
||||
rdc_status_t rdc_diag_destroy() override;
|
||||
|
||||
explicit RdcDiagnosticModule(const RdcSmiLibPtr& smi_lib,
|
||||
const RdcRasLibPtr& ras_module);
|
||||
const RdcRasLibPtr& ras_module,
|
||||
const RdcRocrLibPtr& rocr_module);
|
||||
|
||||
private:
|
||||
//< Helper function to dispatch fields to module
|
||||
|
||||
@@ -28,6 +28,7 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/RdcTelemetry.h"
|
||||
#include "rdc_lib/impl/RdcRasLib.h"
|
||||
#include "rdc_lib/impl/RdcSmiLib.h"
|
||||
#include "rdc_lib/impl/RdcRocrLib.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
@@ -46,6 +47,7 @@ class RdcModuleMgrImpl: public RdcModuleMgr {
|
||||
RdcRasLibPtr ras_lib_;
|
||||
RdcSmiLibPtr smi_lib_;
|
||||
RdcMetricFetcherPtr fetcher_;
|
||||
RdcRocrLibPtr rocr_lib_;
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef INCLUDE_RDC_LIB_IMPL_RDCROCRLIB_H_
|
||||
#define INCLUDE_RDC_LIB_IMPL_RDCROCRLIB_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "rdc_lib/RdcLibraryLoader.h"
|
||||
#include "rdc_lib/RdcDiagnostic.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
class RdcRocrLib : public RdcDiagnostic {
|
||||
public:
|
||||
rdc_status_t rdc_diag_test_cases_query(
|
||||
rdc_diag_test_cases_t test_cases[MAX_TEST_CASES],
|
||||
uint32_t* test_case_count) override;
|
||||
|
||||
// Run a specific test case
|
||||
rdc_status_t rdc_test_case_run(
|
||||
rdc_diag_test_cases_t test_case,
|
||||
uint32_t gpu_index[RDC_MAX_NUM_DEVICES],
|
||||
uint32_t gpu_count,
|
||||
rdc_diag_test_result_t* result) override;
|
||||
|
||||
rdc_status_t rdc_diagnostic_run(
|
||||
const rdc_group_info_t& gpus,
|
||||
rdc_diag_level_t level,
|
||||
rdc_diag_response_t* response) override;
|
||||
|
||||
rdc_status_t rdc_diag_init(uint64_t flags) override;
|
||||
rdc_status_t rdc_diag_destroy() override;
|
||||
|
||||
explicit RdcRocrLib(const char* lib_name);
|
||||
|
||||
~RdcRocrLib();
|
||||
|
||||
private:
|
||||
RdcLibraryLoader lib_loader_;
|
||||
rdc_status_t (*test_case_run_)(rdc_diag_test_cases_t,
|
||||
uint32_t[RDC_MAX_NUM_DEVICES], uint32_t,
|
||||
rdc_diag_test_result_t*);
|
||||
rdc_status_t (*diag_test_cases_query_)(
|
||||
rdc_diag_test_cases_t[MAX_TEST_CASES], uint32_t*);
|
||||
rdc_status_t (*diag_init_)(uint64_t);
|
||||
rdc_status_t (*diag_destroy_)();
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<RdcRocrLib> RdcRocrLibPtr;
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // INCLUDE_RDC_LIB_IMPL_RDCROCRLIB_H_
|
||||
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* One instance of this kernel call is a thread.
|
||||
* Each thread finds out the segment in which it should look for the element.
|
||||
* After that, it checks if the element is between the lower bound and upper
|
||||
* bound of its segment. If yes, then this segment becomes the total
|
||||
* searchspace for the next pass.
|
||||
*
|
||||
* To achieve this, it writes the lower bound and upper bound to the output
|
||||
* array. In case the element at the left end (lower bound) matches the element
|
||||
* we are looking for, that is marked in the output and we no longer need to
|
||||
* look any further.
|
||||
*/
|
||||
|
||||
__kernel void
|
||||
binarySearch(__global uint4 * outputArray,
|
||||
__const __global uint2 * sortedArray,
|
||||
const unsigned int findMe) {
|
||||
unsigned int tid = get_global_id(0);
|
||||
|
||||
// Then we find the elements for this thread
|
||||
uint2 element = sortedArray[tid];
|
||||
|
||||
|
||||
// If the element to be found does not lie between
|
||||
// them, then nothing left to do in this thread
|
||||
if((element.x > findMe) || (element.y < findMe)) {
|
||||
return;
|
||||
} else {
|
||||
// However, if the element does lie between the lower
|
||||
// and upper bounds of this thread's searchspace
|
||||
// we need to narrow down the search further in this
|
||||
// search space
|
||||
// The search space for this thread is marked in the
|
||||
// output as being the total search space for the next pass
|
||||
outputArray[0].x = tid;
|
||||
outputArray[0].w = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
binarySearch_mulkeys(__global int *keys,
|
||||
__global uint *input,
|
||||
const unsigned int numKeys,
|
||||
__global int *output) {
|
||||
|
||||
int gid = get_global_id(0);
|
||||
int lBound = gid * 256;
|
||||
int uBound = lBound + 255;
|
||||
|
||||
for(int i = 0; i < numKeys; i++) {
|
||||
if(keys[i] >= input[lBound] && keys[i] <= input[uBound])
|
||||
output[i]=lBound;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
binarySearch_mulkeysConcurrent(__global uint *keys,
|
||||
__global uint *input,
|
||||
const unsigned int inputSize, // num. of inputs
|
||||
const unsigned int numSubdivisions,
|
||||
__global int *output) {
|
||||
|
||||
int lBound = (get_global_id(0) % numSubdivisions) * (inputSize / numSubdivisions);
|
||||
int uBound = lBound + inputSize / numSubdivisions;
|
||||
int myKey = keys[get_global_id(0) / numSubdivisions];
|
||||
int mid;
|
||||
|
||||
while(uBound >= lBound) {
|
||||
mid = (lBound + uBound) / 2;
|
||||
if(input[mid] == myKey) {
|
||||
output[get_global_id(0) / numSubdivisions] = mid;
|
||||
return;
|
||||
} else if(input[mid] > myKey) {
|
||||
uBound = mid - 1;
|
||||
} else {
|
||||
lBound = mid + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_MODULES_RDC_ROCR_COMPUTEQUEUETEST_H_
|
||||
#define RDC_MODULES_RDC_ROCR_COMPUTEQUEUETEST_H_
|
||||
|
||||
#include "rdc_modules/rdc_rocr/TestBase.h"
|
||||
#include "hsa/hsa.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
// Hold all the info specific to binary search
|
||||
typedef struct BinarySearch {
|
||||
// Binary Search parameters
|
||||
uint32_t length;
|
||||
uint32_t work_group_size;
|
||||
uint32_t work_grid_size;
|
||||
uint32_t num_sub_divisions;
|
||||
uint32_t find_me;
|
||||
|
||||
// Buffers needed for this application
|
||||
uint32_t* input;
|
||||
uint32_t* input_arr;
|
||||
uint32_t* input_arr_local;
|
||||
uint32_t* output;
|
||||
// Keneral argument buffers and addresses
|
||||
void* kern_arg_buffer; // Begin of allocated memory
|
||||
// this pointer to be deallocated
|
||||
void* kern_arg_address; // Properly aligned address to be used in aql
|
||||
// packet (don't use for deallocation)
|
||||
|
||||
// Kernel code
|
||||
std::string kernel_file_name;
|
||||
std::string kernel_name;
|
||||
uint32_t kernarg_size;
|
||||
uint32_t kernarg_align;
|
||||
|
||||
// HSA/RocR objects needed for this application
|
||||
hsa_agent_t gpu_dev;
|
||||
hsa_agent_t cpu_dev;
|
||||
hsa_signal_t signal;
|
||||
hsa_queue_t* queue;
|
||||
hsa_amd_memory_pool_t cpu_pool;
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
hsa_amd_memory_pool_t kern_arg_pool;
|
||||
|
||||
// Other items we need to populate AQL packet
|
||||
uint64_t kernel_object;
|
||||
uint32_t group_segment_size; ///< Kernel group seg size
|
||||
uint32_t private_segment_size; ///< Kernel private seg size
|
||||
} BinarySearch;
|
||||
|
||||
|
||||
class ComputeQueueTest : public TestBase {
|
||||
public:
|
||||
explicit ComputeQueueTest(uint32_t gpu_index);
|
||||
|
||||
// @Brief: Destructor for test case of ComputeQueueTest
|
||||
virtual ~ComputeQueueTest();
|
||||
|
||||
// @Brief: Setup the environment for measurement
|
||||
virtual hsa_status_t SetUp();
|
||||
|
||||
// @Brief: Core measurement execution
|
||||
virtual void Run();
|
||||
|
||||
// @Brief: Clean up and retrive the resource
|
||||
virtual void Close();
|
||||
|
||||
// @Brief: Display results
|
||||
virtual void DisplayResults() const;
|
||||
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
|
||||
hsa_status_t RunBinarySearchTest(void);
|
||||
|
||||
private:
|
||||
void InitializeBinarySearch(BinarySearch* bs);
|
||||
hsa_status_t FindPools(BinarySearch* bs);
|
||||
hsa_status_t AllocateAndInitBuffers(BinarySearch* bs);
|
||||
hsa_status_t LoadKernelFromObjFile(BinarySearch* bs);
|
||||
hsa_status_t Run(BinarySearch* bs);
|
||||
hsa_status_t CleanUp(BinarySearch* bs);
|
||||
void PopulateAQLPacket(BinarySearch const* bs,
|
||||
hsa_kernel_dispatch_packet_t* aql);
|
||||
hsa_status_t AgentMemcpy(void* dst, const void* src,
|
||||
size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag);
|
||||
hsa_status_t AllocAndSetKernArgs(BinarySearch* bs, void* args,
|
||||
size_t arg_size, void** aql_buf_ptr);
|
||||
void WriteAQLToQueue(hsa_kernel_dispatch_packet_t const* in_aql,
|
||||
hsa_queue_t* q);
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_MODULES_RDC_ROCR_COMPUTEQUEUETEST_H_
|
||||
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef RDC_MODULES_RDC_ROCR_MEMORYACCESS_H_
|
||||
#define RDC_MODULES_RDC_ROCR_MEMORYACCESS_H_
|
||||
|
||||
|
||||
#include "rdc_modules/rdc_rocr/TestBase.h"
|
||||
#include "hsa/hsa.h"
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
class MemoryAccessTest : public TestBase {
|
||||
public:
|
||||
explicit MemoryAccessTest(uint32_t gpu_index);
|
||||
|
||||
// @Brief: Destructor for test case of MemoryTest
|
||||
virtual ~MemoryAccessTest();
|
||||
|
||||
// @Brief: Setup the environment for measurement
|
||||
virtual hsa_status_t SetUp();
|
||||
|
||||
// @Brief: Core measurement execution
|
||||
virtual void Run();
|
||||
|
||||
// @Brief: Clean up and retrive the resource
|
||||
virtual void Close();
|
||||
|
||||
// @Brief: Display results
|
||||
virtual void DisplayResults() const;
|
||||
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
|
||||
|
||||
// @Brief: This test verify that CPU is able to Read & write GPU memory
|
||||
void CPUAccessToGPUMemoryTest(void);
|
||||
|
||||
// @Brief: This test verify that GPU is able to Read & write CPU memory
|
||||
void GPUAccessToCPUMemoryTest(void);
|
||||
|
||||
|
||||
private:
|
||||
void CPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent,
|
||||
hsa_agent_t gpuAgent,
|
||||
hsa_amd_memory_pool_t pool);
|
||||
void GPUAccessToCPUMemoryTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent);
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_MODULES_RDC_ROCR_MEMORYACCESS_H_
|
||||
+64
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_MODULES_RDC_ROCR_MEMORYTEST_H_
|
||||
#define RDC_MODULES_RDC_ROCR_MEMORYTEST_H_
|
||||
|
||||
#include "rdc_modules/rdc_rocr/TestBase.h"
|
||||
#include "hsa/hsa.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
class MemoryTest : public TestBase {
|
||||
public:
|
||||
explicit MemoryTest(uint32_t gpu_index);
|
||||
|
||||
// @Brief: Destructor for test case of MemoryTest
|
||||
virtual ~MemoryTest();
|
||||
|
||||
// @Brief: Setup the environment for measurement
|
||||
virtual hsa_status_t SetUp();
|
||||
|
||||
// @Brief: Core measurement execution
|
||||
virtual void Run();
|
||||
|
||||
// @Brief: Clean up and retrive the resource
|
||||
virtual void Close();
|
||||
|
||||
// @Brief: Display results
|
||||
virtual void DisplayResults() const;
|
||||
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
|
||||
hsa_status_t MaxSingleAllocationTest(void);
|
||||
|
||||
hsa_status_t TestAllocate(hsa_amd_memory_pool_t pool, size_t sz);
|
||||
|
||||
private:
|
||||
hsa_status_t MaxSingleAllocationTest(hsa_agent_t ag,
|
||||
hsa_amd_memory_pool_t pool);
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_MODULES_RDC_ROCR_MEMORYTEST_H_
|
||||
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_MODULES_RDC_DIAGNOSTIC_RDCDIAGNOSTICLIB_H_
|
||||
#define RDC_MODULES_RDC_DIAGNOSTIC_RDCDIAGNOSTICLIB_H_
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcDiagnosticLibInterface.h"
|
||||
|
||||
|
||||
#endif // RDC_MODULES_RDC_DIAGNOSTIC_RDCDIAGNOSTICLIB_H_
|
||||
@@ -0,0 +1,269 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef RDC_MODULES_RDC_ROCR_RDCROCRBASE_H_
|
||||
#define RDC_MODULES_RDC_ROCR_RDCROCRBASE_H_
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include "rdc_lib/RdcPerfTimer.h"
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
/// Common interface for RocR tests and samples
|
||||
class RdcRocrBase {
|
||||
public:
|
||||
RdcRocrBase(void);
|
||||
|
||||
virtual ~RdcRocrBase(void);
|
||||
|
||||
///< Setters and Getters
|
||||
|
||||
void set_gpu_device1(hsa_agent_t in_dev) {
|
||||
gpu_device1_.handle = in_dev.handle;
|
||||
}
|
||||
hsa_agent_t* gpu_device1(void) {
|
||||
return &gpu_device1_;
|
||||
}
|
||||
|
||||
void set_cpu_device(hsa_agent_t in_dev) {
|
||||
cpu_device_.handle = in_dev.handle;
|
||||
}
|
||||
hsa_agent_t* cpu_device(void) {
|
||||
return &cpu_device_;
|
||||
}
|
||||
|
||||
void set_kernel_file_name(const char* in_file_name) {
|
||||
kernel_file_name_ = in_file_name;
|
||||
}
|
||||
std::string const kernel_file_name(void) const {
|
||||
return kernel_file_name_;
|
||||
}
|
||||
|
||||
void set_kernel_name(std::string in_kernel_name) {
|
||||
kernel_name_ = in_kernel_name;
|
||||
}
|
||||
std::string const kernel_name(void) const {
|
||||
return kernel_name_;
|
||||
}
|
||||
|
||||
void set_agent_name(std::string in_agent_name) {
|
||||
agent_name_ = in_agent_name;
|
||||
}
|
||||
|
||||
std::string const get_agent_name(void) const {
|
||||
return agent_name_;
|
||||
}
|
||||
|
||||
void set_kernel_object(uint64_t in_kernel_object) {
|
||||
kernel_object_ = in_kernel_object;
|
||||
}
|
||||
uint64_t kernel_object(void) const {
|
||||
return kernel_object_;
|
||||
}
|
||||
|
||||
void set_profile(hsa_profile_t in_prof) {
|
||||
profile_ = in_prof;
|
||||
}
|
||||
hsa_profile_t profile(void) const {
|
||||
return profile_;
|
||||
}
|
||||
|
||||
uint32_t private_segment_size(void) const {
|
||||
return private_segment_size_;
|
||||
}
|
||||
void set_private_segment_size(uint32_t sz) {
|
||||
private_segment_size_ = sz;
|
||||
}
|
||||
|
||||
void set_group_segment_size(uint32_t sz) {
|
||||
group_segment_size_ = sz;
|
||||
}
|
||||
uint32_t group_segment_size(void) const {
|
||||
return group_segment_size_;
|
||||
}
|
||||
|
||||
void set_group_size(uint32_t sz) {
|
||||
group_size_ = sz;
|
||||
}
|
||||
uint32_t group_size(void) const {
|
||||
return group_size_;
|
||||
}
|
||||
|
||||
void set_main_queue(hsa_queue_t* q) {
|
||||
main_queue_ = q;
|
||||
}
|
||||
hsa_queue_t* main_queue(void) const {
|
||||
return main_queue_;
|
||||
}
|
||||
|
||||
hsa_kernel_dispatch_packet_t& aql(void) {
|
||||
return aql_;
|
||||
}
|
||||
|
||||
void set_num_iteration(int num) {
|
||||
num_iteration_ = num;
|
||||
}
|
||||
uint32_t num_iteration(void) const {
|
||||
return num_iteration_;
|
||||
}
|
||||
|
||||
hsa_amd_memory_pool_t& device_pool(void) {
|
||||
return device_pool_;
|
||||
}
|
||||
|
||||
hsa_amd_memory_pool_t& cpu_pool(void) {
|
||||
return cpu_pool_;
|
||||
}
|
||||
|
||||
hsa_amd_memory_pool_t& kern_arg_pool(void) {
|
||||
return kern_arg_pool_;
|
||||
}
|
||||
|
||||
void set_kernarg_size(uint32_t sz) {
|
||||
kernarg_size_ = sz;
|
||||
}
|
||||
uint32_t kernarg_size(void) const {
|
||||
return kernarg_size_;
|
||||
}
|
||||
|
||||
void set_kernarg_align(uint32_t align) {
|
||||
kernarg_align_ = align;
|
||||
}
|
||||
uint32_t kernarg_align(void) const {
|
||||
return kernarg_align_;
|
||||
}
|
||||
|
||||
void* kernarg_buffer(void) const {
|
||||
return kernarg_buffer_;
|
||||
}
|
||||
void set_kernarg_buffer(void* buffer) {
|
||||
kernarg_buffer_ = buffer;
|
||||
}
|
||||
|
||||
int32_t requires_profile(void) const {
|
||||
return requires_profile_;
|
||||
}
|
||||
|
||||
char* orig_hsa_enable_interrupt() const {
|
||||
return orig_hsa_enable_interrupt_;
|
||||
}
|
||||
|
||||
bool enable_interrupt() const {
|
||||
return enable_interrupt_;
|
||||
}
|
||||
|
||||
void set_title(std::string name) {
|
||||
title_ = name;
|
||||
}
|
||||
std::string title(void) const {
|
||||
return title_;
|
||||
}
|
||||
|
||||
RdcPerfTimer* hsa_timer(void) {
|
||||
return &hsa_timer_;
|
||||
}
|
||||
|
||||
void set_verbosity(uint32_t v) {
|
||||
verbosity_ = v;
|
||||
}
|
||||
uint32_t verbosity(void) const {
|
||||
return verbosity_;
|
||||
}
|
||||
|
||||
void set_monitor_verbosity(uint32_t m) {
|
||||
monitor_verbosity_ = m;
|
||||
}
|
||||
uint32_t monitor_verbosity(void) const {
|
||||
return monitor_verbosity_;
|
||||
}
|
||||
|
||||
protected:
|
||||
void set_requires_profile(int32_t reqd_prof) {
|
||||
requires_profile_ = reqd_prof;
|
||||
}
|
||||
|
||||
void set_enable_interrupt(bool doEnable) {
|
||||
enable_interrupt_ = doEnable;
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t num_iteration_; ///< Number of times to execute test
|
||||
|
||||
hsa_queue_t* main_queue_; ///< AQL queue used for packets
|
||||
|
||||
hsa_agent_t gpu_device1_; ///< Handle to first GPU found
|
||||
|
||||
hsa_agent_t cpu_device_; ///< Handle to CPU
|
||||
|
||||
hsa_amd_memory_pool_t device_pool_; ///< Memory pool on gpu pool list
|
||||
|
||||
hsa_amd_memory_pool_t cpu_pool_; ///< Memory pool on cpu pool list
|
||||
|
||||
hsa_amd_memory_pool_t kern_arg_pool_; ///< Memory pool suitable for args
|
||||
|
||||
uint64_t kernel_object_; ///< Handle to kernel code
|
||||
|
||||
std::string kernel_file_name_; ///< Code object file name
|
||||
|
||||
std::string kernel_name_; ///< Kernel name
|
||||
|
||||
std::string agent_name_; ///< Agent name
|
||||
|
||||
hsa_kernel_dispatch_packet_t aql_; ///< Kernel dispatch packet
|
||||
|
||||
uint32_t group_segment_size_; ///< Kernel group seg size
|
||||
|
||||
uint32_t kernarg_size_; ///< Kernarg memory size
|
||||
|
||||
uint32_t kernarg_align_; ///< Alignment for kern argument memory
|
||||
|
||||
void* kernarg_buffer_; ///< Unaligned allocated kernel arg. buffer
|
||||
|
||||
hsa_profile_t profile_; ///< Device profile.
|
||||
|
||||
uint32_t group_size_; ///< Number of work items in one group
|
||||
|
||||
uint32_t private_segment_size_; ///< Kernel private seg size
|
||||
|
||||
int32_t requires_profile_; ///< Profile required by test (-1 if no req.)
|
||||
|
||||
char* orig_hsa_enable_interrupt_; ///< Orig. value of HSA_ENABLE_INTERRUPT
|
||||
|
||||
bool enable_interrupt_; ///< Whether to enable/disable interrupts for test
|
||||
|
||||
std::string title_; ///< Displayed title of test
|
||||
|
||||
uint32_t verbosity_; ///< How much additional output to produce
|
||||
|
||||
uint32_t monitor_verbosity_; ///< verbose or not
|
||||
|
||||
RdcPerfTimer hsa_timer_; ///< Timer to be used for timing parts of test
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_MODULES_RDC_ROCR_RDCROCRBASE_H_
|
||||
+80
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_MODULES_RDC_ROCR_TESTBASE_H_
|
||||
#define RDC_MODULES_RDC_ROCR_TESTBASE_H_
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "rdc_modules/rdc_rocr/RdcRocrBase.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
class TestBase : public RdcRocrBase {
|
||||
public:
|
||||
explicit TestBase(uint32_t gpu_index);
|
||||
|
||||
virtual ~TestBase(void);
|
||||
|
||||
enum VerboseLevel {VERBOSE_MIN = 0, VERBOSE_STANDARD, VERBOSE_PROGRESS};
|
||||
|
||||
// @Brief: Before run the core measure codes, do something to set up
|
||||
// i.e. init runtime, prepare packet...
|
||||
virtual hsa_status_t SetUp(void);
|
||||
|
||||
// @Brief: Core measurement codes executing here
|
||||
virtual void Run(void);
|
||||
|
||||
// @Brief: Do something clean up
|
||||
virtual void Close(void);
|
||||
|
||||
// @Brief: Display the results
|
||||
virtual void DisplayResults(void) const;
|
||||
|
||||
// @Brief: Display information about the test
|
||||
virtual void DisplayTestInfo(void);
|
||||
|
||||
const std::string & description(void) const {return description_;}
|
||||
|
||||
void set_description(std::string d);
|
||||
|
||||
const std::string & get_gpu_info() const { return gpu_info_;}
|
||||
const std::string & get_per_gpu_info() const { return per_gpu_info_;}
|
||||
|
||||
hsa_status_t FindGPUIndex(hsa_agent_t agent, void* data);
|
||||
// Return the agent by GPU index in rocm_smi
|
||||
hsa_status_t get_agent_by_gpu_index(uint32_t gpu_index, hsa_agent_t* agent);
|
||||
|
||||
protected:
|
||||
uint32_t gpu_index_;
|
||||
std::string gpu_info_;
|
||||
std::string per_gpu_info_;
|
||||
|
||||
private:
|
||||
std::string description_;
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_MODULES_RDC_ROCR_TESTBASE_H_
|
||||
+172
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef RDC_MODULES_RDC_ROCR_BASE_ROCR_UTILS_H_
|
||||
#define RDC_MODULES_RDC_ROCR_BASE_ROCR_UTILS_H_
|
||||
|
||||
/// \file
|
||||
/// Prototypes of utility functions that act on RdcRocrBase objects.
|
||||
|
||||
#include "rdc_modules/rdc_rocr/RdcRocrBase.h"
|
||||
#include <string>
|
||||
#include "rdc_modules/rdc_rocr/common.h"
|
||||
#include "hsa/hsa.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
/// Open binary kernel object file and set all member data related to the
|
||||
/// kernel. Assumes that input test already has the kernel file name,
|
||||
/// agent name and kernel function specifed
|
||||
/// \param[in] test Test for which the kernel will be loaded.
|
||||
/// \param[in] agent for which the kernel will be loaded .
|
||||
/// \returns HSA_STATUS_SUCCESS if no errors
|
||||
hsa_status_t LoadKernelFromObjFile(RdcRocrBase* test, hsa_agent_t* agent);
|
||||
|
||||
/// Do initialization tasks for HSA test program.
|
||||
/// \param[in] test Test to initialize
|
||||
/// \returns HSA_STATUS_SUCCESS if no errors
|
||||
hsa_status_t InitAndSetupHSA(RdcRocrBase* test);
|
||||
|
||||
/// Find and set the cpu and gpu agent member variables. Also checks that
|
||||
/// gpu agent meets test requirements (e.g., FULL profile vs. BASE profile).
|
||||
hsa_status_t SetDefaultAgents(RdcRocrBase* test);
|
||||
|
||||
/// For the provided device agent, create an AQL queue
|
||||
/// \param[in] device Device for which a queue is to be created
|
||||
/// \param[out] queue Address to which created queue pointer will be written
|
||||
/// \param[in] num_pkts Size of the queue to create
|
||||
/// \param[in] do_profile [Optional] Specificy whether profiled queue should
|
||||
/// be created
|
||||
/// \returns HSA_STATUS_SUCCESS if no errors encountered
|
||||
hsa_status_t CreateQueue(hsa_agent_t device, hsa_queue_t** queue,
|
||||
uint32_t num_pkts = 0);
|
||||
|
||||
/// This function sets some reasonable default values for an AQL packet.
|
||||
/// Override any field as necessary after calling this function.
|
||||
/// \param[in] test Test from which information to populate aql packet can
|
||||
/// be drawn.
|
||||
/// \param[inout] aql Caller provided pointer to aql packet that will be
|
||||
/// populated
|
||||
/// \returns Appropriate hsa_status_t
|
||||
hsa_status_t InitializeAQLPacket(const RdcRocrBase* test,
|
||||
hsa_kernel_dispatch_packet_t* aql);
|
||||
|
||||
/// This function writes all of the aql packet fields to the queue besides
|
||||
/// "setup" and "header". This assumes all the aql fields have be set
|
||||
/// appropriately.
|
||||
/// \param[in] test Test containing the queue and aql packet to be written.
|
||||
/// \returns Pointer to dispatch packet in queue that was written to
|
||||
hsa_kernel_dispatch_packet_t* WriteAQLToQueue(RdcRocrBase* test, uint64_t *ind);
|
||||
|
||||
void WriteAQLToQueueLoc(hsa_queue_t *queue, uint64_t indx,
|
||||
hsa_kernel_dispatch_packet_t *aql_pkt);
|
||||
/// This function writes the first 32 bits of an aql packet to the provided
|
||||
/// aql packet. This function is meant to be called immediately before
|
||||
/// ringing door_bell signal.
|
||||
/// \param[in] header Value to be written to header field
|
||||
/// \param[in] setup Value to be written to setup field
|
||||
/// \param[in] queue_packet Start address of in queue memory of aql packet to
|
||||
/// be written
|
||||
/// \returns void
|
||||
inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
|
||||
hsa_kernel_dispatch_packet_t* queue_packet) {
|
||||
__atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
|
||||
header | (setup <<16), __ATOMIC_RELEASE);
|
||||
}
|
||||
|
||||
/// Perform common operations to clean up after executing a test. Specifically,
|
||||
/// hsa_shut_down() is called and environment variables that were changed are
|
||||
/// reset to their original values.
|
||||
/// \param[in] test Test for which clean up with be performed
|
||||
/// \returns HSA_STATUS_SUCCESS if everything cleaned up ok, or appropriate HSA
|
||||
/// error code otherwise.
|
||||
hsa_status_t CommonCleanUp(RdcRocrBase* test);
|
||||
|
||||
/// Check to see if target machine has the necessary profile to run the
|
||||
/// provided test.
|
||||
/// \param[1] test The test that specifies the required profile.
|
||||
bool CheckProfile(RdcRocrBase const* test);
|
||||
|
||||
/// Allocate memory from the kernel args pool and write the provided argument
|
||||
/// data to the kernel arg memory. Assumes kern_arg memory pool has been
|
||||
/// assigned. The amount of memory allocated will actually be \p arg_size
|
||||
/// plus the alignment required by the kernel arguments. The argument will
|
||||
/// be written with the proper alignment within the allocated buffer.
|
||||
/// \p test kernarg_buffer() will point to the allocated buffer, and it should
|
||||
/// be freed when the kernel is no longer being used.
|
||||
/// \param test Test from which to find kern_arg pool to write arguments
|
||||
/// \param args pointer to block of data containing kernel arguments to be
|
||||
/// written. Arguments are assumed to be of the correct placement, length,
|
||||
/// and with any padding that is expected by the OpenCL kernel
|
||||
/// \param arg_size Size of the kernel arg data (including padding) to be
|
||||
/// written
|
||||
/// \returns HSA_STATUS_SUCCESS if no errors
|
||||
hsa_status_t AllocAndSetKernArgs(RdcRocrBase* test, void* args,
|
||||
size_t arg_size);
|
||||
|
||||
/// Verify that the machine running the test has the required profile.
|
||||
/// This function will verify that the execution machine meets any specific
|
||||
/// test requirement for a profile (HSA_PROFILE_BASE or HSA_PROFILE_FULL).
|
||||
/// \param[in] test Test that provides profile requirements.
|
||||
/// \returns bool
|
||||
/// - true Machine meets test requirements
|
||||
/// - false Machine does not meet test requirements
|
||||
bool CheckProfileAndInform(RdcRocrBase* test);
|
||||
|
||||
/// This function will set the cpu and gpu memory pools to the type used in
|
||||
/// many applications.
|
||||
/// \param[in] test Test that provides profile requirements.
|
||||
/// \returns HSA_STATUS_SUCCESS if everything cleaned up ok, or appropriate HSA
|
||||
/// error code otherwise.
|
||||
hsa_status_t SetPoolsTypical(RdcRocrBase* test);
|
||||
|
||||
/// Work-around for hsa_amd_memory_fill, which is currently broken.
|
||||
/// \param[in] ptr Pointer to start of memory location to be filled
|
||||
/// \param[in] value Value to write to each byte of input buffer
|
||||
/// \param[in] count Size of buffer to fill
|
||||
/// \param[in] dst_ag Agent owning the buffer to be filled
|
||||
/// \param[in] src_ag Agent wanting to do the fill
|
||||
/// \param[in] test Test that has handles to cpu and gpu agents that can own
|
||||
/// either source or destination of fill
|
||||
/// \returns HSA_STATUS_OK if not errors
|
||||
hsa_status_t hsa_memory_fill_workaround_gen(void* ptr, uint32_t value,
|
||||
size_t count, hsa_agent_t dst_ag, hsa_agent_t src_ag, RdcRocrBase* test);
|
||||
|
||||
/// Get the library directory which is loaded by current process.
|
||||
/// It will search /proc/self/maps for it.
|
||||
/// return empty string if fail.
|
||||
std::string get_lib_dir(const char* lib_name);
|
||||
|
||||
/// Get the app dir by looking at link of /proc/self/exe
|
||||
std::string get_app_dir();
|
||||
|
||||
// Search multiple folder for the hsaco file
|
||||
// Return empty if cannot find it.
|
||||
std::string search_hsaco_full_path(const char* hsaco_file_name,
|
||||
const char* agent_name);
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_MODULES_RDC_ROCR_BASE_ROCR_UTILS_H_
|
||||
|
||||
+231
@@ -0,0 +1,231 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/// \file
|
||||
/// RocR related helper functions for sequeneces that come up frequently
|
||||
|
||||
#ifndef RDC_MODULES_RDC_ROCR_COMMON_H_
|
||||
#define RDC_MODULES_RDC_ROCR_COMMON_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#if defined(__GNUC__)
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif // __GNUC__
|
||||
#endif // _MSC_VER
|
||||
|
||||
#define MULTILINE(...) # __VA_ARGS__
|
||||
|
||||
#define ASSERT_EQ(a, b) (a==b)
|
||||
|
||||
void SetEnv(const char* env_var_name, const char* env_var_value);
|
||||
intptr_t AlignDown(intptr_t value, size_t alignment);
|
||||
void* AlignDown(void* value, size_t alignment);
|
||||
void* AlignUp(void* value, size_t alignment);
|
||||
|
||||
// define below should be deleted. Leaving in commented out until code that
|
||||
// refers to it has been corrected
|
||||
// #define HSA_ARGUMENT_ALIGN_BYTES 16
|
||||
|
||||
// This structure holds memory pool information acquired through hsa info
|
||||
// related calls, and is later used for reference when displaying the
|
||||
// information.
|
||||
typedef struct pool_info_t_ {
|
||||
uint32_t segment;
|
||||
size_t size;
|
||||
bool alloc_allowed;
|
||||
size_t alloc_granule;
|
||||
size_t alloc_alignment;
|
||||
bool accessible_by_all;
|
||||
uint32_t global_flag;
|
||||
uint64_t aggregate_alloc_max;
|
||||
inline bool operator==(const pool_info_t_ &a) {
|
||||
if (a.segment == segment && a.size == size
|
||||
&& a.alloc_allowed == alloc_allowed
|
||||
&& a.alloc_granule == alloc_granule
|
||||
&& a.alloc_alignment == alloc_alignment
|
||||
&& a.accessible_by_all == accessible_by_all
|
||||
&& a.aggregate_alloc_max == aggregate_alloc_max
|
||||
&& a.global_flag == global_flag )
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
} pool_info_t;
|
||||
|
||||
|
||||
struct agent_pools_t{
|
||||
hsa_agent_t agent;
|
||||
std::vector<hsa_amd_memory_pool_t> pools;
|
||||
};
|
||||
|
||||
/// Fill in the pool_info_t structure for the provided pool.
|
||||
/// \param[in] pool Pool for which information will be retrieved
|
||||
/// \param[out] pool_i Pointer to structure where pool info will be stored
|
||||
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
|
||||
hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, pool_info_t *pool_i);
|
||||
|
||||
/// If the provided agent is associated with a GPU, return that agent through
|
||||
/// output parameter. This function is meant to be the call-back function used
|
||||
/// with hsa_iterate_agents to find GPU agents.
|
||||
/// \param[in] agent Agent to evaluate if GPU
|
||||
/// \param[out] data If agent is associated with a GPU, this pointer will point
|
||||
/// to the agent upon return
|
||||
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
|
||||
hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data);
|
||||
|
||||
/// If the provided agent is associated with a CPU, return that agent through
|
||||
/// output parameter. This function is meant to be the call-back function used
|
||||
/// with hsa_iterate_agents to find CPU agents.
|
||||
/// \param[in] agent Agent to evaluate if CPU
|
||||
/// \param[out] data If agent is associated with a CPU, this pointer will point
|
||||
/// to the agent upon return
|
||||
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
|
||||
hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data);
|
||||
|
||||
// TODO(cfreehil): get rid of FindGlobalPool and replace with FindStandardPool
|
||||
hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data);
|
||||
|
||||
/// If the provided agent is associated with a CPU, return that agent through
|
||||
/// output parameter. This function is meant to be the call-back function used
|
||||
/// with hsa_iterate_agents to find all the CPU agents.
|
||||
/// \param[in] agent Agent to evaluate if CPU
|
||||
/// \param[out] data If agent is associated with a CPU, this pointer will point
|
||||
/// to the agent upon return
|
||||
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
|
||||
hsa_status_t IterateCPUAgents(hsa_agent_t agent, void *data);
|
||||
|
||||
/// If the provided agent is associated with a GPU, return that agent through
|
||||
/// output parameter. This function is meant to be the call-back function used
|
||||
/// with hsa_iterate_agents to find all the GPU agents.
|
||||
/// \param[in] agent Agent to evaluate if GPU
|
||||
/// \param[out] data If agent is associated with a GPU, this pointer will point
|
||||
/// to the agent upon return
|
||||
/// \returns HSA_STATUS_SUCCESS if no errors are encountered.
|
||||
hsa_status_t IterateGPUAgents(hsa_agent_t agent, void *data);
|
||||
|
||||
/// Find a GLOBAL memory pool. By this, we mean not a kernel args pool.
|
||||
/// This function is meant to be the call-back function used
|
||||
/// with hsa_amd_agent_iterate_memory_pools.
|
||||
/// \param[in] pool Pool to evaluate for required properties
|
||||
/// \param[in] data If pool meets criteria, this pointer will point
|
||||
/// to the pool upon return
|
||||
/// \returns hsa_status_t
|
||||
/// -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
|
||||
/// -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
|
||||
/// -else return an appropriate error code for any error encountered
|
||||
hsa_status_t GetGlobalMemoryPool(hsa_amd_memory_pool_t pool, void* data);
|
||||
|
||||
/// Find a "kernel arg" pool.
|
||||
/// This function is meant to be the call-back function used
|
||||
/// with hsa_amd_agent_iterate_memory_pools.
|
||||
/// \param[in] pool Pool to evaluate for required properties
|
||||
/// \param[in] data If pool meets criteria, this pointer will point
|
||||
/// to the pool upon return
|
||||
/// \returns hsa_status_t
|
||||
/// -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
|
||||
/// -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
|
||||
/// -else return an appropriate error code for any error encountered
|
||||
hsa_status_t GetKernArgMemoryPool(hsa_amd_memory_pool_t pool, void* data);
|
||||
|
||||
|
||||
/// Find a "standard" pool. By this, we mean not a kernel args pool.
|
||||
/// The pool found will have the following properties:
|
||||
/// HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: Don't care
|
||||
/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT: Off
|
||||
/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED: Don't care
|
||||
/// This function is meant to be the call-back function used
|
||||
/// with hsa_amd_agent_iterate_memory_pools.
|
||||
/// \param[in] pool Pool to evaluate for required properties
|
||||
/// \param[in] data If pool meets criteria, this pointer will point
|
||||
/// to the pool upon return
|
||||
/// \returns hsa_status_t
|
||||
/// -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
|
||||
/// -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
|
||||
/// -else return an appropriate error code for any error encountered
|
||||
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data);
|
||||
hsa_status_t FindAPUStandardPool(hsa_amd_memory_pool_t pool, void* data);
|
||||
|
||||
/// Find a "kernel arg" pool.
|
||||
/// The pool found will have the following properties:
|
||||
/// HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: Don't care
|
||||
/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT: On
|
||||
/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED: Don't care
|
||||
/// This function is meant to be the call-back function used
|
||||
/// with hsa_amd_agent_iterate_memory_pools.
|
||||
/// \param[in] pool Pool to evaluate for required properties
|
||||
/// \param[in] data If pool meets criteria, this pointer will point
|
||||
/// to the pool upon return
|
||||
/// \returns hsa_status_t
|
||||
/// -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria
|
||||
/// -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria
|
||||
/// -else return an appropriate error code for any error encountered
|
||||
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data);
|
||||
|
||||
/// Dump information about provided memory pool to STDOUT
|
||||
/// \param[in] pool Pool to gather and dump information for
|
||||
/// \param[in] indent Number of spaces to indent output.
|
||||
/// \returns hsa_status_t HSA_STATUS_SUCCESS if no errors
|
||||
hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i,
|
||||
uint32_t indent = 0);
|
||||
|
||||
/// Dump information about a provided pointer to STDOUT.
|
||||
/// \param[in] ptr Pointer about which information is dumped.
|
||||
/// \returns HSA_STATUS_SUCCESS if there are no errors
|
||||
hsa_status_t DumpPointerInfo(void* ptr);
|
||||
|
||||
hsa_status_t GetAgentPools(
|
||||
std::vector<std::shared_ptr<agent_pools_t>> *agent_pools);
|
||||
|
||||
void throw_if_error(hsa_status_t err, const std::string& msg = "");
|
||||
|
||||
void throw_if_skip(const std::string& msg);
|
||||
|
||||
// The customize exception when the test has to be skipped
|
||||
class SkipException : public std::exception {
|
||||
public:
|
||||
explicit SkipException(const char* msg): _msg(msg) {}
|
||||
virtual const char* what() const noexcept { return _msg.c_str(); }
|
||||
private:
|
||||
std::string _msg;
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_MODULES_RDC_ROCR_COMMON_H_
|
||||
@@ -161,12 +161,14 @@ set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcMetricsUpdaterIm
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcWatchTableImpl.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcRasLib.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcSmiLib.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcRocrLib.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcTelemetryModule.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcDiagnosticModule.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcSmiDiagnosticImpl.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcModuleMgrImpl.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcNotificationImpl.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RsmiUtils.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcPerfTimer.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${COMMON_DIR}/rdc_fields_supported.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${COMMON_DIR}/rdc_capabilities.cc")
|
||||
|
||||
@@ -183,6 +185,7 @@ set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcWatchTab
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcWatchTableImpl.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcRasLib.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcSmiLib.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcRocrLib.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcModuleMgrImpl.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcModuleMgr.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcTelemetry.h")
|
||||
@@ -194,6 +197,7 @@ set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcSmi
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcNotification.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcNotificationImpl.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RsmiUtils.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcPerfTimer.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${COMMON_DIR}/rdc_fields_supported.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${COMMON_DIR}/rdc_capabilities.h")
|
||||
|
||||
@@ -215,6 +219,54 @@ set_property(TARGET ${RDC_LIB} PROPERTY
|
||||
set_property(TARGET ${RDC_LIB} PROPERTY
|
||||
VERSION "${SO_VERSION_STRING}")
|
||||
|
||||
# librdc_rocr.so set up
|
||||
set(DIAGNOSTIC_LIB "rdc_rocr")
|
||||
set(DIAGNOSTIC_LIB_COMPONENT "lib${DIAGNOSTIC_LIB}")
|
||||
set(DIAGNOSTIC_LIB_SRC_LIST "${SRC_DIR}/rdc_modules/rdc_rocr/RdcDiagnosticLib.cc")
|
||||
set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/bootstrap/src/RdcLogger.cc")
|
||||
set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/RdcRocrBase.cc")
|
||||
set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/MemoryTest.cc")
|
||||
set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/MemoryAccess.cc")
|
||||
set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/TestBase.cc")
|
||||
set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/base_rocr_utils.cc")
|
||||
set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/ComputeQueueTest.cc")
|
||||
set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/common.cc")
|
||||
set(DIAGNOSTIC_LIB_INC_LIST "${RDC_LIB_INC_DIR}/rdc/rdc.h")
|
||||
set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcDiagnosticLibInterface.h")
|
||||
set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/rdc_common.h")
|
||||
set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcLogger.h")
|
||||
set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/RdcDiagnosticLib.h")
|
||||
set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/RdcRocrBase.h")
|
||||
set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/TestBase.h")
|
||||
set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/MemoryTest.h")
|
||||
set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/MemoryAccess.h")
|
||||
set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/base_rocr_utils.h")
|
||||
set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/common.h")
|
||||
|
||||
if(BUILD_ROCRTEST)
|
||||
message("Build librdc_rocr.so is enabled, make sure the Rocm run time is installed.")
|
||||
|
||||
message("DIAGNOSTIC_LIB_INC_LIST=${DIAGNOSTIC_LIB_INC_LIST}")
|
||||
|
||||
set(HSA_LIB "hsa-runtime64")
|
||||
add_library(${DIAGNOSTIC_LIB} SHARED ${DIAGNOSTIC_LIB_SRC_LIST} ${DIAGNOSTIC_LIB_INC_LIST})
|
||||
target_link_libraries(${DIAGNOSTIC_LIB} ${RDC_LIB} ${BOOTSTRAP_LIB} ${HSA_LIB} pthread dl)
|
||||
target_include_directories(${DIAGNOSTIC_LIB} PRIVATE
|
||||
"${RSMI_INC_DIR}"
|
||||
"${ROCM_DIR}/include"
|
||||
"${PROJECT_SOURCE_DIR}"
|
||||
"${PROJECT_SOURCE_DIR}/include"
|
||||
"${COMMON_DIR}"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/include")
|
||||
|
||||
# TODO: set the properties for the library once we have one
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property(TARGET ${DIAGNOSTIC_LIB} PROPERTY
|
||||
SOVERSION "${VERSION_MAJOR}")
|
||||
set_property(TARGET ${DIAGNOSTIC_LIB} PROPERTY
|
||||
VERSION "${SO_VERSION_STRING}")
|
||||
endif()
|
||||
|
||||
if(BUILD_STANDALONE)
|
||||
# librdc_client.so set up
|
||||
file(GLOB PROTOBUF_GENERATED_INCLUDES "${PROTOB_OUT_DIR}/*.h")
|
||||
@@ -274,12 +326,12 @@ target_include_directories(${BOOTSTRAP_LIB}
|
||||
|
||||
## Add the install directives for the runtime library.
|
||||
if(BUILD_STANDALONE)
|
||||
install(TARGETS ${BOOTSTRAP_LIB} ${RDC_LIB} ${RDCCLIENT_LIB}
|
||||
install(TARGETS ${BOOTSTRAP_LIB} ${RDC_LIB} ${RDCCLIENT_LIB} ${DIAGNOSTIC_LIB}
|
||||
EXPORT rdcTargets
|
||||
LIBRARY DESTINATION ${RDC_CLIENT_INSTALL_PREFIX}/${RDC}/lib
|
||||
COMPONENT ${CLIENT_COMPONENT})
|
||||
else()
|
||||
install(TARGETS ${BOOTSTRAP_LIB} ${RDC_LIB}
|
||||
install(TARGETS ${BOOTSTRAP_LIB} ${RDC_LIB} ${DIAGNOSTIC_LIB}
|
||||
EXPORT rdcTargets
|
||||
LIBRARY DESTINATION ${RDC_CLIENT_INSTALL_PREFIX}/${RDC}/lib
|
||||
COMPONENT ${CLIENT_COMPONENT})
|
||||
@@ -289,6 +341,12 @@ install(FILES ${SOURCE_DIR}/include/rdc/rdc.h
|
||||
DESTINATION ${RDC_CLIENT_INSTALL_PREFIX}/${RDC}/include/rdc
|
||||
COMPONENT ${CLIENT_COMPONENT})
|
||||
|
||||
# Install the kernel files
|
||||
install(DIRECTORY ${PROJECT_SOURCE_DIR}/rdc_libs/rdc_modules/kernels/hsaco
|
||||
DESTINATION ${RDC_CLIENT_INSTALL_PREFIX}/${RDC}/lib
|
||||
COMPONENT ${CLIENT_COMPONENT})
|
||||
|
||||
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
message(" Finished Cmake RDC Lib ")
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
|
||||
@@ -83,8 +83,6 @@ rdc_status_t RdcDiagnosticModule::rdc_diagnostic_run(
|
||||
rdc_runs.push_back(RDC_DIAG_NODE_TOPOLOGY);
|
||||
rdc_runs.push_back(RDC_DIAG_GPU_PARAMETERS);
|
||||
rdc_runs.push_back(RDC_DIAG_COMPUTE_QUEUE);
|
||||
rdc_runs.push_back(RDC_DIAG_SDMA_QUEUE);
|
||||
rdc_runs.push_back(RDC_DIAG_VRAM_CHECK);
|
||||
rdc_runs.push_back(RDC_DIAG_SYS_MEM_CHECK);
|
||||
}
|
||||
|
||||
@@ -117,11 +115,13 @@ rdc_status_t RdcDiagnosticModule::RdcDiagnosticModule::rdc_diag_destroy() {
|
||||
}
|
||||
|
||||
RdcDiagnosticModule::RdcDiagnosticModule(const RdcSmiLibPtr& smi_lib,
|
||||
const RdcRasLibPtr& ras_module) {
|
||||
const RdcRasLibPtr& ras_module, const RdcRocrLibPtr& rocr_module) {
|
||||
if (smi_lib) {
|
||||
diagnostic_modules_.push_back(smi_lib);
|
||||
}
|
||||
|
||||
if (rocr_module) {
|
||||
diagnostic_modules_.push_back(rocr_module);
|
||||
}
|
||||
if (ras_module) {
|
||||
diagnostic_modules_.push_back(ras_module);
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/impl/RdcTelemetryModule.h"
|
||||
#include "rdc_lib/impl/RdcDiagnosticModule.h"
|
||||
#include "rdc_lib/impl/RdcRasLib.h"
|
||||
#include "rdc_lib/impl/RdcRocrLib.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
@@ -60,9 +61,13 @@ RdcDiagnosticPtr RdcModuleMgrImpl::get_diagnostic_module() {
|
||||
ras_lib_.reset(new RdcRasLib("librdc_ras.so"));
|
||||
}
|
||||
|
||||
if (!rocr_lib_) {
|
||||
rocr_lib_.reset(new RdcRocrLib("librdc_rocr.so"));
|
||||
}
|
||||
|
||||
if (!rdc_diagnostic_module_) {
|
||||
rdc_diagnostic_module_.reset(
|
||||
new RdcDiagnosticModule(smi_lib_, ras_lib_));
|
||||
new RdcDiagnosticModule(smi_lib_, ras_lib_, rocr_lib_));
|
||||
}
|
||||
|
||||
return rdc_diagnostic_module_;
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "rdc_lib/RdcPerfTimer.h"
|
||||
#include <x86intrin.h>
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
static const uint64_t kNanosecondsPerSecond = 1000000000;
|
||||
|
||||
RdcPerfTimer::RdcPerfTimer(void) {
|
||||
freq_in_100mhz = MeasureTSCFreqHz();
|
||||
}
|
||||
|
||||
RdcPerfTimer::~RdcPerfTimer() {
|
||||
while (!_timers.empty()) {
|
||||
Timer* temp = _timers.back();
|
||||
_timers.pop_back();
|
||||
delete temp;
|
||||
}
|
||||
}
|
||||
|
||||
int RdcPerfTimer::CreateTimer(void) {
|
||||
Timer* newTimer = new Timer;
|
||||
newTimer->_start = 0;
|
||||
newTimer->_clocks = 0;
|
||||
|
||||
newTimer->_freq = kNanosecondsPerSecond;
|
||||
|
||||
/* Push back the address of new Timer instance created */
|
||||
_timers.push_back(newTimer);
|
||||
return static_cast<int>(_timers.size() - 1);
|
||||
}
|
||||
|
||||
int RdcPerfTimer::StartTimer(int index) {
|
||||
if (index >= static_cast<int>(_timers.size())) {
|
||||
Error("Cannot reset timer. Invalid handle.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// General Linux timing method
|
||||
#ifndef _AMD
|
||||
struct timespec s;
|
||||
clock_gettime(CLOCK_MONOTONIC, &s);
|
||||
_timers[index]->_start = (uint64_t) s.tv_sec * kNanosecondsPerSecond
|
||||
+ (uint64_t) s.tv_nsec;
|
||||
#else
|
||||
|
||||
// AMD timing method
|
||||
|
||||
unsigned int unused;
|
||||
_timers[index]->_start = __rdtscp(&unused);
|
||||
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int RdcPerfTimer::StopTimer(int index) {
|
||||
uint64_t n = 0;
|
||||
|
||||
if (index >= static_cast<int>(_timers.size())) {
|
||||
Error("Cannot reset timer. Invalid handle.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// General Linux timing method
|
||||
#ifndef _AMD
|
||||
struct timespec s;
|
||||
clock_gettime(CLOCK_MONOTONIC, &s);
|
||||
n = (uint64_t) s.tv_sec * kNanosecondsPerSecond + (uint64_t) s.tv_nsec;
|
||||
#else
|
||||
// AMD Linux timing
|
||||
|
||||
unsigned int unused;
|
||||
n = __rdtscp(&unused);
|
||||
#endif
|
||||
|
||||
n -= _timers[index]->_start;
|
||||
_timers[index]->_start = 0;
|
||||
|
||||
#ifndef _AMD
|
||||
_timers[index]->_clocks += n;
|
||||
#else
|
||||
// convert to ms
|
||||
_timers[index]->_clocks += 1.0E-6 * 10 * n / freq_in_100mhz;
|
||||
cout << "_AMD is enabled!!!" << endl;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void RdcPerfTimer::Error(std::string str) {
|
||||
std::cout << str << std::endl;
|
||||
}
|
||||
|
||||
double RdcPerfTimer::ReadTimer(int index) {
|
||||
if (index >= static_cast<int>(_timers.size())) {
|
||||
Error("Cannot read timer. Invalid handle.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
double reading = static_cast<double>(_timers[index]->_clocks);
|
||||
|
||||
reading = static_cast<double>(reading / _timers[index]->_freq);
|
||||
|
||||
return reading;
|
||||
}
|
||||
|
||||
void RdcPerfTimer::ResetTimer(int index) {
|
||||
// Check if index value is over the timer's size
|
||||
if (index >= static_cast<int>(_timers.size())) {
|
||||
Error("Invalid index value\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
_timers[index]->_clocks = 0.0;
|
||||
_timers[index]->_start = 0.0;
|
||||
}
|
||||
|
||||
uint64_t RdcPerfTimer::CoarseTimestampUs() {
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
|
||||
return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
|
||||
}
|
||||
|
||||
uint64_t RdcPerfTimer::MeasureTSCFreqHz() {
|
||||
// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
|
||||
unsigned int unused;
|
||||
uint64_t tscTicksEnd;
|
||||
|
||||
uint64_t coarseBeginUs = CoarseTimestampUs();
|
||||
uint64_t tscTicksBegin = __rdtscp(&unused);
|
||||
|
||||
do {
|
||||
tscTicksEnd = __rdtscp(&unused);
|
||||
} while (tscTicksEnd - tscTicksBegin < 1000000000);
|
||||
|
||||
uint64_t coarseEndUs = CoarseTimestampUs();
|
||||
|
||||
// Compute the TSC frequency and round to nearest 100MHz.
|
||||
uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
|
||||
uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
|
||||
return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
@@ -0,0 +1,143 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <functional>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/impl/RdcRocrLib.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
RdcRocrLib::RdcRocrLib(const char* lib_name):
|
||||
test_case_run_(nullptr)
|
||||
, diag_test_cases_query_(nullptr)
|
||||
, diag_init_(nullptr)
|
||||
, diag_destroy_(nullptr) {
|
||||
rdc_status_t status = lib_loader_.load(lib_name);
|
||||
if (status != RDC_ST_OK) {
|
||||
RDC_LOG(RDC_ERROR, "Rocr related function will not work.");
|
||||
return;
|
||||
}
|
||||
|
||||
status = lib_loader_.load_symbol(&diag_init_,
|
||||
"rdc_diag_init");
|
||||
if (status != RDC_ST_OK) {
|
||||
diag_init_ = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
status = diag_init_(0);
|
||||
if (status != RDC_ST_OK) {
|
||||
RDC_LOG(RDC_ERROR, "Fail to init librdc_rocr.so:"
|
||||
<< rdc_status_string(status)
|
||||
<< ". Rocr related function will not work.");
|
||||
return;
|
||||
}
|
||||
|
||||
status = lib_loader_.load_symbol(&diag_destroy_,
|
||||
"rdc_diag_destroy");
|
||||
if (status != RDC_ST_OK) {
|
||||
diag_destroy_ = nullptr;
|
||||
}
|
||||
|
||||
status = lib_loader_.load_symbol(&test_case_run_,
|
||||
"rdc_diag_test_case_run");
|
||||
if (status != RDC_ST_OK) {
|
||||
test_case_run_ = nullptr;
|
||||
}
|
||||
status = lib_loader_.load_symbol(&diag_test_cases_query_,
|
||||
"rdc_diag_test_cases_query");
|
||||
if (status != RDC_ST_OK) {
|
||||
diag_test_cases_query_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
RdcRocrLib::~RdcRocrLib() {
|
||||
if (diag_destroy_) {
|
||||
diag_destroy_();
|
||||
}
|
||||
}
|
||||
|
||||
rdc_status_t RdcRocrLib::rdc_diag_test_cases_query(
|
||||
rdc_diag_test_cases_t test_cases[MAX_TEST_CASES],
|
||||
uint32_t* test_case_count) {
|
||||
if (test_case_count == nullptr) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
if (!diag_test_cases_query_) {
|
||||
return RDC_ST_FAIL_LOAD_MODULE;
|
||||
}
|
||||
|
||||
rdc_status_t status = diag_test_cases_query_(test_cases, test_case_count);
|
||||
RDC_LOG(RDC_DEBUG, "Query " << *test_case_count << " test cases from Rocr: "
|
||||
<< rdc_status_string(status));
|
||||
return status;
|
||||
}
|
||||
|
||||
// Run a specific test case
|
||||
rdc_status_t RdcRocrLib::rdc_test_case_run(
|
||||
rdc_diag_test_cases_t test_case,
|
||||
uint32_t gpu_index[RDC_MAX_NUM_DEVICES],
|
||||
uint32_t gpu_count,
|
||||
rdc_diag_test_result_t* result) {
|
||||
if (result == nullptr) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
if (!test_case_run_) {
|
||||
return RDC_ST_FAIL_LOAD_MODULE;
|
||||
}
|
||||
|
||||
rdc_status_t status = test_case_run_(test_case, gpu_index,
|
||||
gpu_count, result);
|
||||
RDC_LOG(RDC_DEBUG, "Run " << test_case << " test case from Rocr: "
|
||||
<< rdc_status_string(status));
|
||||
return status;
|
||||
}
|
||||
|
||||
rdc_status_t RdcRocrLib::rdc_diagnostic_run(
|
||||
const rdc_group_info_t& gpus,
|
||||
rdc_diag_level_t level,
|
||||
rdc_diag_response_t* response) {
|
||||
(void)gpus;
|
||||
(void)level;
|
||||
(void)response;
|
||||
return RDC_ST_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
rdc_status_t RdcRocrLib::rdc_diag_init(uint64_t flags) {
|
||||
if (!diag_init_) {
|
||||
return RDC_ST_FAIL_LOAD_MODULE;
|
||||
}
|
||||
|
||||
return diag_init_(flags);
|
||||
}
|
||||
rdc_status_t RdcRocrLib::rdc_diag_destroy() {
|
||||
if (!diag_destroy_) {
|
||||
return RDC_ST_FAIL_LOAD_MODULE;
|
||||
}
|
||||
|
||||
return diag_destroy_();
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
@@ -61,13 +61,12 @@ rdc_status_t RdcSmiDiagnosticImpl::check_rsmi_process_info(
|
||||
result->status = RDC_DIAG_RESULT_PASS;
|
||||
result->per_gpu_result_count = 0;
|
||||
strncpy_with_null(result->info,
|
||||
"Do not have any compute process running on any devices",
|
||||
"No processes running on any devices.",
|
||||
MAX_DIAG_MSG_LENGTH);
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
std::string info = std::to_string(num_items)
|
||||
+ " compute process is using devices.";
|
||||
std::string info;
|
||||
// Find details of the process running on each GPU
|
||||
std::vector<rsmi_process_info_t> procs(num_items);
|
||||
err = rsmi_compute_process_info_get(
|
||||
@@ -81,6 +80,9 @@ rdc_status_t RdcSmiDiagnosticImpl::check_rsmi_process_info(
|
||||
|
||||
std::map<uint32_t, std::vector<uint32_t>> pids_per_gpu;
|
||||
for (uint32_t i=0; i < num_items; i++) {
|
||||
// Skip the process does not occupy any GPUs. The hsa_shutdown()
|
||||
// will not clear /proc sys file until the process is terminated.
|
||||
if (procs[i].cu_occupancy == 0 ) continue;
|
||||
info += " Process: " + std::to_string(procs[i].process_id)
|
||||
+= ", pasid: " + std::to_string(procs[i].pasid)
|
||||
+= ", vram_usage: " + std::to_string(procs[i].vram_usage)
|
||||
@@ -118,11 +120,13 @@ rdc_status_t RdcSmiDiagnosticImpl::check_rsmi_process_info(
|
||||
}
|
||||
} // end for (uint32_t i=0 ...)
|
||||
|
||||
result->status = RDC_DIAG_RESULT_PASS; // pass by default
|
||||
if (pids_per_gpu.size() == 0) {
|
||||
result->status = RDC_DIAG_RESULT_WARN;
|
||||
info += " Cannot detect the processes running in which devices.";
|
||||
} else {
|
||||
result->status = RDC_DIAG_RESULT_PASS; // pass by default
|
||||
result->per_gpu_result_count = 0;
|
||||
strncpy_with_null(result->info,
|
||||
"No processes running on any devices.",
|
||||
MAX_DIAG_MSG_LENGTH);
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
// Mark as fail
|
||||
@@ -359,7 +363,7 @@ rdc_diag_result_t RdcSmiDiagnosticImpl::check_temperature_level(
|
||||
int64_t critical_temp = 0;
|
||||
err = rsmi_dev_temp_metric_get(gpu_index,
|
||||
type, met, &critical_temp);
|
||||
if (err != RSMI_STATUS_SUCCESS) {
|
||||
if (err == RSMI_STATUS_SUCCESS) {
|
||||
if (current_temp >= critical_temp) {
|
||||
result = RDC_DIAG_RESULT_FAIL;
|
||||
per_gpu_info += "Critical ";
|
||||
@@ -384,7 +388,7 @@ rdc_diag_result_t RdcSmiDiagnosticImpl::check_temperature_level(
|
||||
int64_t emergency_temp = 0;
|
||||
err = rsmi_dev_temp_metric_get(gpu_index,
|
||||
type, met, &emergency_temp);
|
||||
if (err != RSMI_STATUS_SUCCESS) {
|
||||
if (err == RSMI_STATUS_SUCCESS) {
|
||||
if (current_temp >= critical_temp) {
|
||||
result = RDC_DIAG_RESULT_FAIL;
|
||||
per_gpu_info += "Emergency ";
|
||||
@@ -409,7 +413,7 @@ rdc_diag_result_t RdcSmiDiagnosticImpl::check_temperature_level(
|
||||
int64_t critical_min_temp = 0;
|
||||
err = rsmi_dev_temp_metric_get(gpu_index,
|
||||
type, met, &critical_min_temp);
|
||||
if (err != RSMI_STATUS_SUCCESS) {
|
||||
if (err == RSMI_STATUS_SUCCESS) {
|
||||
if (current_temp <= critical_min_temp) {
|
||||
result = RDC_DIAG_RESULT_FAIL;
|
||||
per_gpu_info += "Critical Min ";
|
||||
|
||||
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* One instance of this kernel call is a thread.
|
||||
* Each thread finds out the segment in which it should look for the element.
|
||||
* After that, it checks if the element is between the lower bound and upper
|
||||
* bound of its segment. If yes, then this segment becomes the total
|
||||
* searchspace for the next pass.
|
||||
*
|
||||
* To achieve this, it writes the lower bound and upper bound to the output
|
||||
* array. In case the element at the left end (lower bound) matches the element
|
||||
* we are looking for, that is marked in the output and we no longer need to
|
||||
* look any further.
|
||||
*/
|
||||
|
||||
__kernel void
|
||||
binarySearch(__global uint4 * outputArray,
|
||||
__const __global uint2 * sortedArray,
|
||||
const unsigned int findMe) {
|
||||
unsigned int tid = get_global_id(0);
|
||||
|
||||
// Then we find the elements for this thread
|
||||
uint2 element = sortedArray[tid];
|
||||
|
||||
|
||||
// If the element to be found does not lie between
|
||||
// them, then nothing left to do in this thread
|
||||
if((element.x > findMe) || (element.y < findMe)) {
|
||||
return;
|
||||
} else {
|
||||
// However, if the element does lie between the lower
|
||||
// and upper bounds of this thread's searchspace
|
||||
// we need to narrow down the search further in this
|
||||
// search space
|
||||
// The search space for this thread is marked in the
|
||||
// output as being the total search space for the next pass
|
||||
outputArray[0].x = tid;
|
||||
outputArray[0].w = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
binarySearch_mulkeys(__global int *keys,
|
||||
__global uint *input,
|
||||
const unsigned int numKeys,
|
||||
__global int *output) {
|
||||
|
||||
int gid = get_global_id(0);
|
||||
int lBound = gid * 256;
|
||||
int uBound = lBound + 255;
|
||||
|
||||
for(int i = 0; i < numKeys; i++) {
|
||||
if(keys[i] >= input[lBound] && keys[i] <= input[uBound])
|
||||
output[i]=lBound;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
binarySearch_mulkeysConcurrent(__global uint *keys,
|
||||
__global uint *input,
|
||||
const unsigned int inputSize, // num. of inputs
|
||||
const unsigned int numSubdivisions,
|
||||
__global int *output) {
|
||||
|
||||
int lBound = (get_global_id(0) % numSubdivisions) * (inputSize / numSubdivisions);
|
||||
int uBound = lBound + inputSize / numSubdivisions;
|
||||
int myKey = keys[get_global_id(0) / numSubdivisions];
|
||||
int mid;
|
||||
|
||||
while(uBound >= lBound) {
|
||||
mid = (lBound + uBound) / 2;
|
||||
if(input[mid] == myKey) {
|
||||
output[get_global_id(0) / numSubdivisions] = mid;
|
||||
return;
|
||||
} else if(input[mid] > myKey) {
|
||||
uBound = mid - 1;
|
||||
} else {
|
||||
lBound = mid + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
__kernel void gpuReadWrite(__global const int * a,
|
||||
__global int * b, __global int * c) {
|
||||
int i = get_global_id(0);
|
||||
// Reading the system memory and writing to gpu memory
|
||||
c[i] = a[i]; // a[i] point to system memory while c[i] to gpu memory.
|
||||
//writing to system memory
|
||||
b[i] = i;
|
||||
}
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -0,0 +1,686 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <string>
|
||||
#include <climits>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "rdc_modules/rdc_rocr/common.h"
|
||||
#include "rdc_modules/rdc_rocr/ComputeQueueTest.h"
|
||||
#include "rdc_modules/rdc_rocr/base_rocr_utils.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
static const uint32_t kNumBufferElements = 256;
|
||||
|
||||
ComputeQueueTest::ComputeQueueTest(uint32_t gpu_index): TestBase(gpu_index) {
|
||||
set_num_iteration(10); // Number of iterations to execute of the main test;
|
||||
// This is a default value which can be overridden
|
||||
// on the command line.
|
||||
set_title("ComputeQueue Test");
|
||||
set_description("This test will run binary search compute task via AQL.");
|
||||
}
|
||||
|
||||
ComputeQueueTest::~ComputeQueueTest(void) {
|
||||
}
|
||||
|
||||
// Any 1-time setup involving member variables used in the rest of the test
|
||||
// should be done here.
|
||||
hsa_status_t ComputeQueueTest::SetUp(void) {
|
||||
hsa_status_t err = HSA_STATUS_SUCCESS;
|
||||
|
||||
TestBase::SetUp();
|
||||
|
||||
err = SetDefaultAgents(this);
|
||||
if ( err != HSA_STATUS_SUCCESS) return err;
|
||||
|
||||
err = SetPoolsTypical(this);
|
||||
return err;
|
||||
}
|
||||
|
||||
void ComputeQueueTest::Run(void) {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
TestBase::Run();
|
||||
}
|
||||
|
||||
void ComputeQueueTest::DisplayTestInfo(void) {
|
||||
TestBase::DisplayTestInfo();
|
||||
}
|
||||
|
||||
void ComputeQueueTest::DisplayResults(void) const {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void ComputeQueueTest::Close() {
|
||||
// This will close handles opened within rocrtst utility calls and call
|
||||
// hsa_shut_down(), so it should be done after other hsa cleanup
|
||||
TestBase::Close();
|
||||
}
|
||||
|
||||
static const uint32_t kBinarySearchLength = 512;
|
||||
static const uint32_t kBinarySearchFindMe = 108;
|
||||
static const uint32_t kWorkGroupSize = 256;
|
||||
|
||||
void ComputeQueueTest::InitializeBinarySearch(BinarySearch* bs) {
|
||||
bs->kernel_file_name = "binary_search_kernels.hsaco";
|
||||
bs->kernel_name = "binarySearch.kd";
|
||||
bs->length = kBinarySearchLength;
|
||||
bs->find_me = kBinarySearchFindMe;
|
||||
bs->work_group_size = kWorkGroupSize;
|
||||
bs->num_sub_divisions = bs->length / bs->work_group_size;
|
||||
}
|
||||
|
||||
// This function shows how to do an asynchronous copy. We have to create a
|
||||
// signal and use the signal to notify us when the copy has completed.
|
||||
hsa_status_t ComputeQueueTest::AgentMemcpy(void* dst, const void* src,
|
||||
size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag) {
|
||||
hsa_signal_t s;
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_signal_create(1, 0, NULL, &s);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_amd_memory_async_copy(dst, dst_ag, src, src_ag, size, 0, NULL, s);
|
||||
throw_if_error(err);
|
||||
|
||||
if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
|
||||
UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) {
|
||||
err = HSA_STATUS_ERROR;
|
||||
RDC_LOG(RDC_ERROR, "Async copy signal error");
|
||||
|
||||
throw_if_error(err);
|
||||
}
|
||||
|
||||
err = hsa_signal_destroy(s);
|
||||
|
||||
throw_if_error(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
hsa_status_t ComputeQueueTest::FindPools(BinarySearch* bs) {
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev, FindStandardPool,
|
||||
&bs->cpu_pool);
|
||||
|
||||
if (err != HSA_STATUS_INFO_BREAK) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(bs->gpu_dev, FindStandardPool,
|
||||
&bs->gpu_pool);
|
||||
|
||||
if (err != HSA_STATUS_INFO_BREAK) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev,
|
||||
FindKernArgPool, &bs->kern_arg_pool);
|
||||
|
||||
if (err != HSA_STATUS_INFO_BREAK) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Once the needed memory pools have been found and the BinarySearch structure
|
||||
// has been updated with these handles, this function is then used to allocate
|
||||
// memory from those pools.
|
||||
// Devices with which a pool is associated already have access to the pool.
|
||||
// However, other devices may also need to read or write to that memory. Below,
|
||||
// we see how we can grant access to other devices to address this issue.
|
||||
hsa_status_t ComputeQueueTest::AllocateAndInitBuffers(BinarySearch* bs) {
|
||||
hsa_status_t err;
|
||||
uint32_t out_length = 4 * sizeof(uint32_t);
|
||||
uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t);
|
||||
|
||||
// In all of these examples, we want both the cpu and gpu to have access to
|
||||
// the buffer in question. We use the array of agents below in the susequent
|
||||
// calls to hsa_amd_agents_allow_access() for this purpose.
|
||||
hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev};
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
|
||||
reinterpret_cast<void**>(&bs->input));
|
||||
throw_if_error(err);
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input);
|
||||
throw_if_error(err);
|
||||
(void)memset(bs->input, 0, in_length);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, out_length, 0,
|
||||
reinterpret_cast<void**>(&bs->output));
|
||||
throw_if_error(err);
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->output);
|
||||
throw_if_error(err);
|
||||
(void)memset(bs->input, 0, in_length);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
|
||||
reinterpret_cast<void**>(&bs->input_arr));
|
||||
throw_if_error(err);
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr);
|
||||
throw_if_error(err);
|
||||
(void)memset(bs->input, 0, in_length);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
|
||||
reinterpret_cast<void**>(&bs->input_arr_local));
|
||||
throw_if_error(err);
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr_local);
|
||||
throw_if_error(err);
|
||||
|
||||
// Binary-search application specific code...
|
||||
// Initialize input buffer with random values in an increasing order
|
||||
uint32_t max = bs->length * 20;
|
||||
bs->input[0] = 0;
|
||||
|
||||
uint32_t seed = (unsigned int)time(NULL);
|
||||
srand(seed);
|
||||
|
||||
for (uint32_t i = 1; i < bs->length; ++i) {
|
||||
bs->input[i] = bs->input[i - 1] +
|
||||
static_cast<uint32_t>(max * rand_r(&seed) / static_cast<float>(RAND_MAX));
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
// The code in this function illustrates how to load a kernel from
|
||||
// pre-compiled code. The goal is to get a handle that can be later
|
||||
// used in an AQL packet and also to extract information about kernel
|
||||
// that we will need. All of the information hand kernel handle will
|
||||
// be saved to the BinarySearch structure. It will be used when we
|
||||
// populate the AQL packet.
|
||||
hsa_status_t ComputeQueueTest::LoadKernelFromObjFile(BinarySearch* bs) {
|
||||
hsa_status_t err;
|
||||
char agent_name[512];
|
||||
hsa_code_object_reader_t code_obj_rdr = {0};
|
||||
hsa_executable_t executable = {0};
|
||||
|
||||
err = hsa_agent_get_info(bs->gpu_dev, HSA_AGENT_INFO_NAME, agent_name);
|
||||
throw_if_error(err);
|
||||
std::string kernel_file = search_hsaco_full_path(
|
||||
bs->kernel_file_name.c_str(), agent_name);
|
||||
if (kernel_file == "") {
|
||||
RDC_LOG(RDC_ERROR, "failed to open " << bs->kernel_file_name.c_str() <<
|
||||
" at line " << __LINE__ << ", errno: " << errno);
|
||||
std::string msg("fail to open ");
|
||||
msg += bs->kernel_file_name;
|
||||
throw_if_skip(msg);
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
hsa_file_t file_handle = open(kernel_file.c_str(), O_RDONLY);
|
||||
if (file_handle == -1) {
|
||||
RDC_LOG(RDC_ERROR, "failed to open " << bs->kernel_file_name.c_str() <<
|
||||
" at line " << __LINE__ << ", errno: " << errno);
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
err = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
|
||||
throw_if_error(err);
|
||||
close(file_handle);
|
||||
|
||||
err = hsa_executable_create_alt(HSA_PROFILE_FULL,
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, &executable);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_executable_load_agent_code_object(executable, bs->gpu_dev,
|
||||
code_obj_rdr, NULL, NULL);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_executable_freeze(executable, NULL);
|
||||
throw_if_error(err);
|
||||
|
||||
hsa_executable_symbol_t kern_sym;
|
||||
err = hsa_executable_get_symbol(executable, NULL, bs->kernel_name.c_str(),
|
||||
bs->gpu_dev, 0, &kern_sym);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
&bs->kernel_object);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
||||
&bs->private_segment_size);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
||||
&bs->group_segment_size);
|
||||
throw_if_error(err);
|
||||
|
||||
// Remaining queries not supported on code object v3.
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
|
||||
&bs->kernarg_size);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
|
||||
&bs->kernarg_align);
|
||||
throw_if_error(err);
|
||||
assert(bs->kernarg_align >= 16 && "Reported kernarg size is too small.");
|
||||
bs->kernarg_align = (bs->kernarg_align == 0) ? 16 : bs->kernarg_align;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
// This function populates the AQL patch with the information
|
||||
// we have collected and stored in the BinarySearch structure thus far.
|
||||
void ComputeQueueTest::PopulateAQLPacket(BinarySearch const* bs,
|
||||
hsa_kernel_dispatch_packet_t* aql) {
|
||||
aql->header = 0; // Dummy val. for now. Set this right before doorbell ring
|
||||
aql->setup = 1;
|
||||
aql->workgroup_size_x = bs->work_group_size;
|
||||
aql->workgroup_size_y = 1;
|
||||
aql->workgroup_size_z = 1;
|
||||
aql->grid_size_x = bs->work_grid_size;
|
||||
aql->grid_size_y = 1;
|
||||
aql->grid_size_z = 1;
|
||||
aql->private_segment_size = bs->private_segment_size;
|
||||
aql->group_segment_size = bs->group_segment_size;
|
||||
aql->kernel_object = bs->kernel_object;
|
||||
aql->kernarg_address = bs->kern_arg_address;
|
||||
aql->completion_signal = bs->signal;
|
||||
}
|
||||
|
||||
void ComputeQueueTest::WriteAQLToQueue(hsa_kernel_dispatch_packet_t const* in_aql,
|
||||
hsa_queue_t* q) {
|
||||
void* queue_base = q->base_address;
|
||||
const uint32_t queue_mask = q->size - 1;
|
||||
uint64_t que_idx = hsa_queue_add_write_index_relaxed(q, 1);
|
||||
|
||||
hsa_kernel_dispatch_packet_t* queue_aql_packet;
|
||||
|
||||
queue_aql_packet =
|
||||
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_base))
|
||||
[que_idx & queue_mask];
|
||||
|
||||
queue_aql_packet->workgroup_size_x = in_aql->workgroup_size_x;
|
||||
queue_aql_packet->workgroup_size_y = in_aql->workgroup_size_y;
|
||||
queue_aql_packet->workgroup_size_z = in_aql->workgroup_size_z;
|
||||
queue_aql_packet->grid_size_x = in_aql->grid_size_x;
|
||||
queue_aql_packet->grid_size_y = in_aql->grid_size_y;
|
||||
queue_aql_packet->grid_size_z = in_aql->grid_size_z;
|
||||
queue_aql_packet->private_segment_size = in_aql->private_segment_size;
|
||||
queue_aql_packet->group_segment_size = in_aql->group_segment_size;
|
||||
queue_aql_packet->kernel_object = in_aql->kernel_object;
|
||||
queue_aql_packet->kernarg_address = in_aql->kernarg_address;
|
||||
queue_aql_packet->completion_signal = in_aql->completion_signal;
|
||||
}
|
||||
|
||||
|
||||
// This function allocates memory from the kern_arg pool we already found, and
|
||||
// then sets the argument values needed by the kernel code.
|
||||
hsa_status_t ComputeQueueTest::AllocAndSetKernArgs(BinarySearch* bs, void* args,
|
||||
size_t arg_size, void** aql_buf_ptr) {
|
||||
void* kern_arg_buf = nullptr;
|
||||
hsa_status_t err;
|
||||
size_t buf_size;
|
||||
size_t req_align;
|
||||
|
||||
// The kernel code must be written to memory at the correct alignment. We
|
||||
// already queried the executable to get the correct alignment, which is
|
||||
// stored in bs->kernarg_align. In case the memory returned from
|
||||
// hsa_amd_memory_pool is not of the correct alignment, we request a little
|
||||
// more than what we need in case we need to adjust.
|
||||
req_align = bs->kernarg_align;
|
||||
// Allocate enough extra space for alignment adjustments if ncessary
|
||||
buf_size = arg_size + (req_align << 1);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(bs->kern_arg_pool, buf_size, 0,
|
||||
reinterpret_cast<void**>(&kern_arg_buf));
|
||||
throw_if_error(err);
|
||||
|
||||
// Address of the allocated buffer
|
||||
bs->kern_arg_buffer = kern_arg_buf;
|
||||
|
||||
// Addr. of kern arg start.
|
||||
bs->kern_arg_address = AlignUp(kern_arg_buf, req_align);
|
||||
|
||||
assert(arg_size >= bs->kernarg_size);
|
||||
assert(((uintptr_t)bs->kern_arg_address + arg_size) <
|
||||
((uintptr_t)bs->kern_arg_buffer + buf_size));
|
||||
|
||||
(void)memcpy(bs->kern_arg_address, args, arg_size);
|
||||
throw_if_error(err);
|
||||
|
||||
// Make sure both the CPU and GPU can access the kernel arguments
|
||||
hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev};
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->kern_arg_buffer);
|
||||
throw_if_error(err);
|
||||
|
||||
// Save this info in our BinarySearch structure for later.
|
||||
*aql_buf_ptr = bs->kern_arg_address;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Once all the required data for kernel execution is collected (in this
|
||||
// application it is stored in the BinarySearch structure) we can put it in
|
||||
// an AQL packet and ring the queue door bell to tell the command processor to
|
||||
// execute it.
|
||||
hsa_status_t ComputeQueueTest::Run(BinarySearch* bs) {
|
||||
hsa_status_t err;
|
||||
RDC_LOG(RDC_DEBUG, "Executing kernel " << bs->kernel_name);
|
||||
|
||||
// Adjust the size of workgroup
|
||||
// This is mostly application specific.
|
||||
if (bs->work_group_size > 64) {
|
||||
bs->work_group_size = 64;
|
||||
bs->num_sub_divisions = bs->length / bs->work_group_size;
|
||||
}
|
||||
if (bs->num_sub_divisions < bs->work_group_size) {
|
||||
bs->num_sub_divisions = bs->work_group_size;
|
||||
}
|
||||
|
||||
bs->work_grid_size = bs->num_sub_divisions;
|
||||
|
||||
// Explanation of BinarySearch algorithm.
|
||||
/*
|
||||
* Since a plain binary search on the GPU would not achieve much benefit
|
||||
* over the GPU we are doing an N'ary search. We split the array into N
|
||||
* segments every pass and therefore get log (base N) passes instead of log
|
||||
* (base 2) passes.
|
||||
*
|
||||
* In every pass, only the thread that can potentially have the element we
|
||||
* are looking for writes to the output array. For ex: if we are looking to
|
||||
* find 4567 in the array and every thread is searching over a segment of
|
||||
* 1000 values and the input array is 1, 2, 3, 4,... then the first thread
|
||||
* is searching in 1 to 1000, the second one from 1001 to 2000, etc. The
|
||||
* first one does not write to the output. The second one doesn't either.
|
||||
* The fifth one however is from 4001 to 5000. So it can potentially have
|
||||
* the element 4567 which lies between them.
|
||||
*
|
||||
* This particular thread writes to the output the lower bound, upper bound
|
||||
* and whether the element equals the lower bound element. So, it would be
|
||||
* 4001, 5000, 0
|
||||
*
|
||||
* The next pass would subdivide 4001 to 5000 into smaller segments and
|
||||
* continue the same process from there.
|
||||
*
|
||||
* When a pass returns 1 in the third element, it means the element has been
|
||||
* found and we can stop executing the kernel. If the element is not found,
|
||||
* then the execution stops after looking at segment of size 1.
|
||||
*/
|
||||
|
||||
uint32_t global_lower_bound = 0;
|
||||
uint32_t global_upper_bound = bs->length - 1;
|
||||
uint32_t sub_div_size = (global_upper_bound - global_lower_bound + 1) /
|
||||
bs->num_sub_divisions;
|
||||
|
||||
if ((bs->input[0] > bs->find_me) ||
|
||||
(bs->input[bs->length - 1] < bs->find_me)) {
|
||||
bs->output[0] = 0;
|
||||
bs->output[1] = bs->length - 1;
|
||||
bs->output[2] = 0;
|
||||
RDC_LOG(RDC_DEBUG, "Returning too early");
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
bs->output[3] = 1;
|
||||
|
||||
// Setup the kernel args
|
||||
// See the meta-data for the compiled OpenCL kernel code to ascertain
|
||||
// the sizes, padding and alignment required for kernel arguments.
|
||||
// This can be seen by executing
|
||||
// $ amdgcn-amd-amdhsa-readelf -aw ./binary_search_kernels.hsaco
|
||||
// The kernel code will expect the following arguments aligned as shown.
|
||||
typedef uint32_t uint2[2];
|
||||
typedef uint32_t uint4[4];
|
||||
struct __attribute__((aligned(16))) local_args_t {
|
||||
uint4* outputArray;
|
||||
uint2* sortedArray;
|
||||
uint32_t findMe;
|
||||
uint32_t pad;
|
||||
uint64_t global_offset_x;
|
||||
uint64_t global_offset_y;
|
||||
uint64_t global_offset_z;
|
||||
uint64_t printf_buffer;
|
||||
uint64_t default_queue;
|
||||
uint64_t completion_action;
|
||||
} local_args;
|
||||
|
||||
local_args.outputArray = reinterpret_cast<uint4*>(bs->output);
|
||||
local_args.sortedArray = reinterpret_cast<uint2*>(bs->input_arr_local);
|
||||
local_args.findMe = bs->find_me;
|
||||
local_args.global_offset_x = 0;
|
||||
local_args.global_offset_y = 0;
|
||||
local_args.global_offset_z = 0;
|
||||
local_args.printf_buffer = 0;
|
||||
local_args.default_queue = 0;
|
||||
local_args.completion_action = 0;
|
||||
|
||||
// Copy the kernel args structure into kernel arg memory
|
||||
err = AllocAndSetKernArgs(bs, &local_args, sizeof(local_args),
|
||||
&bs->kern_arg_address);
|
||||
throw_if_error(err);
|
||||
|
||||
// Populate an AQL packet with the info we've gathered
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
PopulateAQLPacket(bs, &aql);
|
||||
|
||||
uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t);
|
||||
|
||||
while ((sub_div_size > 1) && (bs->output[3] != 0)) {
|
||||
for (uint32_t i = 0 ; i < bs->num_sub_divisions; i++) {
|
||||
int idx1 = i * sub_div_size;
|
||||
int idx2 = ((i + 1) * sub_div_size) - 1;
|
||||
bs->input_arr[2 * i] = bs->input[idx1];
|
||||
bs->input_arr[2 * i + 1] = bs->input[idx2];
|
||||
}
|
||||
|
||||
// Copy kernel parameter from system memory to local memory
|
||||
err = AgentMemcpy(reinterpret_cast<uint8_t*>(bs->input_arr_local),
|
||||
reinterpret_cast<uint8_t*>(bs->input_arr),
|
||||
in_length, bs->gpu_dev, bs->cpu_dev);
|
||||
|
||||
throw_if_error(err);
|
||||
|
||||
// Reset output buffer to zero
|
||||
bs->output[3] = 0;
|
||||
|
||||
// Dispatch kernel with global work size, work group size with ONE dimesion
|
||||
// and wait for kernel to complete
|
||||
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
uint64_t que_idx = hsa_queue_load_write_index_relaxed(bs->queue);
|
||||
|
||||
const uint32_t mask = bs->queue->size - 1;
|
||||
|
||||
// This function simply copies the data we've collected so far into our
|
||||
// local AQL packet, except the the setup and header fields.
|
||||
WriteAQLToQueue(&aql, bs->queue);
|
||||
|
||||
uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
|
||||
HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
|
||||
aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
|
||||
HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
|
||||
|
||||
// Set the packet's type, acquire and release fences. This should be done
|
||||
// atomically after all the other fields have been set, using release
|
||||
// memory ordering to ensure all the fields are set when the door bell
|
||||
// signal is activated.
|
||||
void* q_base = bs->queue->base_address;
|
||||
|
||||
AtomicSetPacketHeader(aql_header, aql.setup,
|
||||
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
|
||||
(q_base))[que_idx & mask]);
|
||||
|
||||
// Increment the write index and ring the doorbell to dispatch kernel.
|
||||
hsa_queue_store_write_index_relaxed(bs->queue, (que_idx + 1));
|
||||
hsa_signal_store_relaxed(bs->queue->doorbell_signal, que_idx);
|
||||
|
||||
// Wait on the dispatch signal until the kernel is finished.
|
||||
// Modify the wait condition to HSA_WAIT_STATE_ACTIVE (instead of
|
||||
// HSA_WAIT_STATE_BLOCKED) if polling is needed instead of blocking, as we
|
||||
// have below.
|
||||
// The call below will block until the condition is met. Below we have said
|
||||
// the condition is that the signal value (initiailzed to 1) associated with
|
||||
// the queue is less than 1. When the kernel associated with the queued AQL
|
||||
// packet has completed execution, the signal value is automatically
|
||||
// decremented by the packet processor.
|
||||
hsa_signal_value_t value = hsa_signal_wait_scacquire(bs->signal,
|
||||
HSA_SIGNAL_CONDITION_LT, 1,
|
||||
UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
// value should be 0, or we timed-out
|
||||
if (value) {
|
||||
RDC_LOG(RDC_ERROR, "Timed out waiting for kernel to complete?");
|
||||
throw_if_error(HSA_STATUS_ERROR);
|
||||
}
|
||||
|
||||
// Reset the signal to its initial value for the next iteration
|
||||
hsa_signal_store_screlease(bs->signal, 1);
|
||||
|
||||
// Binary search algorithm stuff...
|
||||
global_lower_bound = bs->output[0] * sub_div_size;
|
||||
global_upper_bound = global_lower_bound + sub_div_size - 1;
|
||||
sub_div_size = (global_upper_bound - global_lower_bound + 1) /
|
||||
bs->num_sub_divisions;
|
||||
}
|
||||
|
||||
uint32_t element_index = UINT_MAX;
|
||||
|
||||
for (uint32_t i = global_lower_bound; i <= global_upper_bound; i++) {
|
||||
if (bs->input[i] == bs->find_me) {
|
||||
element_index = i;
|
||||
bs->output[0] = i;
|
||||
bs->output[1] = i + 1;
|
||||
bs->output[2] = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Element is not found in region specified
|
||||
// by global lower bound to global upper bound
|
||||
bs->output[2] = 0;
|
||||
}
|
||||
|
||||
uint32_t is_elem_found = bs->output[2];
|
||||
RDC_LOG(RDC_DEBUG, "Lower bound = " << global_lower_bound);
|
||||
RDC_LOG(RDC_DEBUG, "Upper bound = " << global_upper_bound);
|
||||
RDC_LOG(RDC_DEBUG, "Element search for = " << bs->find_me);
|
||||
|
||||
if (is_elem_found == 1) {
|
||||
RDC_LOG(RDC_DEBUG, "Element found at index " << element_index);
|
||||
} else {
|
||||
RDC_LOG(RDC_DEBUG, "Element value " << bs->find_me << " not found");
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Release all the RocR resources we have acquired in this application.
|
||||
hsa_status_t ComputeQueueTest::CleanUp(BinarySearch* bs) {
|
||||
hsa_status_t err = HSA_STATUS_SUCCESS;
|
||||
|
||||
err = hsa_amd_memory_pool_free(bs->input);
|
||||
|
||||
err = hsa_amd_memory_pool_free(bs->output);
|
||||
|
||||
err = hsa_amd_memory_pool_free(bs->input_arr);
|
||||
|
||||
err = hsa_amd_memory_pool_free(bs->kern_arg_buffer);
|
||||
|
||||
err = hsa_queue_destroy(bs->queue);
|
||||
|
||||
err = hsa_signal_destroy(bs->signal);
|
||||
|
||||
// shutdown will be called at destructor
|
||||
// err = hsa_shut_down();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
hsa_status_t ComputeQueueTest::RunBinarySearchTest(void) {
|
||||
BinarySearch bs;
|
||||
hsa_status_t err;
|
||||
|
||||
InitializeBinarySearch(&bs);
|
||||
|
||||
hsa_agent_t current_gpu;
|
||||
err = get_agent_by_gpu_index(gpu_index_, ¤t_gpu);
|
||||
throw_if_error(err, "Get agent by GPU index fail.");
|
||||
bs.gpu_dev.handle = current_gpu.handle;
|
||||
|
||||
// find all cpu agents
|
||||
std::vector<hsa_agent_t> cpus;
|
||||
err = hsa_iterate_agents(IterateCPUAgents, &cpus);
|
||||
throw_if_error(err);
|
||||
bs.cpu_dev.handle = cpus[0].handle;
|
||||
|
||||
err = hsa_signal_create(1, 0, NULL, &bs.signal);
|
||||
throw_if_error(err, "Fail to create signal.");
|
||||
|
||||
err = hsa_queue_create(bs.gpu_dev, 128, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
|
||||
UINT32_MAX, UINT32_MAX, &bs.queue);
|
||||
throw_if_error(err, "Fail to create queue.");
|
||||
|
||||
err = FindPools(&bs);
|
||||
throw_if_error(err, "Fail to find pools.");
|
||||
|
||||
// Allocate memory from the correct memory pool, and initialize them as
|
||||
// neeeded for the algorihm.
|
||||
err = AllocateAndInitBuffers(&bs);
|
||||
throw_if_error(err, "Allocate and initBuffers fail.");
|
||||
|
||||
err = LoadKernelFromObjFile(&bs);
|
||||
throw_if_error(err, "Load kernel from Object file fail.");
|
||||
|
||||
err = Run(&bs);
|
||||
throw_if_error(err, "Run binary search fail.");
|
||||
|
||||
CleanUp(&bs);
|
||||
|
||||
gpu_info_ += "Run binary search task on GPU ";
|
||||
gpu_info_ += std::to_string(gpu_index_);
|
||||
gpu_info_ += " Pass.";
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
+469
@@ -0,0 +1,469 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "rdc_modules/rdc_rocr/common.h"
|
||||
#include "rdc_modules/rdc_rocr/MemoryAccess.h"
|
||||
#include "rdc_modules/rdc_rocr/base_rocr_utils.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
MemoryAccessTest::MemoryAccessTest(uint32_t gpu_index): TestBase(gpu_index) {
|
||||
set_num_iteration(10); // Number of iterations to execute of the main test;
|
||||
// This is a default value which can be overridden
|
||||
// on the command line.
|
||||
|
||||
set_title("RocR Memory Access Tests");
|
||||
set_description("This series of tests check memory allocation"
|
||||
"on GPU and CPU, i.e. GPU access to system memory "
|
||||
"and CPU access to GPU memory.");
|
||||
}
|
||||
|
||||
MemoryAccessTest::~MemoryAccessTest(void) {
|
||||
}
|
||||
|
||||
// Any 1-time setup involving member variables used in the rest of the test
|
||||
// should be done here.
|
||||
hsa_status_t MemoryAccessTest::SetUp(void) {
|
||||
hsa_status_t err;
|
||||
|
||||
TestBase::SetUp();
|
||||
|
||||
err = SetDefaultAgents(this);
|
||||
throw_if_error(err);
|
||||
|
||||
err = SetPoolsTypical(this);
|
||||
throw_if_error(err);
|
||||
return err;
|
||||
}
|
||||
|
||||
void MemoryAccessTest::Run(void) {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
TestBase::Run();
|
||||
}
|
||||
|
||||
void MemoryAccessTest::DisplayTestInfo(void) {
|
||||
TestBase::DisplayTestInfo();
|
||||
}
|
||||
|
||||
void MemoryAccessTest::DisplayResults(void) const {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryAccessTest::Close() {
|
||||
// This will close handles opened within rocrtst utility calls and call
|
||||
// hsa_shut_down(), so it should be done after other hsa cleanup
|
||||
TestBase::Close();
|
||||
}
|
||||
|
||||
|
||||
typedef struct __attribute__ ((aligned(16))) args_t {
|
||||
int *a;
|
||||
int *b;
|
||||
int *c;
|
||||
} args;
|
||||
|
||||
args *kernArgs = NULL;
|
||||
|
||||
static const char kSubTestSeparator[] = " **************************";
|
||||
|
||||
static void PrintMemorySubtestHeader(const char *header) {
|
||||
RDC_LOG(RDC_DEBUG, " *** Memory Subtest: " << header << " ***");
|
||||
}
|
||||
|
||||
#if ROCRTST_EMULATOR_BUILD
|
||||
static const int kMemoryAllocSize = 8;
|
||||
#else
|
||||
static const int kMemoryAllocSize = 1024;
|
||||
#endif
|
||||
|
||||
|
||||
// Test to check GPU can read & write to system memory
|
||||
void MemoryAccessTest::GPUAccessToCPUMemoryTest(hsa_agent_t cpuAgent,
|
||||
hsa_agent_t gpuAgent) {
|
||||
hsa_status_t err;
|
||||
|
||||
// Get Global Memory Pool on the gpuAgent to allocate gpu buffers
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
err = hsa_amd_agent_iterate_memory_pools(gpuAgent,
|
||||
GetGlobalMemoryPool,
|
||||
&gpu_pool);
|
||||
throw_if_error(err);
|
||||
|
||||
hsa_amd_memory_pool_access_t access;
|
||||
hsa_amd_agent_memory_pool_get_info(cpuAgent, gpu_pool,
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
|
||||
&access);
|
||||
if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
|
||||
// hsa objects
|
||||
hsa_queue_t *queue = NULL; // command queue
|
||||
hsa_signal_t signal = {0}; // completion signal
|
||||
|
||||
// get queue size
|
||||
uint32_t queue_size = 0;
|
||||
err = hsa_agent_get_info(gpuAgent,
|
||||
HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
|
||||
throw_if_error(err);
|
||||
|
||||
// create queue
|
||||
err = hsa_queue_create(gpuAgent,
|
||||
queue_size, HSA_QUEUE_TYPE_MULTI,
|
||||
NULL, NULL, 0, 0, &queue);
|
||||
throw_if_error(err);
|
||||
|
||||
// Get System Memory Pool on the cpuAgent to allocate host side buffers
|
||||
hsa_amd_memory_pool_t global_pool;
|
||||
err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
|
||||
GetGlobalMemoryPool,
|
||||
&global_pool);
|
||||
throw_if_error(err);
|
||||
|
||||
// Find a memory pool that supports kernel arguments.
|
||||
hsa_amd_memory_pool_t kernarg_pool;
|
||||
err = hsa_amd_agent_iterate_memory_pools(cpuAgent,
|
||||
GetKernArgMemoryPool,
|
||||
&kernarg_pool);
|
||||
throw_if_error(err);
|
||||
|
||||
// Allocate the host side buffers
|
||||
// (sys_data,dup_sys_data,cpuResult,kernArg) on system memory
|
||||
int *sys_data = NULL;
|
||||
int *dup_sys_data = NULL;
|
||||
int *cpuResult = NULL;
|
||||
int *gpuResult = NULL;
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(global_pool,
|
||||
kMemoryAllocSize, 0,
|
||||
reinterpret_cast<void **>(&cpuResult));
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(global_pool,
|
||||
kMemoryAllocSize, 0,
|
||||
reinterpret_cast<void **>(&sys_data));
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(global_pool,
|
||||
kMemoryAllocSize, 0,
|
||||
reinterpret_cast<void **>(&dup_sys_data));
|
||||
throw_if_error(err);
|
||||
|
||||
|
||||
// Allocate the kernel argument buffer from the kernarg_pool.
|
||||
err = hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0,
|
||||
reinterpret_cast<void **>(&kernArgs));
|
||||
throw_if_error(err);
|
||||
|
||||
// initialize the host buffers
|
||||
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
||||
unsigned int seed = time(NULL);
|
||||
sys_data[i] = 1 + rand_r(&seed) % 1;
|
||||
dup_sys_data[i] = sys_data[i];
|
||||
}
|
||||
|
||||
memset(cpuResult, 0, kMemoryAllocSize * sizeof(int));
|
||||
|
||||
// for the dGPU, we have coarse grained local memory,
|
||||
// so allocate memory for it on the GPU's GLOBAL segment .
|
||||
|
||||
// Get local memory of GPU to allocate device side buffers
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(gpu_pool, kMemoryAllocSize, 0,
|
||||
reinterpret_cast<void **>(&gpuResult));
|
||||
throw_if_error(err);
|
||||
|
||||
|
||||
// Allow cpuAgent access to all allocated GPU memory.
|
||||
err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, gpuResult);
|
||||
throw_if_error(err);
|
||||
memset(gpuResult, 0, kMemoryAllocSize * sizeof(int));
|
||||
|
||||
// Allow gpuAgent access to all allocated system memory.
|
||||
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, cpuResult);
|
||||
throw_if_error(err);
|
||||
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, sys_data);
|
||||
throw_if_error(err);
|
||||
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, dup_sys_data);
|
||||
throw_if_error(err);
|
||||
err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArgs);
|
||||
throw_if_error(err);
|
||||
|
||||
kernArgs->a = sys_data;
|
||||
kernArgs->b = cpuResult; // system memory passed to gpu for write
|
||||
kernArgs->c = gpuResult; // gpu memory to verify that gpu read system data
|
||||
|
||||
|
||||
// Create the executable, get symbol by name and load the code object
|
||||
set_kernel_file_name("gpuReadWrite_kernels.hsaco");
|
||||
set_kernel_name("gpuReadWrite");
|
||||
err = LoadKernelFromObjFile(this, &gpuAgent);
|
||||
throw_if_error(err);
|
||||
|
||||
// Fill the dispatch packet with
|
||||
// workgroup_size, grid_size, kernelArgs and completion signal
|
||||
// Put it on the queue and launch the kernel by ringing the doorbell
|
||||
|
||||
// create completion signal
|
||||
err = hsa_signal_create(1, 0, NULL, &signal);
|
||||
throw_if_error(err);
|
||||
|
||||
// create aql packet
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
memset(&aql, 0, sizeof(aql));
|
||||
|
||||
// initialize aql packet
|
||||
aql.workgroup_size_x = 256;
|
||||
aql.workgroup_size_y = 1;
|
||||
aql.workgroup_size_z = 1;
|
||||
aql.grid_size_x = kMemoryAllocSize;
|
||||
aql.grid_size_y = 1;
|
||||
aql.grid_size_z = 1;
|
||||
aql.private_segment_size = 0;
|
||||
aql.group_segment_size = 0;
|
||||
aql.kernel_object = kernel_object(); // kernel_code;
|
||||
aql.kernarg_address = kernArgs;
|
||||
aql.completion_signal = signal;
|
||||
|
||||
// const uint32_t queue_size = queue->size;
|
||||
const uint32_t queue_mask = queue->size - 1;
|
||||
|
||||
// write to command queue
|
||||
uint64_t index = hsa_queue_load_write_index_relaxed(queue);
|
||||
hsa_queue_store_write_index_relaxed(queue, index + 1);
|
||||
|
||||
WriteAQLToQueueLoc(queue, index, &aql);
|
||||
|
||||
hsa_kernel_dispatch_packet_t *q_base_addr =
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t *>(queue->base_address);
|
||||
AtomicSetPacketHeader(
|
||||
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
|
||||
(1 << HSA_PACKET_HEADER_BARRIER) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE),
|
||||
(1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS),
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t *>
|
||||
(&q_base_addr[index & queue_mask]));
|
||||
|
||||
// ringdoor bell
|
||||
hsa_signal_store_relaxed(queue->doorbell_signal, index);
|
||||
// wait for the signal and reset it for future use
|
||||
while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1,
|
||||
(uint64_t)-1, HSA_WAIT_STATE_ACTIVE)) { }
|
||||
hsa_signal_store_relaxed(signal, 1);
|
||||
|
||||
// compare device and host side results
|
||||
if (verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, "check gpu has read the system memory");
|
||||
}
|
||||
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
||||
if (gpuResult[i] != dup_sys_data[i]) {
|
||||
throw_if_error(HSA_STATUS_ERROR,
|
||||
"gpuResult does not match dup_sys_data.");
|
||||
}
|
||||
}
|
||||
|
||||
if (verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, "gpu has read the system memory successfully");
|
||||
RDC_LOG(RDC_DEBUG, "check gpu has written to system memory");
|
||||
}
|
||||
for (int i = 0; i < kMemoryAllocSize; ++i) {
|
||||
if (cpuResult[i] != i) {
|
||||
throw_if_error(HSA_STATUS_ERROR,
|
||||
"The CPU memory size does not match the system memory size.");
|
||||
}
|
||||
}
|
||||
|
||||
if (verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, "gpu has written to system memory successfully");
|
||||
}
|
||||
|
||||
if (sys_data) { hsa_memory_free(sys_data); }
|
||||
if (dup_sys_data) { hsa_memory_free(dup_sys_data); }
|
||||
if (cpuResult) {hsa_memory_free(cpuResult); }
|
||||
if (gpuResult) {hsa_memory_free(gpuResult); }
|
||||
if (kernArgs) { hsa_memory_free(kernArgs); }
|
||||
if (signal.handle) { hsa_signal_destroy(signal); }
|
||||
if (queue) { hsa_queue_destroy(queue); }
|
||||
|
||||
} else {
|
||||
if (verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG,
|
||||
"Test not applicable as system is not large bar, skipping");
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Test to check cpu can read & write to GPU memory
|
||||
void MemoryAccessTest::CPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent,
|
||||
hsa_agent_t,
|
||||
hsa_amd_memory_pool_t pool) {
|
||||
hsa_status_t err;
|
||||
|
||||
pool_info_t pool_i;
|
||||
err = AcquirePoolInfo(pool, &pool_i);
|
||||
throw_if_error(err);
|
||||
|
||||
if (pool_i.segment == HSA_AMD_SEGMENT_GLOBAL &&
|
||||
pool_i.global_flag == HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) {
|
||||
hsa_amd_memory_pool_access_t access;
|
||||
hsa_amd_agent_memory_pool_get_info(cpuAgent, pool,
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
|
||||
&access);
|
||||
if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
|
||||
if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 ||
|
||||
pool_i.alloc_alignment == 0) {
|
||||
if (verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, "Test not applicable. Skipping.");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
auto gran_sz = pool_i.alloc_granule;
|
||||
auto pool_sz = pool_i.size / gran_sz;
|
||||
auto max_alloc_size = pool_sz/2;
|
||||
unsigned int max_element = max_alloc_size/sizeof(unsigned int);
|
||||
unsigned int *gpu_data;
|
||||
unsigned int *sys_data;
|
||||
sys_data = (unsigned int*)malloc(max_alloc_size);
|
||||
memset(sys_data, 0, max_alloc_size);
|
||||
for (unsigned int i = 1; i <= max_element; ++i) {
|
||||
sys_data[i] = i;
|
||||
}
|
||||
// err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, sys_data);
|
||||
// EXPECT_EQ(err, HSA_STATUS_SUCCESS);
|
||||
err = hsa_amd_memory_pool_allocate(pool, max_alloc_size, 0,
|
||||
reinterpret_cast<void**>(&gpu_data));
|
||||
throw_if_error(err);
|
||||
/*
|
||||
if (err == HSA_STATUS_ERROR) {
|
||||
err = hsa_amd_memory_pool_free(gpu_data);
|
||||
}*/
|
||||
|
||||
err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, gpu_data);
|
||||
throw_if_error(err);
|
||||
memset(gpu_data, 0, max_alloc_size);
|
||||
|
||||
// Verify CPU can read & write to GPU memory
|
||||
RDC_LOG(RDC_DEBUG, "Verify CPU can read & write to GPU memory");
|
||||
for (unsigned int i = 1; i <= max_element; ++i) {
|
||||
gpu_data[i] = i; // Write to gpu memory directly
|
||||
}
|
||||
|
||||
for (unsigned int i = 1; i <= max_element; ++i) {
|
||||
if (sys_data[i] != gpu_data[i]) { // Reading GPU memory
|
||||
fprintf(stdout, "Values not mathing !! sys_data[%d]:%d ,"
|
||||
"gpu_data[%d]\n", sys_data[i], i, gpu_data[i]);
|
||||
}
|
||||
}
|
||||
RDC_LOG(RDC_DEBUG, "CPU have read & write to GPU memory successfully");
|
||||
err = hsa_amd_memory_pool_free(gpu_data);
|
||||
free(sys_data);
|
||||
} else {
|
||||
if (verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG,
|
||||
"Test not applicable as system is not large bar, Skipping.");
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryAccessTest::CPUAccessToGPUMemoryTest(void) {
|
||||
hsa_status_t err;
|
||||
|
||||
PrintMemorySubtestHeader("CPUAccessToGPUMemoryTest in Memory Pools");
|
||||
// find all cpu agents
|
||||
std::vector<hsa_agent_t> cpus;
|
||||
err = hsa_iterate_agents(IterateCPUAgents, &cpus);
|
||||
throw_if_error(err);
|
||||
// find all gpu agents
|
||||
std::vector<hsa_agent_t> gpus;
|
||||
err = hsa_iterate_agents(IterateGPUAgents, &gpus);
|
||||
throw_if_error(err);
|
||||
for (unsigned int i = 0 ; i< gpus.size(); ++i) {
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
memset(&gpu_pool, 0, sizeof(gpu_pool));
|
||||
err = hsa_amd_agent_iterate_memory_pools(gpus[i],
|
||||
GetGlobalMemoryPool,
|
||||
&gpu_pool);
|
||||
throw_if_error(err);
|
||||
if (gpu_pool.handle == 0) {
|
||||
RDC_LOG(RDC_DEBUG, "no global mempool in gpu agent");
|
||||
return;
|
||||
}
|
||||
CPUAccessToGPUMemoryTest(cpus[0], gpus[i], gpu_pool);
|
||||
}
|
||||
if (verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, "subtest Passed");
|
||||
}
|
||||
per_gpu_info_ += "CPUAccessToGPUMemoryTest Pass.";
|
||||
gpu_info_ += "CPUAccessToGPUMemoryTest for GPU ";
|
||||
gpu_info_ += std::to_string(gpu_index_);
|
||||
gpu_info_ += " Pass. ";
|
||||
}
|
||||
|
||||
void MemoryAccessTest::GPUAccessToCPUMemoryTest(void) {
|
||||
hsa_status_t err;
|
||||
|
||||
PrintMemorySubtestHeader("GPUAccessToCPUMemoryTest in Memory Pools");
|
||||
// find all cpu agents
|
||||
std::vector<hsa_agent_t> cpus;
|
||||
err = hsa_iterate_agents(IterateCPUAgents, &cpus);
|
||||
throw_if_error(err);
|
||||
|
||||
// find current gpu
|
||||
hsa_agent_t current_gpu;
|
||||
err = get_agent_by_gpu_index(gpu_index_, ¤t_gpu);
|
||||
throw_if_error(err, "Get agent by GPU index fail.");
|
||||
|
||||
GPUAccessToCPUMemoryTest(cpus[0], current_gpu);
|
||||
|
||||
if (verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, "subtest Passed");
|
||||
}
|
||||
|
||||
per_gpu_info_ += "GPUAccessToCPUMemoryTest Pass.";
|
||||
|
||||
gpu_info_ += "GPUAccessToCPUMemoryTest for GPU ";
|
||||
gpu_info_ += std::to_string(gpu_index_);
|
||||
gpu_info_ += " Pass. ";
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
+258
@@ -0,0 +1,258 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "rdc_modules/rdc_rocr/common.h"
|
||||
#include "rdc_modules/rdc_rocr/MemoryTest.h"
|
||||
#include "rdc_modules/rdc_rocr/base_rocr_utils.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
static const uint32_t kNumBufferElements = 256;
|
||||
|
||||
MemoryTest::MemoryTest(uint32_t gpu_index): TestBase(gpu_index) {
|
||||
set_num_iteration(10); // Number of iterations to execute of the main test;
|
||||
// This is a default value which can be overridden
|
||||
// on the command line.
|
||||
set_title("Max Single Allocation Memory Test");
|
||||
set_description("This series of tests check memory allocation limits, extent"
|
||||
" of GPU access to system memory and other memory related functionality.");
|
||||
}
|
||||
|
||||
MemoryTest::~MemoryTest(void) {
|
||||
}
|
||||
|
||||
// Any 1-time setup involving member variables used in the rest of the test
|
||||
// should be done here.
|
||||
hsa_status_t MemoryTest::SetUp(void) {
|
||||
hsa_status_t err = HSA_STATUS_SUCCESS;
|
||||
|
||||
TestBase::SetUp();
|
||||
|
||||
err = SetDefaultAgents(this);
|
||||
if ( err != HSA_STATUS_SUCCESS) return err;
|
||||
|
||||
err = SetPoolsTypical(this);
|
||||
return err;
|
||||
}
|
||||
|
||||
void MemoryTest::Run(void) {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
TestBase::Run();
|
||||
}
|
||||
|
||||
void MemoryTest::DisplayTestInfo(void) {
|
||||
TestBase::DisplayTestInfo();
|
||||
}
|
||||
|
||||
void MemoryTest::DisplayResults(void) const {
|
||||
// Compare required profile for this test case with what we're actually
|
||||
// running on
|
||||
if (!CheckProfile(this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void MemoryTest::Close() {
|
||||
// This will close handles opened within rocrtst utility calls and call
|
||||
// hsa_shut_down(), so it should be done after other hsa cleanup
|
||||
TestBase::Close();
|
||||
}
|
||||
|
||||
hsa_status_t MemoryTest::TestAllocate(hsa_amd_memory_pool_t pool, size_t sz) {
|
||||
void *ptr;
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(pool, sz, 0, &ptr);
|
||||
|
||||
if (err == HSA_STATUS_SUCCESS) {
|
||||
err = hsa_memory_free(ptr);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static const char kSubTestSeparator[] = " **************************";
|
||||
|
||||
static void PrintMemorySubtestHeader(const char *header) {
|
||||
RDC_LOG(RDC_DEBUG, " *** Memory Subtest: " << header << " ***");
|
||||
}
|
||||
|
||||
// Test Fixtures
|
||||
hsa_status_t MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag,
|
||||
hsa_amd_memory_pool_t pool) {
|
||||
hsa_status_t err = HSA_STATUS_SUCCESS;
|
||||
|
||||
pool_info_t pool_i;
|
||||
char ag_name[64];
|
||||
hsa_device_type_t ag_type;
|
||||
|
||||
err = hsa_agent_get_info(ag, HSA_AGENT_INFO_NAME, ag_name);
|
||||
if (err != HSA_STATUS_SUCCESS) return err;
|
||||
|
||||
err = hsa_agent_get_info(ag, HSA_AGENT_INFO_DEVICE, &ag_type);
|
||||
if (err != HSA_STATUS_SUCCESS) return err;
|
||||
|
||||
uint32_t node = 0;
|
||||
err = hsa_agent_get_info(ag, HSA_AGENT_INFO_NODE, &node);
|
||||
if (err != HSA_STATUS_SUCCESS) return err;
|
||||
|
||||
if (verbosity() > 0) {
|
||||
std::string device_type;
|
||||
switch (ag_type) {
|
||||
case HSA_DEVICE_TYPE_CPU:
|
||||
device_type = "CPU";
|
||||
break;
|
||||
case HSA_DEVICE_TYPE_GPU:
|
||||
device_type = "GPU";
|
||||
break;
|
||||
case HSA_DEVICE_TYPE_DSP:
|
||||
device_type = "DSP";
|
||||
break;
|
||||
}
|
||||
RDC_LOG(RDC_DEBUG, " Agent: " << ag_name << " Node " << node << " ("
|
||||
<< device_type << ")");
|
||||
}
|
||||
|
||||
err = AcquirePoolInfo(pool, &pool_i);
|
||||
if (err != HSA_STATUS_SUCCESS) return err;
|
||||
|
||||
if (verbosity() > 0) {
|
||||
DumpMemoryPoolInfo(&pool_i, 2);
|
||||
}
|
||||
|
||||
if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 ||
|
||||
pool_i.alloc_alignment == 0) {
|
||||
if (verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, " Test not applicable. Skipping.");
|
||||
}
|
||||
return err;
|
||||
}
|
||||
// Do everything in "granule" units
|
||||
auto gran_sz = pool_i.alloc_granule;
|
||||
auto pool_sz = pool_i.aggregate_alloc_max / gran_sz;
|
||||
|
||||
// Neg. test: Try to allocate more than the pool size
|
||||
err = TestAllocate(pool, pool_sz*gran_sz + gran_sz);
|
||||
if (err != HSA_STATUS_ERROR_INVALID_ALLOCATION) return err;
|
||||
|
||||
auto max_alloc_size = pool_sz/2;
|
||||
uint64_t upper_bound = pool_sz;
|
||||
uint64_t lower_bound = 0;
|
||||
|
||||
while (true) {
|
||||
err = TestAllocate(pool, max_alloc_size * gran_sz);
|
||||
|
||||
if (err != HSA_STATUS_SUCCESS ||
|
||||
err != HSA_STATUS_ERROR_OUT_OF_RESOURCES) return err;
|
||||
|
||||
if (err == HSA_STATUS_SUCCESS) {
|
||||
lower_bound = max_alloc_size;
|
||||
max_alloc_size += (upper_bound - lower_bound)/2;
|
||||
} else if (err == HSA_STATUS_ERROR_OUT_OF_RESOURCES) {
|
||||
upper_bound = max_alloc_size;
|
||||
max_alloc_size -= (upper_bound - lower_bound)/2;
|
||||
}
|
||||
|
||||
if ((upper_bound - lower_bound) < 2) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (upper_bound <= lower_bound) {
|
||||
RDC_LOG(RDC_ERROR, "Wrong upper bound and lower bound");
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, " Biggest single allocation size for this pool is " <<
|
||||
(max_alloc_size * gran_sz)/1024 << "KB.");
|
||||
RDC_LOG(RDC_DEBUG, " This is " <<
|
||||
static_cast<float>(max_alloc_size)/pool_sz*100 <<
|
||||
"% of the total.");
|
||||
}
|
||||
|
||||
if (ag_type == HSA_DEVICE_TYPE_GPU) {
|
||||
if ((float)max_alloc_size/pool_sz < (float)15/16) {
|
||||
RDC_LOG(RDC_ERROR, "the allocate size is wrong");
|
||||
throw_if_error(HSA_STATUS_ERROR, "The allocate size is wrong");
|
||||
}
|
||||
// EXPECT_GE((float)max_alloc_size/pool_sz, (float)15/16);
|
||||
}
|
||||
if (verbosity() > 0) {
|
||||
std::cout << kSubTestSeparator << std::endl;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryTest::MaxSingleAllocationTest(void) {
|
||||
hsa_status_t err = HSA_STATUS_SUCCESS;
|
||||
std::vector<std::shared_ptr<agent_pools_t>> agent_pools;
|
||||
|
||||
PrintMemorySubtestHeader("Maximum Single Allocation in Memory Pools");
|
||||
|
||||
err = GetAgentPools(&agent_pools);
|
||||
throw_if_error(err, "GetAgentPools pool fail.");
|
||||
|
||||
hsa_agent_t current_gpu;
|
||||
err = get_agent_by_gpu_index(gpu_index_, ¤t_gpu);
|
||||
throw_if_error(err, "Get agent by GPU index fail.");
|
||||
|
||||
auto pool_idx = 0;
|
||||
for (auto a : agent_pools) {
|
||||
if (a->agent.handle != current_gpu.handle)
|
||||
continue;
|
||||
for (auto p : a->pools) {
|
||||
pool_idx++;
|
||||
RDC_LOG(RDC_DEBUG, " Pool " << pool_idx << ":");
|
||||
err = MaxSingleAllocationTest(a->agent, p);
|
||||
throw_if_error(err, "MaxSingleAllocationTest .");
|
||||
per_gpu_info_ += title();
|
||||
per_gpu_info_ += " Pool ";
|
||||
per_gpu_info_ += std::to_string(pool_idx);
|
||||
per_gpu_info_ += " test pass. ";
|
||||
}
|
||||
}
|
||||
gpu_info_ += title();
|
||||
gpu_info_ += " for GPU ";
|
||||
gpu_info_ += std::to_string(gpu_index_);
|
||||
gpu_info_ += " Pass. ";
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
@@ -0,0 +1,198 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "rdc_modules/rdc_rocr/common.h"
|
||||
#include "rdc_modules/rdc_rocr/RdcDiagnosticLib.h"
|
||||
#include "rdc_modules/rdc_rocr/MemoryTest.h"
|
||||
#include "rdc_modules/rdc_rocr/MemoryAccess.h"
|
||||
#include "rdc_modules/rdc_rocr/ComputeQueueTest.h"
|
||||
|
||||
rdc_status_t rdc_diag_init(uint64_t) {
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t rdc_diag_destroy() {
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t rdc_diag_test_cases_query(
|
||||
rdc_diag_test_cases_t test_cases[MAX_TEST_CASES],
|
||||
uint32_t* test_case_count) {
|
||||
if (test_case_count == nullptr) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
|
||||
*test_case_count = 2;
|
||||
test_cases[0] = RDC_DIAG_COMPUTE_QUEUE;
|
||||
test_cases[1] = RDC_DIAG_SYS_MEM_CHECK;
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
// Helper function to run the memory test on GPU
|
||||
static rdc_status_t run_memory_test(uint32_t gpu_index,
|
||||
rdc_diag_test_result_t* result) {
|
||||
std::string info = result->info;
|
||||
std::string per_gpu_info = result->gpu_results[gpu_index].gpu_result.msg;
|
||||
|
||||
try {
|
||||
amd::rdc::MemoryTest test(gpu_index);
|
||||
test.MaxSingleAllocationTest();
|
||||
|
||||
info += test.get_gpu_info();
|
||||
per_gpu_info += test.get_per_gpu_info();
|
||||
} catch (const amd::rdc::SkipException& e) {
|
||||
result->status = RDC_DIAG_RESULT_SKIP;
|
||||
per_gpu_info += "MaxSingleAllocationTest is skipped: ";
|
||||
per_gpu_info += e.what();
|
||||
info += "GPU ";
|
||||
info += std::to_string(gpu_index);
|
||||
info += " MaxSingleAllocationTest is skipped: ";
|
||||
info += e.what();
|
||||
info += ".";
|
||||
} catch (const std::exception& e) {
|
||||
result->status = RDC_DIAG_RESULT_FAIL;
|
||||
per_gpu_info += "MaxSingleAllocationTest returns with error ";
|
||||
per_gpu_info += e.what();
|
||||
info += "GPU ";
|
||||
info += std::to_string(gpu_index);
|
||||
info += " MaxSingleAllocationTest returns with error ";
|
||||
info += e.what();
|
||||
info += ".";
|
||||
}
|
||||
|
||||
try {
|
||||
amd::rdc::MemoryAccessTest test(gpu_index);
|
||||
test.CPUAccessToGPUMemoryTest();
|
||||
test.GPUAccessToCPUMemoryTest();
|
||||
info += test.get_gpu_info();
|
||||
per_gpu_info += test.get_per_gpu_info();
|
||||
} catch (const amd::rdc::SkipException& e) {
|
||||
result->status = RDC_DIAG_RESULT_SKIP;
|
||||
per_gpu_info += "Memory Access is skipped: ";
|
||||
per_gpu_info += e.what();
|
||||
info += "GPU ";
|
||||
info += std::to_string(gpu_index);
|
||||
info += " Memory Access is skipped: ";
|
||||
info += e.what();
|
||||
info += ".";
|
||||
} catch (const std::exception& e) {
|
||||
result->status = RDC_DIAG_RESULT_FAIL;
|
||||
per_gpu_info += "Memory Access returns with error ";
|
||||
per_gpu_info += e.what();
|
||||
info += "GPU ";
|
||||
info += std::to_string(gpu_index);
|
||||
info += " Memory Access returns with error ";
|
||||
info += e.what();
|
||||
info += ".";
|
||||
}
|
||||
|
||||
strncpy_with_null(result->info, info.c_str(),
|
||||
MAX_DIAG_MSG_LENGTH);
|
||||
strncpy_with_null(result->gpu_results[gpu_index].gpu_result.msg,
|
||||
per_gpu_info.c_str(), MAX_DIAG_MSG_LENGTH);
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
|
||||
static rdc_status_t run_compute_queue_test(uint32_t gpu_index,
|
||||
rdc_diag_test_result_t* result) {
|
||||
std::string info = result->info;
|
||||
std::string per_gpu_info = result->gpu_results[gpu_index].gpu_result.msg;
|
||||
|
||||
try {
|
||||
amd::rdc::ComputeQueueTest test(gpu_index);
|
||||
test.RunBinarySearchTest();
|
||||
info += test.get_gpu_info();
|
||||
per_gpu_info += test.get_per_gpu_info();
|
||||
} catch (const amd::rdc::SkipException& e) {
|
||||
result->status = RDC_DIAG_RESULT_SKIP;
|
||||
per_gpu_info += "Compute Queue test is skipped: ";
|
||||
per_gpu_info += e.what();
|
||||
info += "GPU ";
|
||||
info += std::to_string(gpu_index);
|
||||
info += " Compute Queue test is skipped: ";
|
||||
info += e.what();
|
||||
info += ".";
|
||||
} catch (const std::exception& e) {
|
||||
result->status = RDC_DIAG_RESULT_FAIL;
|
||||
per_gpu_info += "Compute Queue test returns with error ";
|
||||
per_gpu_info += e.what();
|
||||
info += "GPU ";
|
||||
info += std::to_string(gpu_index);
|
||||
info += " Compute Queue test returns with error ";
|
||||
info += e.what();
|
||||
info += ".";
|
||||
}
|
||||
|
||||
strncpy_with_null(result->info, info.c_str(),
|
||||
MAX_DIAG_MSG_LENGTH);
|
||||
strncpy_with_null(result->gpu_results[gpu_index].gpu_result.msg,
|
||||
per_gpu_info.c_str(), MAX_DIAG_MSG_LENGTH);
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t rdc_diag_test_case_run(
|
||||
rdc_diag_test_cases_t test_case,
|
||||
uint32_t gpu_index[RDC_MAX_NUM_DEVICES],
|
||||
uint32_t gpu_count,
|
||||
rdc_diag_test_result_t* result) {
|
||||
if (result == nullptr ||
|
||||
gpu_count == 0 ) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
|
||||
if (test_case != RDC_DIAG_COMPUTE_QUEUE &&
|
||||
test_case != RDC_DIAG_SYS_MEM_CHECK) {
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
// init the return data
|
||||
*result = {};
|
||||
result->test_case = test_case;
|
||||
result->status = RDC_DIAG_RESULT_PASS;
|
||||
result->per_gpu_result_count = 0;
|
||||
|
||||
// Run test for each GPU. It will continue even
|
||||
// if one GPU test is fail.
|
||||
for (uint32_t i = 0; i < gpu_count; i++) {
|
||||
switch (test_case) {
|
||||
case RDC_DIAG_SYS_MEM_CHECK:
|
||||
run_memory_test(gpu_index[i], result);
|
||||
break;
|
||||
case RDC_DIAG_COMPUTE_QUEUE:
|
||||
run_compute_queue_test(gpu_index[i], result);
|
||||
break;
|
||||
default:
|
||||
result->status = RDC_DIAG_RESULT_SKIP;
|
||||
strncpy_with_null(result->info, "Not support yet"
|
||||
, MAX_DIAG_MSG_LENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "rdc_modules/rdc_rocr/RdcRocrBase.h"
|
||||
#include <string.h>
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
RdcRocrBase::RdcRocrBase(void) {
|
||||
num_iteration_ = 1;
|
||||
cpu_device_.handle = -1;
|
||||
gpu_device1_.handle = -1;
|
||||
device_pool_.handle = 0;
|
||||
kern_arg_pool_.handle = 0;
|
||||
main_queue_ = nullptr;
|
||||
kernarg_buffer_ = nullptr;
|
||||
kernel_object_ = 0;
|
||||
memset(&aql_, 0, sizeof(aql_));
|
||||
set_requires_profile(-1);
|
||||
set_enable_interrupt(false);
|
||||
set_kernel_file_name("");
|
||||
set_verbosity(1);
|
||||
set_monitor_verbosity(0);
|
||||
set_title("unset_title");
|
||||
orig_hsa_enable_interrupt_ = nullptr;
|
||||
}
|
||||
|
||||
RdcRocrBase::~RdcRocrBase() {
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
+139
@@ -0,0 +1,139 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
#include "rdc_modules/rdc_rocr/TestBase.h"
|
||||
#include "rdc_modules/rdc_rocr/base_rocr_utils.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
static const int kOutputLineLength = 80;
|
||||
static const char kLabelDelimiter[] = "####";
|
||||
static const char kDescriptionLabel[] = "TEST DESCRIPTION";
|
||||
static const char kTitleLabel[] = "TEST NAME";
|
||||
static const char kSetupLabel[] = "TEST SETUP";
|
||||
static const char kRunLabel[] = "TEST EXECUTION";
|
||||
static const char kCloseLabel[] = "TEST CLEAN UP";
|
||||
static const char kResultsLabel[] = "TEST RESULTS";
|
||||
|
||||
|
||||
TestBase::TestBase(uint32_t gpu_index):
|
||||
gpu_index_(gpu_index), description_("") {
|
||||
SetUp();
|
||||
}
|
||||
TestBase::~TestBase() {
|
||||
Close();
|
||||
}
|
||||
|
||||
static void MakeHeaderStr(const char *inStr, std::string *outStr) {
|
||||
assert(outStr != nullptr);
|
||||
assert(inStr != nullptr);
|
||||
|
||||
outStr->clear();
|
||||
*outStr = kLabelDelimiter;
|
||||
*outStr += " ";
|
||||
*outStr += inStr;
|
||||
*outStr += " ";
|
||||
*outStr += kLabelDelimiter;
|
||||
}
|
||||
|
||||
hsa_status_t TestBase::SetUp(void) {
|
||||
hsa_status_t err = HSA_STATUS_SUCCESS;
|
||||
std::string label;
|
||||
MakeHeaderStr(kSetupLabel, &label);
|
||||
RDC_LOG(RDC_DEBUG, label);
|
||||
|
||||
err = InitAndSetupHSA(this);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void TestBase::Run(void) {
|
||||
std::string label;
|
||||
MakeHeaderStr(kRunLabel, &label);
|
||||
RDC_LOG(RDC_DEBUG, label);
|
||||
}
|
||||
|
||||
void TestBase::Close(void) {
|
||||
hsa_status_t err;
|
||||
std::string label;
|
||||
MakeHeaderStr(kCloseLabel, &label);
|
||||
RDC_LOG(RDC_DEBUG, label);
|
||||
|
||||
err = CommonCleanUp(this);
|
||||
throw_if_error(err);
|
||||
}
|
||||
|
||||
|
||||
void TestBase::DisplayResults(void) const {
|
||||
std::string label;
|
||||
MakeHeaderStr(kResultsLabel, &label);
|
||||
printf("\n\t%s\n", label.c_str());
|
||||
}
|
||||
|
||||
void TestBase::DisplayTestInfo(void) {
|
||||
printf("#########################################"
|
||||
"######################################\n");
|
||||
|
||||
std::string label;
|
||||
MakeHeaderStr(kTitleLabel, &label);
|
||||
printf("\n\t%s\n%s\n", label.c_str(), title().c_str());
|
||||
|
||||
if (verbosity() >= VERBOSE_STANDARD) {
|
||||
MakeHeaderStr(kDescriptionLabel, &label);
|
||||
printf("\n\t%s\n%s\n", label.c_str(), description().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void TestBase::set_description(std::string d) {
|
||||
int le = kOutputLineLength - 4;
|
||||
|
||||
description_ = d;
|
||||
size_t endlptr;
|
||||
|
||||
for (size_t i = le; i < description_.size(); i += le) {
|
||||
endlptr = description_.find_last_of(" ", i);
|
||||
description_.replace(endlptr, 1, "\n");
|
||||
i = endlptr;
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t TestBase::get_agent_by_gpu_index(uint32_t gpu_index,
|
||||
hsa_agent_t* agent) {
|
||||
hsa_status_t err = HSA_STATUS_SUCCESS;
|
||||
std::vector<hsa_agent_t> gpus;
|
||||
err = hsa_iterate_agents(IterateGPUAgents, &gpus);
|
||||
throw_if_error(err, "Fail to iterate agents.");
|
||||
if (gpu_index >= gpus.size()) {
|
||||
throw_if_error(err, "GPU index is too large.");
|
||||
}
|
||||
*agent = gpus[gpu_index];
|
||||
return err;
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
@@ -0,0 +1,568 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "rdc_modules/rdc_rocr/base_rocr_utils.h"
|
||||
#include <assert.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <libgen.h>
|
||||
#include <unistd.h>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
#include "hsa/hsa.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
|
||||
// Clean up some of the common handles and memory used by RdcRocrBase code, then
|
||||
// shut down hsa. Restore HSA_ENABLE_INTERRUPT to original value, if necessary
|
||||
hsa_status_t CommonCleanUp(RdcRocrBase* test) {
|
||||
hsa_status_t err;
|
||||
|
||||
assert(test != nullptr);
|
||||
|
||||
if (nullptr != test->kernarg_buffer()) {
|
||||
err = hsa_amd_memory_pool_free(test->kernarg_buffer());
|
||||
throw_if_error(err);
|
||||
test->set_kernarg_buffer(nullptr);
|
||||
}
|
||||
|
||||
if (nullptr != test->main_queue()) {
|
||||
err = hsa_queue_destroy(test->main_queue());
|
||||
throw_if_error(err);
|
||||
test->set_main_queue(nullptr);
|
||||
}
|
||||
|
||||
if (test->aql().completion_signal.handle != 0) {
|
||||
err = hsa_signal_destroy(test->aql().completion_signal);
|
||||
throw_if_error(err);
|
||||
}
|
||||
|
||||
err = hsa_shut_down();
|
||||
throw_if_error(err);
|
||||
|
||||
// Ensure that HSA is actually closed.
|
||||
hsa_status_t check = hsa_shut_down();
|
||||
if (check != HSA_STATUS_ERROR_NOT_INITIALIZED) {
|
||||
RDC_LOG(RDC_ERROR, "hsa_init reference count was too high.");
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
std::string intr_val;
|
||||
|
||||
if (test->orig_hsa_enable_interrupt() == nullptr) {
|
||||
intr_val = "";
|
||||
} else {
|
||||
intr_val = test->orig_hsa_enable_interrupt();
|
||||
}
|
||||
|
||||
SetEnv("HSA_ENABLE_INTERRUPT", intr_val.c_str());
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static const char* PROFILE_STR[] = {"HSA_PROFILE_BASE", "HSA_PROFILE_FULL", };
|
||||
|
||||
/// Verify that the machine running the test has the required profile.
|
||||
/// This function will verify that the execution machine meets any specific
|
||||
/// test requirement for a profile (HSA_PROFILE_BASE or HSA_PROFILE_FULL).
|
||||
/// \param[in] test Test that provides profile requirements.
|
||||
/// \returns bool
|
||||
/// - true Machine meets test requirements
|
||||
/// - false Machine does not meet test requirements
|
||||
bool CheckProfileAndInform(RdcRocrBase* test) {
|
||||
if (test->verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, "Target HW Profile is "
|
||||
<< PROFILE_STR[test->profile()]);
|
||||
}
|
||||
|
||||
if (test->requires_profile() == -1) {
|
||||
if (test->verbosity() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, "Test can run on any profile. OK.");
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
RDC_LOG(RDC_DEBUG, "Test requires " << PROFILE_STR[test->requires_profile()]
|
||||
<< ". ");
|
||||
if (test->requires_profile() != test->profile()) {
|
||||
RDC_LOG(RDC_DEBUG, "Not Running.");
|
||||
return false;
|
||||
} else {
|
||||
RDC_LOG(RDC_DEBUG, "OK.");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to process error returned from
|
||||
/// iterate function like hsa_amd_agent_iterate_memory_pools
|
||||
/// \param[in] Error returned from iterate call
|
||||
/// \returns HSA_STATUS_SUCCESS iff iterate call succeeds in finding
|
||||
/// what was being searched for
|
||||
static hsa_status_t ProcessIterateError(hsa_status_t err) {
|
||||
if (err == HSA_STATUS_INFO_BREAK) {
|
||||
err = HSA_STATUS_SUCCESS;
|
||||
} else if (err == HSA_STATUS_SUCCESS) {
|
||||
// This actually means no pool was found.
|
||||
err = HSA_STATUS_ERROR;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
// Find pools for cpu, gpu and for kernel arguments. These pools have
|
||||
// common basic requirements, but are not suitable for all cases. In
|
||||
// that case, set cpu_pool(), device_pool() and/or kern_arg_pool()
|
||||
// yourself instead of using this function.
|
||||
hsa_status_t SetPoolsTypical(RdcRocrBase* test) {
|
||||
hsa_status_t err;
|
||||
if (test->profile() == HSA_PROFILE_FULL) {
|
||||
err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
|
||||
FindAPUStandardPool, &test->cpu_pool());
|
||||
throw_if_error(ProcessIterateError(err));
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
|
||||
FindAPUStandardPool, &test->device_pool());
|
||||
throw_if_error(ProcessIterateError(err));
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
|
||||
FindAPUStandardPool, &test->kern_arg_pool());
|
||||
throw_if_error(ProcessIterateError(err));
|
||||
|
||||
} else {
|
||||
err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
|
||||
FindStandardPool, &test->cpu_pool());
|
||||
throw_if_error(ProcessIterateError(err));
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(*test->gpu_device1(),
|
||||
FindStandardPool, &test->device_pool());
|
||||
throw_if_error(ProcessIterateError(err));
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(),
|
||||
FindKernArgPool, &test->kern_arg_pool());
|
||||
throw_if_error(ProcessIterateError(err));
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Enable interrupts if necessary, and call hsa_init()
|
||||
hsa_status_t InitAndSetupHSA(RdcRocrBase* test) {
|
||||
hsa_status_t err;
|
||||
|
||||
if (test->enable_interrupt()) {
|
||||
SetEnv("HSA_ENABLE_INTERRUPT", "1");
|
||||
}
|
||||
|
||||
err = hsa_init();
|
||||
throw_if_error(err);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Attempt to find and set test->cpu_device and test->gpu_device1
|
||||
hsa_status_t SetDefaultAgents(RdcRocrBase* test) {
|
||||
hsa_agent_t gpu_device1;
|
||||
hsa_agent_t cpu_device;
|
||||
hsa_status_t err;
|
||||
|
||||
gpu_device1.handle = 0;
|
||||
err = hsa_iterate_agents(FindGPUDevice, &gpu_device1);
|
||||
throw_if_error(ProcessIterateError(err));
|
||||
test->set_gpu_device1(gpu_device1);
|
||||
|
||||
cpu_device.handle = 0;
|
||||
err = hsa_iterate_agents(FindCPUDevice, &cpu_device);
|
||||
throw_if_error(ProcessIterateError(err));
|
||||
test->set_cpu_device(cpu_device);
|
||||
|
||||
if (0 == gpu_device1.handle) {
|
||||
RDC_LOG(RDC_ERROR, "GPU Device is not Created properly!");
|
||||
throw_if_error(HSA_STATUS_ERROR, "GPU Device is not Created properly!");
|
||||
}
|
||||
|
||||
if (0 == cpu_device.handle) {
|
||||
RDC_LOG(RDC_ERROR, "CPU Device is not Created properly!");
|
||||
throw_if_error(HSA_STATUS_ERROR, "CPU Device is not Created properly!");
|
||||
}
|
||||
|
||||
if (test->verbosity() > 0) {
|
||||
char name[64] = {0};
|
||||
err = hsa_agent_get_info(gpu_device1, HSA_AGENT_INFO_NAME, name);
|
||||
throw_if_error(err);
|
||||
RDC_LOG(RDC_DEBUG, "The gpu device name is " << name);
|
||||
}
|
||||
|
||||
hsa_profile_t profile;
|
||||
err = hsa_agent_get_info(gpu_device1, HSA_AGENT_INFO_PROFILE, &profile);
|
||||
throw_if_error(err);
|
||||
test->set_profile(profile);
|
||||
|
||||
if (!CheckProfileAndInform(test)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// See if the profile of the target matches any required profile by the
|
||||
// test program.
|
||||
bool CheckProfile(RdcRocrBase const* test) {
|
||||
if (test->requires_profile() == -1) {
|
||||
return true;
|
||||
} else {
|
||||
return (test->requires_profile() == test->profile());
|
||||
}
|
||||
}
|
||||
// Load the specified kernel code from the specified file, inspect and fill
|
||||
// in RdcRocrBase member variables related to the kernel and executable.
|
||||
// Required Input RdcRocrBase member variables:
|
||||
// - gpu_device1()
|
||||
// - kernel_file_name()
|
||||
// - kernel_name()
|
||||
//
|
||||
// Written RdcRocrBase member variables:
|
||||
// -kernel_object()
|
||||
// -private_segment_size()
|
||||
// -group_segment_size()
|
||||
// -kernarg_size()
|
||||
// -kernarg_align()
|
||||
hsa_status_t LoadKernelFromObjFile(RdcRocrBase* test, hsa_agent_t* agent) {
|
||||
hsa_status_t err;
|
||||
hsa_code_object_reader_t code_obj_rdr = {0};
|
||||
hsa_executable_t executable = {0};
|
||||
|
||||
assert(test != nullptr);
|
||||
if (agent == nullptr) {
|
||||
agent = test->gpu_device1(); // Assume GPU agent for now
|
||||
}
|
||||
|
||||
// if agent name is not set, then set the agent name
|
||||
if (!test->get_agent_name().size()) {
|
||||
char agent_name[64];
|
||||
err = hsa_agent_get_info(*agent, HSA_AGENT_INFO_NAME, agent_name);
|
||||
throw_if_error(err);
|
||||
test->set_agent_name(agent_name);
|
||||
}
|
||||
|
||||
std::string kern_name = test->kernel_name();
|
||||
std::string obj_file = search_hsaco_full_path(
|
||||
test->kernel_file_name().c_str(), test->get_agent_name().c_str());
|
||||
if (obj_file == "") {
|
||||
RDC_LOG(RDC_ERROR, "failed to find " << test->kernel_file_name() <<
|
||||
" at line " << __LINE__ << ", errno: " << errno);
|
||||
std::string msg("fail to open ");
|
||||
msg += test->kernel_file_name();
|
||||
throw_if_skip(msg);
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
hsa_file_t file_handle = open(obj_file.c_str(), O_RDONLY);
|
||||
|
||||
if (file_handle == -1) {
|
||||
RDC_LOG(RDC_ERROR, "failed to open " << obj_file.c_str() << " at line "
|
||||
<< __LINE__ << ", file: " << __FILE__);
|
||||
return (hsa_status_t) errno;
|
||||
}
|
||||
|
||||
err = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
|
||||
throw_if_error(err);
|
||||
close(file_handle);
|
||||
|
||||
err = hsa_executable_create_alt(HSA_PROFILE_FULL,
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
NULL, &executable);
|
||||
throw_if_error(err);
|
||||
err = hsa_executable_load_agent_code_object(executable, *agent, code_obj_rdr,
|
||||
NULL, NULL);
|
||||
throw_if_error(err);
|
||||
err = hsa_executable_freeze(executable, NULL);
|
||||
throw_if_error(err);
|
||||
|
||||
hsa_executable_symbol_t kern_sym;
|
||||
err = hsa_executable_get_symbol(executable, NULL, (kern_name + ".kd").c_str(), *agent,
|
||||
0, &kern_sym);
|
||||
throw_if_error(err);
|
||||
|
||||
uint64_t codeHandle;
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &codeHandle);
|
||||
throw_if_error(err);
|
||||
test->set_kernel_object(codeHandle);
|
||||
|
||||
uint32_t val;
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &val);
|
||||
throw_if_error(err);
|
||||
test->set_private_segment_size(val);
|
||||
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &val);
|
||||
throw_if_error(err);
|
||||
test->set_group_segment_size(val);
|
||||
|
||||
// Remaining queries only supported on code object v3.
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &val);
|
||||
throw_if_error(err);
|
||||
test->set_kernarg_size(val);
|
||||
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &val);
|
||||
throw_if_error(err);
|
||||
assert(val >= 16 && "Reported kernarg size is too small.");
|
||||
val = (val == 0) ? 16 : val;
|
||||
test->set_kernarg_align(val);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t CreateQueue(hsa_agent_t device, hsa_queue_t** queue,
|
||||
uint32_t num_pkts) {
|
||||
hsa_status_t err;
|
||||
|
||||
if (num_pkts == 0) {
|
||||
err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
|
||||
&num_pkts);
|
||||
throw_if_error(err);
|
||||
}
|
||||
|
||||
err = hsa_queue_create(device, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL,
|
||||
NULL, UINT32_MAX, UINT32_MAX, queue);
|
||||
throw_if_error(err);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
// Initialize the provided aql packet with standard default values, and
|
||||
// values from provided RdcRocrBase object.
|
||||
hsa_status_t InitializeAQLPacket(const RdcRocrBase* test,
|
||||
hsa_kernel_dispatch_packet_t* aql) {
|
||||
hsa_status_t err;
|
||||
|
||||
assert(aql != nullptr);
|
||||
|
||||
if (aql == nullptr) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// Initialize Packet type as Invalid
|
||||
// Update packet type to Kernel Dispatch
|
||||
// right before ringing doorbell
|
||||
aql->header = 1;
|
||||
|
||||
aql->setup = 1;
|
||||
aql->workgroup_size_x = 256;
|
||||
aql->workgroup_size_y = 1;
|
||||
aql->workgroup_size_z = 1;
|
||||
|
||||
aql->grid_size_x = (uint64_t) 256; // manual_input*group_input; workg max sz
|
||||
aql->grid_size_y = 1;
|
||||
aql->grid_size_z = 1;
|
||||
|
||||
aql->private_segment_size = test->private_segment_size();
|
||||
|
||||
aql->group_segment_size = test->group_segment_size();
|
||||
|
||||
// Pin kernel code and the kernel argument buffer to the aql packet->
|
||||
aql->kernel_object = test->kernel_object();
|
||||
|
||||
// aql->kernarg_address may be filled in by AllocAndSetKernArgs() if it is
|
||||
// called before this function, so we don't want overwrite it, therefore
|
||||
// we ignore it in this function.
|
||||
|
||||
err = hsa_signal_create(1, 0, NULL, &aql->completion_signal);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
// Copy RdcRocrBase aql object values to the RdcRocrBase object queue in the
|
||||
// specified queue position (ind)
|
||||
hsa_kernel_dispatch_packet_t * WriteAQLToQueue(RdcRocrBase* test, uint64_t *ind) {
|
||||
assert(test);
|
||||
assert(test->main_queue());
|
||||
|
||||
void *queue_base = test->main_queue()->base_address;
|
||||
const uint32_t queue_mask = test->main_queue()->size - 1;
|
||||
uint64_t que_idx = hsa_queue_add_write_index_relaxed(test->main_queue(), 1);
|
||||
*ind = que_idx;
|
||||
|
||||
hsa_kernel_dispatch_packet_t* staging_aql_packet = &test->aql();
|
||||
hsa_kernel_dispatch_packet_t* queue_aql_packet;
|
||||
|
||||
queue_aql_packet =
|
||||
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_base))
|
||||
[que_idx & queue_mask];
|
||||
|
||||
queue_aql_packet->workgroup_size_x = staging_aql_packet->workgroup_size_x;
|
||||
queue_aql_packet->workgroup_size_y = staging_aql_packet->workgroup_size_y;
|
||||
queue_aql_packet->workgroup_size_z = staging_aql_packet->workgroup_size_z;
|
||||
queue_aql_packet->grid_size_x = staging_aql_packet->grid_size_x;
|
||||
queue_aql_packet->grid_size_y = staging_aql_packet->grid_size_y;
|
||||
queue_aql_packet->grid_size_z = staging_aql_packet->grid_size_z;
|
||||
queue_aql_packet->private_segment_size =
|
||||
staging_aql_packet->private_segment_size;
|
||||
queue_aql_packet->group_segment_size =
|
||||
staging_aql_packet->group_segment_size;
|
||||
queue_aql_packet->kernel_object = staging_aql_packet->kernel_object;
|
||||
queue_aql_packet->kernarg_address = staging_aql_packet->kernarg_address;
|
||||
queue_aql_packet->completion_signal = staging_aql_packet->completion_signal;
|
||||
|
||||
return queue_aql_packet;
|
||||
}
|
||||
|
||||
void
|
||||
WriteAQLToQueueLoc(hsa_queue_t *queue, uint64_t indx,
|
||||
hsa_kernel_dispatch_packet_t *aql_pkt) {
|
||||
assert(queue);
|
||||
assert(aql_pkt);
|
||||
|
||||
void *queue_base = queue->base_address;
|
||||
const uint32_t queue_mask = queue->size - 1;
|
||||
hsa_kernel_dispatch_packet_t* queue_aql_packet;
|
||||
|
||||
queue_aql_packet =
|
||||
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_base))
|
||||
[indx & queue_mask];
|
||||
|
||||
queue_aql_packet->workgroup_size_x = aql_pkt->workgroup_size_x;
|
||||
queue_aql_packet->workgroup_size_y = aql_pkt->workgroup_size_y;
|
||||
queue_aql_packet->workgroup_size_z = aql_pkt->workgroup_size_z;
|
||||
queue_aql_packet->grid_size_x = aql_pkt->grid_size_x;
|
||||
queue_aql_packet->grid_size_y = aql_pkt->grid_size_y;
|
||||
queue_aql_packet->grid_size_z = aql_pkt->grid_size_z;
|
||||
queue_aql_packet->private_segment_size =
|
||||
aql_pkt->private_segment_size;
|
||||
queue_aql_packet->group_segment_size =
|
||||
aql_pkt->group_segment_size;
|
||||
queue_aql_packet->kernel_object = aql_pkt->kernel_object;
|
||||
queue_aql_packet->kernarg_address = aql_pkt->kernarg_address;
|
||||
queue_aql_packet->completion_signal = aql_pkt->completion_signal;
|
||||
}
|
||||
|
||||
// Allocate a buffer in the kern_arg_pool for the kernel arguments and write
|
||||
// the arguments to buffer
|
||||
hsa_status_t AllocAndSetKernArgs(RdcRocrBase* test, void* args, size_t arg_size) {
|
||||
void* kern_arg_buf = nullptr;
|
||||
hsa_status_t err;
|
||||
size_t buf_size;
|
||||
size_t req_align;
|
||||
assert(args != nullptr);
|
||||
assert(test != nullptr);
|
||||
|
||||
req_align = test->kernarg_align();
|
||||
// Allocate enough extra space for alignment adjustments if ncessary
|
||||
buf_size = arg_size + (req_align << 1);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(test->kern_arg_pool(), buf_size, 0,
|
||||
reinterpret_cast<void**>(&kern_arg_buf));
|
||||
throw_if_error(err);
|
||||
|
||||
test->set_kernarg_buffer(kern_arg_buf);
|
||||
|
||||
void *adj_kern_arg_buf = AlignUp(kern_arg_buf, req_align);
|
||||
|
||||
assert(arg_size >= test->kernarg_size());
|
||||
assert(((uintptr_t)adj_kern_arg_buf + arg_size) <
|
||||
((uintptr_t)kern_arg_buf + buf_size));
|
||||
|
||||
hsa_agent_t ag_list[2] = {*test->gpu_device1(), *test->cpu_device()};
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, kern_arg_buf);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_memory_copy(adj_kern_arg_buf, args, arg_size);
|
||||
throw_if_error(err);
|
||||
|
||||
test->aql().kernarg_address = adj_kern_arg_buf;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
std::string get_lib_dir(const char* lib_name) {
|
||||
std::string result;
|
||||
char line[1024*8];
|
||||
|
||||
FILE* file = fopen("/proc/self/maps", "r");
|
||||
if (file == NULL)
|
||||
return result;
|
||||
std::string lib_path = "/";
|
||||
lib_path += lib_name;
|
||||
// 7f4eacb46000 r-xp 00000 08:01 17183106 /lib/x86_64-linux-gnu/libc-2.27.so
|
||||
while (fgets(line, sizeof(line), file)) {
|
||||
char* end = strstr(line, lib_path.c_str());
|
||||
if (end != NULL) {
|
||||
char* start = end;
|
||||
while (start > line) {
|
||||
if (isspace(*start)) {
|
||||
start++;
|
||||
break;
|
||||
}
|
||||
start--;
|
||||
}
|
||||
result = std::string(start, end-start);
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(file);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string get_app_dir() {
|
||||
char buf[1024*8];
|
||||
int ret = readlink("/proc/self/exe", buf, 1024*8);
|
||||
if ((ret != -1) && ret < (1024*8 - 1)) {
|
||||
buf[ret] = '\0';
|
||||
return dirname(buf);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string search_hsaco_full_path(const char* hsaco_file_name,
|
||||
const char* agent_name) {
|
||||
const std::string lib_dir = get_lib_dir("librdc_rocr.so");
|
||||
const std::string app_dir = get_app_dir();
|
||||
|
||||
std::vector<std::string> path_to_search;
|
||||
path_to_search.push_back(std::string("./")+hsaco_file_name);
|
||||
path_to_search.push_back(app_dir+"/"+hsaco_file_name);
|
||||
path_to_search.push_back(lib_dir+"/"+hsaco_file_name);
|
||||
path_to_search.push_back(lib_dir+"/hsaco/"+ agent_name
|
||||
+ "/" + hsaco_file_name);
|
||||
// for dev structure
|
||||
path_to_search.push_back(lib_dir+"/../../rdc_libs/rdc_modules/kernels/hsaco/"
|
||||
+ agent_name + "/" + hsaco_file_name);
|
||||
for (std::size_t i = 0; i < path_to_search.size(); i++) {
|
||||
if ( ::access(path_to_search[i].c_str(), F_OK) == 0 ) {
|
||||
RDC_LOG(RDC_DEBUG, "Use the file " << path_to_search[i]);
|
||||
return path_to_search[i];
|
||||
}
|
||||
RDC_LOG(RDC_DEBUG, "Skip not exists file " << path_to_search[i]);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
+527
@@ -0,0 +1,527 @@
|
||||
/*
|
||||
Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/// \file
|
||||
/// Implementation of utility functions used by RocR applications
|
||||
#include "rdc_modules/rdc_rocr/common.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
void throw_if_error(hsa_status_t err, const std::string& msg) {
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
const char* errstr = 0;
|
||||
hsa_status_string(err, &errstr);
|
||||
throw std::runtime_error(msg + " hsa error code: "
|
||||
+ std::to_string(err) + " " + errstr);
|
||||
}
|
||||
}
|
||||
|
||||
void throw_if_skip(const std::string& msg) {
|
||||
throw SkipException(msg.c_str());
|
||||
}
|
||||
|
||||
void SetEnv(const char* env_var_name, const char* env_var_value) {
|
||||
int err = setenv(env_var_name, env_var_value, 1);
|
||||
|
||||
if (0 != err) {
|
||||
RDC_LOG(RDC_ERROR, "Set environment variable failed!");
|
||||
throw_if_error(HSA_STATUS_ERROR, "Set environment variable failed");
|
||||
}
|
||||
}
|
||||
|
||||
intptr_t
|
||||
AlignDown(intptr_t value, size_t alignment) {
|
||||
assert(alignment != 0 && "Zero alignment");
|
||||
return (intptr_t) (value & ~(alignment - 1));
|
||||
}
|
||||
|
||||
void *
|
||||
AlignDown(void* value, size_t alignment) {
|
||||
return reinterpret_cast<void*>(AlignDown(
|
||||
reinterpret_cast<uintptr_t>(value), alignment));
|
||||
}
|
||||
|
||||
void *
|
||||
AlignUp(void* value, size_t alignment) {
|
||||
return reinterpret_cast<void*>(
|
||||
AlignDown((uintptr_t)(reinterpret_cast<uintptr_t>(value) + alignment - 1),
|
||||
alignment));
|
||||
}
|
||||
|
||||
|
||||
static hsa_status_t FindAgent(hsa_agent_t agent, void* data,
|
||||
hsa_device_type_t dev_type) {
|
||||
assert(data != nullptr);
|
||||
|
||||
if (data == nullptr) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
hsa_device_type_t hsa_device_type;
|
||||
hsa_status_t hsa_error_code = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
|
||||
&hsa_device_type);
|
||||
throw_if_error(hsa_error_code);
|
||||
|
||||
if (hsa_device_type == dev_type) {
|
||||
*(reinterpret_cast<hsa_agent_t*>(data)) = agent;
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Find CPU Agents
|
||||
hsa_status_t IterateCPUAgents(hsa_agent_t agent, void *data) {
|
||||
hsa_status_t status;
|
||||
assert(data != nullptr);
|
||||
if (data == nullptr) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
std::vector<hsa_agent_t>* cpus = static_cast<std::vector<hsa_agent_t>*>(data);
|
||||
hsa_device_type_t device_type;
|
||||
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
|
||||
throw_if_error(status);
|
||||
if (HSA_STATUS_SUCCESS == status && HSA_DEVICE_TYPE_CPU == device_type) {
|
||||
cpus->push_back(agent);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Find GPU Agents
|
||||
hsa_status_t IterateGPUAgents(hsa_agent_t agent, void *data) {
|
||||
hsa_status_t status;
|
||||
assert(data != nullptr);
|
||||
if (data == nullptr) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
std::vector<hsa_agent_t>* gpus = static_cast<std::vector<hsa_agent_t>*>(data);
|
||||
hsa_device_type_t device_type;
|
||||
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
|
||||
throw_if_error(status);
|
||||
if (HSA_STATUS_SUCCESS == status && HSA_DEVICE_TYPE_GPU == device_type) {
|
||||
gpus->push_back(agent);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
// Find coarse grained system memory.
|
||||
hsa_status_t GetGlobalMemoryPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
hsa_amd_segment_t segment;
|
||||
hsa_status_t err;
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
|
||||
&segment);
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment)
|
||||
return err;
|
||||
|
||||
hsa_amd_memory_pool_global_flag_t flags;
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
|
||||
&flags);
|
||||
throw_if_error(err);
|
||||
|
||||
// this is valid for dGPUs. But on APUs, it has to be FINE_GRAINED
|
||||
if (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) {
|
||||
hsa_amd_memory_pool_t* ret =
|
||||
reinterpret_cast<hsa_amd_memory_pool_t*>(data);
|
||||
*ret = pool;
|
||||
} else { // this is for APUs
|
||||
if (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED) {
|
||||
hsa_amd_memory_pool_t* ret =
|
||||
reinterpret_cast<hsa_amd_memory_pool_t*>(data);
|
||||
*ret = pool;
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
// Find a memory pool that can be used for kernarg locations.
|
||||
hsa_status_t GetKernArgMemoryPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
hsa_status_t err;
|
||||
if (nullptr == data) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
hsa_amd_segment_t segment;
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
|
||||
&segment);
|
||||
throw_if_error(err);
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_amd_memory_pool_global_flag_t flags;
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
|
||||
&flags);
|
||||
throw_if_error(err);
|
||||
|
||||
if (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT) {
|
||||
hsa_amd_memory_pool_t* ret =
|
||||
reinterpret_cast<hsa_amd_memory_pool_t*>(data);
|
||||
*ret = pool;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data) {
|
||||
return FindAgent(agent, data, HSA_DEVICE_TYPE_GPU);
|
||||
}
|
||||
|
||||
hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) {
|
||||
return FindAgent(agent, data, HSA_DEVICE_TYPE_CPU);
|
||||
}
|
||||
|
||||
/// Ennumeration that indicates whether a pool property must be present or not.
|
||||
/// This is meant to be used by FindPool
|
||||
typedef enum {
|
||||
POOL_PROP_OFF = 0, ///< The property must be present.
|
||||
POOL_PROP_ON, ///< The property must not be present.
|
||||
POOL_PROP_DONT_CARE ///< We don't care if the property is present or not.
|
||||
} pool_prop_t;
|
||||
|
||||
static hsa_status_t
|
||||
FindPool(hsa_amd_memory_pool_t pool, void* data, hsa_amd_segment_t in_segment,
|
||||
pool_prop_t accessible_by_all, pool_prop_t kern_arg,
|
||||
pool_prop_t fine_grain) {
|
||||
if (nullptr == data) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
hsa_status_t err;
|
||||
hsa_amd_segment_t segment;
|
||||
uint32_t flag;
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
|
||||
&segment);
|
||||
throw_if_error(err);
|
||||
|
||||
if (in_segment != segment) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
if (HSA_AMD_SEGMENT_GLOBAL == in_segment) {
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
throw_if_error(err);
|
||||
|
||||
if (kern_arg != POOL_PROP_DONT_CARE) {
|
||||
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
|
||||
if ((karg_st == 0 && kern_arg == POOL_PROP_ON) ||
|
||||
(karg_st != 0 && kern_arg == POOL_PROP_OFF)) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
if (fine_grain != POOL_PROP_DONT_CARE) {
|
||||
uint32_t fg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED;
|
||||
if ((fg_st == 0 && fine_grain == POOL_PROP_ON) ||
|
||||
(fg_st != 0 && fine_grain == POOL_PROP_OFF)) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (accessible_by_all != POOL_PROP_DONT_CARE) {
|
||||
bool access_read;
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
(hsa_amd_memory_pool_info_t)
|
||||
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access_read);
|
||||
throw_if_error(err);
|
||||
|
||||
if (((!access_read) && accessible_by_all == POOL_PROP_ON) ||
|
||||
(access_read && (accessible_by_all == POOL_PROP_OFF))) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
*(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
|
||||
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE,
|
||||
POOL_PROP_OFF, POOL_PROP_DONT_CARE);
|
||||
}
|
||||
|
||||
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE,
|
||||
POOL_PROP_ON, POOL_PROP_DONT_CARE);
|
||||
}
|
||||
hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_ON,
|
||||
POOL_PROP_OFF, POOL_PROP_DONT_CARE);
|
||||
}
|
||||
|
||||
hsa_status_t FindAPUStandardPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE,
|
||||
POOL_PROP_DONT_CARE, POOL_PROP_DONT_CARE);
|
||||
}
|
||||
|
||||
// Populate the vector with handles to all agents and pools
|
||||
hsa_status_t
|
||||
GetAgentPools(std::vector<std::shared_ptr<agent_pools_t>> *agent_pools) {
|
||||
hsa_status_t err;
|
||||
|
||||
assert(agent_pools != nullptr);
|
||||
|
||||
auto save_agent = [](hsa_agent_t a, void *data)->hsa_status_t {
|
||||
std::vector<std::shared_ptr<agent_pools_t>> *ag_vec;
|
||||
hsa_status_t err;
|
||||
assert(data != nullptr);
|
||||
ag_vec =
|
||||
reinterpret_cast<std::vector<std::shared_ptr<agent_pools_t>> *>(data);
|
||||
std::shared_ptr<agent_pools_t> ag(new agent_pools_t);
|
||||
ag->agent = a;
|
||||
|
||||
|
||||
auto save_pool = [](hsa_amd_memory_pool_t p, void *data)->hsa_status_t {
|
||||
assert(data != nullptr);
|
||||
std::vector<hsa_amd_memory_pool_t> *p_list =
|
||||
reinterpret_cast<std::vector<hsa_amd_memory_pool_t> *>(data);
|
||||
p_list->push_back(p);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(a, save_pool,
|
||||
reinterpret_cast<void *>(&ag->pools));
|
||||
ag_vec->push_back(ag);
|
||||
return err;
|
||||
};
|
||||
|
||||
err = hsa_iterate_agents(save_agent, reinterpret_cast<void *>(agent_pools));
|
||||
return err;
|
||||
}
|
||||
|
||||
static hsa_status_t MakeGlobalFlagsString(const pool_info_t *pool_i,
|
||||
std::string* out_str) {
|
||||
uint32_t global_flag = pool_i->global_flag;
|
||||
|
||||
assert(out_str != nullptr);
|
||||
|
||||
*out_str = "";
|
||||
|
||||
std::vector < std::string > flags;
|
||||
|
||||
if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & global_flag) {
|
||||
flags.push_back("KERNARG");
|
||||
}
|
||||
|
||||
if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) {
|
||||
flags.push_back("FINE GRAINED");
|
||||
}
|
||||
|
||||
if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & global_flag) {
|
||||
flags.push_back("COARSE GRAINED");
|
||||
}
|
||||
|
||||
if (flags.size() > 0) {
|
||||
*out_str += flags[0];
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < flags.size(); i++) {
|
||||
*out_str += ", " + flags[i];
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
static hsa_status_t DumpSegment(const pool_info_t *pool_i,
|
||||
std::string const *ind_lvl) {
|
||||
hsa_status_t err;
|
||||
|
||||
RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Segment:");
|
||||
std::string seg_str = "";
|
||||
std::string tmp_str;
|
||||
|
||||
switch (pool_i->segment) {
|
||||
case HSA_AMD_SEGMENT_GLOBAL:
|
||||
err = MakeGlobalFlagsString(pool_i, &tmp_str);
|
||||
throw_if_error(err);
|
||||
|
||||
seg_str += "GLOBAL; FLAGS: " + tmp_str;
|
||||
break;
|
||||
|
||||
case HSA_AMD_SEGMENT_READONLY:
|
||||
seg_str += "READONLY";
|
||||
break;
|
||||
|
||||
case HSA_AMD_SEGMENT_PRIVATE:
|
||||
seg_str += "PRIVATE";
|
||||
break;
|
||||
|
||||
case HSA_AMD_SEGMENT_GROUP:
|
||||
seg_str += "GROUP";
|
||||
break;
|
||||
|
||||
default:
|
||||
RDC_LOG(RDC_DEBUG, "Not Supported");
|
||||
break;
|
||||
}
|
||||
|
||||
RDC_LOG(RDC_DEBUG, seg_str);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool,
|
||||
pool_info_t *pool_i) {
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
|
||||
&pool_i->segment);
|
||||
throw_if_error(err);
|
||||
|
||||
// Get the size of the POOL
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE,
|
||||
&pool_i->size);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
|
||||
&pool_i->alloc_allowed);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
|
||||
&pool_i->alloc_granule);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT,
|
||||
&pool_i->alloc_alignment);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL,
|
||||
&pool_i->accessible_by_all);
|
||||
throw_if_error(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE,
|
||||
&pool_i->aggregate_alloc_max);
|
||||
throw_if_error(err);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i,
|
||||
uint32_t indent) {
|
||||
std::string ind_lvl(indent, ' ');
|
||||
|
||||
DumpSegment(pool_i, &ind_lvl);
|
||||
|
||||
std::string sz_str = std::to_string(pool_i->size / 1024) + "KB";
|
||||
RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Size:" << sz_str);
|
||||
|
||||
RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Allocatable:"
|
||||
<< (pool_i->alloc_allowed ? "TRUE" : "FALSE"));
|
||||
|
||||
std::string gr_str = std::to_string(pool_i->alloc_granule / 1024) + "KB";
|
||||
RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Alloc Granule:" << gr_str);
|
||||
|
||||
std::string al_str =
|
||||
std::to_string(pool_i->alloc_alignment / 1024) + "KB";
|
||||
RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Alloc Alignment:" << al_str);
|
||||
RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Acessible by all:" <<
|
||||
(pool_i->accessible_by_all ? "TRUE" : "FALSE"));
|
||||
|
||||
std::string agg_str =
|
||||
std::to_string(pool_i->aggregate_alloc_max / 1024) + "KB";
|
||||
RDC_LOG(RDC_DEBUG, ind_lvl << "Pool Aggregate Alloc Size:" << agg_str);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static const char* Types[] = {"HSA_EXT_POINTER_TYPE_UNKNOWN",
|
||||
"HSA_EXT_POINTER_TYPE_HSA",
|
||||
"HSA_EXT_POINTER_TYPE_LOCKED",
|
||||
"HSA_EXT_POINTER_TYPE_GRAPHICS",
|
||||
"HSA_EXT_POINTER_TYPE_IPC"
|
||||
};
|
||||
|
||||
hsa_status_t DumpPointerInfo(void* ptr) {
|
||||
hsa_amd_pointer_info_t info;
|
||||
hsa_agent_t* agents;
|
||||
uint32_t count;
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_amd_pointer_info(ptr, &info, malloc, &count, &agents);
|
||||
throw_if_error(err);
|
||||
|
||||
std::cout << "Info for ptr: " << ptr << std::endl;
|
||||
std::cout << "CPU ptr: " << reinterpret_cast<void*>(info.hostBaseAddress) <<
|
||||
std::endl;
|
||||
std::cout << "GPU ptr: " << reinterpret_cast<void*>(info.agentBaseAddress)
|
||||
<< std::endl;
|
||||
std::cout << "Size: " << info.sizeInBytes << std::endl;
|
||||
std::cout << "Type: " << Types[info.type] << std::endl;
|
||||
std::cout << "UsrPtr " << reinterpret_cast<void*>(info.userData) <<
|
||||
std::endl;
|
||||
std::cout << "Accessible by: ";
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
std::cout << agents[i].handle << " ";
|
||||
}
|
||||
|
||||
std::cout << " ;[EOM]" << std::endl;
|
||||
free(agents);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*! \brief Writes to the buffer and increments the write pointer to the
|
||||
* buffer. Also, ensures that the argument is written to an
|
||||
* aligned memory as specified. Return the new write pointer.
|
||||
*
|
||||
* @param dst The write pointer to the buffer
|
||||
* @param src The source pointer
|
||||
* @param size The size in bytes to copy
|
||||
* @param alignment The alignment to follow while writing to the buffer
|
||||
*/
|
||||
#if 0
|
||||
inline void *
|
||||
addArg(void * dst, const void* src, size_t size, uint32_t alignment) {
|
||||
dst = rocrtst::AlignUp(dst, alignment);
|
||||
::memcpy(dst, src, size);
|
||||
return dst + size;
|
||||
}
|
||||
#endif
|
||||
#undef throw_if_error
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
@@ -138,9 +138,7 @@ std::string RdciDiagSubSystem::get_test_name
|
||||
(rdc_diag_test_cases_t test_case) const {
|
||||
const std::map<rdc_diag_test_cases_t, std::string> test_desc = {
|
||||
{RDC_DIAG_COMPUTE_PROCESS, "No compute process"},
|
||||
{RDC_DIAG_SDMA_QUEUE, "SDMA Queue ready"},
|
||||
{RDC_DIAG_COMPUTE_QUEUE, "Compute Queue ready"},
|
||||
{RDC_DIAG_VRAM_CHECK, "VRAM check"},
|
||||
{RDC_DIAG_SYS_MEM_CHECK, "System memory check"},
|
||||
{RDC_DIAG_NODE_TOPOLOGY, "Node topology check"},
|
||||
{RDC_DIAG_GPU_PARAMETERS, "GPU parameters check"},
|
||||
|
||||
Reference in New Issue
Block a user