diff --git a/projects/rdc/CMakeLists.txt b/projects/rdc/CMakeLists.txt index 5a59b78b24..f40e212f9e 100755 --- a/projects/rdc/CMakeLists.txt +++ b/projects/rdc/CMakeLists.txt @@ -35,6 +35,10 @@ option(BUILD_STANDALONE "Build targets for rdci and rdcd" ON) # which requires the ROCT-Thunk-Interface. option(BUILD_RASLIB "Build targets for raslib" OFF) +# When cmake -DBUILD_ROCRTEST=off, it will not build the librdc_rocr.so +# which requires the Rocm run time. +option(BUILD_ROCRTEST "Build targets for librdc_rocr.so" ON) + if( NOT EXISTS "${CMAKE_SOURCE_DIR}/raslib/.git" AND BUILD_RASLIB) message( FATAL_ERROR "The git submodule raslib is not available. Please run git submodule update --init --recursive diff --git a/projects/rdc/README.md b/projects/rdc/README.md index 0b68dbce6e..20a8689180 100644 --- a/projects/rdc/README.md +++ b/projects/rdc/README.md @@ -76,6 +76,12 @@ If only the RDC libraries are needed (i.e. only "embedded mode" is required), th $ cmake -DROCM_DIR=/opt/rocm -DBUILD_STANDALONE=off <-DCMAKE_INSTALL_PREFIX=> .. +## Building RDC library without ROCM Run time (optional) + +The user can choose to not build RDC diagnostic ROCM Run time. This will eliminate the need for ROCM Run time. To build in this way, -DBUILD_ROCRTEST=off should be passed on the the cmake command line: + + $ cmake -DROCM_DIR=/opt/rocm -DBUILD_ROCRTEST=off <-DCMAKE_INSTALL_PREFIX=> .. + ## Update System Library Path The following commands need to be executed as root (sudo). It may be easiest to put them into a script and then run that script as root: diff --git a/projects/rdc/example/diagnostic_example.cc b/projects/rdc/example/diagnostic_example.cc index b34db7c3a0..72973cc85f 100644 --- a/projects/rdc/example/diagnostic_example.cc +++ b/projects/rdc/example/diagnostic_example.cc @@ -31,9 +31,7 @@ THE SOFTWARE. static std::string get_test_name(rdc_diag_test_cases_t test_case) { const std::map test_desc = { {RDC_DIAG_COMPUTE_PROCESS, "No compute process"}, - {RDC_DIAG_SDMA_QUEUE, "SDMA Queue ready"}, {RDC_DIAG_COMPUTE_QUEUE, "Compute Queue ready"}, - {RDC_DIAG_VRAM_CHECK, "VRAM check"}, {RDC_DIAG_SYS_MEM_CHECK, "System memory check"}, {RDC_DIAG_NODE_TOPOLOGY, "Node topology check"}, {RDC_DIAG_GPU_PARAMETERS, "GPU parameters check"}, @@ -160,7 +158,8 @@ int main(int, char **) { std::cout << std::setw(22) << std::left << get_test_name(RDC_DIAG_COMPUTE_PROCESS) + ":" - << rdc_diagnostic_result_string(test_result.status) << "\n"; + << test_result.info << "\n"; + // Cleanup consists of shutting down RDC. cleanup: diff --git a/projects/rdc/include/rdc/rdc.h b/projects/rdc/include/rdc/rdc.h index 0198c07d96..8b7dd1e4fe 100755 --- a/projects/rdc/include/rdc/rdc.h +++ b/projects/rdc/include/rdc/rdc.h @@ -421,9 +421,7 @@ typedef enum { RDC_DIAG_TEST_FIRST = 0, //!< The diagnostic test pass RDC_DIAG_COMPUTE_PROCESS = RDC_DIAG_TEST_FIRST, - RDC_DIAG_SDMA_QUEUE, //!< The SDMA Queue is ready RDC_DIAG_COMPUTE_QUEUE, //!< The Compute Queue is ready - RDC_DIAG_VRAM_CHECK, //!< Check VRAM RDC_DIAG_SYS_MEM_CHECK, //!< Check System memory RDC_DIAG_NODE_TOPOLOGY, //!< Report node topology RDC_DIAG_GPU_PARAMETERS, //!< GPU parameters in range diff --git a/projects/rdc/include/rdc_lib/RdcPerfTimer.h b/projects/rdc/include/rdc_lib/RdcPerfTimer.h new file mode 100755 index 0000000000..fe4b86fbc6 --- /dev/null +++ b/projects/rdc/include/rdc_lib/RdcPerfTimer.h @@ -0,0 +1,88 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef INCLUDE_RDC_LIB_RDCRdcPerfTimer_H_ +#define INCLUDE_RDC_LIB_RDCRdcPerfTimer_H_ + +#include +#include +#include +#include +/// \file +/// Timer related class. + +namespace amd { +namespace rdc { + +class RdcPerfTimer { + private: + struct Timer { + std::string name; /* < name name of time object*/ + uint64_t _freq; /* < _freq frequency*/ + uint64_t _clocks; /* < _clocks number of ticks at end*/ + uint64_t _start; /* < _start start point ticks*/ + }; + + std::vector _timers; /*< _timers vector to Timer objects */ + double freq_in_100mhz; + + public: + RdcPerfTimer(void); + ~RdcPerfTimer(void); + + /// Create a new timer. + /// \returns A new timer instance index + int CreateTimer(void); + + /// Start the timer associated with the given index + /// \param[in] index Index of the timer to start + /// \returns int 0 for success, non-zero otherwise + int StartTimer(int index); + + /// Stop the timer associated with the given index + /// \param[in] Index Index of the timer to stop + /// \returns int 0 for success, non-zero otherwise + int StopTimer(int index); + + /// Reset the timer to 0 + /// param[in] Index of the timer to reset + /// \returns void + void ResetTimer(int index); + + /// Read the time value of the timer associated with the provided index. + /// Units are seconds + /// \param[in] index Index of the timer to read + /// \returns double Value of the timer + double ReadTimer(int index); + + private: + void Error(std::string str); + uint64_t CoarseTimestampUs(); + uint64_t MeasureTSCFreqHz(); +}; + + +} // namespace rdc +} // namespace amd + +#endif // INCLUDE_RDC_LIB_RDCRdcPerfTimer_H_ + diff --git a/projects/rdc/include/rdc_lib/impl/RdcDiagnosticModule.h b/projects/rdc/include/rdc_lib/impl/RdcDiagnosticModule.h index 9ad1debb14..36cd9748eb 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcDiagnosticModule.h +++ b/projects/rdc/include/rdc_lib/impl/RdcDiagnosticModule.h @@ -29,6 +29,7 @@ THE SOFTWARE. #include "rdc_lib/RdcDiagnostic.h" #include "rdc_lib/impl/RdcRasLib.h" #include "rdc_lib/impl/RdcSmiLib.h" +#include "rdc_lib/impl/RdcRocrLib.h" namespace amd { namespace rdc { @@ -55,7 +56,8 @@ class RdcDiagnosticModule : public RdcDiagnostic { rdc_status_t rdc_diag_destroy() override; explicit RdcDiagnosticModule(const RdcSmiLibPtr& smi_lib, - const RdcRasLibPtr& ras_module); + const RdcRasLibPtr& ras_module, + const RdcRocrLibPtr& rocr_module); private: //< Helper function to dispatch fields to module diff --git a/projects/rdc/include/rdc_lib/impl/RdcModuleMgrImpl.h b/projects/rdc/include/rdc_lib/impl/RdcModuleMgrImpl.h index e7bffaad24..591e6ddf40 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcModuleMgrImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcModuleMgrImpl.h @@ -28,6 +28,7 @@ THE SOFTWARE. #include "rdc_lib/RdcTelemetry.h" #include "rdc_lib/impl/RdcRasLib.h" #include "rdc_lib/impl/RdcSmiLib.h" +#include "rdc_lib/impl/RdcRocrLib.h" namespace amd { namespace rdc { @@ -46,6 +47,7 @@ class RdcModuleMgrImpl: public RdcModuleMgr { RdcRasLibPtr ras_lib_; RdcSmiLibPtr smi_lib_; RdcMetricFetcherPtr fetcher_; + RdcRocrLibPtr rocr_lib_; }; } // namespace rdc diff --git a/projects/rdc/include/rdc_lib/impl/RdcRocrLib.h b/projects/rdc/include/rdc_lib/impl/RdcRocrLib.h new file mode 100644 index 0000000000..ff89e01d7e --- /dev/null +++ b/projects/rdc/include/rdc_lib/impl/RdcRocrLib.h @@ -0,0 +1,74 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCROCRLIB_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCROCRLIB_H_ + +#include +#include +#include "rdc_lib/RdcLibraryLoader.h" +#include "rdc_lib/RdcDiagnostic.h" + +namespace amd { +namespace rdc { + +class RdcRocrLib : public RdcDiagnostic { + public: + rdc_status_t rdc_diag_test_cases_query( + rdc_diag_test_cases_t test_cases[MAX_TEST_CASES], + uint32_t* test_case_count) override; + + // Run a specific test case + rdc_status_t rdc_test_case_run( + rdc_diag_test_cases_t test_case, + uint32_t gpu_index[RDC_MAX_NUM_DEVICES], + uint32_t gpu_count, + rdc_diag_test_result_t* result) override; + + rdc_status_t rdc_diagnostic_run( + const rdc_group_info_t& gpus, + rdc_diag_level_t level, + rdc_diag_response_t* response) override; + + rdc_status_t rdc_diag_init(uint64_t flags) override; + rdc_status_t rdc_diag_destroy() override; + + explicit RdcRocrLib(const char* lib_name); + + ~RdcRocrLib(); + + private: + RdcLibraryLoader lib_loader_; + rdc_status_t (*test_case_run_)(rdc_diag_test_cases_t, + uint32_t[RDC_MAX_NUM_DEVICES], uint32_t, + rdc_diag_test_result_t*); + rdc_status_t (*diag_test_cases_query_)( + rdc_diag_test_cases_t[MAX_TEST_CASES], uint32_t*); + rdc_status_t (*diag_init_)(uint64_t); + rdc_status_t (*diag_destroy_)(); +}; + +typedef std::shared_ptr RdcRocrLibPtr; + +} // namespace rdc +} // namespace amd + +#endif // INCLUDE_RDC_LIB_IMPL_RDCROCRLIB_H_ diff --git a/projects/rdc/include/rdc_modules/kernels/binary_search_kernels.cl b/projects/rdc/include/rdc_modules/kernels/binary_search_kernels.cl new file mode 100755 index 0000000000..eb3cca6c86 --- /dev/null +++ b/projects/rdc/include/rdc_modules/kernels/binary_search_kernels.cl @@ -0,0 +1,127 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2017, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * One instance of this kernel call is a thread. + * Each thread finds out the segment in which it should look for the element. + * After that, it checks if the element is between the lower bound and upper + * bound of its segment. If yes, then this segment becomes the total + * searchspace for the next pass. + * + * To achieve this, it writes the lower bound and upper bound to the output + * array. In case the element at the left end (lower bound) matches the element + * we are looking for, that is marked in the output and we no longer need to + * look any further. + */ + +__kernel void +binarySearch(__global uint4 * outputArray, + __const __global uint2 * sortedArray, + const unsigned int findMe) { + unsigned int tid = get_global_id(0); + + // Then we find the elements for this thread + uint2 element = sortedArray[tid]; + + + // If the element to be found does not lie between + // them, then nothing left to do in this thread + if((element.x > findMe) || (element.y < findMe)) { + return; + } else { + // However, if the element does lie between the lower + // and upper bounds of this thread's searchspace + // we need to narrow down the search further in this + // search space + // The search space for this thread is marked in the + // output as being the total search space for the next pass + outputArray[0].x = tid; + outputArray[0].w = 1; + } +} + + +__kernel void +binarySearch_mulkeys(__global int *keys, + __global uint *input, + const unsigned int numKeys, + __global int *output) { + + int gid = get_global_id(0); + int lBound = gid * 256; + int uBound = lBound + 255; + + for(int i = 0; i < numKeys; i++) { + if(keys[i] >= input[lBound] && keys[i] <= input[uBound]) + output[i]=lBound; + } + +} + + +__kernel void +binarySearch_mulkeysConcurrent(__global uint *keys, + __global uint *input, + const unsigned int inputSize, // num. of inputs + const unsigned int numSubdivisions, + __global int *output) { + + int lBound = (get_global_id(0) % numSubdivisions) * (inputSize / numSubdivisions); + int uBound = lBound + inputSize / numSubdivisions; + int myKey = keys[get_global_id(0) / numSubdivisions]; + int mid; + + while(uBound >= lBound) { + mid = (lBound + uBound) / 2; + if(input[mid] == myKey) { + output[get_global_id(0) / numSubdivisions] = mid; + return; + } else if(input[mid] > myKey) { + uBound = mid - 1; + } else { + lBound = mid + 1; + } + } +} diff --git a/projects/rdc/include/rdc_modules/rdc_rocr/ComputeQueueTest.h b/projects/rdc/include/rdc_modules/rdc_rocr/ComputeQueueTest.h new file mode 100755 index 0000000000..ff7b65b254 --- /dev/null +++ b/projects/rdc/include/rdc_modules/rdc_rocr/ComputeQueueTest.h @@ -0,0 +1,117 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef RDC_MODULES_RDC_ROCR_COMPUTEQUEUETEST_H_ +#define RDC_MODULES_RDC_ROCR_COMPUTEQUEUETEST_H_ + +#include "rdc_modules/rdc_rocr/TestBase.h" +#include "hsa/hsa.h" + +namespace amd { +namespace rdc { + +// Hold all the info specific to binary search +typedef struct BinarySearch { + // Binary Search parameters + uint32_t length; + uint32_t work_group_size; + uint32_t work_grid_size; + uint32_t num_sub_divisions; + uint32_t find_me; + + // Buffers needed for this application + uint32_t* input; + uint32_t* input_arr; + uint32_t* input_arr_local; + uint32_t* output; + // Keneral argument buffers and addresses + void* kern_arg_buffer; // Begin of allocated memory + // this pointer to be deallocated + void* kern_arg_address; // Properly aligned address to be used in aql + // packet (don't use for deallocation) + + // Kernel code + std::string kernel_file_name; + std::string kernel_name; + uint32_t kernarg_size; + uint32_t kernarg_align; + + // HSA/RocR objects needed for this application + hsa_agent_t gpu_dev; + hsa_agent_t cpu_dev; + hsa_signal_t signal; + hsa_queue_t* queue; + hsa_amd_memory_pool_t cpu_pool; + hsa_amd_memory_pool_t gpu_pool; + hsa_amd_memory_pool_t kern_arg_pool; + + // Other items we need to populate AQL packet + uint64_t kernel_object; + uint32_t group_segment_size; ///< Kernel group seg size + uint32_t private_segment_size; ///< Kernel private seg size +} BinarySearch; + + +class ComputeQueueTest : public TestBase { + public: + explicit ComputeQueueTest(uint32_t gpu_index); + + // @Brief: Destructor for test case of ComputeQueueTest + virtual ~ComputeQueueTest(); + + // @Brief: Setup the environment for measurement + virtual hsa_status_t SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); + + hsa_status_t RunBinarySearchTest(void); + + private: + void InitializeBinarySearch(BinarySearch* bs); + hsa_status_t FindPools(BinarySearch* bs); + hsa_status_t AllocateAndInitBuffers(BinarySearch* bs); + hsa_status_t LoadKernelFromObjFile(BinarySearch* bs); + hsa_status_t Run(BinarySearch* bs); + hsa_status_t CleanUp(BinarySearch* bs); + void PopulateAQLPacket(BinarySearch const* bs, + hsa_kernel_dispatch_packet_t* aql); + hsa_status_t AgentMemcpy(void* dst, const void* src, + size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag); + hsa_status_t AllocAndSetKernArgs(BinarySearch* bs, void* args, + size_t arg_size, void** aql_buf_ptr); + void WriteAQLToQueue(hsa_kernel_dispatch_packet_t const* in_aql, + hsa_queue_t* q); +}; + +} // namespace rdc +} // namespace amd + +#endif // RDC_MODULES_RDC_ROCR_COMPUTEQUEUETEST_H_ diff --git a/projects/rdc/include/rdc_modules/rdc_rocr/MemoryAccess.h b/projects/rdc/include/rdc_modules/rdc_rocr/MemoryAccess.h new file mode 100755 index 0000000000..4d1b3b7267 --- /dev/null +++ b/projects/rdc/include/rdc_modules/rdc_rocr/MemoryAccess.h @@ -0,0 +1,72 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef RDC_MODULES_RDC_ROCR_MEMORYACCESS_H_ +#define RDC_MODULES_RDC_ROCR_MEMORYACCESS_H_ + + +#include "rdc_modules/rdc_rocr/TestBase.h" +#include "hsa/hsa.h" +namespace amd { +namespace rdc { + +class MemoryAccessTest : public TestBase { + public: + explicit MemoryAccessTest(uint32_t gpu_index); + + // @Brief: Destructor for test case of MemoryTest + virtual ~MemoryAccessTest(); + + // @Brief: Setup the environment for measurement + virtual hsa_status_t SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); + + + // @Brief: This test verify that CPU is able to Read & write GPU memory + void CPUAccessToGPUMemoryTest(void); + + // @Brief: This test verify that GPU is able to Read & write CPU memory + void GPUAccessToCPUMemoryTest(void); + + + private: + void CPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent, + hsa_agent_t gpuAgent, + hsa_amd_memory_pool_t pool); + void GPUAccessToCPUMemoryTest(hsa_agent_t cpuAgent, hsa_agent_t gpuAgent); +}; + +} // namespace rdc +} // namespace amd + +#endif // RDC_MODULES_RDC_ROCR_MEMORYACCESS_H_ diff --git a/projects/rdc/include/rdc_modules/rdc_rocr/MemoryTest.h b/projects/rdc/include/rdc_modules/rdc_rocr/MemoryTest.h new file mode 100755 index 0000000000..5813a8835e --- /dev/null +++ b/projects/rdc/include/rdc_modules/rdc_rocr/MemoryTest.h @@ -0,0 +1,64 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef RDC_MODULES_RDC_ROCR_MEMORYTEST_H_ +#define RDC_MODULES_RDC_ROCR_MEMORYTEST_H_ + +#include "rdc_modules/rdc_rocr/TestBase.h" +#include "hsa/hsa.h" + +namespace amd { +namespace rdc { +class MemoryTest : public TestBase { + public: + explicit MemoryTest(uint32_t gpu_index); + + // @Brief: Destructor for test case of MemoryTest + virtual ~MemoryTest(); + + // @Brief: Setup the environment for measurement + virtual hsa_status_t SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); + + hsa_status_t MaxSingleAllocationTest(void); + + hsa_status_t TestAllocate(hsa_amd_memory_pool_t pool, size_t sz); + + private: + hsa_status_t MaxSingleAllocationTest(hsa_agent_t ag, + hsa_amd_memory_pool_t pool); +}; + +} // namespace rdc +} // namespace amd + +#endif // RDC_MODULES_RDC_ROCR_MEMORYTEST_H_ diff --git a/projects/rdc/include/rdc_modules/rdc_rocr/RdcDiagnosticLib.h b/projects/rdc/include/rdc_modules/rdc_rocr/RdcDiagnosticLib.h new file mode 100644 index 0000000000..652915a27b --- /dev/null +++ b/projects/rdc/include/rdc_modules/rdc_rocr/RdcDiagnosticLib.h @@ -0,0 +1,28 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef RDC_MODULES_RDC_DIAGNOSTIC_RDCDIAGNOSTICLIB_H_ +#define RDC_MODULES_RDC_DIAGNOSTIC_RDCDIAGNOSTICLIB_H_ +#include "rdc/rdc.h" +#include "rdc_lib/RdcDiagnosticLibInterface.h" + + +#endif // RDC_MODULES_RDC_DIAGNOSTIC_RDCDIAGNOSTICLIB_H_ diff --git a/projects/rdc/include/rdc_modules/rdc_rocr/RdcRocrBase.h b/projects/rdc/include/rdc_modules/rdc_rocr/RdcRocrBase.h new file mode 100644 index 0000000000..84cac0d8a1 --- /dev/null +++ b/projects/rdc/include/rdc_modules/rdc_rocr/RdcRocrBase.h @@ -0,0 +1,269 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef RDC_MODULES_RDC_ROCR_RDCROCRBASE_H_ +#define RDC_MODULES_RDC_ROCR_RDCROCRBASE_H_ +#include +#include +#include +#include "rdc_lib/RdcPerfTimer.h" +#include "hsa/hsa.h" +#include "hsa/hsa_ext_amd.h" + +namespace amd { +namespace rdc { + +/// Common interface for RocR tests and samples +class RdcRocrBase { + public: + RdcRocrBase(void); + + virtual ~RdcRocrBase(void); + + ///< Setters and Getters + + void set_gpu_device1(hsa_agent_t in_dev) { + gpu_device1_.handle = in_dev.handle; + } + hsa_agent_t* gpu_device1(void) { + return &gpu_device1_; + } + + void set_cpu_device(hsa_agent_t in_dev) { + cpu_device_.handle = in_dev.handle; + } + hsa_agent_t* cpu_device(void) { + return &cpu_device_; + } + + void set_kernel_file_name(const char* in_file_name) { + kernel_file_name_ = in_file_name; + } + std::string const kernel_file_name(void) const { + return kernel_file_name_; + } + + void set_kernel_name(std::string in_kernel_name) { + kernel_name_ = in_kernel_name; + } + std::string const kernel_name(void) const { + return kernel_name_; + } + + void set_agent_name(std::string in_agent_name) { + agent_name_ = in_agent_name; + } + + std::string const get_agent_name(void) const { + return agent_name_; + } + + void set_kernel_object(uint64_t in_kernel_object) { + kernel_object_ = in_kernel_object; + } + uint64_t kernel_object(void) const { + return kernel_object_; + } + + void set_profile(hsa_profile_t in_prof) { + profile_ = in_prof; + } + hsa_profile_t profile(void) const { + return profile_; + } + + uint32_t private_segment_size(void) const { + return private_segment_size_; + } + void set_private_segment_size(uint32_t sz) { + private_segment_size_ = sz; + } + + void set_group_segment_size(uint32_t sz) { + group_segment_size_ = sz; + } + uint32_t group_segment_size(void) const { + return group_segment_size_; + } + + void set_group_size(uint32_t sz) { + group_size_ = sz; + } + uint32_t group_size(void) const { + return group_size_; + } + + void set_main_queue(hsa_queue_t* q) { + main_queue_ = q; + } + hsa_queue_t* main_queue(void) const { + return main_queue_; + } + + hsa_kernel_dispatch_packet_t& aql(void) { + return aql_; + } + + void set_num_iteration(int num) { + num_iteration_ = num; + } + uint32_t num_iteration(void) const { + return num_iteration_; + } + + hsa_amd_memory_pool_t& device_pool(void) { + return device_pool_; + } + + hsa_amd_memory_pool_t& cpu_pool(void) { + return cpu_pool_; + } + + hsa_amd_memory_pool_t& kern_arg_pool(void) { + return kern_arg_pool_; + } + + void set_kernarg_size(uint32_t sz) { + kernarg_size_ = sz; + } + uint32_t kernarg_size(void) const { + return kernarg_size_; + } + + void set_kernarg_align(uint32_t align) { + kernarg_align_ = align; + } + uint32_t kernarg_align(void) const { + return kernarg_align_; + } + + void* kernarg_buffer(void) const { + return kernarg_buffer_; + } + void set_kernarg_buffer(void* buffer) { + kernarg_buffer_ = buffer; + } + + int32_t requires_profile(void) const { + return requires_profile_; + } + + char* orig_hsa_enable_interrupt() const { + return orig_hsa_enable_interrupt_; + } + + bool enable_interrupt() const { + return enable_interrupt_; + } + + void set_title(std::string name) { + title_ = name; + } + std::string title(void) const { + return title_; + } + + RdcPerfTimer* hsa_timer(void) { + return &hsa_timer_; + } + + void set_verbosity(uint32_t v) { + verbosity_ = v; + } + uint32_t verbosity(void) const { + return verbosity_; + } + + void set_monitor_verbosity(uint32_t m) { + monitor_verbosity_ = m; + } + uint32_t monitor_verbosity(void) const { + return monitor_verbosity_; + } + + protected: + void set_requires_profile(int32_t reqd_prof) { + requires_profile_ = reqd_prof; + } + + void set_enable_interrupt(bool doEnable) { + enable_interrupt_ = doEnable; + } + + private: + uint64_t num_iteration_; ///< Number of times to execute test + + hsa_queue_t* main_queue_; ///< AQL queue used for packets + + hsa_agent_t gpu_device1_; ///< Handle to first GPU found + + hsa_agent_t cpu_device_; ///< Handle to CPU + + hsa_amd_memory_pool_t device_pool_; ///< Memory pool on gpu pool list + + hsa_amd_memory_pool_t cpu_pool_; ///< Memory pool on cpu pool list + + hsa_amd_memory_pool_t kern_arg_pool_; ///< Memory pool suitable for args + + uint64_t kernel_object_; ///< Handle to kernel code + + std::string kernel_file_name_; ///< Code object file name + + std::string kernel_name_; ///< Kernel name + + std::string agent_name_; ///< Agent name + + hsa_kernel_dispatch_packet_t aql_; ///< Kernel dispatch packet + + uint32_t group_segment_size_; ///< Kernel group seg size + + uint32_t kernarg_size_; ///< Kernarg memory size + + uint32_t kernarg_align_; ///< Alignment for kern argument memory + + void* kernarg_buffer_; ///< Unaligned allocated kernel arg. buffer + + hsa_profile_t profile_; ///< Device profile. + + uint32_t group_size_; ///< Number of work items in one group + + uint32_t private_segment_size_; ///< Kernel private seg size + + int32_t requires_profile_; ///< Profile required by test (-1 if no req.) + + char* orig_hsa_enable_interrupt_; ///< Orig. value of HSA_ENABLE_INTERRUPT + + bool enable_interrupt_; ///< Whether to enable/disable interrupts for test + + std::string title_; ///< Displayed title of test + + uint32_t verbosity_; ///< How much additional output to produce + + uint32_t monitor_verbosity_; ///< verbose or not + + RdcPerfTimer hsa_timer_; ///< Timer to be used for timing parts of test +}; + +} // namespace rdc +} // namespace amd + +#endif // RDC_MODULES_RDC_ROCR_RDCROCRBASE_H_ diff --git a/projects/rdc/include/rdc_modules/rdc_rocr/TestBase.h b/projects/rdc/include/rdc_modules/rdc_rocr/TestBase.h new file mode 100755 index 0000000000..e0787828d0 --- /dev/null +++ b/projects/rdc/include/rdc_modules/rdc_rocr/TestBase.h @@ -0,0 +1,80 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef RDC_MODULES_RDC_ROCR_TESTBASE_H_ +#define RDC_MODULES_RDC_ROCR_TESTBASE_H_ + +#include +#include +#include +#include "rdc_modules/rdc_rocr/RdcRocrBase.h" + +namespace amd { +namespace rdc { + +class TestBase : public RdcRocrBase { + public: + explicit TestBase(uint32_t gpu_index); + + virtual ~TestBase(void); + + enum VerboseLevel {VERBOSE_MIN = 0, VERBOSE_STANDARD, VERBOSE_PROGRESS}; + + // @Brief: Before run the core measure codes, do something to set up + // i.e. init runtime, prepare packet... + virtual hsa_status_t SetUp(void); + + // @Brief: Core measurement codes executing here + virtual void Run(void); + + // @Brief: Do something clean up + virtual void Close(void); + + // @Brief: Display the results + virtual void DisplayResults(void) const; + + // @Brief: Display information about the test + virtual void DisplayTestInfo(void); + + const std::string & description(void) const {return description_;} + + void set_description(std::string d); + + const std::string & get_gpu_info() const { return gpu_info_;} + const std::string & get_per_gpu_info() const { return per_gpu_info_;} + + hsa_status_t FindGPUIndex(hsa_agent_t agent, void* data); + // Return the agent by GPU index in rocm_smi + hsa_status_t get_agent_by_gpu_index(uint32_t gpu_index, hsa_agent_t* agent); + + protected: + uint32_t gpu_index_; + std::string gpu_info_; + std::string per_gpu_info_; + + private: + std::string description_; +}; + +} // namespace rdc +} // namespace amd + +#endif // RDC_MODULES_RDC_ROCR_TESTBASE_H_ diff --git a/projects/rdc/include/rdc_modules/rdc_rocr/base_rocr_utils.h b/projects/rdc/include/rdc_modules/rdc_rocr/base_rocr_utils.h new file mode 100755 index 0000000000..bd3ffaa176 --- /dev/null +++ b/projects/rdc/include/rdc_modules/rdc_rocr/base_rocr_utils.h @@ -0,0 +1,172 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef RDC_MODULES_RDC_ROCR_BASE_ROCR_UTILS_H_ +#define RDC_MODULES_RDC_ROCR_BASE_ROCR_UTILS_H_ + +/// \file +/// Prototypes of utility functions that act on RdcRocrBase objects. + +#include "rdc_modules/rdc_rocr/RdcRocrBase.h" +#include +#include "rdc_modules/rdc_rocr/common.h" +#include "hsa/hsa.h" + +namespace amd { +namespace rdc { + +/// Open binary kernel object file and set all member data related to the +/// kernel. Assumes that input test already has the kernel file name, +/// agent name and kernel function specifed +/// \param[in] test Test for which the kernel will be loaded. +/// \param[in] agent for which the kernel will be loaded . +/// \returns HSA_STATUS_SUCCESS if no errors +hsa_status_t LoadKernelFromObjFile(RdcRocrBase* test, hsa_agent_t* agent); + +/// Do initialization tasks for HSA test program. +/// \param[in] test Test to initialize +/// \returns HSA_STATUS_SUCCESS if no errors +hsa_status_t InitAndSetupHSA(RdcRocrBase* test); + +/// Find and set the cpu and gpu agent member variables. Also checks that +/// gpu agent meets test requirements (e.g., FULL profile vs. BASE profile). +hsa_status_t SetDefaultAgents(RdcRocrBase* test); + +/// For the provided device agent, create an AQL queue +/// \param[in] device Device for which a queue is to be created +/// \param[out] queue Address to which created queue pointer will be written +/// \param[in] num_pkts Size of the queue to create +/// \param[in] do_profile [Optional] Specificy whether profiled queue should +/// be created +/// \returns HSA_STATUS_SUCCESS if no errors encountered +hsa_status_t CreateQueue(hsa_agent_t device, hsa_queue_t** queue, + uint32_t num_pkts = 0); + +/// This function sets some reasonable default values for an AQL packet. +/// Override any field as necessary after calling this function. +/// \param[in] test Test from which information to populate aql packet can +/// be drawn. +/// \param[inout] aql Caller provided pointer to aql packet that will be +/// populated +/// \returns Appropriate hsa_status_t +hsa_status_t InitializeAQLPacket(const RdcRocrBase* test, + hsa_kernel_dispatch_packet_t* aql); + +/// This function writes all of the aql packet fields to the queue besides +/// "setup" and "header". This assumes all the aql fields have be set +/// appropriately. +/// \param[in] test Test containing the queue and aql packet to be written. +/// \returns Pointer to dispatch packet in queue that was written to +hsa_kernel_dispatch_packet_t* WriteAQLToQueue(RdcRocrBase* test, uint64_t *ind); + +void WriteAQLToQueueLoc(hsa_queue_t *queue, uint64_t indx, + hsa_kernel_dispatch_packet_t *aql_pkt); +/// This function writes the first 32 bits of an aql packet to the provided +/// aql packet. This function is meant to be called immediately before +/// ringing door_bell signal. +/// \param[in] header Value to be written to header field +/// \param[in] setup Value to be written to setup field +/// \param[in] queue_packet Start address of in queue memory of aql packet to +/// be written +/// \returns void +inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup, + hsa_kernel_dispatch_packet_t* queue_packet) { + __atomic_store_n(reinterpret_cast(queue_packet), + header | (setup <<16), __ATOMIC_RELEASE); +} + +/// Perform common operations to clean up after executing a test. Specifically, +/// hsa_shut_down() is called and environment variables that were changed are +/// reset to their original values. +/// \param[in] test Test for which clean up with be performed +/// \returns HSA_STATUS_SUCCESS if everything cleaned up ok, or appropriate HSA +/// error code otherwise. +hsa_status_t CommonCleanUp(RdcRocrBase* test); + +/// Check to see if target machine has the necessary profile to run the +/// provided test. +/// \param[1] test The test that specifies the required profile. +bool CheckProfile(RdcRocrBase const* test); + +/// Allocate memory from the kernel args pool and write the provided argument +/// data to the kernel arg memory. Assumes kern_arg memory pool has been +/// assigned. The amount of memory allocated will actually be \p arg_size +/// plus the alignment required by the kernel arguments. The argument will +/// be written with the proper alignment within the allocated buffer. +/// \p test kernarg_buffer() will point to the allocated buffer, and it should +/// be freed when the kernel is no longer being used. +/// \param test Test from which to find kern_arg pool to write arguments +/// \param args pointer to block of data containing kernel arguments to be +/// written. Arguments are assumed to be of the correct placement, length, +/// and with any padding that is expected by the OpenCL kernel +/// \param arg_size Size of the kernel arg data (including padding) to be +/// written +/// \returns HSA_STATUS_SUCCESS if no errors +hsa_status_t AllocAndSetKernArgs(RdcRocrBase* test, void* args, + size_t arg_size); + +/// Verify that the machine running the test has the required profile. +/// This function will verify that the execution machine meets any specific +/// test requirement for a profile (HSA_PROFILE_BASE or HSA_PROFILE_FULL). +/// \param[in] test Test that provides profile requirements. +/// \returns bool +/// - true Machine meets test requirements +/// - false Machine does not meet test requirements +bool CheckProfileAndInform(RdcRocrBase* test); + +/// This function will set the cpu and gpu memory pools to the type used in +/// many applications. +/// \param[in] test Test that provides profile requirements. +/// \returns HSA_STATUS_SUCCESS if everything cleaned up ok, or appropriate HSA +/// error code otherwise. +hsa_status_t SetPoolsTypical(RdcRocrBase* test); + +/// Work-around for hsa_amd_memory_fill, which is currently broken. +/// \param[in] ptr Pointer to start of memory location to be filled +/// \param[in] value Value to write to each byte of input buffer +/// \param[in] count Size of buffer to fill +/// \param[in] dst_ag Agent owning the buffer to be filled +/// \param[in] src_ag Agent wanting to do the fill +/// \param[in] test Test that has handles to cpu and gpu agents that can own +/// either source or destination of fill +/// \returns HSA_STATUS_OK if not errors +hsa_status_t hsa_memory_fill_workaround_gen(void* ptr, uint32_t value, + size_t count, hsa_agent_t dst_ag, hsa_agent_t src_ag, RdcRocrBase* test); + +/// Get the library directory which is loaded by current process. +/// It will search /proc/self/maps for it. +/// return empty string if fail. +std::string get_lib_dir(const char* lib_name); + +/// Get the app dir by looking at link of /proc/self/exe +std::string get_app_dir(); + +// Search multiple folder for the hsaco file +// Return empty if cannot find it. +std::string search_hsaco_full_path(const char* hsaco_file_name, + const char* agent_name); + +} // namespace rdc +} // namespace amd + +#endif // RDC_MODULES_RDC_ROCR_BASE_ROCR_UTILS_H_ + diff --git a/projects/rdc/include/rdc_modules/rdc_rocr/common.h b/projects/rdc/include/rdc_modules/rdc_rocr/common.h new file mode 100755 index 0000000000..3b9ff89a6d --- /dev/null +++ b/projects/rdc/include/rdc_modules/rdc_rocr/common.h @@ -0,0 +1,231 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/// \file +/// RocR related helper functions for sequeneces that come up frequently + +#ifndef RDC_MODULES_RDC_ROCR_COMMON_H_ +#define RDC_MODULES_RDC_ROCR_COMMON_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hsa/hsa.h" +#include "hsa/hsa_ext_amd.h" + +namespace amd { +namespace rdc { + +#if defined(_MSC_VER) +#define ALIGNED_(x) __declspec(align(x)) +#else +#if defined(__GNUC__) +#define ALIGNED_(x) __attribute__ ((aligned(x))) +#endif // __GNUC__ +#endif // _MSC_VER + +#define MULTILINE(...) # __VA_ARGS__ + +#define ASSERT_EQ(a, b) (a==b) + +void SetEnv(const char* env_var_name, const char* env_var_value); +intptr_t AlignDown(intptr_t value, size_t alignment); +void* AlignDown(void* value, size_t alignment); +void* AlignUp(void* value, size_t alignment); + +// define below should be deleted. Leaving in commented out until code that +// refers to it has been corrected +// #define HSA_ARGUMENT_ALIGN_BYTES 16 + +// This structure holds memory pool information acquired through hsa info +// related calls, and is later used for reference when displaying the +// information. +typedef struct pool_info_t_ { + uint32_t segment; + size_t size; + bool alloc_allowed; + size_t alloc_granule; + size_t alloc_alignment; + bool accessible_by_all; + uint32_t global_flag; + uint64_t aggregate_alloc_max; + inline bool operator==(const pool_info_t_ &a) { + if (a.segment == segment && a.size == size + && a.alloc_allowed == alloc_allowed + && a.alloc_granule == alloc_granule + && a.alloc_alignment == alloc_alignment + && a.accessible_by_all == accessible_by_all + && a.aggregate_alloc_max == aggregate_alloc_max + && a.global_flag == global_flag ) + return true; + else + return false; + } +} pool_info_t; + + +struct agent_pools_t{ + hsa_agent_t agent; + std::vector pools; +}; + +/// Fill in the pool_info_t structure for the provided pool. +/// \param[in] pool Pool for which information will be retrieved +/// \param[out] pool_i Pointer to structure where pool info will be stored +/// \returns HSA_STATUS_SUCCESS if no errors are encountered. +hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, pool_info_t *pool_i); + +/// If the provided agent is associated with a GPU, return that agent through +/// output parameter. This function is meant to be the call-back function used +/// with hsa_iterate_agents to find GPU agents. +/// \param[in] agent Agent to evaluate if GPU +/// \param[out] data If agent is associated with a GPU, this pointer will point +/// to the agent upon return +/// \returns HSA_STATUS_SUCCESS if no errors are encountered. +hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data); + +/// If the provided agent is associated with a CPU, return that agent through +/// output parameter. This function is meant to be the call-back function used +/// with hsa_iterate_agents to find CPU agents. +/// \param[in] agent Agent to evaluate if CPU +/// \param[out] data If agent is associated with a CPU, this pointer will point +/// to the agent upon return +/// \returns HSA_STATUS_SUCCESS if no errors are encountered. +hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data); + +// TODO(cfreehil): get rid of FindGlobalPool and replace with FindStandardPool +hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data); + +/// If the provided agent is associated with a CPU, return that agent through +/// output parameter. This function is meant to be the call-back function used +/// with hsa_iterate_agents to find all the CPU agents. +/// \param[in] agent Agent to evaluate if CPU +/// \param[out] data If agent is associated with a CPU, this pointer will point +/// to the agent upon return +/// \returns HSA_STATUS_SUCCESS if no errors are encountered. +hsa_status_t IterateCPUAgents(hsa_agent_t agent, void *data); + +/// If the provided agent is associated with a GPU, return that agent through +/// output parameter. This function is meant to be the call-back function used +/// with hsa_iterate_agents to find all the GPU agents. +/// \param[in] agent Agent to evaluate if GPU +/// \param[out] data If agent is associated with a GPU, this pointer will point +/// to the agent upon return +/// \returns HSA_STATUS_SUCCESS if no errors are encountered. +hsa_status_t IterateGPUAgents(hsa_agent_t agent, void *data); + +/// Find a GLOBAL memory pool. By this, we mean not a kernel args pool. +/// This function is meant to be the call-back function used +/// with hsa_amd_agent_iterate_memory_pools. +/// \param[in] pool Pool to evaluate for required properties +/// \param[in] data If pool meets criteria, this pointer will point +/// to the pool upon return +/// \returns hsa_status_t +/// -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria +/// -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria +/// -else return an appropriate error code for any error encountered +hsa_status_t GetGlobalMemoryPool(hsa_amd_memory_pool_t pool, void* data); + +/// Find a "kernel arg" pool. +/// This function is meant to be the call-back function used +/// with hsa_amd_agent_iterate_memory_pools. +/// \param[in] pool Pool to evaluate for required properties +/// \param[in] data If pool meets criteria, this pointer will point +/// to the pool upon return +/// \returns hsa_status_t +/// -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria +/// -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria +/// -else return an appropriate error code for any error encountered +hsa_status_t GetKernArgMemoryPool(hsa_amd_memory_pool_t pool, void* data); + + +/// Find a "standard" pool. By this, we mean not a kernel args pool. +/// The pool found will have the following properties: +/// HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: Don't care +/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT: Off +/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED: Don't care +/// This function is meant to be the call-back function used +/// with hsa_amd_agent_iterate_memory_pools. +/// \param[in] pool Pool to evaluate for required properties +/// \param[in] data If pool meets criteria, this pointer will point +/// to the pool upon return +/// \returns hsa_status_t +/// -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria +/// -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria +/// -else return an appropriate error code for any error encountered +hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data); +hsa_status_t FindAPUStandardPool(hsa_amd_memory_pool_t pool, void* data); + +/// Find a "kernel arg" pool. +/// The pool found will have the following properties: +/// HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL: Don't care +/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT: On +/// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED: Don't care +/// This function is meant to be the call-back function used +/// with hsa_amd_agent_iterate_memory_pools. +/// \param[in] pool Pool to evaluate for required properties +/// \param[in] data If pool meets criteria, this pointer will point +/// to the pool upon return +/// \returns hsa_status_t +/// -HSA_STATUS_INFO_BREAK - we found a pool that meets criteria +/// -HSA_STATUS_SUCCESS - we did not find a pool that meets the criteria +/// -else return an appropriate error code for any error encountered +hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data); + +/// Dump information about provided memory pool to STDOUT +/// \param[in] pool Pool to gather and dump information for +/// \param[in] indent Number of spaces to indent output. +/// \returns hsa_status_t HSA_STATUS_SUCCESS if no errors +hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i, + uint32_t indent = 0); + +/// Dump information about a provided pointer to STDOUT. +/// \param[in] ptr Pointer about which information is dumped. +/// \returns HSA_STATUS_SUCCESS if there are no errors +hsa_status_t DumpPointerInfo(void* ptr); + +hsa_status_t GetAgentPools( + std::vector> *agent_pools); + +void throw_if_error(hsa_status_t err, const std::string& msg = ""); + +void throw_if_skip(const std::string& msg); + +// The customize exception when the test has to be skipped +class SkipException : public std::exception { + public: + explicit SkipException(const char* msg): _msg(msg) {} + virtual const char* what() const noexcept { return _msg.c_str(); } + private: + std::string _msg; +}; + +} // namespace rdc +} // namespace amd + +#endif // RDC_MODULES_RDC_ROCR_COMMON_H_ diff --git a/projects/rdc/rdc_libs/CMakeLists.txt b/projects/rdc/rdc_libs/CMakeLists.txt index 8bc463a062..786fc69eb9 100755 --- a/projects/rdc/rdc_libs/CMakeLists.txt +++ b/projects/rdc/rdc_libs/CMakeLists.txt @@ -161,12 +161,14 @@ set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcMetricsUpdaterIm set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcWatchTableImpl.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcRasLib.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcSmiLib.cc") +set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcRocrLib.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcTelemetryModule.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcDiagnosticModule.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcSmiDiagnosticImpl.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcModuleMgrImpl.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcNotificationImpl.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RsmiUtils.cc") +set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcPerfTimer.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${COMMON_DIR}/rdc_fields_supported.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${COMMON_DIR}/rdc_capabilities.cc") @@ -183,6 +185,7 @@ set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcWatchTab set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcWatchTableImpl.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcRasLib.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcSmiLib.h") +set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcRocrLib.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcModuleMgrImpl.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcModuleMgr.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcTelemetry.h") @@ -194,6 +197,7 @@ set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcSmi set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcNotification.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcNotificationImpl.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RsmiUtils.h") +set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcPerfTimer.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${COMMON_DIR}/rdc_fields_supported.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${COMMON_DIR}/rdc_capabilities.h") @@ -215,6 +219,54 @@ set_property(TARGET ${RDC_LIB} PROPERTY set_property(TARGET ${RDC_LIB} PROPERTY VERSION "${SO_VERSION_STRING}") +# librdc_rocr.so set up +set(DIAGNOSTIC_LIB "rdc_rocr") +set(DIAGNOSTIC_LIB_COMPONENT "lib${DIAGNOSTIC_LIB}") +set(DIAGNOSTIC_LIB_SRC_LIST "${SRC_DIR}/rdc_modules/rdc_rocr/RdcDiagnosticLib.cc") +set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/bootstrap/src/RdcLogger.cc") +set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/RdcRocrBase.cc") +set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/MemoryTest.cc") +set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/MemoryAccess.cc") +set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/TestBase.cc") +set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/base_rocr_utils.cc") +set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/ComputeQueueTest.cc") +set(DIAGNOSTIC_LIB_SRC_LIST ${DIAGNOSTIC_LIB_SRC_LIST} "${SRC_DIR}/rdc_modules/rdc_rocr/common.cc") +set(DIAGNOSTIC_LIB_INC_LIST "${RDC_LIB_INC_DIR}/rdc/rdc.h") +set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcDiagnosticLibInterface.h") +set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/rdc_common.h") +set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcLogger.h") +set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/RdcDiagnosticLib.h") +set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/RdcRocrBase.h") +set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/TestBase.h") +set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/MemoryTest.h") +set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/MemoryAccess.h") +set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/base_rocr_utils.h") +set(DIAGNOSTIC_LIB_INC_LIST ${DIAGNOSTIC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_modules/rdc_rocr/common.h") + +if(BUILD_ROCRTEST) + message("Build librdc_rocr.so is enabled, make sure the Rocm run time is installed.") + + message("DIAGNOSTIC_LIB_INC_LIST=${DIAGNOSTIC_LIB_INC_LIST}") + + set(HSA_LIB "hsa-runtime64") + add_library(${DIAGNOSTIC_LIB} SHARED ${DIAGNOSTIC_LIB_SRC_LIST} ${DIAGNOSTIC_LIB_INC_LIST}) + target_link_libraries(${DIAGNOSTIC_LIB} ${RDC_LIB} ${BOOTSTRAP_LIB} ${HSA_LIB} pthread dl) + target_include_directories(${DIAGNOSTIC_LIB} PRIVATE + "${RSMI_INC_DIR}" + "${ROCM_DIR}/include" + "${PROJECT_SOURCE_DIR}" + "${PROJECT_SOURCE_DIR}/include" + "${COMMON_DIR}" + "${CMAKE_CURRENT_SOURCE_DIR}/include") + + # TODO: set the properties for the library once we have one + ## Set the VERSION and SOVERSION values + set_property(TARGET ${DIAGNOSTIC_LIB} PROPERTY + SOVERSION "${VERSION_MAJOR}") + set_property(TARGET ${DIAGNOSTIC_LIB} PROPERTY + VERSION "${SO_VERSION_STRING}") +endif() + if(BUILD_STANDALONE) # librdc_client.so set up file(GLOB PROTOBUF_GENERATED_INCLUDES "${PROTOB_OUT_DIR}/*.h") @@ -274,12 +326,12 @@ target_include_directories(${BOOTSTRAP_LIB} ## Add the install directives for the runtime library. if(BUILD_STANDALONE) - install(TARGETS ${BOOTSTRAP_LIB} ${RDC_LIB} ${RDCCLIENT_LIB} + install(TARGETS ${BOOTSTRAP_LIB} ${RDC_LIB} ${RDCCLIENT_LIB} ${DIAGNOSTIC_LIB} EXPORT rdcTargets LIBRARY DESTINATION ${RDC_CLIENT_INSTALL_PREFIX}/${RDC}/lib COMPONENT ${CLIENT_COMPONENT}) else() - install(TARGETS ${BOOTSTRAP_LIB} ${RDC_LIB} + install(TARGETS ${BOOTSTRAP_LIB} ${RDC_LIB} ${DIAGNOSTIC_LIB} EXPORT rdcTargets LIBRARY DESTINATION ${RDC_CLIENT_INSTALL_PREFIX}/${RDC}/lib COMPONENT ${CLIENT_COMPONENT}) @@ -289,6 +341,12 @@ install(FILES ${SOURCE_DIR}/include/rdc/rdc.h DESTINATION ${RDC_CLIENT_INSTALL_PREFIX}/${RDC}/include/rdc COMPONENT ${CLIENT_COMPONENT}) +# Install the kernel files +install(DIRECTORY ${PROJECT_SOURCE_DIR}/rdc_libs/rdc_modules/kernels/hsaco + DESTINATION ${RDC_CLIENT_INSTALL_PREFIX}/${RDC}/lib + COMPONENT ${CLIENT_COMPONENT}) + + message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") message(" Finished Cmake RDC Lib ") message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") diff --git a/projects/rdc/rdc_libs/rdc/src/RdcDiagnosticModule.cc b/projects/rdc/rdc_libs/rdc/src/RdcDiagnosticModule.cc index 39e65572c4..b1806e9ded 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcDiagnosticModule.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcDiagnosticModule.cc @@ -83,8 +83,6 @@ rdc_status_t RdcDiagnosticModule::rdc_diagnostic_run( rdc_runs.push_back(RDC_DIAG_NODE_TOPOLOGY); rdc_runs.push_back(RDC_DIAG_GPU_PARAMETERS); rdc_runs.push_back(RDC_DIAG_COMPUTE_QUEUE); - rdc_runs.push_back(RDC_DIAG_SDMA_QUEUE); - rdc_runs.push_back(RDC_DIAG_VRAM_CHECK); rdc_runs.push_back(RDC_DIAG_SYS_MEM_CHECK); } @@ -117,11 +115,13 @@ rdc_status_t RdcDiagnosticModule::RdcDiagnosticModule::rdc_diag_destroy() { } RdcDiagnosticModule::RdcDiagnosticModule(const RdcSmiLibPtr& smi_lib, - const RdcRasLibPtr& ras_module) { + const RdcRasLibPtr& ras_module, const RdcRocrLibPtr& rocr_module) { if (smi_lib) { diagnostic_modules_.push_back(smi_lib); } - + if (rocr_module) { + diagnostic_modules_.push_back(rocr_module); + } if (ras_module) { diagnostic_modules_.push_back(ras_module); } diff --git a/projects/rdc/rdc_libs/rdc/src/RdcModuleMgrImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcModuleMgrImpl.cc index 1c7330d5fc..3bde698db6 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcModuleMgrImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcModuleMgrImpl.cc @@ -23,6 +23,7 @@ THE SOFTWARE. #include "rdc_lib/impl/RdcTelemetryModule.h" #include "rdc_lib/impl/RdcDiagnosticModule.h" #include "rdc_lib/impl/RdcRasLib.h" +#include "rdc_lib/impl/RdcRocrLib.h" namespace amd { namespace rdc { @@ -60,9 +61,13 @@ RdcDiagnosticPtr RdcModuleMgrImpl::get_diagnostic_module() { ras_lib_.reset(new RdcRasLib("librdc_ras.so")); } + if (!rocr_lib_) { + rocr_lib_.reset(new RdcRocrLib("librdc_rocr.so")); + } + if (!rdc_diagnostic_module_) { rdc_diagnostic_module_.reset( - new RdcDiagnosticModule(smi_lib_, ras_lib_)); + new RdcDiagnosticModule(smi_lib_, ras_lib_, rocr_lib_)); } return rdc_diagnostic_module_; diff --git a/projects/rdc/rdc_libs/rdc/src/RdcPerfTimer.cc b/projects/rdc/rdc_libs/rdc/src/RdcPerfTimer.cc new file mode 100644 index 0000000000..a77e3f1fb3 --- /dev/null +++ b/projects/rdc/rdc_libs/rdc/src/RdcPerfTimer.cc @@ -0,0 +1,168 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "rdc_lib/RdcPerfTimer.h" +#include + +namespace amd { +namespace rdc { + +static const uint64_t kNanosecondsPerSecond = 1000000000; + +RdcPerfTimer::RdcPerfTimer(void) { + freq_in_100mhz = MeasureTSCFreqHz(); +} + +RdcPerfTimer::~RdcPerfTimer() { + while (!_timers.empty()) { + Timer* temp = _timers.back(); + _timers.pop_back(); + delete temp; + } +} + +int RdcPerfTimer::CreateTimer(void) { + Timer* newTimer = new Timer; + newTimer->_start = 0; + newTimer->_clocks = 0; + + newTimer->_freq = kNanosecondsPerSecond; + + /* Push back the address of new Timer instance created */ + _timers.push_back(newTimer); + return static_cast(_timers.size() - 1); +} + +int RdcPerfTimer::StartTimer(int index) { + if (index >= static_cast(_timers.size())) { + Error("Cannot reset timer. Invalid handle."); + return 1; + } + +// General Linux timing method +#ifndef _AMD + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + _timers[index]->_start = (uint64_t) s.tv_sec * kNanosecondsPerSecond + + (uint64_t) s.tv_nsec; +#else + + // AMD timing method + + unsigned int unused; + _timers[index]->_start = __rdtscp(&unused); + +#endif + + return 0; +} + +int RdcPerfTimer::StopTimer(int index) { + uint64_t n = 0; + + if (index >= static_cast(_timers.size())) { + Error("Cannot reset timer. Invalid handle."); + return 1; + } + + // General Linux timing method +#ifndef _AMD + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + n = (uint64_t) s.tv_sec * kNanosecondsPerSecond + (uint64_t) s.tv_nsec; +#else + // AMD Linux timing + + unsigned int unused; + n = __rdtscp(&unused); +#endif + + n -= _timers[index]->_start; + _timers[index]->_start = 0; + +#ifndef _AMD + _timers[index]->_clocks += n; +#else + // convert to ms + _timers[index]->_clocks += 1.0E-6 * 10 * n / freq_in_100mhz; + cout << "_AMD is enabled!!!" << endl; +#endif + + return 0; +} + +void RdcPerfTimer::Error(std::string str) { + std::cout << str << std::endl; +} + +double RdcPerfTimer::ReadTimer(int index) { + if (index >= static_cast(_timers.size())) { + Error("Cannot read timer. Invalid handle."); + return 1; + } + + double reading = static_cast(_timers[index]->_clocks); + + reading = static_cast(reading / _timers[index]->_freq); + + return reading; +} + +void RdcPerfTimer::ResetTimer(int index) { + // Check if index value is over the timer's size + if (index >= static_cast(_timers.size())) { + Error("Invalid index value\n"); + exit(1); + } + + _timers[index]->_clocks = 0.0; + _timers[index]->_start = 0.0; +} + +uint64_t RdcPerfTimer::CoarseTimestampUs() { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000; +} + +uint64_t RdcPerfTimer::MeasureTSCFreqHz() { + // Make a coarse interval measurement of TSC ticks for 1 gigacycles. + unsigned int unused; + uint64_t tscTicksEnd; + + uint64_t coarseBeginUs = CoarseTimestampUs(); + uint64_t tscTicksBegin = __rdtscp(&unused); + + do { + tscTicksEnd = __rdtscp(&unused); + } while (tscTicksEnd - tscTicksBegin < 1000000000); + + uint64_t coarseEndUs = CoarseTimestampUs(); + + // Compute the TSC frequency and round to nearest 100MHz. + uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000; + uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin; + return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs; +} + +} // namespace rdc +} // namespace amd diff --git a/projects/rdc/rdc_libs/rdc/src/RdcRocrLib.cc b/projects/rdc/rdc_libs/rdc/src/RdcRocrLib.cc new file mode 100644 index 0000000000..0133c82474 --- /dev/null +++ b/projects/rdc/rdc_libs/rdc/src/RdcRocrLib.cc @@ -0,0 +1,143 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include "rdc_lib/rdc_common.h" +#include "rdc_lib/RdcLogger.h" +#include "rdc_lib/impl/RdcRocrLib.h" + +namespace amd { +namespace rdc { + +RdcRocrLib::RdcRocrLib(const char* lib_name): + test_case_run_(nullptr) + , diag_test_cases_query_(nullptr) + , diag_init_(nullptr) + , diag_destroy_(nullptr) { + rdc_status_t status = lib_loader_.load(lib_name); + if (status != RDC_ST_OK) { + RDC_LOG(RDC_ERROR, "Rocr related function will not work."); + return; + } + + status = lib_loader_.load_symbol(&diag_init_, + "rdc_diag_init"); + if (status != RDC_ST_OK) { + diag_init_ = nullptr; + return; + } + + status = diag_init_(0); + if (status != RDC_ST_OK) { + RDC_LOG(RDC_ERROR, "Fail to init librdc_rocr.so:" + << rdc_status_string(status) + << ". Rocr related function will not work."); + return; + } + + status = lib_loader_.load_symbol(&diag_destroy_, + "rdc_diag_destroy"); + if (status != RDC_ST_OK) { + diag_destroy_ = nullptr; + } + + status = lib_loader_.load_symbol(&test_case_run_, + "rdc_diag_test_case_run"); + if (status != RDC_ST_OK) { + test_case_run_ = nullptr; + } + status = lib_loader_.load_symbol(&diag_test_cases_query_, + "rdc_diag_test_cases_query"); + if (status != RDC_ST_OK) { + diag_test_cases_query_ = nullptr; + } +} + +RdcRocrLib::~RdcRocrLib() { + if (diag_destroy_) { + diag_destroy_(); + } +} + +rdc_status_t RdcRocrLib::rdc_diag_test_cases_query( + rdc_diag_test_cases_t test_cases[MAX_TEST_CASES], + uint32_t* test_case_count) { + if (test_case_count == nullptr) { + return RDC_ST_BAD_PARAMETER; + } + if (!diag_test_cases_query_) { + return RDC_ST_FAIL_LOAD_MODULE; + } + + rdc_status_t status = diag_test_cases_query_(test_cases, test_case_count); + RDC_LOG(RDC_DEBUG, "Query " << *test_case_count << " test cases from Rocr: " + << rdc_status_string(status)); + return status; +} + + // Run a specific test case +rdc_status_t RdcRocrLib::rdc_test_case_run( + rdc_diag_test_cases_t test_case, + uint32_t gpu_index[RDC_MAX_NUM_DEVICES], + uint32_t gpu_count, + rdc_diag_test_result_t* result) { + if (result == nullptr) { + return RDC_ST_BAD_PARAMETER; + } + if (!test_case_run_) { + return RDC_ST_FAIL_LOAD_MODULE; + } + + rdc_status_t status = test_case_run_(test_case, gpu_index, + gpu_count, result); + RDC_LOG(RDC_DEBUG, "Run " << test_case << " test case from Rocr: " + << rdc_status_string(status)); + return status; +} + +rdc_status_t RdcRocrLib::rdc_diagnostic_run( + const rdc_group_info_t& gpus, + rdc_diag_level_t level, + rdc_diag_response_t* response) { + (void)gpus; + (void)level; + (void)response; + return RDC_ST_NOT_SUPPORTED; +} + +rdc_status_t RdcRocrLib::rdc_diag_init(uint64_t flags) { + if (!diag_init_) { + return RDC_ST_FAIL_LOAD_MODULE; + } + + return diag_init_(flags); +} +rdc_status_t RdcRocrLib::rdc_diag_destroy() { + if (!diag_destroy_) { + return RDC_ST_FAIL_LOAD_MODULE; + } + + return diag_destroy_(); +} + +} // namespace rdc +} // namespace amd + diff --git a/projects/rdc/rdc_libs/rdc/src/RdcSmiDiagnosticImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcSmiDiagnosticImpl.cc index 79639b5310..204276bd38 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcSmiDiagnosticImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcSmiDiagnosticImpl.cc @@ -61,13 +61,12 @@ rdc_status_t RdcSmiDiagnosticImpl::check_rsmi_process_info( result->status = RDC_DIAG_RESULT_PASS; result->per_gpu_result_count = 0; strncpy_with_null(result->info, - "Do not have any compute process running on any devices", + "No processes running on any devices.", MAX_DIAG_MSG_LENGTH); return RDC_ST_OK; } - std::string info = std::to_string(num_items) - + " compute process is using devices."; + std::string info; // Find details of the process running on each GPU std::vector procs(num_items); err = rsmi_compute_process_info_get( @@ -81,6 +80,9 @@ rdc_status_t RdcSmiDiagnosticImpl::check_rsmi_process_info( std::map> pids_per_gpu; for (uint32_t i=0; i < num_items; i++) { + // Skip the process does not occupy any GPUs. The hsa_shutdown() + // will not clear /proc sys file until the process is terminated. + if (procs[i].cu_occupancy == 0 ) continue; info += " Process: " + std::to_string(procs[i].process_id) += ", pasid: " + std::to_string(procs[i].pasid) += ", vram_usage: " + std::to_string(procs[i].vram_usage) @@ -118,11 +120,13 @@ rdc_status_t RdcSmiDiagnosticImpl::check_rsmi_process_info( } } // end for (uint32_t i=0 ...) + result->status = RDC_DIAG_RESULT_PASS; // pass by default if (pids_per_gpu.size() == 0) { - result->status = RDC_DIAG_RESULT_WARN; - info += " Cannot detect the processes running in which devices."; - } else { - result->status = RDC_DIAG_RESULT_PASS; // pass by default + result->per_gpu_result_count = 0; + strncpy_with_null(result->info, + "No processes running on any devices.", + MAX_DIAG_MSG_LENGTH); + return RDC_ST_OK; } // Mark as fail @@ -359,7 +363,7 @@ rdc_diag_result_t RdcSmiDiagnosticImpl::check_temperature_level( int64_t critical_temp = 0; err = rsmi_dev_temp_metric_get(gpu_index, type, met, &critical_temp); - if (err != RSMI_STATUS_SUCCESS) { + if (err == RSMI_STATUS_SUCCESS) { if (current_temp >= critical_temp) { result = RDC_DIAG_RESULT_FAIL; per_gpu_info += "Critical "; @@ -384,7 +388,7 @@ rdc_diag_result_t RdcSmiDiagnosticImpl::check_temperature_level( int64_t emergency_temp = 0; err = rsmi_dev_temp_metric_get(gpu_index, type, met, &emergency_temp); - if (err != RSMI_STATUS_SUCCESS) { + if (err == RSMI_STATUS_SUCCESS) { if (current_temp >= critical_temp) { result = RDC_DIAG_RESULT_FAIL; per_gpu_info += "Emergency "; @@ -409,7 +413,7 @@ rdc_diag_result_t RdcSmiDiagnosticImpl::check_temperature_level( int64_t critical_min_temp = 0; err = rsmi_dev_temp_metric_get(gpu_index, type, met, &critical_min_temp); - if (err != RSMI_STATUS_SUCCESS) { + if (err == RSMI_STATUS_SUCCESS) { if (current_temp <= critical_min_temp) { result = RDC_DIAG_RESULT_FAIL; per_gpu_info += "Critical Min "; diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/binary_search_kernels.cl b/projects/rdc/rdc_libs/rdc_modules/kernels/binary_search_kernels.cl new file mode 100755 index 0000000000..eb3cca6c86 --- /dev/null +++ b/projects/rdc/rdc_libs/rdc_modules/kernels/binary_search_kernels.cl @@ -0,0 +1,127 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2017, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +/** + * One instance of this kernel call is a thread. + * Each thread finds out the segment in which it should look for the element. + * After that, it checks if the element is between the lower bound and upper + * bound of its segment. If yes, then this segment becomes the total + * searchspace for the next pass. + * + * To achieve this, it writes the lower bound and upper bound to the output + * array. In case the element at the left end (lower bound) matches the element + * we are looking for, that is marked in the output and we no longer need to + * look any further. + */ + +__kernel void +binarySearch(__global uint4 * outputArray, + __const __global uint2 * sortedArray, + const unsigned int findMe) { + unsigned int tid = get_global_id(0); + + // Then we find the elements for this thread + uint2 element = sortedArray[tid]; + + + // If the element to be found does not lie between + // them, then nothing left to do in this thread + if((element.x > findMe) || (element.y < findMe)) { + return; + } else { + // However, if the element does lie between the lower + // and upper bounds of this thread's searchspace + // we need to narrow down the search further in this + // search space + // The search space for this thread is marked in the + // output as being the total search space for the next pass + outputArray[0].x = tid; + outputArray[0].w = 1; + } +} + + +__kernel void +binarySearch_mulkeys(__global int *keys, + __global uint *input, + const unsigned int numKeys, + __global int *output) { + + int gid = get_global_id(0); + int lBound = gid * 256; + int uBound = lBound + 255; + + for(int i = 0; i < numKeys; i++) { + if(keys[i] >= input[lBound] && keys[i] <= input[uBound]) + output[i]=lBound; + } + +} + + +__kernel void +binarySearch_mulkeysConcurrent(__global uint *keys, + __global uint *input, + const unsigned int inputSize, // num. of inputs + const unsigned int numSubdivisions, + __global int *output) { + + int lBound = (get_global_id(0) % numSubdivisions) * (inputSize / numSubdivisions); + int uBound = lBound + inputSize / numSubdivisions; + int myKey = keys[get_global_id(0) / numSubdivisions]; + int mid; + + while(uBound >= lBound) { + mid = (lBound + uBound) / 2; + if(input[mid] == myKey) { + output[get_global_id(0) / numSubdivisions] = mid; + return; + } else if(input[mid] > myKey) { + uBound = mid - 1; + } else { + lBound = mid + 1; + } + } +} diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/gpuReadWrite_kernels.cl b/projects/rdc/rdc_libs/rdc_modules/kernels/gpuReadWrite_kernels.cl new file mode 100755 index 0000000000..ac45e079f6 --- /dev/null +++ b/projects/rdc/rdc_libs/rdc_modules/kernels/gpuReadWrite_kernels.cl @@ -0,0 +1,53 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2017, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +__kernel void gpuReadWrite(__global const int * a, + __global int * b, __global int * c) { + int i = get_global_id(0); + // Reading the system memory and writing to gpu memory + c[i] = a[i]; // a[i] point to system memory while c[i] to gpu memory. + //writing to system memory + b[i] = i; +} diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1010/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1010/binary_search_kernels.hsaco new file mode 100755 index 0000000000..410b5d212b Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1010/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1010/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1010/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..0c5f45d55a Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1010/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1011/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1011/binary_search_kernels.hsaco new file mode 100755 index 0000000000..c3c1a92a5b Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1011/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1011/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1011/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..c3645a7288 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1011/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1012/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1012/binary_search_kernels.hsaco new file mode 100755 index 0000000000..ea84b552e6 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1012/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1012/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1012/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..837296bfaa Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1012/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1030/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1030/binary_search_kernels.hsaco new file mode 100755 index 0000000000..79e656a486 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1030/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1030/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1030/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..b8fb253765 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1030/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1031/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1031/binary_search_kernels.hsaco new file mode 100755 index 0000000000..ac6d8ed0fc Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1031/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1031/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1031/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..e1bd35b48b Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1031/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1032/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1032/binary_search_kernels.hsaco new file mode 100755 index 0000000000..c2173563fd Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1032/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1032/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1032/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..26ef63ea07 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1032/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1033/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1033/binary_search_kernels.hsaco new file mode 100755 index 0000000000..6d3e050690 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1033/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1033/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1033/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..e5b48db1c2 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx1033/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx700/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx700/binary_search_kernels.hsaco new file mode 100755 index 0000000000..86ad1b4753 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx700/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx700/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx700/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..4369508731 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx700/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx701/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx701/binary_search_kernels.hsaco new file mode 100755 index 0000000000..d1acb7f5b3 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx701/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx701/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx701/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..a54133cb04 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx701/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx702/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx702/binary_search_kernels.hsaco new file mode 100755 index 0000000000..6739875390 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx702/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx702/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx702/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..756d5ada24 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx702/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx801/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx801/binary_search_kernels.hsaco new file mode 100755 index 0000000000..867a27c891 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx801/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx801/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx801/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..df46c70410 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx801/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx802/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx802/binary_search_kernels.hsaco new file mode 100755 index 0000000000..f4fbdf1d10 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx802/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx802/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx802/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..3289cc5fdd Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx802/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx803/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx803/binary_search_kernels.hsaco new file mode 100755 index 0000000000..169c5ed88b Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx803/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx803/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx803/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..b84291559f Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx803/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx805/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx805/binary_search_kernels.hsaco new file mode 100755 index 0000000000..5aaef82139 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx805/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx805/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx805/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..f87b76560e Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx805/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx810/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx810/binary_search_kernels.hsaco new file mode 100755 index 0000000000..346fbf59e5 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx810/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx810/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx810/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..f58792e8b6 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx810/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx900/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx900/binary_search_kernels.hsaco new file mode 100755 index 0000000000..5350361b78 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx900/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx900/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx900/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..0c90e1f726 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx900/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx902/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx902/binary_search_kernels.hsaco new file mode 100755 index 0000000000..8ad506ea69 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx902/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx902/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx902/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..0b536501bc Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx902/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx904/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx904/binary_search_kernels.hsaco new file mode 100755 index 0000000000..742a924960 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx904/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx904/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx904/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..c1919b3737 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx904/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx906/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx906/binary_search_kernels.hsaco new file mode 100755 index 0000000000..5c1cf09a26 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx906/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx906/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx906/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..4ef6e91146 Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx906/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx908/binary_search_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx908/binary_search_kernels.hsaco new file mode 100755 index 0000000000..2474e7ebff Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx908/binary_search_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx908/gpuReadWrite_kernels.hsaco b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx908/gpuReadWrite_kernels.hsaco new file mode 100755 index 0000000000..afa53cb4db Binary files /dev/null and b/projects/rdc/rdc_libs/rdc_modules/kernels/hsaco/gfx908/gpuReadWrite_kernels.hsaco differ diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/ComputeQueueTest.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/ComputeQueueTest.cc new file mode 100755 index 0000000000..42ed69ba0d --- /dev/null +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/ComputeQueueTest.cc @@ -0,0 +1,686 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "rdc_modules/rdc_rocr/common.h" +#include "rdc_modules/rdc_rocr/ComputeQueueTest.h" +#include "rdc_modules/rdc_rocr/base_rocr_utils.h" +#include "rdc_lib/RdcLogger.h" +#include "rdc_lib/rdc_common.h" + +namespace amd { +namespace rdc { + +static const uint32_t kNumBufferElements = 256; + +ComputeQueueTest::ComputeQueueTest(uint32_t gpu_index): TestBase(gpu_index) { + set_num_iteration(10); // Number of iterations to execute of the main test; + // This is a default value which can be overridden + // on the command line. + set_title("ComputeQueue Test"); + set_description("This test will run binary search compute task via AQL."); +} + +ComputeQueueTest::~ComputeQueueTest(void) { +} + +// Any 1-time setup involving member variables used in the rest of the test +// should be done here. +hsa_status_t ComputeQueueTest::SetUp(void) { + hsa_status_t err = HSA_STATUS_SUCCESS; + + TestBase::SetUp(); + + err = SetDefaultAgents(this); + if ( err != HSA_STATUS_SUCCESS) return err; + + err = SetPoolsTypical(this); + return err; +} + +void ComputeQueueTest::Run(void) { + // Compare required profile for this test case with what we're actually + // running on + if (!CheckProfile(this)) { + return; + } + + TestBase::Run(); +} + +void ComputeQueueTest::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void ComputeQueueTest::DisplayResults(void) const { + // Compare required profile for this test case with what we're actually + // running on + if (!CheckProfile(this)) { + return; + } + + return; +} + +void ComputeQueueTest::Close() { + // This will close handles opened within rocrtst utility calls and call + // hsa_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + +static const uint32_t kBinarySearchLength = 512; +static const uint32_t kBinarySearchFindMe = 108; +static const uint32_t kWorkGroupSize = 256; + +void ComputeQueueTest::InitializeBinarySearch(BinarySearch* bs) { + bs->kernel_file_name = "binary_search_kernels.hsaco"; + bs->kernel_name = "binarySearch.kd"; + bs->length = kBinarySearchLength; + bs->find_me = kBinarySearchFindMe; + bs->work_group_size = kWorkGroupSize; + bs->num_sub_divisions = bs->length / bs->work_group_size; +} + +// This function shows how to do an asynchronous copy. We have to create a +// signal and use the signal to notify us when the copy has completed. +hsa_status_t ComputeQueueTest::AgentMemcpy(void* dst, const void* src, + size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag) { + hsa_signal_t s; + hsa_status_t err; + + err = hsa_signal_create(1, 0, NULL, &s); + throw_if_error(err); + + err = hsa_amd_memory_async_copy(dst, dst_ag, src, src_ag, size, 0, NULL, s); + throw_if_error(err); + + if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1, + UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) { + err = HSA_STATUS_ERROR; + RDC_LOG(RDC_ERROR, "Async copy signal error"); + + throw_if_error(err); + } + + err = hsa_signal_destroy(s); + + throw_if_error(err); + + return err; +} + +hsa_status_t ComputeQueueTest::FindPools(BinarySearch* bs) { + hsa_status_t err; + + err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev, FindStandardPool, + &bs->cpu_pool); + + if (err != HSA_STATUS_INFO_BREAK) { + return HSA_STATUS_ERROR; + } + + err = hsa_amd_agent_iterate_memory_pools(bs->gpu_dev, FindStandardPool, + &bs->gpu_pool); + + if (err != HSA_STATUS_INFO_BREAK) { + return HSA_STATUS_ERROR; + } + + err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev, + FindKernArgPool, &bs->kern_arg_pool); + + if (err != HSA_STATUS_INFO_BREAK) { + return HSA_STATUS_ERROR; + } + + return HSA_STATUS_SUCCESS; +} + +// Once the needed memory pools have been found and the BinarySearch structure +// has been updated with these handles, this function is then used to allocate +// memory from those pools. +// Devices with which a pool is associated already have access to the pool. +// However, other devices may also need to read or write to that memory. Below, +// we see how we can grant access to other devices to address this issue. +hsa_status_t ComputeQueueTest::AllocateAndInitBuffers(BinarySearch* bs) { + hsa_status_t err; + uint32_t out_length = 4 * sizeof(uint32_t); + uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t); + + // In all of these examples, we want both the cpu and gpu to have access to + // the buffer in question. We use the array of agents below in the susequent + // calls to hsa_amd_agents_allow_access() for this purpose. + hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev}; + + err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0, + reinterpret_cast(&bs->input)); + throw_if_error(err); + err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input); + throw_if_error(err); + (void)memset(bs->input, 0, in_length); + + err = hsa_amd_memory_pool_allocate(bs->cpu_pool, out_length, 0, + reinterpret_cast(&bs->output)); + throw_if_error(err); + err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->output); + throw_if_error(err); + (void)memset(bs->input, 0, in_length); + + err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0, + reinterpret_cast(&bs->input_arr)); + throw_if_error(err); + err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr); + throw_if_error(err); + (void)memset(bs->input, 0, in_length); + + err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0, + reinterpret_cast(&bs->input_arr_local)); + throw_if_error(err); + err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr_local); + throw_if_error(err); + + // Binary-search application specific code... + // Initialize input buffer with random values in an increasing order + uint32_t max = bs->length * 20; + bs->input[0] = 0; + + uint32_t seed = (unsigned int)time(NULL); + srand(seed); + + for (uint32_t i = 1; i < bs->length; ++i) { + bs->input[i] = bs->input[i - 1] + + static_cast(max * rand_r(&seed) / static_cast(RAND_MAX)); + } + + return err; +} + +// The code in this function illustrates how to load a kernel from +// pre-compiled code. The goal is to get a handle that can be later +// used in an AQL packet and also to extract information about kernel +// that we will need. All of the information hand kernel handle will +// be saved to the BinarySearch structure. It will be used when we +// populate the AQL packet. +hsa_status_t ComputeQueueTest::LoadKernelFromObjFile(BinarySearch* bs) { + hsa_status_t err; + char agent_name[512]; + hsa_code_object_reader_t code_obj_rdr = {0}; + hsa_executable_t executable = {0}; + + err = hsa_agent_get_info(bs->gpu_dev, HSA_AGENT_INFO_NAME, agent_name); + throw_if_error(err); + std::string kernel_file = search_hsaco_full_path( + bs->kernel_file_name.c_str(), agent_name); + if (kernel_file == "") { + RDC_LOG(RDC_ERROR, "failed to open " << bs->kernel_file_name.c_str() << + " at line " << __LINE__ << ", errno: " << errno); + std::string msg("fail to open "); + msg += bs->kernel_file_name; + throw_if_skip(msg); + return HSA_STATUS_ERROR; + } + + hsa_file_t file_handle = open(kernel_file.c_str(), O_RDONLY); + if (file_handle == -1) { + RDC_LOG(RDC_ERROR, "failed to open " << bs->kernel_file_name.c_str() << + " at line " << __LINE__ << ", errno: " << errno); + return HSA_STATUS_ERROR; + } + + err = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr); + throw_if_error(err); + close(file_handle); + + err = hsa_executable_create_alt(HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, &executable); + throw_if_error(err); + + err = hsa_executable_load_agent_code_object(executable, bs->gpu_dev, + code_obj_rdr, NULL, NULL); + throw_if_error(err); + + err = hsa_executable_freeze(executable, NULL); + throw_if_error(err); + + hsa_executable_symbol_t kern_sym; + err = hsa_executable_get_symbol(executable, NULL, bs->kernel_name.c_str(), + bs->gpu_dev, 0, &kern_sym); + throw_if_error(err); + + err = hsa_executable_symbol_get_info(kern_sym, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &bs->kernel_object); + throw_if_error(err); + + err = hsa_executable_symbol_get_info(kern_sym, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, + &bs->private_segment_size); + throw_if_error(err); + + err = hsa_executable_symbol_get_info(kern_sym, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, + &bs->group_segment_size); + throw_if_error(err); + + // Remaining queries not supported on code object v3. + err = hsa_executable_symbol_get_info(kern_sym, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, + &bs->kernarg_size); + throw_if_error(err); + + err = hsa_executable_symbol_get_info(kern_sym, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, + &bs->kernarg_align); + throw_if_error(err); + assert(bs->kernarg_align >= 16 && "Reported kernarg size is too small."); + bs->kernarg_align = (bs->kernarg_align == 0) ? 16 : bs->kernarg_align; + + return err; +} + +// This function populates the AQL patch with the information +// we have collected and stored in the BinarySearch structure thus far. +void ComputeQueueTest::PopulateAQLPacket(BinarySearch const* bs, + hsa_kernel_dispatch_packet_t* aql) { + aql->header = 0; // Dummy val. for now. Set this right before doorbell ring + aql->setup = 1; + aql->workgroup_size_x = bs->work_group_size; + aql->workgroup_size_y = 1; + aql->workgroup_size_z = 1; + aql->grid_size_x = bs->work_grid_size; + aql->grid_size_y = 1; + aql->grid_size_z = 1; + aql->private_segment_size = bs->private_segment_size; + aql->group_segment_size = bs->group_segment_size; + aql->kernel_object = bs->kernel_object; + aql->kernarg_address = bs->kern_arg_address; + aql->completion_signal = bs->signal; +} + +void ComputeQueueTest::WriteAQLToQueue(hsa_kernel_dispatch_packet_t const* in_aql, + hsa_queue_t* q) { + void* queue_base = q->base_address; + const uint32_t queue_mask = q->size - 1; + uint64_t que_idx = hsa_queue_add_write_index_relaxed(q, 1); + + hsa_kernel_dispatch_packet_t* queue_aql_packet; + + queue_aql_packet = + &(reinterpret_cast(queue_base)) + [que_idx & queue_mask]; + + queue_aql_packet->workgroup_size_x = in_aql->workgroup_size_x; + queue_aql_packet->workgroup_size_y = in_aql->workgroup_size_y; + queue_aql_packet->workgroup_size_z = in_aql->workgroup_size_z; + queue_aql_packet->grid_size_x = in_aql->grid_size_x; + queue_aql_packet->grid_size_y = in_aql->grid_size_y; + queue_aql_packet->grid_size_z = in_aql->grid_size_z; + queue_aql_packet->private_segment_size = in_aql->private_segment_size; + queue_aql_packet->group_segment_size = in_aql->group_segment_size; + queue_aql_packet->kernel_object = in_aql->kernel_object; + queue_aql_packet->kernarg_address = in_aql->kernarg_address; + queue_aql_packet->completion_signal = in_aql->completion_signal; +} + + +// This function allocates memory from the kern_arg pool we already found, and +// then sets the argument values needed by the kernel code. +hsa_status_t ComputeQueueTest::AllocAndSetKernArgs(BinarySearch* bs, void* args, + size_t arg_size, void** aql_buf_ptr) { + void* kern_arg_buf = nullptr; + hsa_status_t err; + size_t buf_size; + size_t req_align; + + // The kernel code must be written to memory at the correct alignment. We + // already queried the executable to get the correct alignment, which is + // stored in bs->kernarg_align. In case the memory returned from + // hsa_amd_memory_pool is not of the correct alignment, we request a little + // more than what we need in case we need to adjust. + req_align = bs->kernarg_align; + // Allocate enough extra space for alignment adjustments if ncessary + buf_size = arg_size + (req_align << 1); + + err = hsa_amd_memory_pool_allocate(bs->kern_arg_pool, buf_size, 0, + reinterpret_cast(&kern_arg_buf)); + throw_if_error(err); + + // Address of the allocated buffer + bs->kern_arg_buffer = kern_arg_buf; + + // Addr. of kern arg start. + bs->kern_arg_address = AlignUp(kern_arg_buf, req_align); + + assert(arg_size >= bs->kernarg_size); + assert(((uintptr_t)bs->kern_arg_address + arg_size) < + ((uintptr_t)bs->kern_arg_buffer + buf_size)); + + (void)memcpy(bs->kern_arg_address, args, arg_size); + throw_if_error(err); + + // Make sure both the CPU and GPU can access the kernel arguments + hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev}; + err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->kern_arg_buffer); + throw_if_error(err); + + // Save this info in our BinarySearch structure for later. + *aql_buf_ptr = bs->kern_arg_address; + + return HSA_STATUS_SUCCESS; +} + +// Once all the required data for kernel execution is collected (in this +// application it is stored in the BinarySearch structure) we can put it in +// an AQL packet and ring the queue door bell to tell the command processor to +// execute it. +hsa_status_t ComputeQueueTest::Run(BinarySearch* bs) { + hsa_status_t err; + RDC_LOG(RDC_DEBUG, "Executing kernel " << bs->kernel_name); + + // Adjust the size of workgroup + // This is mostly application specific. + if (bs->work_group_size > 64) { + bs->work_group_size = 64; + bs->num_sub_divisions = bs->length / bs->work_group_size; + } + if (bs->num_sub_divisions < bs->work_group_size) { + bs->num_sub_divisions = bs->work_group_size; + } + + bs->work_grid_size = bs->num_sub_divisions; + + // Explanation of BinarySearch algorithm. + /* + * Since a plain binary search on the GPU would not achieve much benefit + * over the GPU we are doing an N'ary search. We split the array into N + * segments every pass and therefore get log (base N) passes instead of log + * (base 2) passes. + * + * In every pass, only the thread that can potentially have the element we + * are looking for writes to the output array. For ex: if we are looking to + * find 4567 in the array and every thread is searching over a segment of + * 1000 values and the input array is 1, 2, 3, 4,... then the first thread + * is searching in 1 to 1000, the second one from 1001 to 2000, etc. The + * first one does not write to the output. The second one doesn't either. + * The fifth one however is from 4001 to 5000. So it can potentially have + * the element 4567 which lies between them. + * + * This particular thread writes to the output the lower bound, upper bound + * and whether the element equals the lower bound element. So, it would be + * 4001, 5000, 0 + * + * The next pass would subdivide 4001 to 5000 into smaller segments and + * continue the same process from there. + * + * When a pass returns 1 in the third element, it means the element has been + * found and we can stop executing the kernel. If the element is not found, + * then the execution stops after looking at segment of size 1. + */ + + uint32_t global_lower_bound = 0; + uint32_t global_upper_bound = bs->length - 1; + uint32_t sub_div_size = (global_upper_bound - global_lower_bound + 1) / + bs->num_sub_divisions; + + if ((bs->input[0] > bs->find_me) || + (bs->input[bs->length - 1] < bs->find_me)) { + bs->output[0] = 0; + bs->output[1] = bs->length - 1; + bs->output[2] = 0; + RDC_LOG(RDC_DEBUG, "Returning too early"); + return HSA_STATUS_SUCCESS; + } + + bs->output[3] = 1; + + // Setup the kernel args + // See the meta-data for the compiled OpenCL kernel code to ascertain + // the sizes, padding and alignment required for kernel arguments. + // This can be seen by executing + // $ amdgcn-amd-amdhsa-readelf -aw ./binary_search_kernels.hsaco + // The kernel code will expect the following arguments aligned as shown. + typedef uint32_t uint2[2]; + typedef uint32_t uint4[4]; + struct __attribute__((aligned(16))) local_args_t { + uint4* outputArray; + uint2* sortedArray; + uint32_t findMe; + uint32_t pad; + uint64_t global_offset_x; + uint64_t global_offset_y; + uint64_t global_offset_z; + uint64_t printf_buffer; + uint64_t default_queue; + uint64_t completion_action; + } local_args; + + local_args.outputArray = reinterpret_cast(bs->output); + local_args.sortedArray = reinterpret_cast(bs->input_arr_local); + local_args.findMe = bs->find_me; + local_args.global_offset_x = 0; + local_args.global_offset_y = 0; + local_args.global_offset_z = 0; + local_args.printf_buffer = 0; + local_args.default_queue = 0; + local_args.completion_action = 0; + + // Copy the kernel args structure into kernel arg memory + err = AllocAndSetKernArgs(bs, &local_args, sizeof(local_args), + &bs->kern_arg_address); + throw_if_error(err); + + // Populate an AQL packet with the info we've gathered + hsa_kernel_dispatch_packet_t aql; + PopulateAQLPacket(bs, &aql); + + uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t); + + while ((sub_div_size > 1) && (bs->output[3] != 0)) { + for (uint32_t i = 0 ; i < bs->num_sub_divisions; i++) { + int idx1 = i * sub_div_size; + int idx2 = ((i + 1) * sub_div_size) - 1; + bs->input_arr[2 * i] = bs->input[idx1]; + bs->input_arr[2 * i + 1] = bs->input[idx2]; + } + + // Copy kernel parameter from system memory to local memory + err = AgentMemcpy(reinterpret_cast(bs->input_arr_local), + reinterpret_cast(bs->input_arr), + in_length, bs->gpu_dev, bs->cpu_dev); + + throw_if_error(err); + + // Reset output buffer to zero + bs->output[3] = 0; + + // Dispatch kernel with global work size, work group size with ONE dimesion + // and wait for kernel to complete + + // Compute the write index of queue and copy Aql packet into it + uint64_t que_idx = hsa_queue_load_write_index_relaxed(bs->queue); + + const uint32_t mask = bs->queue->size - 1; + + // This function simply copies the data we've collected so far into our + // local AQL packet, except the the setup and header fields. + WriteAQLToQueue(&aql, bs->queue); + + uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH; + aql_header |= HSA_FENCE_SCOPE_SYSTEM << + HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + aql_header |= HSA_FENCE_SCOPE_SYSTEM << + HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + + // Set the packet's type, acquire and release fences. This should be done + // atomically after all the other fields have been set, using release + // memory ordering to ensure all the fields are set when the door bell + // signal is activated. + void* q_base = bs->queue->base_address; + + AtomicSetPacketHeader(aql_header, aql.setup, + &(reinterpret_cast + (q_base))[que_idx & mask]); + + // Increment the write index and ring the doorbell to dispatch kernel. + hsa_queue_store_write_index_relaxed(bs->queue, (que_idx + 1)); + hsa_signal_store_relaxed(bs->queue->doorbell_signal, que_idx); + + // Wait on the dispatch signal until the kernel is finished. + // Modify the wait condition to HSA_WAIT_STATE_ACTIVE (instead of + // HSA_WAIT_STATE_BLOCKED) if polling is needed instead of blocking, as we + // have below. + // The call below will block until the condition is met. Below we have said + // the condition is that the signal value (initiailzed to 1) associated with + // the queue is less than 1. When the kernel associated with the queued AQL + // packet has completed execution, the signal value is automatically + // decremented by the packet processor. + hsa_signal_value_t value = hsa_signal_wait_scacquire(bs->signal, + HSA_SIGNAL_CONDITION_LT, 1, + UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // value should be 0, or we timed-out + if (value) { + RDC_LOG(RDC_ERROR, "Timed out waiting for kernel to complete?"); + throw_if_error(HSA_STATUS_ERROR); + } + + // Reset the signal to its initial value for the next iteration + hsa_signal_store_screlease(bs->signal, 1); + + // Binary search algorithm stuff... + global_lower_bound = bs->output[0] * sub_div_size; + global_upper_bound = global_lower_bound + sub_div_size - 1; + sub_div_size = (global_upper_bound - global_lower_bound + 1) / + bs->num_sub_divisions; + } + + uint32_t element_index = UINT_MAX; + + for (uint32_t i = global_lower_bound; i <= global_upper_bound; i++) { + if (bs->input[i] == bs->find_me) { + element_index = i; + bs->output[0] = i; + bs->output[1] = i + 1; + bs->output[2] = 1; + break; + } + + // Element is not found in region specified + // by global lower bound to global upper bound + bs->output[2] = 0; + } + + uint32_t is_elem_found = bs->output[2]; + RDC_LOG(RDC_DEBUG, "Lower bound = " << global_lower_bound); + RDC_LOG(RDC_DEBUG, "Upper bound = " << global_upper_bound); + RDC_LOG(RDC_DEBUG, "Element search for = " << bs->find_me); + + if (is_elem_found == 1) { + RDC_LOG(RDC_DEBUG, "Element found at index " << element_index); + } else { + RDC_LOG(RDC_DEBUG, "Element value " << bs->find_me << " not found"); + } + + return HSA_STATUS_SUCCESS; +} + +// Release all the RocR resources we have acquired in this application. +hsa_status_t ComputeQueueTest::CleanUp(BinarySearch* bs) { + hsa_status_t err = HSA_STATUS_SUCCESS; + + err = hsa_amd_memory_pool_free(bs->input); + + err = hsa_amd_memory_pool_free(bs->output); + + err = hsa_amd_memory_pool_free(bs->input_arr); + + err = hsa_amd_memory_pool_free(bs->kern_arg_buffer); + + err = hsa_queue_destroy(bs->queue); + + err = hsa_signal_destroy(bs->signal); + + // shutdown will be called at destructor + // err = hsa_shut_down(); + + return err; +} + +hsa_status_t ComputeQueueTest::RunBinarySearchTest(void) { + BinarySearch bs; + hsa_status_t err; + + InitializeBinarySearch(&bs); + + hsa_agent_t current_gpu; + err = get_agent_by_gpu_index(gpu_index_, ¤t_gpu); + throw_if_error(err, "Get agent by GPU index fail."); + bs.gpu_dev.handle = current_gpu.handle; + + // find all cpu agents + std::vector cpus; + err = hsa_iterate_agents(IterateCPUAgents, &cpus); + throw_if_error(err); + bs.cpu_dev.handle = cpus[0].handle; + + err = hsa_signal_create(1, 0, NULL, &bs.signal); + throw_if_error(err, "Fail to create signal."); + + err = hsa_queue_create(bs.gpu_dev, 128, HSA_QUEUE_TYPE_MULTI, NULL, NULL, + UINT32_MAX, UINT32_MAX, &bs.queue); + throw_if_error(err, "Fail to create queue."); + + err = FindPools(&bs); + throw_if_error(err, "Fail to find pools."); + + // Allocate memory from the correct memory pool, and initialize them as + // neeeded for the algorihm. + err = AllocateAndInitBuffers(&bs); + throw_if_error(err, "Allocate and initBuffers fail."); + + err = LoadKernelFromObjFile(&bs); + throw_if_error(err, "Load kernel from Object file fail."); + + err = Run(&bs); + throw_if_error(err, "Run binary search fail."); + + CleanUp(&bs); + + gpu_info_ += "Run binary search task on GPU "; + gpu_info_ += std::to_string(gpu_index_); + gpu_info_ += " Pass."; + + return err; +} + +} // namespace rdc +} // namespace amd diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/MemoryAccess.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/MemoryAccess.cc new file mode 100755 index 0000000000..51a8c75dbb --- /dev/null +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/MemoryAccess.cc @@ -0,0 +1,469 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include + +#include "rdc_modules/rdc_rocr/common.h" +#include "rdc_modules/rdc_rocr/MemoryAccess.h" +#include "rdc_modules/rdc_rocr/base_rocr_utils.h" +#include "rdc_lib/RdcLogger.h" +#include "rdc_lib/rdc_common.h" + +namespace amd { +namespace rdc { + +MemoryAccessTest::MemoryAccessTest(uint32_t gpu_index): TestBase(gpu_index) { + set_num_iteration(10); // Number of iterations to execute of the main test; + // This is a default value which can be overridden + // on the command line. + + set_title("RocR Memory Access Tests"); + set_description("This series of tests check memory allocation" + "on GPU and CPU, i.e. GPU access to system memory " + "and CPU access to GPU memory."); +} + +MemoryAccessTest::~MemoryAccessTest(void) { +} + +// Any 1-time setup involving member variables used in the rest of the test +// should be done here. +hsa_status_t MemoryAccessTest::SetUp(void) { + hsa_status_t err; + + TestBase::SetUp(); + + err = SetDefaultAgents(this); + throw_if_error(err); + + err = SetPoolsTypical(this); + throw_if_error(err); + return err; +} + +void MemoryAccessTest::Run(void) { + // Compare required profile for this test case with what we're actually + // running on + if (!CheckProfile(this)) { + return; + } + + TestBase::Run(); +} + +void MemoryAccessTest::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void MemoryAccessTest::DisplayResults(void) const { + // Compare required profile for this test case with what we're actually + // running on + if (!CheckProfile(this)) { + return; + } +} + +void MemoryAccessTest::Close() { + // This will close handles opened within rocrtst utility calls and call + // hsa_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + + +typedef struct __attribute__ ((aligned(16))) args_t { + int *a; + int *b; + int *c; + } args; + + args *kernArgs = NULL; + +static const char kSubTestSeparator[] = " **************************"; + +static void PrintMemorySubtestHeader(const char *header) { + RDC_LOG(RDC_DEBUG, " *** Memory Subtest: " << header << " ***"); +} + +#if ROCRTST_EMULATOR_BUILD +static const int kMemoryAllocSize = 8; +#else +static const int kMemoryAllocSize = 1024; +#endif + + +// Test to check GPU can read & write to system memory +void MemoryAccessTest::GPUAccessToCPUMemoryTest(hsa_agent_t cpuAgent, + hsa_agent_t gpuAgent) { + hsa_status_t err; + + // Get Global Memory Pool on the gpuAgent to allocate gpu buffers + hsa_amd_memory_pool_t gpu_pool; + err = hsa_amd_agent_iterate_memory_pools(gpuAgent, + GetGlobalMemoryPool, + &gpu_pool); + throw_if_error(err); + + hsa_amd_memory_pool_access_t access; + hsa_amd_agent_memory_pool_get_info(cpuAgent, gpu_pool, + HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, + &access); + if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) { + // hsa objects + hsa_queue_t *queue = NULL; // command queue + hsa_signal_t signal = {0}; // completion signal + + // get queue size + uint32_t queue_size = 0; + err = hsa_agent_get_info(gpuAgent, + HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); + throw_if_error(err); + + // create queue + err = hsa_queue_create(gpuAgent, + queue_size, HSA_QUEUE_TYPE_MULTI, + NULL, NULL, 0, 0, &queue); + throw_if_error(err); + + // Get System Memory Pool on the cpuAgent to allocate host side buffers + hsa_amd_memory_pool_t global_pool; + err = hsa_amd_agent_iterate_memory_pools(cpuAgent, + GetGlobalMemoryPool, + &global_pool); + throw_if_error(err); + + // Find a memory pool that supports kernel arguments. + hsa_amd_memory_pool_t kernarg_pool; + err = hsa_amd_agent_iterate_memory_pools(cpuAgent, + GetKernArgMemoryPool, + &kernarg_pool); + throw_if_error(err); + + // Allocate the host side buffers + // (sys_data,dup_sys_data,cpuResult,kernArg) on system memory + int *sys_data = NULL; + int *dup_sys_data = NULL; + int *cpuResult = NULL; + int *gpuResult = NULL; + + err = hsa_amd_memory_pool_allocate(global_pool, + kMemoryAllocSize, 0, + reinterpret_cast(&cpuResult)); + throw_if_error(err); + + err = hsa_amd_memory_pool_allocate(global_pool, + kMemoryAllocSize, 0, + reinterpret_cast(&sys_data)); + throw_if_error(err); + + err = hsa_amd_memory_pool_allocate(global_pool, + kMemoryAllocSize, 0, + reinterpret_cast(&dup_sys_data)); + throw_if_error(err); + + + // Allocate the kernel argument buffer from the kernarg_pool. + err = hsa_amd_memory_pool_allocate(kernarg_pool, sizeof(args_t), 0, + reinterpret_cast(&kernArgs)); + throw_if_error(err); + + // initialize the host buffers + for (int i = 0; i < kMemoryAllocSize; ++i) { + unsigned int seed = time(NULL); + sys_data[i] = 1 + rand_r(&seed) % 1; + dup_sys_data[i] = sys_data[i]; + } + + memset(cpuResult, 0, kMemoryAllocSize * sizeof(int)); + + // for the dGPU, we have coarse grained local memory, + // so allocate memory for it on the GPU's GLOBAL segment . + + // Get local memory of GPU to allocate device side buffers + + err = hsa_amd_memory_pool_allocate(gpu_pool, kMemoryAllocSize, 0, + reinterpret_cast(&gpuResult)); + throw_if_error(err); + + + // Allow cpuAgent access to all allocated GPU memory. + err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, gpuResult); + throw_if_error(err); + memset(gpuResult, 0, kMemoryAllocSize * sizeof(int)); + + // Allow gpuAgent access to all allocated system memory. + err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, cpuResult); + throw_if_error(err); + err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, sys_data); + throw_if_error(err); + err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, dup_sys_data); + throw_if_error(err); + err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, kernArgs); + throw_if_error(err); + + kernArgs->a = sys_data; + kernArgs->b = cpuResult; // system memory passed to gpu for write + kernArgs->c = gpuResult; // gpu memory to verify that gpu read system data + + + // Create the executable, get symbol by name and load the code object + set_kernel_file_name("gpuReadWrite_kernels.hsaco"); + set_kernel_name("gpuReadWrite"); + err = LoadKernelFromObjFile(this, &gpuAgent); + throw_if_error(err); + + // Fill the dispatch packet with + // workgroup_size, grid_size, kernelArgs and completion signal + // Put it on the queue and launch the kernel by ringing the doorbell + + // create completion signal + err = hsa_signal_create(1, 0, NULL, &signal); + throw_if_error(err); + + // create aql packet + hsa_kernel_dispatch_packet_t aql; + memset(&aql, 0, sizeof(aql)); + + // initialize aql packet + aql.workgroup_size_x = 256; + aql.workgroup_size_y = 1; + aql.workgroup_size_z = 1; + aql.grid_size_x = kMemoryAllocSize; + aql.grid_size_y = 1; + aql.grid_size_z = 1; + aql.private_segment_size = 0; + aql.group_segment_size = 0; + aql.kernel_object = kernel_object(); // kernel_code; + aql.kernarg_address = kernArgs; + aql.completion_signal = signal; + + // const uint32_t queue_size = queue->size; + const uint32_t queue_mask = queue->size - 1; + + // write to command queue + uint64_t index = hsa_queue_load_write_index_relaxed(queue); + hsa_queue_store_write_index_relaxed(queue, index + 1); + + WriteAQLToQueueLoc(queue, index, &aql); + + hsa_kernel_dispatch_packet_t *q_base_addr = + reinterpret_cast(queue->base_address); + AtomicSetPacketHeader( + (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | + (1 << HSA_PACKET_HEADER_BARRIER) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE), + (1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS), + reinterpret_cast + (&q_base_addr[index & queue_mask])); + + // ringdoor bell + hsa_signal_store_relaxed(queue->doorbell_signal, index); + // wait for the signal and reset it for future use + while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, + (uint64_t)-1, HSA_WAIT_STATE_ACTIVE)) { } + hsa_signal_store_relaxed(signal, 1); + + // compare device and host side results + if (verbosity() > 0) { + RDC_LOG(RDC_DEBUG, "check gpu has read the system memory"); + } + for (int i = 0; i < kMemoryAllocSize; ++i) { + if (gpuResult[i] != dup_sys_data[i]) { + throw_if_error(HSA_STATUS_ERROR, + "gpuResult does not match dup_sys_data."); + } + } + + if (verbosity() > 0) { + RDC_LOG(RDC_DEBUG, "gpu has read the system memory successfully"); + RDC_LOG(RDC_DEBUG, "check gpu has written to system memory"); + } + for (int i = 0; i < kMemoryAllocSize; ++i) { + if (cpuResult[i] != i) { + throw_if_error(HSA_STATUS_ERROR, + "The CPU memory size does not match the system memory size."); + } + } + + if (verbosity() > 0) { + RDC_LOG(RDC_DEBUG, "gpu has written to system memory successfully"); + } + + if (sys_data) { hsa_memory_free(sys_data); } + if (dup_sys_data) { hsa_memory_free(dup_sys_data); } + if (cpuResult) {hsa_memory_free(cpuResult); } + if (gpuResult) {hsa_memory_free(gpuResult); } + if (kernArgs) { hsa_memory_free(kernArgs); } + if (signal.handle) { hsa_signal_destroy(signal); } + if (queue) { hsa_queue_destroy(queue); } + + } else { + if (verbosity() > 0) { + RDC_LOG(RDC_DEBUG, + "Test not applicable as system is not large bar, skipping"); + } + return; + } +} + +// Test to check cpu can read & write to GPU memory +void MemoryAccessTest::CPUAccessToGPUMemoryTest(hsa_agent_t cpuAgent, + hsa_agent_t, + hsa_amd_memory_pool_t pool) { + hsa_status_t err; + + pool_info_t pool_i; + err = AcquirePoolInfo(pool, &pool_i); + throw_if_error(err); + + if (pool_i.segment == HSA_AMD_SEGMENT_GLOBAL && + pool_i.global_flag == HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) { + hsa_amd_memory_pool_access_t access; + hsa_amd_agent_memory_pool_get_info(cpuAgent, pool, + HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, + &access); + if (access != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) { + if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 || + pool_i.alloc_alignment == 0) { + if (verbosity() > 0) { + RDC_LOG(RDC_DEBUG, "Test not applicable. Skipping."); + } + return; + } + + auto gran_sz = pool_i.alloc_granule; + auto pool_sz = pool_i.size / gran_sz; + auto max_alloc_size = pool_sz/2; + unsigned int max_element = max_alloc_size/sizeof(unsigned int); + unsigned int *gpu_data; + unsigned int *sys_data; + sys_data = (unsigned int*)malloc(max_alloc_size); + memset(sys_data, 0, max_alloc_size); + for (unsigned int i = 1; i <= max_element; ++i) { + sys_data[i] = i; + } + // err = hsa_amd_agents_allow_access(1, &gpuAgent, NULL, sys_data); + // EXPECT_EQ(err, HSA_STATUS_SUCCESS); + err = hsa_amd_memory_pool_allocate(pool, max_alloc_size, 0, + reinterpret_cast(&gpu_data)); + throw_if_error(err); + /* + if (err == HSA_STATUS_ERROR) { + err = hsa_amd_memory_pool_free(gpu_data); + }*/ + + err = hsa_amd_agents_allow_access(1, &cpuAgent, NULL, gpu_data); + throw_if_error(err); + memset(gpu_data, 0, max_alloc_size); + + // Verify CPU can read & write to GPU memory + RDC_LOG(RDC_DEBUG, "Verify CPU can read & write to GPU memory"); + for (unsigned int i = 1; i <= max_element; ++i) { + gpu_data[i] = i; // Write to gpu memory directly + } + + for (unsigned int i = 1; i <= max_element; ++i) { + if (sys_data[i] != gpu_data[i]) { // Reading GPU memory + fprintf(stdout, "Values not mathing !! sys_data[%d]:%d ," + "gpu_data[%d]\n", sys_data[i], i, gpu_data[i]); + } + } + RDC_LOG(RDC_DEBUG, "CPU have read & write to GPU memory successfully"); + err = hsa_amd_memory_pool_free(gpu_data); + free(sys_data); + } else { + if (verbosity() > 0) { + RDC_LOG(RDC_DEBUG, + "Test not applicable as system is not large bar, Skipping."); + } + return; + } + } +} + +void MemoryAccessTest::CPUAccessToGPUMemoryTest(void) { + hsa_status_t err; + + PrintMemorySubtestHeader("CPUAccessToGPUMemoryTest in Memory Pools"); + // find all cpu agents + std::vector cpus; + err = hsa_iterate_agents(IterateCPUAgents, &cpus); + throw_if_error(err); + // find all gpu agents + std::vector gpus; + err = hsa_iterate_agents(IterateGPUAgents, &gpus); + throw_if_error(err); + for (unsigned int i = 0 ; i< gpus.size(); ++i) { + hsa_amd_memory_pool_t gpu_pool; + memset(&gpu_pool, 0, sizeof(gpu_pool)); + err = hsa_amd_agent_iterate_memory_pools(gpus[i], + GetGlobalMemoryPool, + &gpu_pool); + throw_if_error(err); + if (gpu_pool.handle == 0) { + RDC_LOG(RDC_DEBUG, "no global mempool in gpu agent"); + return; + } + CPUAccessToGPUMemoryTest(cpus[0], gpus[i], gpu_pool); + } + if (verbosity() > 0) { + RDC_LOG(RDC_DEBUG, "subtest Passed"); + } + per_gpu_info_ += "CPUAccessToGPUMemoryTest Pass."; + gpu_info_ += "CPUAccessToGPUMemoryTest for GPU "; + gpu_info_ += std::to_string(gpu_index_); + gpu_info_ += " Pass. "; +} + +void MemoryAccessTest::GPUAccessToCPUMemoryTest(void) { + hsa_status_t err; + + PrintMemorySubtestHeader("GPUAccessToCPUMemoryTest in Memory Pools"); + // find all cpu agents + std::vector cpus; + err = hsa_iterate_agents(IterateCPUAgents, &cpus); + throw_if_error(err); + + // find current gpu + hsa_agent_t current_gpu; + err = get_agent_by_gpu_index(gpu_index_, ¤t_gpu); + throw_if_error(err, "Get agent by GPU index fail."); + + GPUAccessToCPUMemoryTest(cpus[0], current_gpu); + + if (verbosity() > 0) { + RDC_LOG(RDC_DEBUG, "subtest Passed"); + } + + per_gpu_info_ += "GPUAccessToCPUMemoryTest Pass."; + + gpu_info_ += "GPUAccessToCPUMemoryTest for GPU "; + gpu_info_ += std::to_string(gpu_index_); + gpu_info_ += " Pass. "; +} + +} // namespace rdc +} // namespace amd \ No newline at end of file diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/MemoryTest.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/MemoryTest.cc new file mode 100755 index 0000000000..4c5b09f142 --- /dev/null +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/MemoryTest.cc @@ -0,0 +1,258 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include "rdc_modules/rdc_rocr/common.h" +#include "rdc_modules/rdc_rocr/MemoryTest.h" +#include "rdc_modules/rdc_rocr/base_rocr_utils.h" +#include "rdc_lib/RdcLogger.h" +#include "rdc_lib/rdc_common.h" + +namespace amd { +namespace rdc { + +static const uint32_t kNumBufferElements = 256; + +MemoryTest::MemoryTest(uint32_t gpu_index): TestBase(gpu_index) { + set_num_iteration(10); // Number of iterations to execute of the main test; + // This is a default value which can be overridden + // on the command line. + set_title("Max Single Allocation Memory Test"); + set_description("This series of tests check memory allocation limits, extent" + " of GPU access to system memory and other memory related functionality."); +} + +MemoryTest::~MemoryTest(void) { +} + +// Any 1-time setup involving member variables used in the rest of the test +// should be done here. +hsa_status_t MemoryTest::SetUp(void) { + hsa_status_t err = HSA_STATUS_SUCCESS; + + TestBase::SetUp(); + + err = SetDefaultAgents(this); + if ( err != HSA_STATUS_SUCCESS) return err; + + err = SetPoolsTypical(this); + return err; +} + +void MemoryTest::Run(void) { + // Compare required profile for this test case with what we're actually + // running on + if (!CheckProfile(this)) { + return; + } + + TestBase::Run(); +} + +void MemoryTest::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void MemoryTest::DisplayResults(void) const { + // Compare required profile for this test case with what we're actually + // running on + if (!CheckProfile(this)) { + return; + } + + return; +} + +void MemoryTest::Close() { + // This will close handles opened within rocrtst utility calls and call + // hsa_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + +hsa_status_t MemoryTest::TestAllocate(hsa_amd_memory_pool_t pool, size_t sz) { + void *ptr; + hsa_status_t err; + + err = hsa_amd_memory_pool_allocate(pool, sz, 0, &ptr); + + if (err == HSA_STATUS_SUCCESS) { + err = hsa_memory_free(ptr); + } + + return err; +} + +static const char kSubTestSeparator[] = " **************************"; + +static void PrintMemorySubtestHeader(const char *header) { + RDC_LOG(RDC_DEBUG, " *** Memory Subtest: " << header << " ***"); +} + +// Test Fixtures +hsa_status_t MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag, + hsa_amd_memory_pool_t pool) { + hsa_status_t err = HSA_STATUS_SUCCESS; + + pool_info_t pool_i; + char ag_name[64]; + hsa_device_type_t ag_type; + + err = hsa_agent_get_info(ag, HSA_AGENT_INFO_NAME, ag_name); + if (err != HSA_STATUS_SUCCESS) return err; + + err = hsa_agent_get_info(ag, HSA_AGENT_INFO_DEVICE, &ag_type); + if (err != HSA_STATUS_SUCCESS) return err; + + uint32_t node = 0; + err = hsa_agent_get_info(ag, HSA_AGENT_INFO_NODE, &node); + if (err != HSA_STATUS_SUCCESS) return err; + + if (verbosity() > 0) { + std::string device_type; + switch (ag_type) { + case HSA_DEVICE_TYPE_CPU: + device_type = "CPU"; + break; + case HSA_DEVICE_TYPE_GPU: + device_type = "GPU"; + break; + case HSA_DEVICE_TYPE_DSP: + device_type = "DSP"; + break; + } + RDC_LOG(RDC_DEBUG, " Agent: " << ag_name << " Node " << node << " (" + << device_type << ")"); + } + + err = AcquirePoolInfo(pool, &pool_i); + if (err != HSA_STATUS_SUCCESS) return err; + + if (verbosity() > 0) { + DumpMemoryPoolInfo(&pool_i, 2); + } + + if (!pool_i.alloc_allowed || pool_i.alloc_granule == 0 || + pool_i.alloc_alignment == 0) { + if (verbosity() > 0) { + RDC_LOG(RDC_DEBUG, " Test not applicable. Skipping."); + } + return err; + } + // Do everything in "granule" units + auto gran_sz = pool_i.alloc_granule; + auto pool_sz = pool_i.aggregate_alloc_max / gran_sz; + + // Neg. test: Try to allocate more than the pool size + err = TestAllocate(pool, pool_sz*gran_sz + gran_sz); + if (err != HSA_STATUS_ERROR_INVALID_ALLOCATION) return err; + + auto max_alloc_size = pool_sz/2; + uint64_t upper_bound = pool_sz; + uint64_t lower_bound = 0; + + while (true) { + err = TestAllocate(pool, max_alloc_size * gran_sz); + + if (err != HSA_STATUS_SUCCESS || + err != HSA_STATUS_ERROR_OUT_OF_RESOURCES) return err; + + if (err == HSA_STATUS_SUCCESS) { + lower_bound = max_alloc_size; + max_alloc_size += (upper_bound - lower_bound)/2; + } else if (err == HSA_STATUS_ERROR_OUT_OF_RESOURCES) { + upper_bound = max_alloc_size; + max_alloc_size -= (upper_bound - lower_bound)/2; + } + + if ((upper_bound - lower_bound) < 2) { + break; + } + + if (upper_bound <= lower_bound) { + RDC_LOG(RDC_ERROR, "Wrong upper bound and lower bound"); + return err; + } + } + + if (verbosity() > 0) { + RDC_LOG(RDC_DEBUG, " Biggest single allocation size for this pool is " << + (max_alloc_size * gran_sz)/1024 << "KB."); + RDC_LOG(RDC_DEBUG, " This is " << + static_cast(max_alloc_size)/pool_sz*100 << + "% of the total."); + } + + if (ag_type == HSA_DEVICE_TYPE_GPU) { + if ((float)max_alloc_size/pool_sz < (float)15/16) { + RDC_LOG(RDC_ERROR, "the allocate size is wrong"); + throw_if_error(HSA_STATUS_ERROR, "The allocate size is wrong"); + } + // EXPECT_GE((float)max_alloc_size/pool_sz, (float)15/16); + } + if (verbosity() > 0) { + std::cout << kSubTestSeparator << std::endl; + } + + return err; +} + +hsa_status_t MemoryTest::MaxSingleAllocationTest(void) { + hsa_status_t err = HSA_STATUS_SUCCESS; + std::vector> agent_pools; + + PrintMemorySubtestHeader("Maximum Single Allocation in Memory Pools"); + + err = GetAgentPools(&agent_pools); + throw_if_error(err, "GetAgentPools pool fail."); + + hsa_agent_t current_gpu; + err = get_agent_by_gpu_index(gpu_index_, ¤t_gpu); + throw_if_error(err, "Get agent by GPU index fail."); + + auto pool_idx = 0; + for (auto a : agent_pools) { + if (a->agent.handle != current_gpu.handle) + continue; + for (auto p : a->pools) { + pool_idx++; + RDC_LOG(RDC_DEBUG, " Pool " << pool_idx << ":"); + err = MaxSingleAllocationTest(a->agent, p); + throw_if_error(err, "MaxSingleAllocationTest ."); + per_gpu_info_ += title(); + per_gpu_info_ += " Pool "; + per_gpu_info_ += std::to_string(pool_idx); + per_gpu_info_ += " test pass. "; + } + } + gpu_info_ += title(); + gpu_info_ += " for GPU "; + gpu_info_ += std::to_string(gpu_index_); + gpu_info_ += " Pass. "; + + return err; +} + +} // namespace rdc +} // namespace amd diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/RdcDiagnosticLib.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/RdcDiagnosticLib.cc new file mode 100644 index 0000000000..f91b1034f3 --- /dev/null +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/RdcDiagnosticLib.cc @@ -0,0 +1,198 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include "rdc_lib/rdc_common.h" +#include "rdc_modules/rdc_rocr/common.h" +#include "rdc_modules/rdc_rocr/RdcDiagnosticLib.h" +#include "rdc_modules/rdc_rocr/MemoryTest.h" +#include "rdc_modules/rdc_rocr/MemoryAccess.h" +#include "rdc_modules/rdc_rocr/ComputeQueueTest.h" + +rdc_status_t rdc_diag_init(uint64_t) { + return RDC_ST_OK; +} + +rdc_status_t rdc_diag_destroy() { + return RDC_ST_OK; +} + +rdc_status_t rdc_diag_test_cases_query( + rdc_diag_test_cases_t test_cases[MAX_TEST_CASES], + uint32_t* test_case_count) { + if (test_case_count == nullptr) { + return RDC_ST_BAD_PARAMETER; + } + + *test_case_count = 2; + test_cases[0] = RDC_DIAG_COMPUTE_QUEUE; + test_cases[1] = RDC_DIAG_SYS_MEM_CHECK; + + return RDC_ST_OK; +} + +// Helper function to run the memory test on GPU +static rdc_status_t run_memory_test(uint32_t gpu_index, + rdc_diag_test_result_t* result) { + std::string info = result->info; + std::string per_gpu_info = result->gpu_results[gpu_index].gpu_result.msg; + + try { + amd::rdc::MemoryTest test(gpu_index); + test.MaxSingleAllocationTest(); + + info += test.get_gpu_info(); + per_gpu_info += test.get_per_gpu_info(); + } catch (const amd::rdc::SkipException& e) { + result->status = RDC_DIAG_RESULT_SKIP; + per_gpu_info += "MaxSingleAllocationTest is skipped: "; + per_gpu_info += e.what(); + info += "GPU "; + info += std::to_string(gpu_index); + info += " MaxSingleAllocationTest is skipped: "; + info += e.what(); + info += "."; + } catch (const std::exception& e) { + result->status = RDC_DIAG_RESULT_FAIL; + per_gpu_info += "MaxSingleAllocationTest returns with error "; + per_gpu_info += e.what(); + info += "GPU "; + info += std::to_string(gpu_index); + info += " MaxSingleAllocationTest returns with error "; + info += e.what(); + info += "."; + } + + try { + amd::rdc::MemoryAccessTest test(gpu_index); + test.CPUAccessToGPUMemoryTest(); + test.GPUAccessToCPUMemoryTest(); + info += test.get_gpu_info(); + per_gpu_info += test.get_per_gpu_info(); + } catch (const amd::rdc::SkipException& e) { + result->status = RDC_DIAG_RESULT_SKIP; + per_gpu_info += "Memory Access is skipped: "; + per_gpu_info += e.what(); + info += "GPU "; + info += std::to_string(gpu_index); + info += " Memory Access is skipped: "; + info += e.what(); + info += "."; + } catch (const std::exception& e) { + result->status = RDC_DIAG_RESULT_FAIL; + per_gpu_info += "Memory Access returns with error "; + per_gpu_info += e.what(); + info += "GPU "; + info += std::to_string(gpu_index); + info += " Memory Access returns with error "; + info += e.what(); + info += "."; + } + + strncpy_with_null(result->info, info.c_str(), + MAX_DIAG_MSG_LENGTH); + strncpy_with_null(result->gpu_results[gpu_index].gpu_result.msg, + per_gpu_info.c_str(), MAX_DIAG_MSG_LENGTH); + + return RDC_ST_OK; +} + + +static rdc_status_t run_compute_queue_test(uint32_t gpu_index, + rdc_diag_test_result_t* result) { + std::string info = result->info; + std::string per_gpu_info = result->gpu_results[gpu_index].gpu_result.msg; + + try { + amd::rdc::ComputeQueueTest test(gpu_index); + test.RunBinarySearchTest(); + info += test.get_gpu_info(); + per_gpu_info += test.get_per_gpu_info(); + } catch (const amd::rdc::SkipException& e) { + result->status = RDC_DIAG_RESULT_SKIP; + per_gpu_info += "Compute Queue test is skipped: "; + per_gpu_info += e.what(); + info += "GPU "; + info += std::to_string(gpu_index); + info += " Compute Queue test is skipped: "; + info += e.what(); + info += "."; + } catch (const std::exception& e) { + result->status = RDC_DIAG_RESULT_FAIL; + per_gpu_info += "Compute Queue test returns with error "; + per_gpu_info += e.what(); + info += "GPU "; + info += std::to_string(gpu_index); + info += " Compute Queue test returns with error "; + info += e.what(); + info += "."; + } + + strncpy_with_null(result->info, info.c_str(), + MAX_DIAG_MSG_LENGTH); + strncpy_with_null(result->gpu_results[gpu_index].gpu_result.msg, + per_gpu_info.c_str(), MAX_DIAG_MSG_LENGTH); + + return RDC_ST_OK; +} + +rdc_status_t rdc_diag_test_case_run( + rdc_diag_test_cases_t test_case, + uint32_t gpu_index[RDC_MAX_NUM_DEVICES], + uint32_t gpu_count, + rdc_diag_test_result_t* result) { + if (result == nullptr || + gpu_count == 0 ) { + return RDC_ST_BAD_PARAMETER; + } + + if (test_case != RDC_DIAG_COMPUTE_QUEUE && + test_case != RDC_DIAG_SYS_MEM_CHECK) { + return RDC_ST_OK; + } + + // init the return data + *result = {}; + result->test_case = test_case; + result->status = RDC_DIAG_RESULT_PASS; + result->per_gpu_result_count = 0; + + // Run test for each GPU. It will continue even + // if one GPU test is fail. + for (uint32_t i = 0; i < gpu_count; i++) { + switch (test_case) { + case RDC_DIAG_SYS_MEM_CHECK: + run_memory_test(gpu_index[i], result); + break; + case RDC_DIAG_COMPUTE_QUEUE: + run_compute_queue_test(gpu_index[i], result); + break; + default: + result->status = RDC_DIAG_RESULT_SKIP; + strncpy_with_null(result->info, "Not support yet" + , MAX_DIAG_MSG_LENGTH); + } + } + + return RDC_ST_OK; +} diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/RdcRocrBase.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/RdcRocrBase.cc new file mode 100644 index 0000000000..29dcc8b792 --- /dev/null +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/RdcRocrBase.cc @@ -0,0 +1,52 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "rdc_modules/rdc_rocr/RdcRocrBase.h" +#include + +namespace amd { +namespace rdc { + +RdcRocrBase::RdcRocrBase(void) { + num_iteration_ = 1; + cpu_device_.handle = -1; + gpu_device1_.handle = -1; + device_pool_.handle = 0; + kern_arg_pool_.handle = 0; + main_queue_ = nullptr; + kernarg_buffer_ = nullptr; + kernel_object_ = 0; + memset(&aql_, 0, sizeof(aql_)); + set_requires_profile(-1); + set_enable_interrupt(false); + set_kernel_file_name(""); + set_verbosity(1); + set_monitor_verbosity(0); + set_title("unset_title"); + orig_hsa_enable_interrupt_ = nullptr; +} + +RdcRocrBase::~RdcRocrBase() { +} + +} // namespace rdc +} // namespace amd diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/TestBase.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/TestBase.cc new file mode 100755 index 0000000000..c769780bfb --- /dev/null +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/TestBase.cc @@ -0,0 +1,139 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include "rdc_modules/rdc_rocr/TestBase.h" +#include "rdc_modules/rdc_rocr/base_rocr_utils.h" +#include "rdc_lib/RdcLogger.h" +#include "rdc_lib/rdc_common.h" + +namespace amd { +namespace rdc { + +static const int kOutputLineLength = 80; +static const char kLabelDelimiter[] = "####"; +static const char kDescriptionLabel[] = "TEST DESCRIPTION"; +static const char kTitleLabel[] = "TEST NAME"; +static const char kSetupLabel[] = "TEST SETUP"; +static const char kRunLabel[] = "TEST EXECUTION"; +static const char kCloseLabel[] = "TEST CLEAN UP"; +static const char kResultsLabel[] = "TEST RESULTS"; + + +TestBase::TestBase(uint32_t gpu_index): + gpu_index_(gpu_index), description_("") { + SetUp(); +} +TestBase::~TestBase() { + Close(); +} + +static void MakeHeaderStr(const char *inStr, std::string *outStr) { + assert(outStr != nullptr); + assert(inStr != nullptr); + + outStr->clear(); + *outStr = kLabelDelimiter; + *outStr += " "; + *outStr += inStr; + *outStr += " "; + *outStr += kLabelDelimiter; +} + +hsa_status_t TestBase::SetUp(void) { + hsa_status_t err = HSA_STATUS_SUCCESS; + std::string label; + MakeHeaderStr(kSetupLabel, &label); + RDC_LOG(RDC_DEBUG, label); + + err = InitAndSetupHSA(this); + + return err; +} + +void TestBase::Run(void) { + std::string label; + MakeHeaderStr(kRunLabel, &label); + RDC_LOG(RDC_DEBUG, label); +} + +void TestBase::Close(void) { + hsa_status_t err; + std::string label; + MakeHeaderStr(kCloseLabel, &label); + RDC_LOG(RDC_DEBUG, label); + + err = CommonCleanUp(this); + throw_if_error(err); +} + + +void TestBase::DisplayResults(void) const { + std::string label; + MakeHeaderStr(kResultsLabel, &label); + printf("\n\t%s\n", label.c_str()); +} + +void TestBase::DisplayTestInfo(void) { + printf("#########################################" + "######################################\n"); + + std::string label; + MakeHeaderStr(kTitleLabel, &label); + printf("\n\t%s\n%s\n", label.c_str(), title().c_str()); + + if (verbosity() >= VERBOSE_STANDARD) { + MakeHeaderStr(kDescriptionLabel, &label); + printf("\n\t%s\n%s\n", label.c_str(), description().c_str()); + } +} + +void TestBase::set_description(std::string d) { + int le = kOutputLineLength - 4; + + description_ = d; + size_t endlptr; + + for (size_t i = le; i < description_.size(); i += le) { + endlptr = description_.find_last_of(" ", i); + description_.replace(endlptr, 1, "\n"); + i = endlptr; + } +} + +hsa_status_t TestBase::get_agent_by_gpu_index(uint32_t gpu_index, + hsa_agent_t* agent) { + hsa_status_t err = HSA_STATUS_SUCCESS; + std::vector gpus; + err = hsa_iterate_agents(IterateGPUAgents, &gpus); + throw_if_error(err, "Fail to iterate agents."); + if (gpu_index >= gpus.size()) { + throw_if_error(err, "GPU index is too large."); + } + *agent = gpus[gpu_index]; + return err; +} + +} // namespace rdc +} // namespace amd + diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/base_rocr_utils.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/base_rocr_utils.cc new file mode 100755 index 0000000000..944f826682 --- /dev/null +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/base_rocr_utils.cc @@ -0,0 +1,568 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#include "rdc_modules/rdc_rocr/base_rocr_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "hsa/hsa.h" +#include "rdc_lib/RdcLogger.h" +#include "rdc_lib/rdc_common.h" + +namespace amd { +namespace rdc { + + +// Clean up some of the common handles and memory used by RdcRocrBase code, then +// shut down hsa. Restore HSA_ENABLE_INTERRUPT to original value, if necessary +hsa_status_t CommonCleanUp(RdcRocrBase* test) { + hsa_status_t err; + + assert(test != nullptr); + + if (nullptr != test->kernarg_buffer()) { + err = hsa_amd_memory_pool_free(test->kernarg_buffer()); + throw_if_error(err); + test->set_kernarg_buffer(nullptr); + } + + if (nullptr != test->main_queue()) { + err = hsa_queue_destroy(test->main_queue()); + throw_if_error(err); + test->set_main_queue(nullptr); + } + + if (test->aql().completion_signal.handle != 0) { + err = hsa_signal_destroy(test->aql().completion_signal); + throw_if_error(err); + } + + err = hsa_shut_down(); + throw_if_error(err); + + // Ensure that HSA is actually closed. + hsa_status_t check = hsa_shut_down(); + if (check != HSA_STATUS_ERROR_NOT_INITIALIZED) { + RDC_LOG(RDC_ERROR, "hsa_init reference count was too high."); + return HSA_STATUS_ERROR; + } + + std::string intr_val; + + if (test->orig_hsa_enable_interrupt() == nullptr) { + intr_val = ""; + } else { + intr_val = test->orig_hsa_enable_interrupt(); + } + + SetEnv("HSA_ENABLE_INTERRUPT", intr_val.c_str()); + + return err; +} + +static const char* PROFILE_STR[] = {"HSA_PROFILE_BASE", "HSA_PROFILE_FULL", }; + +/// Verify that the machine running the test has the required profile. +/// This function will verify that the execution machine meets any specific +/// test requirement for a profile (HSA_PROFILE_BASE or HSA_PROFILE_FULL). +/// \param[in] test Test that provides profile requirements. +/// \returns bool +/// - true Machine meets test requirements +/// - false Machine does not meet test requirements +bool CheckProfileAndInform(RdcRocrBase* test) { + if (test->verbosity() > 0) { + RDC_LOG(RDC_DEBUG, "Target HW Profile is " + << PROFILE_STR[test->profile()]); + } + + if (test->requires_profile() == -1) { + if (test->verbosity() > 0) { + RDC_LOG(RDC_DEBUG, "Test can run on any profile. OK."); + } + return true; + } else { + RDC_LOG(RDC_DEBUG, "Test requires " << PROFILE_STR[test->requires_profile()] + << ". "); + if (test->requires_profile() != test->profile()) { + RDC_LOG(RDC_DEBUG, "Not Running."); + return false; + } else { + RDC_LOG(RDC_DEBUG, "OK."); + return true; + } + } +} + +/// Helper function to process error returned from +/// iterate function like hsa_amd_agent_iterate_memory_pools +/// \param[in] Error returned from iterate call +/// \returns HSA_STATUS_SUCCESS iff iterate call succeeds in finding +/// what was being searched for +static hsa_status_t ProcessIterateError(hsa_status_t err) { + if (err == HSA_STATUS_INFO_BREAK) { + err = HSA_STATUS_SUCCESS; + } else if (err == HSA_STATUS_SUCCESS) { + // This actually means no pool was found. + err = HSA_STATUS_ERROR; + } + return err; +} + +// Find pools for cpu, gpu and for kernel arguments. These pools have +// common basic requirements, but are not suitable for all cases. In +// that case, set cpu_pool(), device_pool() and/or kern_arg_pool() +// yourself instead of using this function. +hsa_status_t SetPoolsTypical(RdcRocrBase* test) { + hsa_status_t err; + if (test->profile() == HSA_PROFILE_FULL) { + err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(), + FindAPUStandardPool, &test->cpu_pool()); + throw_if_error(ProcessIterateError(err)); + + err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(), + FindAPUStandardPool, &test->device_pool()); + throw_if_error(ProcessIterateError(err)); + + err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(), + FindAPUStandardPool, &test->kern_arg_pool()); + throw_if_error(ProcessIterateError(err)); + + } else { + err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(), + FindStandardPool, &test->cpu_pool()); + throw_if_error(ProcessIterateError(err)); + + err = hsa_amd_agent_iterate_memory_pools(*test->gpu_device1(), + FindStandardPool, &test->device_pool()); + throw_if_error(ProcessIterateError(err)); + + err = hsa_amd_agent_iterate_memory_pools(*test->cpu_device(), + FindKernArgPool, &test->kern_arg_pool()); + throw_if_error(ProcessIterateError(err)); + } + + return HSA_STATUS_SUCCESS; +} + +// Enable interrupts if necessary, and call hsa_init() +hsa_status_t InitAndSetupHSA(RdcRocrBase* test) { + hsa_status_t err; + + if (test->enable_interrupt()) { + SetEnv("HSA_ENABLE_INTERRUPT", "1"); + } + + err = hsa_init(); + throw_if_error(err); + + return HSA_STATUS_SUCCESS; +} + +// Attempt to find and set test->cpu_device and test->gpu_device1 +hsa_status_t SetDefaultAgents(RdcRocrBase* test) { + hsa_agent_t gpu_device1; + hsa_agent_t cpu_device; + hsa_status_t err; + + gpu_device1.handle = 0; + err = hsa_iterate_agents(FindGPUDevice, &gpu_device1); + throw_if_error(ProcessIterateError(err)); + test->set_gpu_device1(gpu_device1); + + cpu_device.handle = 0; + err = hsa_iterate_agents(FindCPUDevice, &cpu_device); + throw_if_error(ProcessIterateError(err)); + test->set_cpu_device(cpu_device); + + if (0 == gpu_device1.handle) { + RDC_LOG(RDC_ERROR, "GPU Device is not Created properly!"); + throw_if_error(HSA_STATUS_ERROR, "GPU Device is not Created properly!"); + } + + if (0 == cpu_device.handle) { + RDC_LOG(RDC_ERROR, "CPU Device is not Created properly!"); + throw_if_error(HSA_STATUS_ERROR, "CPU Device is not Created properly!"); + } + + if (test->verbosity() > 0) { + char name[64] = {0}; + err = hsa_agent_get_info(gpu_device1, HSA_AGENT_INFO_NAME, name); + throw_if_error(err); + RDC_LOG(RDC_DEBUG, "The gpu device name is " << name); + } + + hsa_profile_t profile; + err = hsa_agent_get_info(gpu_device1, HSA_AGENT_INFO_PROFILE, &profile); + throw_if_error(err); + test->set_profile(profile); + + if (!CheckProfileAndInform(test)) { + return HSA_STATUS_ERROR; + } + return HSA_STATUS_SUCCESS; +} + +// See if the profile of the target matches any required profile by the +// test program. +bool CheckProfile(RdcRocrBase const* test) { + if (test->requires_profile() == -1) { + return true; + } else { + return (test->requires_profile() == test->profile()); + } +} +// Load the specified kernel code from the specified file, inspect and fill +// in RdcRocrBase member variables related to the kernel and executable. +// Required Input RdcRocrBase member variables: +// - gpu_device1() +// - kernel_file_name() +// - kernel_name() +// +// Written RdcRocrBase member variables: +// -kernel_object() +// -private_segment_size() +// -group_segment_size() +// -kernarg_size() +// -kernarg_align() +hsa_status_t LoadKernelFromObjFile(RdcRocrBase* test, hsa_agent_t* agent) { + hsa_status_t err; + hsa_code_object_reader_t code_obj_rdr = {0}; + hsa_executable_t executable = {0}; + + assert(test != nullptr); + if (agent == nullptr) { + agent = test->gpu_device1(); // Assume GPU agent for now + } + + // if agent name is not set, then set the agent name + if (!test->get_agent_name().size()) { + char agent_name[64]; + err = hsa_agent_get_info(*agent, HSA_AGENT_INFO_NAME, agent_name); + throw_if_error(err); + test->set_agent_name(agent_name); + } + + std::string kern_name = test->kernel_name(); + std::string obj_file = search_hsaco_full_path( + test->kernel_file_name().c_str(), test->get_agent_name().c_str()); + if (obj_file == "") { + RDC_LOG(RDC_ERROR, "failed to find " << test->kernel_file_name() << + " at line " << __LINE__ << ", errno: " << errno); + std::string msg("fail to open "); + msg += test->kernel_file_name(); + throw_if_skip(msg); + return HSA_STATUS_ERROR; + } + + hsa_file_t file_handle = open(obj_file.c_str(), O_RDONLY); + + if (file_handle == -1) { + RDC_LOG(RDC_ERROR, "failed to open " << obj_file.c_str() << " at line " + << __LINE__ << ", file: " << __FILE__); + return (hsa_status_t) errno; + } + + err = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr); + throw_if_error(err); + close(file_handle); + + err = hsa_executable_create_alt(HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, &executable); + throw_if_error(err); + err = hsa_executable_load_agent_code_object(executable, *agent, code_obj_rdr, + NULL, NULL); + throw_if_error(err); + err = hsa_executable_freeze(executable, NULL); + throw_if_error(err); + + hsa_executable_symbol_t kern_sym; + err = hsa_executable_get_symbol(executable, NULL, (kern_name + ".kd").c_str(), *agent, + 0, &kern_sym); + throw_if_error(err); + + uint64_t codeHandle; + err = hsa_executable_symbol_get_info(kern_sym, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &codeHandle); + throw_if_error(err); + test->set_kernel_object(codeHandle); + + uint32_t val; + err = hsa_executable_symbol_get_info(kern_sym, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &val); + throw_if_error(err); + test->set_private_segment_size(val); + + err = hsa_executable_symbol_get_info(kern_sym, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &val); + throw_if_error(err); + test->set_group_segment_size(val); + + // Remaining queries only supported on code object v3. + err = hsa_executable_symbol_get_info(kern_sym, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &val); + throw_if_error(err); + test->set_kernarg_size(val); + + err = hsa_executable_symbol_get_info(kern_sym, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &val); + throw_if_error(err); + assert(val >= 16 && "Reported kernarg size is too small."); + val = (val == 0) ? 16 : val; + test->set_kernarg_align(val); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t CreateQueue(hsa_agent_t device, hsa_queue_t** queue, + uint32_t num_pkts) { + hsa_status_t err; + + if (num_pkts == 0) { + err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE, + &num_pkts); + throw_if_error(err); + } + + err = hsa_queue_create(device, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, + NULL, UINT32_MAX, UINT32_MAX, queue); + throw_if_error(err); + + return HSA_STATUS_SUCCESS; +} +// Initialize the provided aql packet with standard default values, and +// values from provided RdcRocrBase object. +hsa_status_t InitializeAQLPacket(const RdcRocrBase* test, + hsa_kernel_dispatch_packet_t* aql) { + hsa_status_t err; + + assert(aql != nullptr); + + if (aql == nullptr) { + return HSA_STATUS_ERROR; + } + + // Initialize Packet type as Invalid + // Update packet type to Kernel Dispatch + // right before ringing doorbell + aql->header = 1; + + aql->setup = 1; + aql->workgroup_size_x = 256; + aql->workgroup_size_y = 1; + aql->workgroup_size_z = 1; + + aql->grid_size_x = (uint64_t) 256; // manual_input*group_input; workg max sz + aql->grid_size_y = 1; + aql->grid_size_z = 1; + + aql->private_segment_size = test->private_segment_size(); + + aql->group_segment_size = test->group_segment_size(); + + // Pin kernel code and the kernel argument buffer to the aql packet-> + aql->kernel_object = test->kernel_object(); + + // aql->kernarg_address may be filled in by AllocAndSetKernArgs() if it is + // called before this function, so we don't want overwrite it, therefore + // we ignore it in this function. + + err = hsa_signal_create(1, 0, NULL, &aql->completion_signal); + + return err; +} + +// Copy RdcRocrBase aql object values to the RdcRocrBase object queue in the +// specified queue position (ind) +hsa_kernel_dispatch_packet_t * WriteAQLToQueue(RdcRocrBase* test, uint64_t *ind) { + assert(test); + assert(test->main_queue()); + + void *queue_base = test->main_queue()->base_address; + const uint32_t queue_mask = test->main_queue()->size - 1; + uint64_t que_idx = hsa_queue_add_write_index_relaxed(test->main_queue(), 1); + *ind = que_idx; + + hsa_kernel_dispatch_packet_t* staging_aql_packet = &test->aql(); + hsa_kernel_dispatch_packet_t* queue_aql_packet; + + queue_aql_packet = + &(reinterpret_cast(queue_base)) + [que_idx & queue_mask]; + + queue_aql_packet->workgroup_size_x = staging_aql_packet->workgroup_size_x; + queue_aql_packet->workgroup_size_y = staging_aql_packet->workgroup_size_y; + queue_aql_packet->workgroup_size_z = staging_aql_packet->workgroup_size_z; + queue_aql_packet->grid_size_x = staging_aql_packet->grid_size_x; + queue_aql_packet->grid_size_y = staging_aql_packet->grid_size_y; + queue_aql_packet->grid_size_z = staging_aql_packet->grid_size_z; + queue_aql_packet->private_segment_size = + staging_aql_packet->private_segment_size; + queue_aql_packet->group_segment_size = + staging_aql_packet->group_segment_size; + queue_aql_packet->kernel_object = staging_aql_packet->kernel_object; + queue_aql_packet->kernarg_address = staging_aql_packet->kernarg_address; + queue_aql_packet->completion_signal = staging_aql_packet->completion_signal; + + return queue_aql_packet; +} + +void +WriteAQLToQueueLoc(hsa_queue_t *queue, uint64_t indx, + hsa_kernel_dispatch_packet_t *aql_pkt) { + assert(queue); + assert(aql_pkt); + + void *queue_base = queue->base_address; + const uint32_t queue_mask = queue->size - 1; + hsa_kernel_dispatch_packet_t* queue_aql_packet; + + queue_aql_packet = + &(reinterpret_cast(queue_base)) + [indx & queue_mask]; + + queue_aql_packet->workgroup_size_x = aql_pkt->workgroup_size_x; + queue_aql_packet->workgroup_size_y = aql_pkt->workgroup_size_y; + queue_aql_packet->workgroup_size_z = aql_pkt->workgroup_size_z; + queue_aql_packet->grid_size_x = aql_pkt->grid_size_x; + queue_aql_packet->grid_size_y = aql_pkt->grid_size_y; + queue_aql_packet->grid_size_z = aql_pkt->grid_size_z; + queue_aql_packet->private_segment_size = + aql_pkt->private_segment_size; + queue_aql_packet->group_segment_size = + aql_pkt->group_segment_size; + queue_aql_packet->kernel_object = aql_pkt->kernel_object; + queue_aql_packet->kernarg_address = aql_pkt->kernarg_address; + queue_aql_packet->completion_signal = aql_pkt->completion_signal; +} + +// Allocate a buffer in the kern_arg_pool for the kernel arguments and write +// the arguments to buffer +hsa_status_t AllocAndSetKernArgs(RdcRocrBase* test, void* args, size_t arg_size) { + void* kern_arg_buf = nullptr; + hsa_status_t err; + size_t buf_size; + size_t req_align; + assert(args != nullptr); + assert(test != nullptr); + + req_align = test->kernarg_align(); + // Allocate enough extra space for alignment adjustments if ncessary + buf_size = arg_size + (req_align << 1); + + err = hsa_amd_memory_pool_allocate(test->kern_arg_pool(), buf_size, 0, + reinterpret_cast(&kern_arg_buf)); + throw_if_error(err); + + test->set_kernarg_buffer(kern_arg_buf); + + void *adj_kern_arg_buf = AlignUp(kern_arg_buf, req_align); + + assert(arg_size >= test->kernarg_size()); + assert(((uintptr_t)adj_kern_arg_buf + arg_size) < + ((uintptr_t)kern_arg_buf + buf_size)); + + hsa_agent_t ag_list[2] = {*test->gpu_device1(), *test->cpu_device()}; + err = hsa_amd_agents_allow_access(2, ag_list, NULL, kern_arg_buf); + throw_if_error(err); + + err = hsa_memory_copy(adj_kern_arg_buf, args, arg_size); + throw_if_error(err); + + test->aql().kernarg_address = adj_kern_arg_buf; + + return HSA_STATUS_SUCCESS; +} + +std::string get_lib_dir(const char* lib_name) { + std::string result; + char line[1024*8]; + + FILE* file = fopen("/proc/self/maps", "r"); + if (file == NULL) + return result; + std::string lib_path = "/"; + lib_path += lib_name; + // 7f4eacb46000 r-xp 00000 08:01 17183106 /lib/x86_64-linux-gnu/libc-2.27.so + while (fgets(line, sizeof(line), file)) { + char* end = strstr(line, lib_path.c_str()); + if (end != NULL) { + char* start = end; + while (start > line) { + if (isspace(*start)) { + start++; + break; + } + start--; + } + result = std::string(start, end-start); + break; + } + } + fclose(file); + + return result; +} + +std::string get_app_dir() { + char buf[1024*8]; + int ret = readlink("/proc/self/exe", buf, 1024*8); + if ((ret != -1) && ret < (1024*8 - 1)) { + buf[ret] = '\0'; + return dirname(buf); + } + return ""; +} + +std::string search_hsaco_full_path(const char* hsaco_file_name, + const char* agent_name) { + const std::string lib_dir = get_lib_dir("librdc_rocr.so"); + const std::string app_dir = get_app_dir(); + + std::vector path_to_search; + path_to_search.push_back(std::string("./")+hsaco_file_name); + path_to_search.push_back(app_dir+"/"+hsaco_file_name); + path_to_search.push_back(lib_dir+"/"+hsaco_file_name); + path_to_search.push_back(lib_dir+"/hsaco/"+ agent_name + + "/" + hsaco_file_name); + // for dev structure + path_to_search.push_back(lib_dir+"/../../rdc_libs/rdc_modules/kernels/hsaco/" + + agent_name + "/" + hsaco_file_name); + for (std::size_t i = 0; i < path_to_search.size(); i++) { + if ( ::access(path_to_search[i].c_str(), F_OK) == 0 ) { + RDC_LOG(RDC_DEBUG, "Use the file " << path_to_search[i]); + return path_to_search[i]; + } + RDC_LOG(RDC_DEBUG, "Skip not exists file " << path_to_search[i]); + } + return ""; +} + + +} // namespace rdc +} // namespace amd diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/common.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/common.cc new file mode 100755 index 0000000000..634702a8ae --- /dev/null +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rocr/common.cc @@ -0,0 +1,527 @@ +/* +Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/// \file +/// Implementation of utility functions used by RocR applications +#include "rdc_modules/rdc_rocr/common.h" +#include +#include +#include +#include +#include +#include "rdc_lib/RdcLogger.h" +#include "rdc_lib/rdc_common.h" + +namespace amd { +namespace rdc { + +void throw_if_error(hsa_status_t err, const std::string& msg) { + if (err != HSA_STATUS_SUCCESS) { + const char* errstr = 0; + hsa_status_string(err, &errstr); + throw std::runtime_error(msg + " hsa error code: " + + std::to_string(err) + " " + errstr); + } +} + +void throw_if_skip(const std::string& msg) { + throw SkipException(msg.c_str()); +} + +void SetEnv(const char* env_var_name, const char* env_var_value) { + int err = setenv(env_var_name, env_var_value, 1); + + if (0 != err) { + RDC_LOG(RDC_ERROR, "Set environment variable failed!"); + throw_if_error(HSA_STATUS_ERROR, "Set environment variable failed"); + } +} + +intptr_t +AlignDown(intptr_t value, size_t alignment) { + assert(alignment != 0 && "Zero alignment"); + return (intptr_t) (value & ~(alignment - 1)); +} + +void * +AlignDown(void* value, size_t alignment) { + return reinterpret_cast(AlignDown( + reinterpret_cast(value), alignment)); +} + +void * +AlignUp(void* value, size_t alignment) { + return reinterpret_cast( + AlignDown((uintptr_t)(reinterpret_cast(value) + alignment - 1), + alignment)); +} + + +static hsa_status_t FindAgent(hsa_agent_t agent, void* data, + hsa_device_type_t dev_type) { + assert(data != nullptr); + + if (data == nullptr) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + hsa_device_type_t hsa_device_type; + hsa_status_t hsa_error_code = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, + &hsa_device_type); + throw_if_error(hsa_error_code); + + if (hsa_device_type == dev_type) { + *(reinterpret_cast(data)) = agent; + return HSA_STATUS_INFO_BREAK; + } + + return HSA_STATUS_SUCCESS; +} + +// Find CPU Agents +hsa_status_t IterateCPUAgents(hsa_agent_t agent, void *data) { + hsa_status_t status; + assert(data != nullptr); + if (data == nullptr) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + std::vector* cpus = static_cast*>(data); + hsa_device_type_t device_type; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); + throw_if_error(status); + if (HSA_STATUS_SUCCESS == status && HSA_DEVICE_TYPE_CPU == device_type) { + cpus->push_back(agent); + } + return status; +} + + + +// Find GPU Agents +hsa_status_t IterateGPUAgents(hsa_agent_t agent, void *data) { + hsa_status_t status; + assert(data != nullptr); + if (data == nullptr) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + std::vector* gpus = static_cast*>(data); + hsa_device_type_t device_type; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); + throw_if_error(status); + if (HSA_STATUS_SUCCESS == status && HSA_DEVICE_TYPE_GPU == device_type) { + gpus->push_back(agent); + } + return status; +} + +// Find coarse grained system memory. +hsa_status_t GetGlobalMemoryPool(hsa_amd_memory_pool_t pool, void* data) { + hsa_amd_segment_t segment; + hsa_status_t err; + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_SEGMENT, + &segment); + if (HSA_AMD_SEGMENT_GLOBAL != segment) + return err; + + hsa_amd_memory_pool_global_flag_t flags; + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, + &flags); + throw_if_error(err); + + // this is valid for dGPUs. But on APUs, it has to be FINE_GRAINED + if (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) { + hsa_amd_memory_pool_t* ret = + reinterpret_cast(data); + *ret = pool; + } else { // this is for APUs + if (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED) { + hsa_amd_memory_pool_t* ret = + reinterpret_cast(data); + *ret = pool; + } + } + return err; +} + +// Find a memory pool that can be used for kernarg locations. +hsa_status_t GetKernArgMemoryPool(hsa_amd_memory_pool_t pool, void* data) { + hsa_status_t err; + if (nullptr == data) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + hsa_amd_segment_t segment; + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_SEGMENT, + &segment); + throw_if_error(err); + if (HSA_AMD_SEGMENT_GLOBAL != segment) { + return HSA_STATUS_SUCCESS; + } + + hsa_amd_memory_pool_global_flag_t flags; + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, + &flags); + throw_if_error(err); + + if (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT) { + hsa_amd_memory_pool_t* ret = + reinterpret_cast(data); + *ret = pool; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data) { + return FindAgent(agent, data, HSA_DEVICE_TYPE_GPU); +} + +hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) { + return FindAgent(agent, data, HSA_DEVICE_TYPE_CPU); +} + +/// Ennumeration that indicates whether a pool property must be present or not. +/// This is meant to be used by FindPool +typedef enum { + POOL_PROP_OFF = 0, ///< The property must be present. + POOL_PROP_ON, ///< The property must not be present. + POOL_PROP_DONT_CARE ///< We don't care if the property is present or not. +} pool_prop_t; + +static hsa_status_t +FindPool(hsa_amd_memory_pool_t pool, void* data, hsa_amd_segment_t in_segment, + pool_prop_t accessible_by_all, pool_prop_t kern_arg, + pool_prop_t fine_grain) { + if (nullptr == data) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + hsa_status_t err; + hsa_amd_segment_t segment; + uint32_t flag; + + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, + &segment); + throw_if_error(err); + + if (in_segment != segment) { + return HSA_STATUS_SUCCESS; + } + + if (HSA_AMD_SEGMENT_GLOBAL == in_segment) { + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); + throw_if_error(err); + + if (kern_arg != POOL_PROP_DONT_CARE) { + uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; + if ((karg_st == 0 && kern_arg == POOL_PROP_ON) || + (karg_st != 0 && kern_arg == POOL_PROP_OFF)) { + return HSA_STATUS_SUCCESS; + } + } + if (fine_grain != POOL_PROP_DONT_CARE) { + uint32_t fg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED; + if ((fg_st == 0 && fine_grain == POOL_PROP_ON) || + (fg_st != 0 && fine_grain == POOL_PROP_OFF)) { + return HSA_STATUS_SUCCESS; + } + } + } + + if (accessible_by_all != POOL_PROP_DONT_CARE) { + bool access_read; + err = hsa_amd_memory_pool_get_info(pool, + (hsa_amd_memory_pool_info_t) + HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access_read); + throw_if_error(err); + + if (((!access_read) && accessible_by_all == POOL_PROP_ON) || + (access_read && (accessible_by_all == POOL_PROP_OFF))) { + return HSA_STATUS_SUCCESS; + } + } + + *(reinterpret_cast(data)) = pool; + return HSA_STATUS_INFO_BREAK; +} + +hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) { + return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE, + POOL_PROP_OFF, POOL_PROP_DONT_CARE); +} + +hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) { + return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE, + POOL_PROP_ON, POOL_PROP_DONT_CARE); +} +hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data) { + return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_ON, + POOL_PROP_OFF, POOL_PROP_DONT_CARE); +} + +hsa_status_t FindAPUStandardPool(hsa_amd_memory_pool_t pool, void* data) { + return FindPool(pool, data, HSA_AMD_SEGMENT_GLOBAL, POOL_PROP_DONT_CARE, + POOL_PROP_DONT_CARE, POOL_PROP_DONT_CARE); +} + +// Populate the vector with handles to all agents and pools +hsa_status_t +GetAgentPools(std::vector> *agent_pools) { + hsa_status_t err; + + assert(agent_pools != nullptr); + + auto save_agent = [](hsa_agent_t a, void *data)->hsa_status_t { + std::vector> *ag_vec; + hsa_status_t err; + assert(data != nullptr); + ag_vec = + reinterpret_cast> *>(data); + std::shared_ptr ag(new agent_pools_t); + ag->agent = a; + + + auto save_pool = [](hsa_amd_memory_pool_t p, void *data)->hsa_status_t { + assert(data != nullptr); + std::vector *p_list = + reinterpret_cast *>(data); + p_list->push_back(p); + + return HSA_STATUS_SUCCESS; + }; + + err = hsa_amd_agent_iterate_memory_pools(a, save_pool, + reinterpret_cast(&ag->pools)); + ag_vec->push_back(ag); + return err; + }; + + err = hsa_iterate_agents(save_agent, reinterpret_cast(agent_pools)); + return err; +} + +static hsa_status_t MakeGlobalFlagsString(const pool_info_t *pool_i, + std::string* out_str) { + uint32_t global_flag = pool_i->global_flag; + + assert(out_str != nullptr); + + *out_str = ""; + + std::vector < std::string > flags; + + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & global_flag) { + flags.push_back("KERNARG"); + } + + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) { + flags.push_back("FINE GRAINED"); + } + + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & global_flag) { + flags.push_back("COARSE GRAINED"); + } + + if (flags.size() > 0) { + *out_str += flags[0]; + } + + for (size_t i = 1; i < flags.size(); i++) { + *out_str += ", " + flags[i]; + } + + return HSA_STATUS_SUCCESS; +} +static hsa_status_t DumpSegment(const pool_info_t *pool_i, + std::string const *ind_lvl) { + hsa_status_t err; + + RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Segment:"); + std::string seg_str = ""; + std::string tmp_str; + + switch (pool_i->segment) { + case HSA_AMD_SEGMENT_GLOBAL: + err = MakeGlobalFlagsString(pool_i, &tmp_str); + throw_if_error(err); + + seg_str += "GLOBAL; FLAGS: " + tmp_str; + break; + + case HSA_AMD_SEGMENT_READONLY: + seg_str += "READONLY"; + break; + + case HSA_AMD_SEGMENT_PRIVATE: + seg_str += "PRIVATE"; + break; + + case HSA_AMD_SEGMENT_GROUP: + seg_str += "GROUP"; + break; + + default: + RDC_LOG(RDC_DEBUG, "Not Supported"); + break; + } + + RDC_LOG(RDC_DEBUG, seg_str); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, + pool_info_t *pool_i) { + hsa_status_t err; + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag); + throw_if_error(err); + + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, + &pool_i->segment); + throw_if_error(err); + + // Get the size of the POOL + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, + &pool_i->size); + throw_if_error(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, + &pool_i->alloc_allowed); + throw_if_error(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, + &pool_i->alloc_granule); + throw_if_error(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, + &pool_i->alloc_alignment); + throw_if_error(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, + &pool_i->accessible_by_all); + throw_if_error(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE, + &pool_i->aggregate_alloc_max); + throw_if_error(err); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t DumpMemoryPoolInfo(const pool_info_t *pool_i, + uint32_t indent) { + std::string ind_lvl(indent, ' '); + + DumpSegment(pool_i, &ind_lvl); + + std::string sz_str = std::to_string(pool_i->size / 1024) + "KB"; + RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Size:" << sz_str); + + RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Allocatable:" + << (pool_i->alloc_allowed ? "TRUE" : "FALSE")); + + std::string gr_str = std::to_string(pool_i->alloc_granule / 1024) + "KB"; + RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Alloc Granule:" << gr_str); + + std::string al_str = + std::to_string(pool_i->alloc_alignment / 1024) + "KB"; + RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Alloc Alignment:" << al_str); + RDC_LOG(RDC_DEBUG, ind_lvl << " Pool Acessible by all:" << + (pool_i->accessible_by_all ? "TRUE" : "FALSE")); + + std::string agg_str = + std::to_string(pool_i->aggregate_alloc_max / 1024) + "KB"; + RDC_LOG(RDC_DEBUG, ind_lvl << "Pool Aggregate Alloc Size:" << agg_str); + + return HSA_STATUS_SUCCESS; +} + +static const char* Types[] = {"HSA_EXT_POINTER_TYPE_UNKNOWN", + "HSA_EXT_POINTER_TYPE_HSA", + "HSA_EXT_POINTER_TYPE_LOCKED", + "HSA_EXT_POINTER_TYPE_GRAPHICS", + "HSA_EXT_POINTER_TYPE_IPC" + }; + +hsa_status_t DumpPointerInfo(void* ptr) { + hsa_amd_pointer_info_t info; + hsa_agent_t* agents; + uint32_t count; + hsa_status_t err; + + err = hsa_amd_pointer_info(ptr, &info, malloc, &count, &agents); + throw_if_error(err); + + std::cout << "Info for ptr: " << ptr << std::endl; + std::cout << "CPU ptr: " << reinterpret_cast(info.hostBaseAddress) << + std::endl; + std::cout << "GPU ptr: " << reinterpret_cast(info.agentBaseAddress) + << std::endl; + std::cout << "Size: " << info.sizeInBytes << std::endl; + std::cout << "Type: " << Types[info.type] << std::endl; + std::cout << "UsrPtr " << reinterpret_cast(info.userData) << + std::endl; + std::cout << "Accessible by: "; + + for (uint32_t i = 0; i < count; i++) { + std::cout << agents[i].handle << " "; + } + + std::cout << " ;[EOM]" << std::endl; + free(agents); + return HSA_STATUS_SUCCESS; +} + + +/*! \brief Writes to the buffer and increments the write pointer to the + * buffer. Also, ensures that the argument is written to an + * aligned memory as specified. Return the new write pointer. + * + * @param dst The write pointer to the buffer + * @param src The source pointer + * @param size The size in bytes to copy + * @param alignment The alignment to follow while writing to the buffer + */ +#if 0 +inline void * +addArg(void * dst, const void* src, size_t size, uint32_t alignment) { + dst = rocrtst::AlignUp(dst, alignment); + ::memcpy(dst, src, size); + return dst + size; +} +#endif +#undef throw_if_error + +} // namespace rdc +} // namespace amd diff --git a/projects/rdc/rdci/src/RdciDiagSubSystem.cc b/projects/rdc/rdci/src/RdciDiagSubSystem.cc index cd871773e0..3dfecc5758 100644 --- a/projects/rdc/rdci/src/RdciDiagSubSystem.cc +++ b/projects/rdc/rdci/src/RdciDiagSubSystem.cc @@ -138,9 +138,7 @@ std::string RdciDiagSubSystem::get_test_name (rdc_diag_test_cases_t test_case) const { const std::map test_desc = { {RDC_DIAG_COMPUTE_PROCESS, "No compute process"}, - {RDC_DIAG_SDMA_QUEUE, "SDMA Queue ready"}, {RDC_DIAG_COMPUTE_QUEUE, "Compute Queue ready"}, - {RDC_DIAG_VRAM_CHECK, "VRAM check"}, {RDC_DIAG_SYS_MEM_CHECK, "System memory check"}, {RDC_DIAG_NODE_TOPOLOGY, "Node topology check"}, {RDC_DIAG_GPU_PARAMETERS, "GPU parameters check"},