2021-06-15 18:07:58 +05:30
|
|
|
/*
|
2022-06-13 11:05:02 +01:00
|
|
|
Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
2021-09-20 10:47:52 +05:30
|
|
|
|
2021-06-15 18:07:58 +05:30
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
|
|
|
all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
|
THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2021-05-21 02:31:28 -07:00
|
|
|
#pragma once
|
2023-08-14 12:06:14 +05:30
|
|
|
#pragma clang diagnostic ignored "-Wsign-compare"
|
2021-04-08 14:09:19 +05:30
|
|
|
#include "hip_test_context.hh"
|
2023-06-21 21:27:36 +05:30
|
|
|
|
2025-12-03 16:53:17 +00:00
|
|
|
#include <catch2/catch_all.hpp>
|
2022-06-20 10:37:13 +01:00
|
|
|
#include <atomic>
|
|
|
|
|
#include <chrono>
|
2023-07-20 16:11:03 +01:00
|
|
|
#include <cstring>
|
|
|
|
|
#include <cstdlib>
|
2022-06-20 10:37:13 +01:00
|
|
|
#include <iostream>
|
|
|
|
|
#include <iomanip>
|
|
|
|
|
#include <mutex>
|
|
|
|
|
#include <cstdlib>
|
2023-06-21 21:27:36 +05:30
|
|
|
#include <thread>
|
2024-06-28 17:17:31 -04:00
|
|
|
#include "hip_test_features.hh"
|
2021-05-21 02:31:28 -07:00
|
|
|
|
2025-09-15 15:58:41 +02:00
|
|
|
#if HT_LINUX
|
|
|
|
|
#include <sys/resource.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-09-25 10:58:59 -04:00
|
|
|
#if !defined(__HIP_ATOMIC_BACKWARD_COMPAT)
|
|
|
|
|
#define __HIP_ATOMIC_BACKWARD_COMPAT 1
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-10-07 08:28:56 -07:00
|
|
|
#if HT_AMD
|
2025-09-25 10:58:59 -04:00
|
|
|
#if defined(__has_extension) && __has_extension(clang_atomic_attributes) && __HIP_ATOMIC_BACKWARD_COMPAT
|
|
|
|
|
#define HIP_TEST_ATOMIC_BACKWARD_COMPAT_MEMORY [[clang::atomic(fine_grained_memory, remote_memory)]]
|
|
|
|
|
#else
|
|
|
|
|
#define HIP_TEST_ATOMIC_BACKWARD_COMPAT_MEMORY
|
|
|
|
|
#endif
|
2025-10-07 08:28:56 -07:00
|
|
|
#elif HT_NVIDIA
|
|
|
|
|
#define HIP_TEST_ATOMIC_BACKWARD_COMPAT_MEMORY
|
|
|
|
|
#endif
|
2025-09-25 10:58:59 -04:00
|
|
|
|
2024-03-15 17:54:34 +01:00
|
|
|
#ifdef TEST_CLOCK_CYCLE
|
|
|
|
|
#define clock_function() clock64()
|
|
|
|
|
#else
|
|
|
|
|
#define clock_function() wall_clock64()
|
|
|
|
|
#endif
|
|
|
|
|
|
2021-05-21 02:31:28 -07:00
|
|
|
#define HIP_PRINT_STATUS(status) INFO(hipGetErrorName(status) << " at line: " << __LINE__);
|
|
|
|
|
|
2025-08-11 16:53:04 -04:00
|
|
|
#define CHAR_BUF_SIZE 512
|
|
|
|
|
|
|
|
|
|
#define CONSOLE_PRINT(fmt, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
std::printf(fmt "\n", ##__VA_ARGS__); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
// DEBUG_PRINT: If ENABLE_DEBUG is defined, prints immediately to console.
|
|
|
|
|
// Otherwise, uses Catch2 INFO() - debug messages will only appear if the test fails.
|
|
|
|
|
#if defined(ENABLE_DEBUG)
|
|
|
|
|
#define DEBUG_PRINT(fmt, ...) CONSOLE_PRINT("[DEBUG]: " fmt, ##__VA_ARGS__)
|
|
|
|
|
#else
|
|
|
|
|
#define DEBUG_PRINT(fmt, ...) \
|
|
|
|
|
do { \
|
|
|
|
|
char buf[CHAR_BUF_SIZE]; \
|
|
|
|
|
std::snprintf(buf, CHAR_BUF_SIZE, "[INFO]: " fmt, ##__VA_ARGS__); \
|
|
|
|
|
INFO(buf); \
|
|
|
|
|
} while (0)
|
|
|
|
|
#endif
|
|
|
|
|
|
2022-05-25 07:20:59 +01:00
|
|
|
// Not thread-safe
|
2021-06-15 18:07:58 +05:30
|
|
|
#define HIP_CHECK(error) \
|
2021-05-21 02:31:28 -07:00
|
|
|
{ \
|
|
|
|
|
hipError_t localError = error; \
|
|
|
|
|
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
|
2022-06-20 10:37:13 +01:00
|
|
|
INFO("Error: " << hipGetErrorString(localError) << "\n Code: " << localError \
|
|
|
|
|
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
|
|
|
|
|
<< "\n At line: " << __LINE__); \
|
2021-05-21 02:31:28 -07:00
|
|
|
REQUIRE(false); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-11 14:09:37 -04:00
|
|
|
#define HIP_CHECK_IGNORED_RETURN(error, ignoredError) \
|
|
|
|
|
{ \
|
|
|
|
|
hipError_t localError = error; \
|
|
|
|
|
if ((localError == ignoredError)) { \
|
|
|
|
|
INFO("Skipped: " << hipGetErrorString(localError) << "\n Code: " << localError \
|
|
|
|
|
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
|
|
|
|
|
<< "\n At line: " << __LINE__); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
|
|
|
|
|
INFO("Error: " << hipGetErrorString(localError) << "\n Code: " << localError \
|
|
|
|
|
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
|
|
|
|
|
<< "\n At line: " << __LINE__); \
|
|
|
|
|
REQUIRE(false); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
2022-06-20 10:37:13 +01:00
|
|
|
// Threaded HIP_CHECKs
|
|
|
|
|
#define HIP_CHECK_THREAD(error) \
|
|
|
|
|
{ \
|
|
|
|
|
/*To see if error has occured in previous threads, stop execution */ \
|
|
|
|
|
if (TestContext::get().hasErrorOccured() == true) { \
|
|
|
|
|
return; /*This will only work with std::thread and not with std::async*/ \
|
|
|
|
|
} \
|
|
|
|
|
auto localError = error; \
|
|
|
|
|
HCResult result(__LINE__, __FILE__, localError, #error); \
|
|
|
|
|
TestContext::get().addResults(result); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define REQUIRE_THREAD(condition) \
|
|
|
|
|
{ \
|
|
|
|
|
/*To see if error has occured in previous threads, stop execution */ \
|
|
|
|
|
if (TestContext::get().hasErrorOccured() == true) { \
|
|
|
|
|
return; /*This will only work with std::thread and not with std::async*/ \
|
|
|
|
|
} \
|
|
|
|
|
auto localResult = (condition); \
|
|
|
|
|
HCResult result(__LINE__, __FILE__, hipSuccess, #condition, localResult); \
|
|
|
|
|
TestContext::get().addResults(result); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Do not call before all threads have joined
|
|
|
|
|
#define HIP_CHECK_THREAD_FINALIZE() \
|
|
|
|
|
{ TestContext::get().finalizeResults(); }
|
|
|
|
|
|
|
|
|
|
|
2022-05-25 07:20:59 +01:00
|
|
|
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
|
|
|
|
|
#define HIP_CHECK_ERROR(errorExpr, expectedError) \
|
|
|
|
|
{ \
|
|
|
|
|
hipError_t localError = errorExpr; \
|
|
|
|
|
INFO("Matching Errors: " \
|
2022-06-20 10:37:13 +01:00
|
|
|
<< "\n Expected Error: " << hipGetErrorString(expectedError) \
|
|
|
|
|
<< "\n Expected Code: " << expectedError << '\n' \
|
2022-05-25 07:20:59 +01:00
|
|
|
<< " Actual Error: " << hipGetErrorString(localError) \
|
2022-06-20 10:37:13 +01:00
|
|
|
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
|
|
|
|
|
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
|
2022-05-25 07:20:59 +01:00
|
|
|
REQUIRE(localError == expectedError); \
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-11 14:09:37 -04:00
|
|
|
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError or
|
|
|
|
|
// expectedError1.
|
|
|
|
|
#define HIP_CHECK_ERRORS(errorExpr, expectedError, expectedError1) \
|
|
|
|
|
{ \
|
|
|
|
|
hipError_t localError = errorExpr; \
|
|
|
|
|
INFO("Matching Errors: " \
|
|
|
|
|
<< "\n Expected Error: " << hipGetErrorString(expectedError) \
|
|
|
|
|
<< "\n Expected Code: " << expectedError << " or " << expectedError << '\n' \
|
|
|
|
|
<< " Actual Error: " << hipGetErrorString(localError) \
|
|
|
|
|
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
|
|
|
|
|
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
|
|
|
|
|
REQUIRE((localError == expectedError || localError == expectedError1)); \
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-25 07:20:59 +01:00
|
|
|
// Not thread-safe
|
2021-07-20 04:13:19 -07:00
|
|
|
#define HIPRTC_CHECK(error) \
|
|
|
|
|
{ \
|
|
|
|
|
auto localError = error; \
|
|
|
|
|
if (localError != HIPRTC_SUCCESS) { \
|
2022-06-20 10:37:13 +01:00
|
|
|
INFO("Error: " << hiprtcGetErrorString(localError) << "\n Code: " << localError \
|
|
|
|
|
<< "\n Str: " << #error << "\n In File: " << __FILE__ \
|
|
|
|
|
<< "\n At line: " << __LINE__); \
|
2021-07-20 04:13:19 -07:00
|
|
|
REQUIRE(false); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
2022-05-25 07:20:59 +01:00
|
|
|
|
2023-11-26 19:44:41 +00:00
|
|
|
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
|
|
|
|
|
#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \
|
|
|
|
|
{ \
|
|
|
|
|
auto localError = errorExpr; \
|
|
|
|
|
INFO("Matching Errors: " \
|
|
|
|
|
<< "\n Expected Error: " << hiprtcGetErrorString(expectedError) \
|
|
|
|
|
<< "\n Expected Code: " << expectedError << '\n' \
|
|
|
|
|
<< " Actual Error: " << hiprtcGetErrorString(localError) \
|
|
|
|
|
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
|
|
|
|
|
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
|
|
|
|
|
REQUIRE(localError == expectedError); \
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-15 18:07:58 +05:30
|
|
|
// Although its assert, it will be evaluated at runtime
|
|
|
|
|
#define HIP_ASSERT(x) \
|
|
|
|
|
{ REQUIRE((x)); }
|
|
|
|
|
|
2021-07-06 23:48:24 +05:30
|
|
|
#define HIPCHECK(error) \
|
2022-05-25 07:20:59 +01:00
|
|
|
{ \
|
|
|
|
|
hipError_t localError = error; \
|
|
|
|
|
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
|
|
|
|
|
printf("error: '%s'(%d) from %s at %s:%d\n", hipGetErrorString(localError), localError, \
|
|
|
|
|
#error, __FILE__, __LINE__); \
|
|
|
|
|
abort(); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
2021-07-06 23:48:24 +05:30
|
|
|
|
2023-12-28 17:41:54 +00:00
|
|
|
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
|
|
|
|
|
#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \
|
|
|
|
|
{ \
|
|
|
|
|
auto localError = errorExpr; \
|
|
|
|
|
INFO("Matching Errors: " \
|
|
|
|
|
<< "\n Expected Error: " << hiprtcGetErrorString(expectedError) \
|
|
|
|
|
<< "\n Expected Code: " << expectedError << '\n' \
|
|
|
|
|
<< " Actual Error: " << hiprtcGetErrorString(localError) \
|
|
|
|
|
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
|
|
|
|
|
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
|
|
|
|
|
REQUIRE(localError == expectedError); \
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-06 23:48:24 +05:30
|
|
|
#define HIPASSERT(condition) \
|
2022-06-20 10:37:13 +01:00
|
|
|
if (!(condition)) { \
|
|
|
|
|
printf("assertion %s at %s:%d \n", #condition, __FILE__, __LINE__); \
|
|
|
|
|
abort(); \
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-17 15:01:15 +05:30
|
|
|
#if HT_NVIDIA
|
2022-06-20 10:37:13 +01:00
|
|
|
#define CTX_CREATE() \
|
|
|
|
|
hipCtx_t context; \
|
2022-05-17 15:01:15 +05:30
|
|
|
initHipCtx(&context);
|
|
|
|
|
#define CTX_DESTROY() HIPCHECK(hipCtxDestroy(context));
|
|
|
|
|
#define ARRAY_DESTROY(array) HIPCHECK(hipArrayDestroy(array));
|
|
|
|
|
#define HIP_TEX_REFERENCE hipTexRef
|
2023-09-15 14:44:12 -04:00
|
|
|
#define HIP_ARRAY hipArray_t
|
2022-06-20 10:37:13 +01:00
|
|
|
static void initHipCtx(hipCtx_t* pcontext) {
|
2022-05-17 15:01:15 +05:30
|
|
|
HIPCHECK(hipInit(0));
|
|
|
|
|
hipDevice_t device;
|
|
|
|
|
HIPCHECK(hipDeviceGet(&device, 0));
|
|
|
|
|
HIPCHECK(hipCtxCreate(pcontext, 0, device));
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
#define CTX_CREATE()
|
|
|
|
|
#define CTX_DESTROY()
|
|
|
|
|
#define ARRAY_DESTROY(array) HIPCHECK(hipFreeArray(array));
|
|
|
|
|
#define HIP_TEX_REFERENCE textureReference*
|
2023-09-15 14:44:12 -04:00
|
|
|
#define HIP_ARRAY hipArray_t
|
2022-05-17 15:01:15 +05:30
|
|
|
#endif
|
2021-07-06 23:48:24 +05:30
|
|
|
|
2024-03-21 16:28:33 +05:30
|
|
|
static inline int getWarpSize() {
|
|
|
|
|
#if HT_NVIDIA
|
|
|
|
|
return 32;
|
|
|
|
|
#elif HT_AMD
|
|
|
|
|
int device = -1;
|
|
|
|
|
int warpSize = -1;
|
|
|
|
|
HIP_CHECK(hipGetDevice(&device));
|
|
|
|
|
HIP_CHECK(hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize, device));
|
|
|
|
|
return warpSize;
|
|
|
|
|
#else
|
|
|
|
|
std::cout<<"Have to be either Nvidia or AMD platform, asserting"<<std::endl;
|
|
|
|
|
assert(false);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-07 04:35:21 +05:30
|
|
|
static inline bool IsGfx11() {
|
2023-02-10 02:59:44 +05:30
|
|
|
#if HT_NVIDIA
|
2023-01-07 04:35:21 +05:30
|
|
|
return false;
|
2023-02-10 02:59:44 +05:30
|
|
|
#elif HT_AMD
|
2023-01-07 04:35:21 +05:30
|
|
|
int device = -1;
|
|
|
|
|
hipDeviceProp_t props{};
|
|
|
|
|
HIP_CHECK(hipGetDevice(&device));
|
|
|
|
|
HIP_CHECK(hipGetDeviceProperties(&props, device));
|
2024-01-22 20:09:00 +05:30
|
|
|
// Get GCN Arch Name and compare to check if it is gfx11
|
2023-01-07 04:35:21 +05:30
|
|
|
std::string arch = std::string(props.gcnArchName);
|
2023-02-14 03:40:31 +05:30
|
|
|
auto pos = arch.find("gfx11");
|
2023-01-07 04:35:21 +05:30
|
|
|
if (pos != std::string::npos)
|
2023-02-14 03:40:31 +05:30
|
|
|
return true;
|
|
|
|
|
else
|
|
|
|
|
return false;
|
2023-01-07 04:35:21 +05:30
|
|
|
#else
|
2024-01-22 20:09:00 +05:30
|
|
|
std::cout << "Have to be either Nvidia or AMD platform, asserting" << std::endl;
|
2023-01-07 04:35:21 +05:30
|
|
|
assert(false);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-21 19:25:28 -04:00
|
|
|
static inline bool IsNavi4X() {
|
|
|
|
|
#if HT_NVIDIA
|
|
|
|
|
return false;
|
|
|
|
|
#elif HT_AMD
|
|
|
|
|
int device = -1;
|
|
|
|
|
hipDeviceProp_t props{};
|
|
|
|
|
HIP_CHECK(hipGetDevice(&device));
|
|
|
|
|
HIP_CHECK(hipGetDeviceProperties(&props, device));
|
|
|
|
|
std::string arch = std::string(props.gcnArchName);
|
|
|
|
|
if (arch.find("gfx1200") != std::string::npos ||
|
|
|
|
|
arch.find("gfx1201") != std::string::npos) {
|
|
|
|
|
// gfx1200 = Navi44, gfx1201 = Navi48
|
|
|
|
|
return true;
|
|
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
std::cout << "Have to be either Nvidia or AMD platform, asserting" << std::endl;
|
|
|
|
|
assert(false);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-15 18:07:58 +05:30
|
|
|
// Utility Functions
|
|
|
|
|
namespace HipTest {
|
2021-06-25 08:28:59 +00:00
|
|
|
static inline int getDeviceCount() {
|
|
|
|
|
int dev = 0;
|
|
|
|
|
HIP_CHECK(hipGetDeviceCount(&dev));
|
|
|
|
|
return dev;
|
|
|
|
|
}
|
2021-06-21 13:19:44 -04:00
|
|
|
|
|
|
|
|
// Returns the current system time in microseconds
|
|
|
|
|
static inline long long get_time() {
|
2022-05-25 07:20:59 +01:00
|
|
|
return std::chrono::high_resolution_clock::now().time_since_epoch() /
|
|
|
|
|
std::chrono::microseconds(1);
|
2021-06-21 13:19:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline double elapsed_time(long long startTimeUs, long long stopTimeUs) {
|
|
|
|
|
return ((double)(stopTimeUs - startTimeUs)) / ((double)(1000));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) {
|
2022-06-20 10:37:13 +01:00
|
|
|
int device{0};
|
2021-06-21 13:19:44 -04:00
|
|
|
HIP_CHECK(hipGetDevice(&device));
|
2022-06-20 10:37:13 +01:00
|
|
|
hipDeviceProp_t props{};
|
2021-06-21 13:19:44 -04:00
|
|
|
HIP_CHECK(hipGetDeviceProperties(&props, device));
|
|
|
|
|
|
|
|
|
|
unsigned blocks = props.multiProcessorCount * blocksPerCU;
|
2022-07-29 04:05:27 +01:00
|
|
|
if (blocks * threadsPerBlock < N) {
|
2021-06-21 13:19:44 -04:00
|
|
|
blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return blocks;
|
|
|
|
|
}
|
2021-11-25 21:32:29 -08:00
|
|
|
|
2022-06-20 10:37:13 +01:00
|
|
|
// Threaded version of setNumBlocks - to be used in multi threaded test
|
|
|
|
|
// Why? because catch2 does not support multithreaded macro calls
|
|
|
|
|
// Make sure you call HIP_CHECK_THREAD_FINALIZE after your threads join
|
|
|
|
|
// Also you can not return in threaded functions, due to how HIP_CHECK_THREAD works
|
|
|
|
|
static inline void setNumBlocksThread(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N,
|
|
|
|
|
unsigned& blocks) {
|
|
|
|
|
int device{0};
|
|
|
|
|
blocks = 0; // incase error has occured in some other thread and the next call might not execute,
|
|
|
|
|
// we set the blocks size to 0
|
|
|
|
|
HIP_CHECK_THREAD(hipGetDevice(&device));
|
|
|
|
|
hipDeviceProp_t props{};
|
|
|
|
|
HIP_CHECK_THREAD(hipGetDeviceProperties(&props, device));
|
|
|
|
|
|
|
|
|
|
blocks = props.multiProcessorCount * blocksPerCU;
|
|
|
|
|
if (blocks * threadsPerBlock > N) {
|
|
|
|
|
blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline int RAND_R(unsigned* rand_seed) {
|
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
|
|
|
srand(*rand_seed);
|
|
|
|
|
return rand();
|
|
|
|
|
#else
|
|
|
|
|
return rand_r(rand_seed);
|
|
|
|
|
#endif
|
2021-11-25 22:22:46 -08:00
|
|
|
}
|
2022-05-09 21:16:20 +05:30
|
|
|
|
|
|
|
|
inline bool isImageSupported() {
|
2022-06-20 10:37:13 +01:00
|
|
|
int imageSupport = 1;
|
2022-07-19 19:26:56 +05:30
|
|
|
#if HT_AMD
|
2022-06-20 10:37:13 +01:00
|
|
|
int device;
|
|
|
|
|
HIP_CHECK(hipGetDevice(&device));
|
|
|
|
|
HIPCHECK(hipDeviceGetAttribute(&imageSupport, hipDeviceAttributeImageSupport, device));
|
2022-05-09 21:16:20 +05:30
|
|
|
#endif
|
|
|
|
|
return imageSupport != 0;
|
2021-06-15 18:07:58 +05:30
|
|
|
}
|
2022-05-09 21:16:20 +05:30
|
|
|
|
2025-09-09 09:01:25 -07:00
|
|
|
inline bool isPcieAtomicSupported() {
|
|
|
|
|
int pcieAtomic = 1;
|
2025-06-11 11:41:25 -04:00
|
|
|
int device;
|
|
|
|
|
HIP_CHECK(hipGetDevice(&device));
|
2025-09-09 09:01:25 -07:00
|
|
|
HIP_CHECK(hipDeviceGetAttribute(&pcieAtomic, hipDeviceAttributeHostNativeAtomicSupported, device));
|
|
|
|
|
return pcieAtomic;
|
2025-06-11 11:41:25 -04:00
|
|
|
}
|
|
|
|
|
|
2025-08-04 12:51:59 -07:00
|
|
|
inline bool isP2PSupported(int& d1, int& d2) {
|
|
|
|
|
int num_devices = HipTest::getDeviceCount();
|
|
|
|
|
int supported = 1;
|
|
|
|
|
for (auto i = 0u; i < num_devices; ++i) {
|
|
|
|
|
int canAccess = 0;
|
|
|
|
|
for (auto j = 0u; j < num_devices; ++j) {
|
|
|
|
|
if (i != j) {
|
|
|
|
|
HIP_CHECK(hipDeviceCanAccessPeer(&canAccess, i, j));
|
|
|
|
|
if (!canAccess) {
|
|
|
|
|
supported = 0;
|
|
|
|
|
d1 = i;
|
|
|
|
|
d2 = j;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return supported;
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-25 10:58:59 -04:00
|
|
|
inline bool checkConcurrentKernels(int num_devices) {
|
|
|
|
|
for (auto i = 0; i < num_devices; ++i) {
|
|
|
|
|
HIP_CHECK(hipSetDevice(i));
|
|
|
|
|
int concurrent_kernels = 0;
|
|
|
|
|
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i));
|
|
|
|
|
if (!concurrent_kernels) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (num_devices > 1) {
|
|
|
|
|
HIP_CHECK(hipSetDevice(0));
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline bool isXnackOn() {
|
|
|
|
|
hipDeviceProp_t prop;
|
|
|
|
|
int device = 0;
|
|
|
|
|
HIP_CHECK(hipGetDevice(&device));
|
|
|
|
|
HIP_CHECK(hipGetDeviceProperties(&prop, device));
|
|
|
|
|
std::string gfxName(prop.gcnArchName);
|
|
|
|
|
return gfxName.find("xnack+") != std::string::npos;
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-17 15:56:39 +02:00
|
|
|
inline bool areWarpMatchFunctionsSupported() {
|
|
|
|
|
int matchFunctionsSupported = 1;
|
|
|
|
|
#if HT_NVIDIA
|
|
|
|
|
int device;
|
|
|
|
|
hipDeviceProp_t prop;
|
|
|
|
|
HIP_CHECK(hipGetDevice(&device));
|
|
|
|
|
HIP_CHECK(hipGetDeviceProperties(&prop, device));
|
|
|
|
|
if (prop.major < 7) {
|
|
|
|
|
matchFunctionsSupported = 0;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
return matchFunctionsSupported != 0;
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-25 11:43:18 +01:00
|
|
|
/**
|
|
|
|
|
* Causes the test to stop and be skipped at runtime.
|
|
|
|
|
* reason: Message describing the reason the test has been skipped.
|
|
|
|
|
*/
|
|
|
|
|
static inline void HIP_SKIP_TEST(char const* const reason) noexcept {
|
|
|
|
|
// ctest is setup to parse for "HIP_SKIP_THIS_TEST", at which point it will skip the test.
|
|
|
|
|
std::cout << "Skipping test. Reason: " << reason << '\n' << "HIP_SKIP_THIS_TEST" << std::endl;
|
2022-05-09 21:16:20 +05:30
|
|
|
}
|
2022-06-13 11:05:02 +01:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Helper template that returns the expected arguments of a kernel.
|
|
|
|
|
*
|
|
|
|
|
* @return constexpr std::tuple<FArgs...> the expected arguments of the kernel.
|
|
|
|
|
*/
|
|
|
|
|
template <typename... FArgs> std::tuple<FArgs...> getExpectedArgs(void(FArgs...)){};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Asserts that the types of the arguments of a function match exactly with the types in the
|
|
|
|
|
* function signature.
|
|
|
|
|
* This is necessary because HIP RTC does not do implicit casting of the kernel
|
|
|
|
|
* parameters.
|
|
|
|
|
* In order to get the kernel function signature, this function should only called when
|
|
|
|
|
* RTC is disabled.
|
|
|
|
|
*
|
|
|
|
|
* @tparam F the kernel function
|
|
|
|
|
* @tparam Args the parameters that will be passed to the kernel.
|
|
|
|
|
*/
|
|
|
|
|
template <typename F, typename... Args> void validateArguments(F f, Args...) {
|
|
|
|
|
using expectedArgsTuple = decltype(getExpectedArgs(f));
|
|
|
|
|
static_assert(std::is_same<expectedArgsTuple, std::tuple<Args...>>::value,
|
|
|
|
|
"Kernel arguments types must match exactly!");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief Launch a kernel using either HIP or HIP RTC.
|
|
|
|
|
*
|
|
|
|
|
* @tparam Typenames A list of typenames used by the kernel (unused if the kernel is not a
|
|
|
|
|
* template).
|
|
|
|
|
* @tparam K The kernel type. Expects a function or template when RTC is disabled. Expects a
|
|
|
|
|
* function pointer instead when RTC is enabled.
|
|
|
|
|
* @tparam Dim Can be either dim3 or int.
|
|
|
|
|
* @tparam Args A list of kernel arguments to be forwarded.
|
|
|
|
|
* @param kernel The kernel to be launched (defined in kernels.hh)
|
|
|
|
|
* @param numBlocks
|
|
|
|
|
* @param numThreads
|
|
|
|
|
* @param memPerBlock
|
|
|
|
|
* @param stream
|
|
|
|
|
* @param packedArgs A list of kernel arguments to be forwarded.
|
|
|
|
|
*/
|
|
|
|
|
template <typename... Typenames, typename K, typename Dim, typename... Args>
|
|
|
|
|
void launchKernel(K kernel, Dim numBlocks, Dim numThreads, std::uint32_t memPerBlock,
|
|
|
|
|
hipStream_t stream, Args&&... packedArgs) {
|
|
|
|
|
#ifndef RTC_TESTING
|
2022-06-20 10:37:13 +01:00
|
|
|
validateArguments(kernel, packedArgs...);
|
|
|
|
|
kernel<<<numBlocks, numThreads, memPerBlock, stream>>>(std::forward<Args>(packedArgs)...);
|
2022-06-13 11:05:02 +01:00
|
|
|
#else
|
|
|
|
|
launchRTCKernel<Typenames...>(kernel, numBlocks, numThreads, memPerBlock, stream,
|
|
|
|
|
std::forward<Args>(packedArgs)...);
|
|
|
|
|
#endif
|
2024-01-22 20:09:00 +05:30
|
|
|
HIP_CHECK(hipGetLastError());
|
2022-05-25 11:43:18 +01:00
|
|
|
}
|
2022-07-19 20:09:07 +05:30
|
|
|
|
|
|
|
|
//---
|
|
|
|
|
struct Pinned {
|
|
|
|
|
static const bool isPinned = true;
|
|
|
|
|
static const char* str() { return "Pinned"; };
|
|
|
|
|
|
|
|
|
|
static void* Alloc(size_t sizeBytes) {
|
|
|
|
|
void* p;
|
|
|
|
|
HIPCHECK(hipHostMalloc((void**)&p, sizeBytes));
|
|
|
|
|
return p;
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//---
|
|
|
|
|
struct Unpinned {
|
2022-08-04 06:05:21 +01:00
|
|
|
static const bool isPinned = false;
|
|
|
|
|
static const char* str() { return "Unpinned"; };
|
|
|
|
|
|
|
|
|
|
static void* Alloc(size_t sizeBytes) {
|
|
|
|
|
void* p = malloc(sizeBytes);
|
|
|
|
|
HIPASSERT(p);
|
|
|
|
|
return p;
|
|
|
|
|
};
|
2022-07-19 20:09:07 +05:30
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct Memcpy {
|
2022-08-04 06:05:21 +01:00
|
|
|
static const char* str() { return "Memcpy"; };
|
2022-07-19 20:09:07 +05:30
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct MemcpyAsync {
|
|
|
|
|
static const char* str() { return "MemcpyAsync"; };
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2022-08-04 06:05:21 +01:00
|
|
|
template <typename C> struct MemTraits;
|
2022-07-19 20:09:07 +05:30
|
|
|
|
|
|
|
|
|
2022-08-04 06:05:21 +01:00
|
|
|
template <> struct MemTraits<Memcpy> {
|
2022-07-19 20:09:07 +05:30
|
|
|
static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
2022-08-04 06:05:21 +01:00
|
|
|
hipStream_t stream) {
|
2022-07-19 20:09:07 +05:30
|
|
|
(void)stream;
|
|
|
|
|
HIPCHECK(hipMemcpy(dest, src, sizeBytes, kind));
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2022-08-04 06:05:21 +01:00
|
|
|
template <> struct MemTraits<MemcpyAsync> {
|
2022-07-19 20:09:07 +05:30
|
|
|
static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
2022-08-04 06:05:21 +01:00
|
|
|
hipStream_t stream) {
|
2022-07-19 20:09:07 +05:30
|
|
|
HIPCHECK(hipMemcpyAsync(dest, src, sizeBytes, kind, stream));
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2023-06-21 21:27:36 +05:30
|
|
|
class BlockingContext {
|
|
|
|
|
std::atomic_bool blocked{true};
|
|
|
|
|
hipStream_t stream;
|
|
|
|
|
|
|
|
|
|
public:
|
2023-08-14 12:06:14 +05:30
|
|
|
BlockingContext(hipStream_t s) : blocked(true), stream(s) {}
|
2023-06-21 21:27:36 +05:30
|
|
|
|
|
|
|
|
BlockingContext(const BlockingContext& in) {
|
|
|
|
|
blocked = in.blocked_val();
|
|
|
|
|
stream = in.stream_val();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BlockingContext(const BlockingContext&& in) {
|
|
|
|
|
blocked = in.blocked_val();
|
|
|
|
|
stream = in.stream_val();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void reset() { blocked = true; }
|
|
|
|
|
|
|
|
|
|
BlockingContext& operator=(const BlockingContext& in) {
|
|
|
|
|
blocked = in.blocked_val();
|
|
|
|
|
stream = in.stream_val();
|
|
|
|
|
return *this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void block_stream() {
|
|
|
|
|
blocked = true;
|
|
|
|
|
auto blocking_callback = [](hipStream_t, hipError_t, void* data) {
|
|
|
|
|
auto blocked = reinterpret_cast<std::atomic_bool*>(data);
|
|
|
|
|
while (blocked->load()) {
|
|
|
|
|
// Yield this thread till we are waiting
|
|
|
|
|
std::this_thread::yield();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
HIP_CHECK(hipStreamAddCallback(stream, blocking_callback, (void*)&blocked, 0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void unblock_stream() {
|
|
|
|
|
blocked = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool is_blocked() const { return hipStreamQuery(stream) == hipErrorNotReady; }
|
|
|
|
|
|
|
|
|
|
bool blocked_val() const { return blocked.load(); }
|
|
|
|
|
hipStream_t stream_val() const { return stream; }
|
|
|
|
|
};
|
2022-08-04 06:05:21 +01:00
|
|
|
} // namespace HipTest
|
2022-05-09 21:16:20 +05:30
|
|
|
|
|
|
|
|
// This must be called in the beginning of image test app's main() to indicate whether image
|
|
|
|
|
// is supported.
|
2022-08-04 06:05:21 +01:00
|
|
|
#define CHECK_IMAGE_SUPPORT \
|
|
|
|
|
if (!HipTest::isImageSupported()) { \
|
|
|
|
|
INFO("Texture is not support on the device. Skipped."); \
|
|
|
|
|
return; \
|
|
|
|
|
}
|
2024-10-01 17:24:49 +02:00
|
|
|
|
2025-09-09 09:01:25 -07:00
|
|
|
#define CHECK_PCIE_ATOMIC_SUPPORT \
|
|
|
|
|
if (!HipTest::isPcieAtomicSupported()) { \
|
|
|
|
|
HipTest::HIP_SKIP_TEST("Device doesn't support pcie atomic, Skipped"); \
|
2025-06-11 11:41:25 -04:00
|
|
|
return; \
|
2025-09-09 09:01:25 -07:00
|
|
|
}
|
2025-06-11 11:41:25 -04:00
|
|
|
|
2025-08-04 12:51:59 -07:00
|
|
|
#define CHECK_P2P_SUPPORT \
|
|
|
|
|
int d1, d2; \
|
|
|
|
|
if (!HipTest::isP2PSupported(d1,d2)) { \
|
|
|
|
|
std::string msg = "P2P access check failed between dev1:" + std::to_string(d1) + ",dev2:" + \
|
|
|
|
|
std::to_string(d2); \
|
|
|
|
|
HipTest::HIP_SKIP_TEST(msg.c_str()); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
2024-09-17 15:56:39 +02:00
|
|
|
// This must be called in the beginning of warp test app's main() to indicate warp match functions
|
|
|
|
|
// are supported.
|
|
|
|
|
#define CHECK_WARP_MATCH_FUNCTIONS_SUPPORT \
|
|
|
|
|
if (!HipTest::areWarpMatchFunctionsSupported()) { \
|
|
|
|
|
INFO("Warp Match Functions are not support on the device. Skipped."); \
|
|
|
|
|
return; \
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-01 17:24:49 +02:00
|
|
|
// Call GENERATE_CAPTURE macro at the start of the test, before using BEGIN/END_CAPTURE.
|
|
|
|
|
// Use BEGIN/END_CAPTURE macros to execute APIs in both stream capturing and non-capturing modes.
|
|
|
|
|
// Place BEGIN_CAPTURE before the API call and END_CAPTURE after the call.
|
|
|
|
|
#define GENERATE_CAPTURE() bool capture = GENERATE(true, false);
|
|
|
|
|
|
|
|
|
|
#define BEGIN_CAPTURE(stream) \
|
|
|
|
|
if (capture && stream != nullptr) { \
|
|
|
|
|
hipStreamCaptureMode flags = GENERATE( \
|
|
|
|
|
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); \
|
|
|
|
|
HIP_CHECK(hipStreamBeginCapture(stream, flags)); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define END_CAPTURE(stream) \
|
|
|
|
|
if (capture && stream != nullptr) { \
|
|
|
|
|
hipGraph_t graph = nullptr; \
|
|
|
|
|
hipGraphExec_t graph_exec = nullptr; \
|
|
|
|
|
HIP_CHECK(hipStreamEndCapture(stream, &graph)); \
|
|
|
|
|
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); \
|
|
|
|
|
HIP_CHECK(hipGraphLaunch(graph_exec, stream)); \
|
|
|
|
|
HIP_CHECK(hipGraphExecDestroy(graph_exec)); \
|
|
|
|
|
HIP_CHECK(hipGraphDestroy(graph)); \
|
|
|
|
|
}
|
2024-10-01 17:19:34 +02:00
|
|
|
|
|
|
|
|
// These macros are used for testing behaviour when sync APIs are being captured. Before
|
|
|
|
|
// calling BEGIN_CAPTURE_SYNC, hipError_t variable (capture_err) should be initialized to hipSuccess
|
|
|
|
|
// and passed to this macro. The scenario with using this macro should look like this:
|
|
|
|
|
// 1. BEGIN_CAPTURE_SYNC(capture_err)
|
|
|
|
|
// 2. HIP_CHECK_ERROR(SyncAPI, capture_err)
|
|
|
|
|
// 3. END_CAPTURE_SYNC(capture_err)
|
|
|
|
|
// Some sync APIs are allowed in relaxed capture mode which is indicated with
|
|
|
|
|
// rlx_mode_allowed variable. For other two modes, those APIs return
|
|
|
|
|
// hipErrorStreamCaptureUnsupported. These macros shouldn't be used with hipStreamSync and
|
|
|
|
|
// hipDeviceSync during capture.
|
|
|
|
|
#define BEGIN_CAPTURE_SYNC(capture_err, rlx_mode_allowed) \
|
|
|
|
|
hipStream_t stream; \
|
|
|
|
|
GENERATE_CAPTURE(); \
|
|
|
|
|
if (capture) { \
|
|
|
|
|
HIP_CHECK(hipStreamCreate(&stream)); \
|
|
|
|
|
hipStreamCaptureMode mode = GENERATE( \
|
|
|
|
|
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); \
|
|
|
|
|
HIP_CHECK(hipStreamBeginCapture(stream, mode)); \
|
|
|
|
|
if (!rlx_mode_allowed) { \
|
|
|
|
|
capture_err = hipErrorStreamCaptureImplicit; \
|
|
|
|
|
} else if (mode != hipStreamCaptureModeRelaxed) { \
|
|
|
|
|
capture_err = hipErrorStreamCaptureUnsupported; \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If test has other HIP API calls that depend on sync call that is captured and fails, the rest of
|
|
|
|
|
// the test (except freeing the memory) should be skipped after calling END_CAPTURE_SYNC() by
|
|
|
|
|
// testing if previously created hipError_t variable (capture_err) doesn't equal hipSuccess.
|
|
|
|
|
#define END_CAPTURE_SYNC(capture_err) \
|
|
|
|
|
if (capture) { \
|
|
|
|
|
hipGraph_t graph; \
|
|
|
|
|
hipError_t stream_err = hipSuccess; \
|
|
|
|
|
if (capture_err != hipSuccess) { \
|
|
|
|
|
stream_err = hipErrorStreamCaptureInvalidated; \
|
|
|
|
|
} \
|
|
|
|
|
HIP_CHECK_ERROR(hipStreamEndCapture(stream, &graph), stream_err); \
|
|
|
|
|
if (graph != nullptr) { \
|
|
|
|
|
HIP_CHECK(hipGraphDestroy(graph)); \
|
|
|
|
|
} \
|
|
|
|
|
HIP_CHECK(hipStreamDestroy(stream)); \
|
|
|
|
|
}
|
2025-09-15 15:58:41 +02:00
|
|
|
|
|
|
|
|
// Manage core dumps in specific tests which require it disabled (e.g., hipGetLastErrorOnAbort.cc)
|
|
|
|
|
#if HT_LINUX
|
|
|
|
|
#define DISABLE_CORE_DUMPS() \
|
|
|
|
|
struct rlimit core_limit; \
|
|
|
|
|
bool rlimit_saved = false; \
|
|
|
|
|
if (getrlimit(RLIMIT_CORE, &core_limit) == 0) { \
|
|
|
|
|
if (core_limit.rlim_cur != 0) { \
|
|
|
|
|
struct rlimit new_limit; \
|
|
|
|
|
new_limit.rlim_cur = 0; \
|
|
|
|
|
new_limit.rlim_max = core_limit.rlim_max; \
|
|
|
|
|
if (setrlimit(RLIMIT_CORE, &new_limit) == 0) { \
|
|
|
|
|
rlimit_saved = true; \
|
|
|
|
|
} \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define RESTORE_CORE_DUMPS() \
|
|
|
|
|
if (rlimit_saved) { \
|
|
|
|
|
setrlimit(RLIMIT_CORE, &core_limit); \
|
|
|
|
|
rlimit_saved = false; \
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
#define DISABLE_CORE_DUMPS()
|
|
|
|
|
#define RESTORE_CORE_DUMPS()
|
|
|
|
|
#endif
|