SWDEV-306204 - remove external reference in hip_bugs.md (#2394)
Change-Id: I7425eba1cd20a2f842479d0eb050f97e21e457c1
Этот коммит содержится в:
@@ -13,75 +13,160 @@ The language specification for HIP and CUDA forbid calling a
|
||||
differences in the strictness of this restriction, with HIP exhibiting a tighter
|
||||
adherence to the specification and thus less tolerant of infringing code. The
|
||||
solution is to ensure that all functions which are called in a
|
||||
`__device__` context are correctly annotated to reflect it. An interesting case
|
||||
where these differences emerge is shown below. This relies on a the common
|
||||
[C++ Member Detector idiom][1], as it would be implemented pre C++11):
|
||||
`__device__` context are correctly annotated to reflect it.
|
||||
|
||||
```c++
|
||||
#include <cassert>
|
||||
The following is an example of codes using the specification,
|
||||
```
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <type_traits>
|
||||
#include <random>
|
||||
#include "test_common.h"
|
||||
|
||||
struct aye { bool a[1]; };
|
||||
struct nay { bool a[2]; };
|
||||
static std::random_device dev;
|
||||
static std::mt19937 rng(dev());
|
||||
|
||||
// Dual restriction is necessary in HIP if the detector is to work for
|
||||
// __device__ contexts as well as __host__ ones. NVCC is less strict.
|
||||
template<typename T>
|
||||
__host__ __device__
|
||||
const T& cref_t();
|
||||
|
||||
template<typename T>
|
||||
struct Has_call_operator {
|
||||
// Dual restriction is necessary in HIP if the detector is to work for
|
||||
// __device__ contexts as well as __host__ ones. NVCC is less strict.
|
||||
template<typename C>
|
||||
__host__ __device__
|
||||
static
|
||||
aye test(
|
||||
C const *,
|
||||
typename std::enable_if<
|
||||
(sizeof(cref_t<C>().operator()()) > 0)>::type* = nullptr);
|
||||
static
|
||||
nay test(...);
|
||||
|
||||
enum { value = sizeof(test(static_cast<T*>(0))) == sizeof(aye) };
|
||||
};
|
||||
|
||||
template<typename T, typename U, bool callable = has_call_operator<U>::value>
|
||||
struct Wrapper {
|
||||
template<typename V>
|
||||
V f() const { return T{1}; }
|
||||
};
|
||||
|
||||
|
||||
template<typename T, typename U>
|
||||
struct Wrapper<T, U, true> {
|
||||
template<typename V>
|
||||
V f() const { return T{10}; }
|
||||
};
|
||||
|
||||
// This specialisation will yield a compile-time error, if selected.
|
||||
template<typename T, typename U>
|
||||
struct Wrapper<T, U, false> {};
|
||||
|
||||
template<typename T>
|
||||
struct Functor;
|
||||
|
||||
template<> struct Functor<float> {
|
||||
__device__
|
||||
float operator()() const { return 42.0f; }
|
||||
};
|
||||
|
||||
__device__
|
||||
void this_will_not_compile_if_detector_is_not_marked_device()
|
||||
{
|
||||
float f = Wrapper<float, Functor<float>>().f<float>();
|
||||
template <typename T, typename M>
|
||||
__host__ __device__ inline constexpr int count() {
|
||||
return sizeof(T) / sizeof(M);
|
||||
}
|
||||
|
||||
__host__
|
||||
void this_will_not_compile_if_detector_is_marked_device_only()
|
||||
{
|
||||
float f = Wrapper<float, Functor<float>>().f<float>();
|
||||
inline float getRandomFloat(float min = 10, float max = 100) {
|
||||
std::uniform_real_distribution<float> gen(min, max);
|
||||
return gen(rng);
|
||||
}
|
||||
|
||||
template <typename T, typename B>
|
||||
void fillMatrix(T* a, int size) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
T t;
|
||||
t.x = getRandomFloat();
|
||||
if constexpr (count<T, B>() >= 2) t.y = getRandomFloat();
|
||||
if constexpr (count<T, B>() >= 3) t.z = getRandomFloat();
|
||||
if constexpr (count<T, B>() >= 4) t.w = getRandomFloat();
|
||||
|
||||
a[i] = t;
|
||||
}
|
||||
}
|
||||
|
||||
// Test operations
|
||||
template <typename T, typename B>
|
||||
__host__ __device__ void testOperations(T& a, T& b) {
|
||||
a.x += b.x;
|
||||
a.x++;
|
||||
b.x++;
|
||||
if constexpr (count<T, B>() >= 2) {
|
||||
a.y = b.x;
|
||||
a.x = b.y;
|
||||
}
|
||||
if constexpr (count<T, B>() >= 3) {
|
||||
if (a.x > 0) b.x /= a.x;
|
||||
a.x *= b.z;
|
||||
a.y--;
|
||||
}
|
||||
if constexpr (count<T, B>() >= 4) {
|
||||
b.w = a.x;
|
||||
a.w += (-b.y);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename B>
|
||||
__global__ void testOperationsGPU(T* d_a, T* d_b, int size) {
|
||||
int id = threadIdx.x;
|
||||
if (id > size) return;
|
||||
T &a = d_a[id];
|
||||
T &b = d_b[id];
|
||||
|
||||
testOperations<T, B>(a, b);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void dcopy(T* a, T* b, int size) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
a[i] = b[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool isEqual(T* a, T* b, int size) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (a[i] != b[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Main function that tests type
|
||||
// T = what you want to test
|
||||
// D = pack of 1 i.e. float1 int1
|
||||
template <typename T, typename D>
|
||||
void testType(int msize) {
|
||||
T *fa, *fb, *fc, *h_fa, *h_fb;
|
||||
fa = new T[msize];
|
||||
fb = new T[msize];
|
||||
fc = new T[msize];
|
||||
h_fa = new T[msize];
|
||||
h_fb = new T[msize];
|
||||
|
||||
T *d_fa, *d_fb;
|
||||
|
||||
constexpr int c = count<T, D>();
|
||||
|
||||
if (c <= 0 || c >= 5) {
|
||||
failed("Invalid Size\n");
|
||||
}
|
||||
|
||||
fillMatrix<T, D>(fa, msize);
|
||||
dcopy(fb, fa, msize);
|
||||
dcopy(h_fa, fa, msize);
|
||||
dcopy(h_fb, fa, msize);
|
||||
for (int i = 0; i < msize; i++) testOperations<T, D>(h_fa[i], h_fb[i]);
|
||||
|
||||
hipMalloc(&d_fa, sizeof(T) * msize);
|
||||
hipMalloc(&d_fb, sizeof(T) * msize);
|
||||
|
||||
hipMemcpy(d_fa, fa, sizeof(T) * msize, hipMemcpyHostToDevice);
|
||||
hipMemcpy(d_fb, fb, sizeof(T) * msize, hipMemcpyHostToDevice);
|
||||
|
||||
auto kernel = testOperationsGPU<T, D>;
|
||||
hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize);
|
||||
|
||||
hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost);
|
||||
|
||||
bool pass = true;
|
||||
if (!isEqual<T>(h_fa, fc, msize)) {
|
||||
pass = false;
|
||||
}
|
||||
|
||||
delete[] fa;
|
||||
delete[] fb;
|
||||
delete[] fc;
|
||||
delete[] h_fa;
|
||||
delete[] h_fb;
|
||||
hipFree(d_fa);
|
||||
hipFree(d_fb);
|
||||
|
||||
if (!pass) {
|
||||
failed("Failed");
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
const int msize = 100;
|
||||
// double
|
||||
testType<double1, double1>(msize);
|
||||
testType<double2, double1>(msize);
|
||||
testType<double3, double1>(msize);
|
||||
testType<double4, double1>(msize);
|
||||
|
||||
// floats
|
||||
testType<float1, float1>(msize);
|
||||
testType<float2, float1>(msize);
|
||||
testType<float3, float1>(msize);
|
||||
testType<float4, float1>(msize);
|
||||
...
|
||||
passed();
|
||||
}
|
||||
```
|
||||
[1]: https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Member_Detector
|
||||
For more details for the complete program, please refer to HIP test application at the link, https://github.com/ROCm-Developer-Tools/HIP/blob/main/tests/src/deviceLib/hip_floatnTM.cpp
|
||||
|
||||
Ссылка в новой задаче
Block a user