# HIP Bugs - [HIP is more restrictive in enforcing restrictions](#hip-is-more-restrictive-in-enforcing-restrictions) ### HIP is more restrictive in enforcing restrictions The language specification for HIP and CUDA forbid calling a `__device__` function in a `__host__` context. In practice, you may observe differences in the strictness of this restriction, with HIP exhibiting a tighter adherence to the specification and thus less tolerant of infringing code. The solution is to ensure that all functions which are called in a `__device__` context are correctly annotated to reflect it. The following is an example of codes using the specification, ``` #include #include #include #include "test_common.h" static std::random_device dev; static std::mt19937 rng(dev()); template __host__ __device__ inline constexpr int count() { return sizeof(T) / sizeof(M); } inline float getRandomFloat(float min = 10, float max = 100) { std::uniform_real_distribution gen(min, max); return gen(rng); } template void fillMatrix(T* a, int size) { for (int i = 0; i < size; i++) { T t; t.x = getRandomFloat(); if constexpr (count() >= 2) t.y = getRandomFloat(); if constexpr (count() >= 3) t.z = getRandomFloat(); if constexpr (count() >= 4) t.w = getRandomFloat(); a[i] = t; } } // Test operations template __host__ __device__ void testOperations(T& a, T& b) { a.x += b.x; a.x++; b.x++; if constexpr (count() >= 2) { a.y = b.x; a.x = b.y; } if constexpr (count() >= 3) { if (a.x > 0) b.x /= a.x; a.x *= b.z; a.y--; } if constexpr (count() >= 4) { b.w = a.x; a.w += (-b.y); } } template __global__ void testOperationsGPU(T* d_a, T* d_b, int size) { int id = threadIdx.x; if (id > size) return; T &a = d_a[id]; T &b = d_b[id]; testOperations(a, b); } template void dcopy(T* a, T* b, int size) { for (int i = 0; i < size; i++) { a[i] = b[i]; } } template bool isEqual(T* a, T* b, int size) { for (int i = 0; i < size; i++) { if (a[i] != b[i]) { return false; } } return true; } // Main function that tests type // T = what you want to test // D = pack of 1 i.e. float1 int1 template void testType(int msize) { T *fa, *fb, *fc, *h_fa, *h_fb; fa = new T[msize]; fb = new T[msize]; fc = new T[msize]; h_fa = new T[msize]; h_fb = new T[msize]; T *d_fa, *d_fb; constexpr int c = count(); if (c <= 0 || c >= 5) { failed("Invalid Size\n"); } fillMatrix(fa, msize); dcopy(fb, fa, msize); dcopy(h_fa, fa, msize); dcopy(h_fb, fa, msize); for (int i = 0; i < msize; i++) testOperations(h_fa[i], h_fb[i]); hipMalloc(&d_fa, sizeof(T) * msize); hipMalloc(&d_fb, sizeof(T) * msize); hipMemcpy(d_fa, fa, sizeof(T) * msize, hipMemcpyHostToDevice); hipMemcpy(d_fb, fb, sizeof(T) * msize, hipMemcpyHostToDevice); auto kernel = testOperationsGPU; hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize); hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost); bool pass = true; if (!isEqual(h_fa, fc, msize)) { pass = false; } delete[] fa; delete[] fb; delete[] fc; delete[] h_fa; delete[] h_fb; hipFree(d_fa); hipFree(d_fb); if (!pass) { failed("Failed"); } } int main() { const int msize = 100; // double testType(msize); testType(msize); testType(msize); testType(msize); // floats testType(msize); testType(msize); testType(msize); testType(msize); ... passed(); } ``` For more details for the complete program, please refer to HIP test application at the link, https://github.com/ROCm-Developer-Tools/HIP/blob/main/tests/src/deviceLib/hip_floatnTM.cpp