From 1d29819850051da79960ec6535c96a5f49d323e9 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Mon, 20 Aug 2018 16:46:12 +0000 Subject: [PATCH 1/5] Add bitextract and bitinsert functions [ROCm/hip commit: b445e450074d458f34e7ec048ca7e0504f3bcbb7] --- .../include/hip/hcc_detail/device_functions.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/projects/hip/include/hip/hcc_detail/device_functions.h b/projects/hip/include/hip/hcc_detail/device_functions.h index 5e1c8a4e6b..7b147cc307 100644 --- a/projects/hip/include/hip/hcc_detail/device_functions.h +++ b/projects/hip/include/hip/hcc_detail/device_functions.h @@ -63,6 +63,22 @@ __device__ static inline unsigned int __clzll(long long int input) { return input == 0 ? 64 : input > 0 ? __builtin_clzl(input) : __builtin_clzl(~input); } +__device__ static inline unsigned int __lastbit_u32_u64(unsigned long long int input) { + return input == 0 ? -1 : __builtin_ctzl(input); +} + +__device__ static inline unsigned int __bitextract_u32(unsigned int src0, unsigned int src1, unsigned int src2) { + return (src0 << (32 - src1 - src2)) >> (32 - src2); +} + +__device__ static inline uint64_t __bitextract_u64(uint64_t src0, unsigned int src1, unsigned int src2) { + return (src0 << (64 - src1 - src2)) >> (64 - src2); +} + +__device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigned int src1, unsigned int src2, unsigned int src3) { + return (src0 & ~(((1 << src3) - 1) << src2)) | ((src1 & ((1 << src3) - 1)) << src2); +} + __device__ static inline unsigned int __ffs(unsigned int input) { return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1; } From eb2e7e0671bf33de92e35b8bca22f2c3f6214c7d Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 28 Aug 2018 18:19:48 +0000 Subject: [PATCH 2/5] Clean up and add meaningful variables to bit funcs [ROCm/hip commit: b939a3c6edede2a6fc5aa5f1680e8ad952601872] --- .../include/hip/hcc_detail/device_functions.h | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/projects/hip/include/hip/hcc_detail/device_functions.h b/projects/hip/include/hip/hcc_detail/device_functions.h index 7b147cc307..2514eec2d7 100644 --- a/projects/hip/include/hip/hcc_detail/device_functions.h +++ b/projects/hip/include/hip/hcc_detail/device_functions.h @@ -63,22 +63,6 @@ __device__ static inline unsigned int __clzll(long long int input) { return input == 0 ? 64 : input > 0 ? __builtin_clzl(input) : __builtin_clzl(~input); } -__device__ static inline unsigned int __lastbit_u32_u64(unsigned long long int input) { - return input == 0 ? -1 : __builtin_ctzl(input); -} - -__device__ static inline unsigned int __bitextract_u32(unsigned int src0, unsigned int src1, unsigned int src2) { - return (src0 << (32 - src1 - src2)) >> (32 - src2); -} - -__device__ static inline uint64_t __bitextract_u64(uint64_t src0, unsigned int src1, unsigned int src2) { - return (src0 << (64 - src1 - src2)) >> (64 - src2); -} - -__device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigned int src1, unsigned int src2, unsigned int src3) { - return (src0 & ~(((1 << src3) - 1) << src2)) | ((src1 & ((1 << src3) - 1)) << src2); -} - __device__ static inline unsigned int __ffs(unsigned int input) { return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1; } @@ -95,12 +79,37 @@ __device__ static inline unsigned int __ffsll(long long int input) { return ( input == 0 ? -1 : __builtin_ctzl(input) ) + 1; } -__device__ static inline unsigned int __brev(unsigned int input) { return __llvm_bitrev_b32(input); } +__device__ static inline unsigned int __brev(unsigned int input) { + return __llvm_bitrev_b32(input); +} __device__ static inline unsigned long long int __brevll(unsigned long long int input) { return __llvm_bitrev_b64(input); } +__device__ static inline unsigned int __lastbit_u32_u64(uint64_t input) { + return input == 0 ? -1 : __builtin_ctzl(input); +} + +__device__ static inline unsigned int __bitextract_u32(unsigned int src0, unsigned int src1, unsigned int src2) { + uint32_t offset = src1 & 31; + uint32_t width = src2 & 31; + return width == 0 ? 0 : (src0 << (32 - offset - width)) >> (32 - width); +} + +__device__ static inline uint64_t __bitextract_u64(uint64_t src0, unsigned int src1, unsigned int src2) { + uint64_t offset = src1 & 63; + uint64_t width = src2 & 63; + return width == 0 ? 0 : (src0 << (64 - offset - width)) >> (64 - width); +} + +__device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigned int src1, unsigned int src2, unsigned int src3) { + uint32_t offset = src2 & 31; + uint32_t width = src3 & 31; + uint32_t mask = (1 << width) - 1; + return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); +} + __device__ static unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s); __device__ static unsigned int __hadd(int x, int y); __device__ static int __mul24(int x, int y); From 9619457b1fab112e90142983d24ee1ce7726d9da Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 28 Aug 2018 19:58:20 +0000 Subject: [PATCH 3/5] Add bitinsert64 to device functions [ROCm/hip commit: ba9fc6f3557a7f32b3e870f71608ff5ab9dbc04d] --- projects/hip/include/hip/hcc_detail/device_functions.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/projects/hip/include/hip/hcc_detail/device_functions.h b/projects/hip/include/hip/hcc_detail/device_functions.h index 2514eec2d7..d774a3ba33 100644 --- a/projects/hip/include/hip/hcc_detail/device_functions.h +++ b/projects/hip/include/hip/hcc_detail/device_functions.h @@ -110,6 +110,13 @@ __device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigne return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); } +__device__ static inline uint64_t __bitinsert_u64(uint64_t src0, uint64_t src1, unsigned int src2, unsigned int src3) { + uint64_t offset = src2 & 63; + uint64_t width = src3 & 63; + uint64_t mask = (1 << width) - 1; + return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); +} + __device__ static unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s); __device__ static unsigned int __hadd(int x, int y); __device__ static int __mul24(int x, int y); From 31a34f6804798ddad54909c125e54fd00ba76a6e Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Tue, 28 Aug 2018 19:58:43 +0000 Subject: [PATCH 4/5] Add tests for bitextract and bitinsert [ROCm/hip commit: 5893bbc1f4c205777ad93b1364634e9e2318144e] --- .../tests/src/deviceLib/hip_bitextract.cpp | 220 ++++++++++++++++ .../hip/tests/src/deviceLib/hip_bitinsert.cpp | 239 ++++++++++++++++++ 2 files changed, 459 insertions(+) create mode 100644 projects/hip/tests/src/deviceLib/hip_bitextract.cpp create mode 100644 projects/hip/tests/src/deviceLib/hip_bitinsert.cpp diff --git a/projects/hip/tests/src/deviceLib/hip_bitextract.cpp b/projects/hip/tests/src/deviceLib/hip_bitextract.cpp new file mode 100644 index 0000000000..3c3ca33d69 --- /dev/null +++ b/projects/hip/tests/src/deviceLib/hip_bitextract.cpp @@ -0,0 +1,220 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include +#include +#include +#include +#include +#include +#include "hip/hip_runtime.h" +#include + +#define HIP_ASSERT(x) (assert((x) == hipSuccess)) + +#define TEST_DEBUG (0) + + +// CPU implementation of bitextract +template +T bit_extract(T src0, unsigned int src1, unsigned int src2) { + unsigned int bits = sizeof(T) * 8; + T offset = src1 & (bits - 1); + T width = src2 & (bits - 1); + if (width == 0) { + return 0; + } else { + return (src0 << (bits - width - offset)) >> (bits - width); + } +} + +__global__ void HIP_kernel(hipLaunchParm lp, + unsigned int* out32, unsigned int* in32_0, + unsigned int* in32_1, unsigned int* in32_2, + unsigned long long int* out64, unsigned long long int* in64_0, + unsigned int* in64_1, unsigned int* in64_2) { + int x = blockDim.x * blockIdx.x + threadIdx.x; + + out32[x] = __bitextract_u32(in32_0[x], in32_1[x], in32_2[x]); + out64[x] = __bitextract_u64(in64_0[x], in64_1[x], in64_2[x]); +} + + +using namespace std; + +int main() { + unsigned int* hostOut32; + unsigned int* hostSrc032; + unsigned int* hostSrc132; + unsigned int* hostSrc232; + unsigned long long int* hostOut64; + unsigned long long int* hostSrc064; + unsigned int* hostSrc164; + unsigned int* hostSrc264; + + unsigned int* deviceOut32; + unsigned int* deviceSrc032; + unsigned int* deviceSrc132; + unsigned int* deviceSrc232; + unsigned long long int* deviceOut64; + unsigned long long int* deviceSrc064; + unsigned int* deviceSrc164; + unsigned int* deviceSrc264; + + hipDeviceProp_t devProp; + hipGetDeviceProperties(&devProp, 0); + cout << " System minor " << devProp.minor << endl; + cout << " System major " << devProp.major << endl; + cout << " agent prop name " << devProp.name << endl; + + cout << "hip Device prop succeeded " << endl; + + unsigned int wave_size = devProp.warpSize; + unsigned int num_waves_per_block = 2; + unsigned int num_threads_per_block = wave_size * num_waves_per_block; + unsigned int num_blocks = 2; + unsigned int NUM = num_threads_per_block * num_blocks; + + int i; + int errors; + + hostOut32 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + hostSrc032 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + hostSrc132 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + hostSrc232 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + + hostOut64 = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); + hostSrc064 = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); + hostSrc164 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + hostSrc264 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + + // initialize the input data + std::random_device rd; + std::uniform_int_distribution uint32_src0_dist; + std::uniform_int_distribution uint32_src12_dist(0,31); + std::uniform_int_distribution uint64_src0_dist; + std::uniform_int_distribution uint64_src12_dist(0,63); + for (i = 0; i < NUM; i++) { + hostOut32[i] = 0; + hostSrc032[i] = uint32_src0_dist(rd); + hostSrc132[i] = uint32_src12_dist(rd); + hostSrc232[i] = uint32_src12_dist(rd); + hostOut64[i] = 0; + hostSrc064[i] = uint64_src0_dist(rd); + hostSrc164[i] = uint64_src12_dist(rd); + hostSrc264[i] = uint64_src12_dist(rd); + } + + HIP_ASSERT(hipMalloc((void**)&deviceOut32, NUM * sizeof(unsigned int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc032, NUM * sizeof(unsigned int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc132, NUM * sizeof(unsigned int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc232, NUM * sizeof(unsigned int))); + + HIP_ASSERT(hipMalloc((void**)&deviceOut64, NUM * sizeof(unsigned long long int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc064, NUM * sizeof(unsigned long long int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc164, NUM * sizeof(unsigned int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc264, NUM * sizeof(unsigned int))); + + HIP_ASSERT(hipMemcpy(deviceSrc032, hostSrc032, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceSrc132, hostSrc132, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceSrc232, hostSrc232, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); + + HIP_ASSERT(hipMemcpy(deviceSrc064, hostSrc064, NUM * sizeof(unsigned long long int), + hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceSrc164, hostSrc164, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceSrc264, hostSrc264, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); + + + hipLaunchKernel(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), + 0, 0, + deviceOut32, deviceSrc032, deviceSrc132, deviceSrc232, + deviceOut64, deviceSrc064, deviceSrc164, deviceSrc264); + + + HIP_ASSERT(hipMemcpy(hostOut32, deviceOut32, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); + HIP_ASSERT(hipMemcpy(hostOut64, deviceOut64, + NUM * sizeof(unsigned long long int), hipMemcpyDeviceToHost)); + + // verify the results + errors = 0; + for (i = 0; i < NUM; i++) { + if (hostOut32[i] != bit_extract(hostSrc032[i], hostSrc132[i], hostSrc232[i])) { + errors++; +#if TEST_DEBUG + cout << "device: " << hostOut32[i] << " host: " + << bit_extract(hostSrc032[i], hostSrc132[i], hostSrc232[i]) + << " " << hostSrc032[i] << " " << hostSrc132[i] << " " << hostSrc232[i] << "\n"; +#endif + } + } + if (errors != 0) { + cout << "__bitextract_u32() FAILED\n" << endl; + return -1; + } else { + cout << "__bitextract_u32() checked!" << endl; + } + errors = 0; + for (i = 0; i < NUM; i++) { + if (hostOut64[i] != bit_extract(hostSrc064[i], hostSrc164[i], hostSrc264[i])) { + errors++; +#if TEST_DEBUG + cout << "device: " << hostOut64[i] << " host: " + << bit_extract(hostSrc064[i], hostSrc164[i], hostSrc264[i]) + << " " << hostSrc064[i] << " " << hostSrc164[i] << " " << hostSrc264[i] << "\n"; +#endif + } + } + if (errors != 0) { + cout << "__bitextract_u64() FAILED" << endl; + return -1; + } else { + cout << "__bitextract_u64() checked!" << endl; + } + + cout << "__bitextract_u32() and __bitextract_u64() PASSED!" << endl; + + HIP_ASSERT(hipFree(deviceOut32)); + HIP_ASSERT(hipFree(deviceSrc032)); + HIP_ASSERT(hipFree(deviceSrc132)); + HIP_ASSERT(hipFree(deviceSrc232)); + HIP_ASSERT(hipFree(deviceOut64)); + HIP_ASSERT(hipFree(deviceSrc064)); + HIP_ASSERT(hipFree(deviceSrc164)); + HIP_ASSERT(hipFree(deviceSrc264)); + + free(hostOut32); + free(hostSrc032); + free(hostSrc132); + free(hostSrc232); + free(hostOut64); + free(hostSrc064); + free(hostSrc164); + free(hostSrc264); + + return errors; +} diff --git a/projects/hip/tests/src/deviceLib/hip_bitinsert.cpp b/projects/hip/tests/src/deviceLib/hip_bitinsert.cpp new file mode 100644 index 0000000000..135234dceb --- /dev/null +++ b/projects/hip/tests/src/deviceLib/hip_bitinsert.cpp @@ -0,0 +1,239 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include +#include +#include +#include +#include +#include +#include "hip/hip_runtime.h" +#include + +#define HIP_ASSERT(x) (assert((x) == hipSuccess)) + +#define TEST_DEBUG (0) + + +// CPU implementation of bitinsert +template +T bit_insert(T src0, T src1, unsigned int src2, unsigned int src3) { + unsigned int bits = sizeof(T) * 8; + T offset = src2 & (bits - 1); + T width = src3 & (bits - 1); + T mask = (1 << width) - 1; + return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); +} + +__global__ void HIP_kernel(hipLaunchParm lp, unsigned int* out32, + unsigned int* in32_0, unsigned int* in32_1, + unsigned int* in32_2, unsigned int* in32_3, + unsigned long long int* out64, unsigned long long int* in64_0, + unsigned long long int* in64_1, unsigned int* in64_2, + unsigned int* in64_3) { + int x = blockDim.x * blockIdx.x + threadIdx.x; + + out32[x] = __bitinsert_u32(in32_0[x], in32_1[x], in32_2[x], in32_3[x]); + out64[x] = __bitinsert_u64(in64_0[x], in64_1[x], in64_2[x], in64_3[x]); +} + + +using namespace std; + +int main() { + unsigned int* hostOut32; + unsigned int* hostSrc032; + unsigned int* hostSrc132; + unsigned int* hostSrc232; + unsigned int* hostSrc332; + unsigned long long int* hostOut64; + unsigned long long int* hostSrc064; + unsigned long long int* hostSrc164; + unsigned int* hostSrc264; + unsigned int* hostSrc364; + + unsigned int* deviceOut32; + unsigned int* deviceSrc032; + unsigned int* deviceSrc132; + unsigned int* deviceSrc232; + unsigned int* deviceSrc332; + unsigned long long int* deviceOut64; + unsigned long long int* deviceSrc064; + unsigned long long int* deviceSrc164; + unsigned int* deviceSrc264; + unsigned int* deviceSrc364; + + hipDeviceProp_t devProp; + hipGetDeviceProperties(&devProp, 0); + cout << " System minor " << devProp.minor << endl; + cout << " System major " << devProp.major << endl; + cout << " agent prop name " << devProp.name << endl; + + cout << "hip Device prop succeeded " << endl; + + unsigned int wave_size = devProp.warpSize; + unsigned int num_waves_per_block = 2; + unsigned int num_threads_per_block = wave_size * num_waves_per_block; + unsigned int num_blocks = 2; + unsigned int NUM = num_threads_per_block * num_blocks; + + int i; + int errors; + + hostOut32 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + hostSrc032 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + hostSrc132 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + hostSrc232 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + hostSrc332 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + + hostOut64 = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); + hostSrc064 = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); + hostSrc164 = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); + hostSrc264 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + hostSrc364 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); + + // initialize the input data + std::random_device rd; + std::uniform_int_distribution uint32_src01_dist; + std::uniform_int_distribution uint32_src23_dist(0,31); + std::uniform_int_distribution uint64_src01_dist; + std::uniform_int_distribution uint64_src23_dist(0,63); + for (i = 0; i < NUM; i++) { + hostOut32[i] = 0; + hostSrc032[i] = uint32_src01_dist(rd); + hostSrc132[i] = uint32_src01_dist(rd); + hostSrc232[i] = uint32_src23_dist(rd); + hostSrc232[i] = uint32_src23_dist(rd); + hostOut64[i] = 0; + hostSrc064[i] = uint64_src01_dist(rd); + hostSrc164[i] = uint64_src01_dist(rd); + hostSrc264[i] = uint64_src23_dist(rd); + hostSrc264[i] = uint64_src23_dist(rd); + } + + HIP_ASSERT(hipMalloc((void**)&deviceOut32, NUM * sizeof(unsigned int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc032, NUM * sizeof(unsigned int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc132, NUM * sizeof(unsigned int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc232, NUM * sizeof(unsigned int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc332, NUM * sizeof(unsigned int))); + + HIP_ASSERT(hipMalloc((void**)&deviceOut64, NUM * sizeof(unsigned long long int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc064, NUM * sizeof(unsigned long long int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc164, NUM * sizeof(unsigned long long int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc264, NUM * sizeof(unsigned int))); + HIP_ASSERT(hipMalloc((void**)&deviceSrc364, NUM * sizeof(unsigned int))); + + HIP_ASSERT(hipMemcpy(deviceSrc032, hostSrc032, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceSrc132, hostSrc132, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceSrc232, hostSrc232, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceSrc332, hostSrc332, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); + + HIP_ASSERT(hipMemcpy(deviceSrc064, hostSrc064, NUM * sizeof(unsigned long long int), + hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceSrc164, hostSrc164, NUM * sizeof(unsigned long long int), + hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceSrc264, hostSrc264, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceSrc364, hostSrc364, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); + + + hipLaunchKernel(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), + 0, 0, + deviceOut32, deviceSrc032, deviceSrc132, deviceSrc232, deviceSrc332, + deviceOut64, deviceSrc064, deviceSrc164, deviceSrc264, deviceSrc364); + + + HIP_ASSERT(hipMemcpy(hostOut32, deviceOut32, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); + HIP_ASSERT(hipMemcpy(hostOut64, deviceOut64, + NUM * sizeof(unsigned long long int), hipMemcpyDeviceToHost)); + + // verify the results + errors = 0; + for (i = 0; i < NUM; i++) { + if (hostOut32[i] != bit_insert(hostSrc032[i], hostSrc132[i], + hostSrc232[i], hostSrc332[i])) { + errors++; +#if TEST_DEBUG + cout << "device: " << hostOut32[i] << " host: " + << bit_insert(hostSrc032[i], hostSrc132[i], hostSrc232[i], hostSrc332[i]) + << " " << hostSrc032[i] << " " << hostSrc132[i] << " " << hostSrc232[i] + << " " << hostSrc332[i] << "\n"; +#endif + } + } + if (errors != 0) { + cout << "__bitinsert_u32() FAILED\n" << endl; + return -1; + } else { + cout << "__bitinsert_u32() checked!" << endl; + } + errors = 0; + for (i = 0; i < NUM; i++) { + if (hostOut64[i] != bit_insert(hostSrc064[i], hostSrc164[i], + hostSrc264[i], hostSrc364[i])) { + errors++; +#if TEST_DEBUG + cout << "device: " << hostOut64[i] << " host: " + << bit_insert(hostSrc064[i], hostSrc164[i], hostSrc264[i], hostSrc364[i]) + << " " << hostSrc064[i] << " " << hostSrc164[i] << " " << hostSrc264[i] + << " " << hostSrc364[i] << "\n"; +#endif + } + } + if (errors != 0) { + cout << "__bitinsert_u64() FAILED" << endl; + return -1; + } else { + cout << "__bitinsert_u64() checked!" << endl; + } + + cout << "__bitinsert_u32() and __bitinsert_u64() PASSED!" << endl; + + HIP_ASSERT(hipFree(deviceOut32)); + HIP_ASSERT(hipFree(deviceSrc032)); + HIP_ASSERT(hipFree(deviceSrc132)); + HIP_ASSERT(hipFree(deviceSrc232)); + HIP_ASSERT(hipFree(deviceSrc332)); + HIP_ASSERT(hipFree(deviceOut64)); + HIP_ASSERT(hipFree(deviceSrc064)); + HIP_ASSERT(hipFree(deviceSrc164)); + HIP_ASSERT(hipFree(deviceSrc264)); + HIP_ASSERT(hipFree(deviceSrc364)); + + free(hostOut32); + free(hostSrc032); + free(hostSrc132); + free(hostSrc232); + free(hostSrc332); + free(hostOut64); + free(hostSrc064); + free(hostSrc164); + free(hostSrc264); + free(hostSrc364); + + return errors; +} From 1a34cbc1c47ae67a46e2271675ec0d00118a3afd Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 29 Aug 2018 15:29:53 +0000 Subject: [PATCH 5/5] Remove testing bitinsert/extract on nvcc path [ROCm/hip commit: bbcb3fa0c0258faa692502ba82af9a2dbb60b288] --- projects/hip/tests/src/deviceLib/hip_bitextract.cpp | 2 +- projects/hip/tests/src/deviceLib/hip_bitinsert.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/hip/tests/src/deviceLib/hip_bitextract.cpp b/projects/hip/tests/src/deviceLib/hip_bitextract.cpp index 3c3ca33d69..34e0be2b57 100644 --- a/projects/hip/tests/src/deviceLib/hip_bitextract.cpp +++ b/projects/hip/tests/src/deviceLib/hip_bitextract.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * RUN: %t * HIT_END */ diff --git a/projects/hip/tests/src/deviceLib/hip_bitinsert.cpp b/projects/hip/tests/src/deviceLib/hip_bitinsert.cpp index 135234dceb..063281768c 100644 --- a/projects/hip/tests/src/deviceLib/hip_bitinsert.cpp +++ b/projects/hip/tests/src/deviceLib/hip_bitinsert.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * RUN: %t * HIT_END */