@@ -48,11 +48,11 @@ template<typename T>
|
||||
{
|
||||
T count = sizeof(num) * 8 - 1;
|
||||
T reverse_num = num;
|
||||
|
||||
num >>= 1;
|
||||
|
||||
num >>= 1;
|
||||
while(num)
|
||||
{
|
||||
reverse_num <<= 1;
|
||||
reverse_num <<= 1;
|
||||
reverse_num |= num & 1;
|
||||
num >>= 1;
|
||||
count--;
|
||||
@@ -60,12 +60,12 @@ template<typename T>
|
||||
reverse_num <<= count;
|
||||
return reverse_num;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
HIP_kernel(hipLaunchParm lp,
|
||||
unsigned int* a, unsigned int* b,unsigned long long int* c, unsigned long long int* d, int width, int height)
|
||||
|
||||
__global__ void
|
||||
HIP_kernel(hipLaunchParm lp,
|
||||
unsigned int* a, unsigned int* b,unsigned long long int* c, unsigned long long int* d, int width, int height)
|
||||
{
|
||||
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
@@ -82,10 +82,10 @@ HIP_kernel(hipLaunchParm lp,
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
|
||||
|
||||
unsigned int* hostA;
|
||||
unsigned int* hostB;
|
||||
unsigned long long int* hostC;
|
||||
unsigned long long int* hostC;
|
||||
unsigned long long int* hostD;
|
||||
|
||||
unsigned int* deviceA;
|
||||
@@ -115,17 +115,17 @@ int main() {
|
||||
hostB[i] = i;
|
||||
hostD[i] = i;
|
||||
}
|
||||
|
||||
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned long long int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int)));
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(unsigned int), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceD, hostD, NUM*sizeof(unsigned long long int), hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
hipLaunchKernel(HIP_kernel,
|
||||
hipLaunchKernel(HIP_kernel,
|
||||
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
|
||||
0, 0,
|
||||
@@ -134,32 +134,35 @@ int main() {
|
||||
|
||||
HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
HIP_ASSERT(hipMemcpy(hostC, deviceC, NUM*sizeof(unsigned long long int), hipMemcpyDeviceToHost));
|
||||
|
||||
// verify the results
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
printf("gpu_brev =%d, cpu_brev =%d \n",hostA[i],bitreverse(hostB[i]));
|
||||
if (hostA[i] != bitreverse(hostB[i])) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
cout << "__brev() FAILED\n" << endl;
|
||||
return -1;
|
||||
} else {
|
||||
printf ("__brev() PASSED!\n");
|
||||
cout << "__brev() checked!" << endl;
|
||||
}
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
printf("gpu_brevll =%llu, cpu_brevll =%llu \n",hostC[i],bitreverse(hostD[i]));
|
||||
if (hostC[i] != bitreverse(hostD[i])) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
cout << "__brevll() FAILED" << endl;
|
||||
return -1;
|
||||
} else {
|
||||
printf ("__brevll() PASSED!\n");
|
||||
cout << "__brevll() checked!" << endl;
|
||||
}
|
||||
|
||||
cout << "__brev() and __brevll() PASSED!" << endl;
|
||||
|
||||
HIP_ASSERT(hipFree(deviceA));
|
||||
HIP_ASSERT(hipFree(deviceB));
|
||||
HIP_ASSERT(hipFree(deviceC));
|
||||
@@ -170,19 +173,6 @@ int main() {
|
||||
free(hostC);
|
||||
free(hostD);
|
||||
|
||||
|
||||
//hipResetDefaultAccelerator();
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -41,45 +41,45 @@ THE SOFTWARE.
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
unsigned int firstbit_u32(unsigned int a)
|
||||
{
|
||||
{
|
||||
if (a == 0)
|
||||
return -1;
|
||||
unsigned int pos = 0;
|
||||
while ((int )a > 0) {
|
||||
return -1;
|
||||
unsigned int pos = 0;
|
||||
while ((int )a > 0) {
|
||||
a <<= 1; pos++;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
unsigned int firstbit_s32(int a)
|
||||
{
|
||||
unsigned int u = a >= 0? a: ~a; // complement negative numbers
|
||||
{
|
||||
unsigned int u = a >= 0? a: ~a; // complement negative numbers
|
||||
return firstbit_u32(u);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int firstbit_u64(unsigned long long int a)
|
||||
{
|
||||
{
|
||||
if (a == 0)
|
||||
return -1;
|
||||
unsigned int pos = 0;
|
||||
while ((long long int)a > 0) {
|
||||
return -1;
|
||||
unsigned int pos = 0;
|
||||
while ((long long int)a > 0) {
|
||||
a <<= 1; pos++;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
unsigned int firstbit_s64(long long int a)
|
||||
{
|
||||
unsigned long long int u = a >= 0? a: ~a; // complement negative numbers
|
||||
{
|
||||
unsigned long long int u = a >= 0? a: ~a; // complement negative numbers
|
||||
return firstbit_u64(u);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void
|
||||
__global__ void
|
||||
HIP_kernel(hipLaunchParm lp,
|
||||
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d,
|
||||
unsigned int* e, int* f,unsigned int* g, long long int* h, int width, int height)
|
||||
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d,
|
||||
unsigned int* e, int* f,unsigned int* g, long long int* h, int width, int height)
|
||||
{
|
||||
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
@@ -98,14 +98,14 @@ HIP_kernel(hipLaunchParm lp,
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
|
||||
|
||||
unsigned int* hostA;
|
||||
unsigned int* hostB;
|
||||
unsigned int* hostC;
|
||||
unsigned int* hostC;
|
||||
unsigned long long int* hostD;
|
||||
unsigned int* hostE;
|
||||
int* hostF;
|
||||
unsigned int* hostG;
|
||||
unsigned int* hostG;
|
||||
long long int* hostH;
|
||||
|
||||
unsigned int* deviceA;
|
||||
@@ -145,7 +145,7 @@ int main() {
|
||||
hostF[i] = -2100+i;
|
||||
hostH[i] = 1099511627776+i;
|
||||
}
|
||||
|
||||
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned int)));
|
||||
@@ -154,13 +154,13 @@ int main() {
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceF, NUM * sizeof(int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceG, NUM * sizeof(unsigned int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceH, NUM * sizeof(long long int)));
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(unsigned int), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceD, hostD, NUM*sizeof(unsigned long long int), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceF, hostF, NUM*sizeof(int), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceH, hostD, NUM*sizeof(long long int), hipMemcpyHostToDevice));
|
||||
|
||||
hipLaunchKernel(HIP_kernel,
|
||||
hipLaunchKernel(HIP_kernel,
|
||||
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
|
||||
0, 0,
|
||||
@@ -171,56 +171,59 @@ int main() {
|
||||
HIP_ASSERT(hipMemcpy(hostC, deviceC, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
HIP_ASSERT(hipMemcpy(hostE, deviceE, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
HIP_ASSERT(hipMemcpy(hostG, deviceG, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
|
||||
// verify the results
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
printf("gpu_clz_u =%d, cpu_clz_u =%d \n",hostA[i],firstbit_u32(hostB[i]));
|
||||
if (hostA[i] != firstbit_u32(hostB[i])) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
cout << "FAILED clz" << endl;
|
||||
return -1;
|
||||
} else {
|
||||
printf ("__clz_u() for unsigned PASSED!\n");
|
||||
cout << "__clz_u() for unsigned checked!" << endl;
|
||||
}
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
printf("gpu_clzll_u =%d, cpu_clzll_u =%d \n",hostC[i],firstbit_u64(hostD[i]));
|
||||
if (hostC[i] != firstbit_u64(hostD[i])) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
cout << "FAILED clz" << endl;
|
||||
return -1;
|
||||
} else {
|
||||
printf ("__clzll_u() for unsigned PASSED!\n");
|
||||
cout << "__clzll_u() for unsigned checked!" << endl;
|
||||
}
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
printf("gpu_clz_s =%d, cpu_clz_s =%d \n",hostE[i],firstbit_s32(hostF[i]));
|
||||
if (hostE[i] != firstbit_s32(hostF[i])) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
cout << "FAILED clz\n" << endl;
|
||||
return -1;
|
||||
} else {
|
||||
printf ("__clz_s() PASSED!\n");
|
||||
cout << "__clz_s() checked!" << endl;
|
||||
}
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
printf("gpu_clzll_s =%d, cpu_clzll_s =%d \n",hostG[i],firstbit_s64(hostH[i]));
|
||||
if (hostG[i] != firstbit_s64(hostH[i])) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
cout << "FAILED clz" << endl;
|
||||
return -1;
|
||||
} else {
|
||||
printf ("__clzll_s() PASSED!\n");
|
||||
cout << "__clzll_s() checked!" << endl;
|
||||
}
|
||||
|
||||
cout << "clz test PASSED!" << endl;
|
||||
|
||||
HIP_ASSERT(hipFree(deviceA));
|
||||
HIP_ASSERT(hipFree(deviceB));
|
||||
HIP_ASSERT(hipFree(deviceC));
|
||||
@@ -239,19 +242,6 @@ int main() {
|
||||
free(hostG);
|
||||
free(hostH);
|
||||
|
||||
|
||||
//hipResetDefaultAccelerator();
|
||||
|
||||
//return errors;
|
||||
return errors;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -42,23 +42,23 @@ THE SOFTWARE.
|
||||
|
||||
template<typename T>
|
||||
int lastbit( T a)
|
||||
{
|
||||
{
|
||||
if (a == 0)
|
||||
return 0;
|
||||
int pos = 1;
|
||||
while ((a&1) != 1) {
|
||||
return 0;
|
||||
int pos = 1;
|
||||
while ((a&1) != 1) {
|
||||
a >>= 1; pos++;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
|
||||
__global__ void
|
||||
__global__ void
|
||||
HIP_kernel(hipLaunchParm lp,
|
||||
unsigned int* a, unsigned int* b, unsigned int* c, unsigned long long int* d,
|
||||
int width, int height)
|
||||
unsigned int* a, unsigned int* b, unsigned int* c, unsigned long long int* d,
|
||||
int width, int height)
|
||||
{
|
||||
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
@@ -66,28 +66,26 @@ HIP_kernel(hipLaunchParm lp,
|
||||
if ( i < (width * height)) {
|
||||
a[i] = __ffs(b[i]);
|
||||
c[i] = __ffsll(d[i]);
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
|
||||
|
||||
unsigned int* hostA;
|
||||
unsigned int* hostB;
|
||||
unsigned int* hostC;
|
||||
unsigned int* hostC;
|
||||
unsigned long long int* hostD;
|
||||
|
||||
|
||||
unsigned int* deviceA;
|
||||
unsigned int* deviceB;
|
||||
unsigned int* deviceC;
|
||||
unsigned long long int* deviceD;
|
||||
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
cout << " System minor " << devProp.minor << endl;
|
||||
@@ -104,23 +102,22 @@ int main() {
|
||||
hostB = (unsigned int*)malloc(NUM * sizeof(unsigned int));
|
||||
hostC = (unsigned int*)malloc(NUM * sizeof(unsigned int));
|
||||
hostD = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int));
|
||||
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
hostB[i] = i;
|
||||
hostD[i] = 1099511627776+i;
|
||||
}
|
||||
|
||||
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int)));
|
||||
;
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(unsigned int), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceD, hostD, NUM*sizeof(unsigned long long int), hipMemcpyHostToDevice));
|
||||
|
||||
hipLaunchKernel(HIP_kernel,
|
||||
|
||||
hipLaunchKernel(HIP_kernel,
|
||||
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
|
||||
0, 0,
|
||||
@@ -129,57 +126,46 @@ int main() {
|
||||
|
||||
HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
HIP_ASSERT(hipMemcpy(hostC, deviceC, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
|
||||
|
||||
// verify the results
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
printf("gpu_ffs =%d, cpu_ffs =%d \n",hostA[i],lastbit(hostB[i]));
|
||||
if (hostA[i] != lastbit(hostB[i])) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
cout << "FAILED: ffs" << endl;
|
||||
return -1;
|
||||
} else {
|
||||
printf ("__ffs() for unsigned PASSED!\n");
|
||||
cout << "__ffs() for unsigned checked!" << endl;
|
||||
}
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
printf("gpu_ffsll =%d, cpu_ffsll =%d \n",hostC[i],lastbit(hostD[i]));
|
||||
if (hostC[i] != lastbit(hostD[i])) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
cout << "FAILED: ffs" << endl;
|
||||
return -1;
|
||||
} else {
|
||||
printf ("__ffsll() for unsigned PASSED!\n");
|
||||
cout << "__ffsll() for unsigned checked!" << endl;
|
||||
}
|
||||
|
||||
|
||||
cout << "ffs test PASSED!" << endl;
|
||||
|
||||
HIP_ASSERT(hipFree(deviceA));
|
||||
HIP_ASSERT(hipFree(deviceB));
|
||||
HIP_ASSERT(hipFree(deviceC));
|
||||
HIP_ASSERT(hipFree(deviceD));
|
||||
|
||||
|
||||
|
||||
free(hostA);
|
||||
free(hostB);
|
||||
free(hostC);
|
||||
free(hostD);
|
||||
|
||||
|
||||
//hipResetDefaultAccelerator();
|
||||
|
||||
//return errors;
|
||||
return errors;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -52,12 +52,12 @@ unsigned int popcountCPU( T value) {
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
HIP_kernel(hipLaunchParm lp,
|
||||
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height)
|
||||
|
||||
__global__ void
|
||||
HIP_kernel(hipLaunchParm lp,
|
||||
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height)
|
||||
{
|
||||
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
@@ -74,10 +74,10 @@ HIP_kernel(hipLaunchParm lp,
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
|
||||
|
||||
unsigned int* hostA;
|
||||
unsigned int* hostB;
|
||||
unsigned int* hostC;
|
||||
unsigned int* hostC;
|
||||
unsigned long long int* hostD;
|
||||
|
||||
unsigned int* deviceA;
|
||||
@@ -107,17 +107,17 @@ int main() {
|
||||
hostB[i] = i;
|
||||
hostD[i] = 1099511627776-i;
|
||||
}
|
||||
|
||||
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned int)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int)));
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(unsigned int), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceD, hostD, NUM*sizeof(unsigned long long int), hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
hipLaunchKernel(HIP_kernel,
|
||||
hipLaunchKernel(HIP_kernel,
|
||||
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
|
||||
0, 0,
|
||||
@@ -129,28 +129,31 @@ int main() {
|
||||
// verify the results
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
printf("gpu_popc =%d, cpu_popc =%d \n",hostA[i],popcountCPU(hostB[i]));
|
||||
if (hostA[i] != popcountCPU(hostB[i])) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
cout << "FAILED: popc" << endl;
|
||||
return -1;
|
||||
} else {
|
||||
printf ("__popc() PASSED!\n");
|
||||
cout << "__popc() checked!" << endl;
|
||||
}
|
||||
errors = 0;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
printf("gpu_popcll =%d, cpu_popcll =%d \n",hostC[i],popcountCPU(hostD[i]));
|
||||
if (hostC[i] != popcountCPU(hostD[i])) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors!=0) {
|
||||
printf("FAILED: %d errors\n",errors);
|
||||
cout << "FAILED:popc" << endl;
|
||||
return -1;
|
||||
} else {
|
||||
printf ("__popcll() PASSED!\n");
|
||||
cout << "__popcll() checked!" << endl;
|
||||
}
|
||||
|
||||
cout << "popc test PASSED!" << endl;
|
||||
|
||||
HIP_ASSERT(hipFree(deviceA));
|
||||
HIP_ASSERT(hipFree(deviceB));
|
||||
HIP_ASSERT(hipFree(deviceC));
|
||||
@@ -161,19 +164,6 @@ int main() {
|
||||
free(hostC);
|
||||
free(hostD);
|
||||
|
||||
|
||||
//hipResetDefaultAccelerator();
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user