Merge "fix_testcase" into amd-master

[ROCm/clr commit: 79cb250265]
This commit is contained in:
Maneesh Gupta
2016-06-10 12:44:05 -04:00
committed by Gerrit Code Review
4 changed files with 111 additions and 155 deletions
+22 -32
View File
@@ -48,11 +48,11 @@ template<typename T>
{
T count = sizeof(num) * 8 - 1;
T reverse_num = num;
num >>= 1;
num >>= 1;
while(num)
{
reverse_num <<= 1;
reverse_num <<= 1;
reverse_num |= num & 1;
num >>= 1;
count--;
@@ -60,12 +60,12 @@ template<typename T>
reverse_num <<= count;
return reverse_num;
}
__global__ void
HIP_kernel(hipLaunchParm lp,
unsigned int* a, unsigned int* b,unsigned long long int* c, unsigned long long int* d, int width, int height)
__global__ void
HIP_kernel(hipLaunchParm lp,
unsigned int* a, unsigned int* b,unsigned long long int* c, unsigned long long int* d, int width, int height)
{
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
@@ -82,10 +82,10 @@ HIP_kernel(hipLaunchParm lp,
using namespace std;
int main() {
unsigned int* hostA;
unsigned int* hostB;
unsigned long long int* hostC;
unsigned long long int* hostC;
unsigned long long int* hostD;
unsigned int* deviceA;
@@ -115,17 +115,17 @@ int main() {
hostB[i] = i;
hostD[i] = i;
}
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned long long int)));
HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int)));
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(unsigned int), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemcpy(deviceD, hostD, NUM*sizeof(unsigned long long int), hipMemcpyHostToDevice));
hipLaunchKernel(HIP_kernel,
hipLaunchKernel(HIP_kernel,
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
0, 0,
@@ -134,32 +134,35 @@ int main() {
HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
HIP_ASSERT(hipMemcpy(hostC, deviceC, NUM*sizeof(unsigned long long int), hipMemcpyDeviceToHost));
// verify the results
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_brev =%d, cpu_brev =%d \n",hostA[i],bitreverse(hostB[i]));
if (hostA[i] != bitreverse(hostB[i])) {
errors++;
}
}
if (errors!=0) {
printf("FAILED: %d errors\n",errors);
cout << "__brev() FAILED\n" << endl;
return -1;
} else {
printf ("__brev() PASSED!\n");
cout << "__brev() checked!" << endl;
}
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_brevll =%llu, cpu_brevll =%llu \n",hostC[i],bitreverse(hostD[i]));
if (hostC[i] != bitreverse(hostD[i])) {
errors++;
}
}
if (errors!=0) {
printf("FAILED: %d errors\n",errors);
cout << "__brevll() FAILED" << endl;
return -1;
} else {
printf ("__brevll() PASSED!\n");
cout << "__brevll() checked!" << endl;
}
cout << "__brev() and __brevll() PASSED!" << endl;
HIP_ASSERT(hipFree(deviceA));
HIP_ASSERT(hipFree(deviceB));
HIP_ASSERT(hipFree(deviceC));
@@ -170,19 +173,6 @@ int main() {
free(hostC);
free(hostD);
//hipResetDefaultAccelerator();
return errors;
}
+40 -50
View File
@@ -41,45 +41,45 @@ THE SOFTWARE.
#define THREADS_PER_BLOCK_Z 1
unsigned int firstbit_u32(unsigned int a)
{
{
if (a == 0)
return -1;
unsigned int pos = 0;
while ((int )a > 0) {
return -1;
unsigned int pos = 0;
while ((int )a > 0) {
a <<= 1; pos++;
}
return pos;
}
unsigned int firstbit_s32(int a)
{
unsigned int u = a >= 0? a: ~a; // complement negative numbers
{
unsigned int u = a >= 0? a: ~a; // complement negative numbers
return firstbit_u32(u);
}
}
unsigned int firstbit_u64(unsigned long long int a)
{
{
if (a == 0)
return -1;
unsigned int pos = 0;
while ((long long int)a > 0) {
return -1;
unsigned int pos = 0;
while ((long long int)a > 0) {
a <<= 1; pos++;
}
return pos;
}
unsigned int firstbit_s64(long long int a)
{
unsigned long long int u = a >= 0? a: ~a; // complement negative numbers
{
unsigned long long int u = a >= 0? a: ~a; // complement negative numbers
return firstbit_u64(u);
}
}
__global__ void
__global__ void
HIP_kernel(hipLaunchParm lp,
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d,
unsigned int* e, int* f,unsigned int* g, long long int* h, int width, int height)
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d,
unsigned int* e, int* f,unsigned int* g, long long int* h, int width, int height)
{
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
@@ -98,14 +98,14 @@ HIP_kernel(hipLaunchParm lp,
using namespace std;
int main() {
unsigned int* hostA;
unsigned int* hostB;
unsigned int* hostC;
unsigned int* hostC;
unsigned long long int* hostD;
unsigned int* hostE;
int* hostF;
unsigned int* hostG;
unsigned int* hostG;
long long int* hostH;
unsigned int* deviceA;
@@ -145,7 +145,7 @@ int main() {
hostF[i] = -2100+i;
hostH[i] = 1099511627776+i;
}
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned int)));
@@ -154,13 +154,13 @@ int main() {
HIP_ASSERT(hipMalloc((void**)&deviceF, NUM * sizeof(int)));
HIP_ASSERT(hipMalloc((void**)&deviceG, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceH, NUM * sizeof(long long int)));
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(unsigned int), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemcpy(deviceD, hostD, NUM*sizeof(unsigned long long int), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemcpy(deviceF, hostF, NUM*sizeof(int), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemcpy(deviceH, hostD, NUM*sizeof(long long int), hipMemcpyHostToDevice));
hipLaunchKernel(HIP_kernel,
hipLaunchKernel(HIP_kernel,
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
0, 0,
@@ -171,56 +171,59 @@ int main() {
HIP_ASSERT(hipMemcpy(hostC, deviceC, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
HIP_ASSERT(hipMemcpy(hostE, deviceE, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
HIP_ASSERT(hipMemcpy(hostG, deviceG, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
// verify the results
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_clz_u =%d, cpu_clz_u =%d \n",hostA[i],firstbit_u32(hostB[i]));
if (hostA[i] != firstbit_u32(hostB[i])) {
errors++;
}
}
if (errors!=0) {
printf("FAILED: %d errors\n",errors);
cout << "FAILED clz" << endl;
return -1;
} else {
printf ("__clz_u() for unsigned PASSED!\n");
cout << "__clz_u() for unsigned checked!" << endl;
}
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_clzll_u =%d, cpu_clzll_u =%d \n",hostC[i],firstbit_u64(hostD[i]));
if (hostC[i] != firstbit_u64(hostD[i])) {
errors++;
}
}
if (errors!=0) {
printf("FAILED: %d errors\n",errors);
cout << "FAILED clz" << endl;
return -1;
} else {
printf ("__clzll_u() for unsigned PASSED!\n");
cout << "__clzll_u() for unsigned checked!" << endl;
}
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_clz_s =%d, cpu_clz_s =%d \n",hostE[i],firstbit_s32(hostF[i]));
if (hostE[i] != firstbit_s32(hostF[i])) {
errors++;
}
}
if (errors!=0) {
printf("FAILED: %d errors\n",errors);
cout << "FAILED clz\n" << endl;
return -1;
} else {
printf ("__clz_s() PASSED!\n");
cout << "__clz_s() checked!" << endl;
}
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_clzll_s =%d, cpu_clzll_s =%d \n",hostG[i],firstbit_s64(hostH[i]));
if (hostG[i] != firstbit_s64(hostH[i])) {
errors++;
}
}
if (errors!=0) {
printf("FAILED: %d errors\n",errors);
cout << "FAILED clz" << endl;
return -1;
} else {
printf ("__clzll_s() PASSED!\n");
cout << "__clzll_s() checked!" << endl;
}
cout << "clz test PASSED!" << endl;
HIP_ASSERT(hipFree(deviceA));
HIP_ASSERT(hipFree(deviceB));
HIP_ASSERT(hipFree(deviceC));
@@ -239,19 +242,6 @@ int main() {
free(hostG);
free(hostH);
//hipResetDefaultAccelerator();
//return errors;
return errors;
}
+30 -44
View File
@@ -42,23 +42,23 @@ THE SOFTWARE.
template<typename T>
int lastbit( T a)
{
{
if (a == 0)
return 0;
int pos = 1;
while ((a&1) != 1) {
return 0;
int pos = 1;
while ((a&1) != 1) {
a >>= 1; pos++;
}
return pos;
}
__global__ void
__global__ void
HIP_kernel(hipLaunchParm lp,
unsigned int* a, unsigned int* b, unsigned int* c, unsigned long long int* d,
int width, int height)
unsigned int* a, unsigned int* b, unsigned int* c, unsigned long long int* d,
int width, int height)
{
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
@@ -66,28 +66,26 @@ HIP_kernel(hipLaunchParm lp,
if ( i < (width * height)) {
a[i] = __ffs(b[i]);
c[i] = __ffsll(d[i]);
}
}
using namespace std;
int main() {
unsigned int* hostA;
unsigned int* hostB;
unsigned int* hostC;
unsigned int* hostC;
unsigned long long int* hostD;
unsigned int* deviceA;
unsigned int* deviceB;
unsigned int* deviceC;
unsigned long long int* deviceD;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
cout << " System minor " << devProp.minor << endl;
@@ -104,23 +102,22 @@ int main() {
hostB = (unsigned int*)malloc(NUM * sizeof(unsigned int));
hostC = (unsigned int*)malloc(NUM * sizeof(unsigned int));
hostD = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int));
// initialize the input data
for (i = 0; i < NUM; i++) {
hostB[i] = i;
hostD[i] = 1099511627776+i;
}
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int)));
;
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(unsigned int), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemcpy(deviceD, hostD, NUM*sizeof(unsigned long long int), hipMemcpyHostToDevice));
hipLaunchKernel(HIP_kernel,
hipLaunchKernel(HIP_kernel,
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
0, 0,
@@ -129,57 +126,46 @@ int main() {
HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
HIP_ASSERT(hipMemcpy(hostC, deviceC, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
// verify the results
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_ffs =%d, cpu_ffs =%d \n",hostA[i],lastbit(hostB[i]));
if (hostA[i] != lastbit(hostB[i])) {
errors++;
}
}
if (errors!=0) {
printf("FAILED: %d errors\n",errors);
cout << "FAILED: ffs" << endl;
return -1;
} else {
printf ("__ffs() for unsigned PASSED!\n");
cout << "__ffs() for unsigned checked!" << endl;
}
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_ffsll =%d, cpu_ffsll =%d \n",hostC[i],lastbit(hostD[i]));
if (hostC[i] != lastbit(hostD[i])) {
errors++;
}
}
if (errors!=0) {
printf("FAILED: %d errors\n",errors);
cout << "FAILED: ffs" << endl;
return -1;
} else {
printf ("__ffsll() for unsigned PASSED!\n");
cout << "__ffsll() for unsigned checked!" << endl;
}
cout << "ffs test PASSED!" << endl;
HIP_ASSERT(hipFree(deviceA));
HIP_ASSERT(hipFree(deviceB));
HIP_ASSERT(hipFree(deviceC));
HIP_ASSERT(hipFree(deviceD));
free(hostA);
free(hostB);
free(hostC);
free(hostD);
//hipResetDefaultAccelerator();
//return errors;
return errors;
}
+19 -29
View File
@@ -52,12 +52,12 @@ unsigned int popcountCPU( T value) {
}
return ret;
}
__global__ void
HIP_kernel(hipLaunchParm lp,
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height)
__global__ void
HIP_kernel(hipLaunchParm lp,
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height)
{
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
@@ -74,10 +74,10 @@ HIP_kernel(hipLaunchParm lp,
using namespace std;
int main() {
unsigned int* hostA;
unsigned int* hostB;
unsigned int* hostC;
unsigned int* hostC;
unsigned long long int* hostD;
unsigned int* deviceA;
@@ -107,17 +107,17 @@ int main() {
hostB[i] = i;
hostD[i] = 1099511627776-i;
}
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int)));
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(unsigned int), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemcpy(deviceD, hostD, NUM*sizeof(unsigned long long int), hipMemcpyHostToDevice));
hipLaunchKernel(HIP_kernel,
hipLaunchKernel(HIP_kernel,
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
0, 0,
@@ -129,28 +129,31 @@ int main() {
// verify the results
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_popc =%d, cpu_popc =%d \n",hostA[i],popcountCPU(hostB[i]));
if (hostA[i] != popcountCPU(hostB[i])) {
errors++;
}
}
if (errors!=0) {
printf("FAILED: %d errors\n",errors);
cout << "FAILED: popc" << endl;
return -1;
} else {
printf ("__popc() PASSED!\n");
cout << "__popc() checked!" << endl;
}
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_popcll =%d, cpu_popcll =%d \n",hostC[i],popcountCPU(hostD[i]));
if (hostC[i] != popcountCPU(hostD[i])) {
errors++;
}
}
if (errors!=0) {
printf("FAILED: %d errors\n",errors);
cout << "FAILED:popc" << endl;
return -1;
} else {
printf ("__popcll() PASSED!\n");
cout << "__popcll() checked!" << endl;
}
cout << "popc test PASSED!" << endl;
HIP_ASSERT(hipFree(deviceA));
HIP_ASSERT(hipFree(deviceB));
HIP_ASSERT(hipFree(deviceC));
@@ -161,19 +164,6 @@ int main() {
free(hostC);
free(hostD);
//hipResetDefaultAccelerator();
return errors;
}