[ROCm/hip commit: 12d9d9be82]
Этот коммит содержится в:
Evgeny Mankov
2016-02-10 17:21:53 +03:00
родитель 3a032ff317 4c861f0284
Коммит 1a9bd34ca2
3 изменённых файлов: 20 добавлений и 17 удалений
+2 -2
Просмотреть файл
@@ -216,11 +216,11 @@ __global__ void testKernel(hipLaunchParm lp,int *g_odata)
// Atomic increment (modulo 17+1)
//atomicInc((unsigned int *)&g_odata[5], 17);
atomicInc((unsigned int *)&g_odata[5]);
//atomicInc((unsigned int *)&g_odata[5]);
// Atomic decrement
// atomicDec((unsigned int *)&g_odata[6], 137);
atomicDec((unsigned int *)&g_odata[6]);
//atomicDec((unsigned int *)&g_odata[6]);
// Atomic compare-and-swap
atomicCAS(&g_odata[7], tid-1, tid);
+11 -8
Просмотреть файл
@@ -28,22 +28,25 @@ THE SOFTWARE.
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
__global__ void
warpvote(hipLaunchParm lp, int* device_any, int* device_all , int Num_Warps_per_Block)
warpvote(hipLaunchParm lp, int* device_any, int* device_all , int Num_Warps_per_Block, int pshift)
{
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
device_any[hipThreadIdx_x>>6] = __any(tid -77);
device_all[hipThreadIdx_x>>6] = __all(tid -77);
device_any[hipThreadIdx_x>>pshift] = __any(tid -77);
device_all[hipThreadIdx_x>>pshift] = __all(tid -77);
}
int main(int argc, char *argv[])
{ int warpSize;
{ int warpSize, pshift;
hipDeviceProp_t devProp;
hipDeviceGetProperties(&devProp, 0);
if(strncmp(devProp.name,"Fiji",1)==0) warpSize =64;
else warpSize =32;
if(strncmp(devProp.name,"Fiji",1)==0)
{ warpSize =64;
pshift =6;
}
else {warpSize =32; pshift=5;}
int anycount =0;
int allcount =0;
int Num_Threads_per_Block = 1024;
@@ -65,7 +68,7 @@ for (int i=0; i<Num_Warps_per_Grid; i++)
HIP_ASSERT(hipMemcpy(device_any, host_any,sizeof(int), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemcpy(device_all, host_all,sizeof(int), hipMemcpyHostToDevice));
hipLaunchKernel(warpvote, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_any, device_all ,Num_Warps_per_Block);
hipLaunchKernel(warpvote, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_any, device_all ,Num_Warps_per_Block,pshift);
HIP_ASSERT(hipMemcpy(host_any, device_any, Num_Warps_per_Grid*sizeof(int), hipMemcpyDeviceToHost));
@@ -78,7 +81,7 @@ for (int i=0; i<Num_Warps_per_Grid; i++)
if (host_all[i]!=1) ++allcount;
}
if (anycount == 0 && allcount ==1) printf("PASSED"); else printf("FAILED");
if (anycount == 0 && allcount ==1) printf("PASSED\n"); else printf("FAILED\n");
return EXIT_SUCCESS;
+7 -7
Просмотреть файл
@@ -4,22 +4,22 @@
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
__global__ void
gpu_ballot(hipLaunchParm lp, unsigned int* device_ballot, int Num_Warps_per_Block)
gpu_ballot(hipLaunchParm lp, unsigned int* device_ballot, int Num_Warps_per_Block,int pshift)
{
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
const unsigned int warp_num = hipThreadIdx_x >> 6;
const unsigned int warp_num = hipThreadIdx_x >> pshift;
atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popcll(__ballot(tid - 245)));
}
int main(int argc, char *argv[])
{ int warpSize;
{ int warpSize, pshift;
hipDeviceProp_t devProp;
hipDeviceGetProperties(&devProp, 0);
if(strncmp(devProp.name,"Fiji",1)==0) warpSize =64;
else warpSize =32;
if(strncmp(devProp.name,"Fiji",1)==0) {warpSize =64; pshift =6;}
else {warpSize =32; pshift =5;}
unsigned int Num_Threads_per_Block = 512;
unsigned int Num_Blocks_per_Grid = 1;
unsigned int Num_Warps_per_Block = Num_Threads_per_Block/warpSize;
@@ -33,7 +33,7 @@ int main(int argc, char *argv[])
HIP_ASSERT(hipMemcpy(device_ballot, host_ballot, Num_Warps_per_Grid*sizeof(unsigned int), hipMemcpyHostToDevice));
hipLaunchKernel(gpu_ballot, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_ballot,Num_Warps_per_Block);
hipLaunchKernel(gpu_ballot, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_ballot,Num_Warps_per_Block,pshift);
HIP_ASSERT(hipMemcpy(host_ballot, device_ballot, Num_Warps_per_Grid*sizeof(unsigned int), hipMemcpyDeviceToHost));
@@ -45,7 +45,7 @@ int main(int argc, char *argv[])
divergent_count++;}
}
if (divergent_count==1) printf("PASSED"); else printf("FAILED");
if (divergent_count==1) printf("PASSED\n"); else printf("FAILED\n");
return EXIT_SUCCESS;
}