Merge branch 'privatestaging' of https://github.com/AMDComputeLibraries/HIP-privatestaging into privatestaging
[ROCm/hip commit: 12d9d9be82]
Этот коммит содержится в:
@@ -216,11 +216,11 @@ __global__ void testKernel(hipLaunchParm lp,int *g_odata)
|
||||
|
||||
// Atomic increment (modulo 17+1)
|
||||
//atomicInc((unsigned int *)&g_odata[5], 17);
|
||||
atomicInc((unsigned int *)&g_odata[5]);
|
||||
//atomicInc((unsigned int *)&g_odata[5]);
|
||||
|
||||
// Atomic decrement
|
||||
// atomicDec((unsigned int *)&g_odata[6], 137);
|
||||
atomicDec((unsigned int *)&g_odata[6]);
|
||||
//atomicDec((unsigned int *)&g_odata[6]);
|
||||
|
||||
// Atomic compare-and-swap
|
||||
atomicCAS(&g_odata[7], tid-1, tid);
|
||||
|
||||
@@ -28,22 +28,25 @@ THE SOFTWARE.
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
|
||||
__global__ void
|
||||
warpvote(hipLaunchParm lp, int* device_any, int* device_all , int Num_Warps_per_Block)
|
||||
warpvote(hipLaunchParm lp, int* device_any, int* device_all , int Num_Warps_per_Block, int pshift)
|
||||
{
|
||||
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
device_any[hipThreadIdx_x>>6] = __any(tid -77);
|
||||
device_all[hipThreadIdx_x>>6] = __all(tid -77);
|
||||
device_any[hipThreadIdx_x>>pshift] = __any(tid -77);
|
||||
device_all[hipThreadIdx_x>>pshift] = __all(tid -77);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{ int warpSize;
|
||||
{ int warpSize, pshift;
|
||||
hipDeviceProp_t devProp;
|
||||
hipDeviceGetProperties(&devProp, 0);
|
||||
if(strncmp(devProp.name,"Fiji",1)==0) warpSize =64;
|
||||
else warpSize =32;
|
||||
if(strncmp(devProp.name,"Fiji",1)==0)
|
||||
{ warpSize =64;
|
||||
pshift =6;
|
||||
}
|
||||
else {warpSize =32; pshift=5;}
|
||||
int anycount =0;
|
||||
int allcount =0;
|
||||
int Num_Threads_per_Block = 1024;
|
||||
@@ -65,7 +68,7 @@ for (int i=0; i<Num_Warps_per_Grid; i++)
|
||||
HIP_ASSERT(hipMemcpy(device_any, host_any,sizeof(int), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(device_all, host_all,sizeof(int), hipMemcpyHostToDevice));
|
||||
|
||||
hipLaunchKernel(warpvote, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_any, device_all ,Num_Warps_per_Block);
|
||||
hipLaunchKernel(warpvote, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_any, device_all ,Num_Warps_per_Block,pshift);
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(host_any, device_any, Num_Warps_per_Grid*sizeof(int), hipMemcpyDeviceToHost));
|
||||
@@ -78,7 +81,7 @@ for (int i=0; i<Num_Warps_per_Grid; i++)
|
||||
if (host_all[i]!=1) ++allcount;
|
||||
|
||||
}
|
||||
if (anycount == 0 && allcount ==1) printf("PASSED"); else printf("FAILED");
|
||||
if (anycount == 0 && allcount ==1) printf("PASSED\n"); else printf("FAILED\n");
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
|
||||
@@ -4,22 +4,22 @@
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
|
||||
__global__ void
|
||||
gpu_ballot(hipLaunchParm lp, unsigned int* device_ballot, int Num_Warps_per_Block)
|
||||
gpu_ballot(hipLaunchParm lp, unsigned int* device_ballot, int Num_Warps_per_Block,int pshift)
|
||||
{
|
||||
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
const unsigned int warp_num = hipThreadIdx_x >> 6;
|
||||
const unsigned int warp_num = hipThreadIdx_x >> pshift;
|
||||
atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popcll(__ballot(tid - 245)));
|
||||
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{ int warpSize;
|
||||
{ int warpSize, pshift;
|
||||
hipDeviceProp_t devProp;
|
||||
hipDeviceGetProperties(&devProp, 0);
|
||||
if(strncmp(devProp.name,"Fiji",1)==0) warpSize =64;
|
||||
else warpSize =32;
|
||||
if(strncmp(devProp.name,"Fiji",1)==0) {warpSize =64; pshift =6;}
|
||||
else {warpSize =32; pshift =5;}
|
||||
unsigned int Num_Threads_per_Block = 512;
|
||||
unsigned int Num_Blocks_per_Grid = 1;
|
||||
unsigned int Num_Warps_per_Block = Num_Threads_per_Block/warpSize;
|
||||
@@ -33,7 +33,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
HIP_ASSERT(hipMemcpy(device_ballot, host_ballot, Num_Warps_per_Grid*sizeof(unsigned int), hipMemcpyHostToDevice));
|
||||
|
||||
hipLaunchKernel(gpu_ballot, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_ballot,Num_Warps_per_Block);
|
||||
hipLaunchKernel(gpu_ballot, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_ballot,Num_Warps_per_Block,pshift);
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(host_ballot, device_ballot, Num_Warps_per_Grid*sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
@@ -45,7 +45,7 @@ int main(int argc, char *argv[])
|
||||
divergent_count++;}
|
||||
}
|
||||
|
||||
if (divergent_count==1) printf("PASSED"); else printf("FAILED");
|
||||
if (divergent_count==1) printf("PASSED\n"); else printf("FAILED\n");
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user