Added test for ballot and removing HIP_FUNCTION from hipSampleAtomicsTest.cpp -sandeep
[ROCm/hip commit: 2b02fe082f]
Bu işleme şunda yer alıyor:
@@ -103,6 +103,7 @@ endmacro()
|
||||
#install (TARGETS hipMemset DESTINATION bin)
|
||||
#install (TARGETS hipEventRecord DESTINATION bin)
|
||||
|
||||
make_hip_executable (hip_ballot hip_ballot.cpp)
|
||||
make_hip_executable (hip_anyall hip_anyall.cpp)
|
||||
make_hip_executable (hip_popc hip_popc.cpp)
|
||||
make_hip_executable (hip_clz hip_clz.cpp)
|
||||
@@ -120,6 +121,7 @@ make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecis
|
||||
make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp)
|
||||
target_link_libraries(hipMathFunctionsHost m)
|
||||
|
||||
make_test(hip_ballot " " )
|
||||
make_test(hip_anyall " " )
|
||||
make_test(hip_popc " " )
|
||||
make_test(hip_brev " " )
|
||||
|
||||
@@ -190,7 +190,7 @@ int computeGold(int *gpuData, const int len)
|
||||
return true;
|
||||
}
|
||||
|
||||
__global__ void HIP_FUNCTION(testKernel,int *g_odata)
|
||||
__global__ void testKernel(hipLaunchParm lp,int *g_odata)
|
||||
{
|
||||
// access thread id
|
||||
const unsigned int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
@@ -236,7 +236,7 @@ __global__ void HIP_FUNCTION(testKernel,int *g_odata)
|
||||
// Atomic XOR
|
||||
atomicXor(&g_odata[10], tid);
|
||||
}
|
||||
HIP_FUNCTION_END
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
|
||||
@@ -32,8 +32,8 @@ __global__ void
|
||||
{
|
||||
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
device_any[hipThreadIdx_x>>6] = __any(tid >77);
|
||||
device_all[hipThreadIdx_x>>6] = __all(tid >77);
|
||||
device_any[hipThreadIdx_x>>6] = __any(tid -77);
|
||||
device_all[hipThreadIdx_x>>6] = __all(tid -77);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <hip_runtime.h>
|
||||
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
|
||||
|
||||
__global__ void
|
||||
gpu_ballot(hipLaunchParm lp, unsigned int* device_ballot, int Num_Warps_per_Block)
|
||||
{
|
||||
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
const unsigned int warp_num = hipThreadIdx_x >> 6;
|
||||
atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popcll(__ballot(tid - 245)));
|
||||
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
unsigned int Num_Threads_per_Block = 512;
|
||||
unsigned int Num_Blocks_per_Grid = 1;
|
||||
unsigned int Num_Warps_per_Block = Num_Threads_per_Block/64;
|
||||
unsigned int Num_Warps_per_Grid = (Num_Threads_per_Block*Num_Blocks_per_Grid)/64;
|
||||
unsigned int* host_ballot = (unsigned int*)malloc(Num_Warps_per_Grid*sizeof(unsigned int));
|
||||
unsigned int* device_ballot;
|
||||
HIP_ASSERT(hipMalloc((void**)&device_ballot, Num_Warps_per_Grid*sizeof(unsigned int)));
|
||||
|
||||
for (int i=0; i<Num_Warps_per_Grid; i++) host_ballot[i] = 0;
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(device_ballot, host_ballot, Num_Warps_per_Grid*sizeof(unsigned int), hipMemcpyHostToDevice));
|
||||
|
||||
hipLaunchKernel(gpu_ballot, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_ballot,Num_Warps_per_Block);
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(host_ballot, device_ballot, Num_Warps_per_Grid*sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
for (int i=0; i<Num_Warps_per_Grid; i++) {
|
||||
|
||||
if ((host_ballot[i] == 0)||(host_ballot[i]/64 == 64)) std::cout << "Warp " << i << " IS convergent- Predicate true for " << host_ballot[i]/64 << " threads\n";
|
||||
|
||||
else std::cout << "Warp " << i << " IS divergent - Predicate true for " << host_ballot[i]/64<< " threads\n";
|
||||
|
||||
}
|
||||
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
}
|
||||
@@ -79,21 +79,6 @@ HIP_kernel(hipLaunchParm lp,
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
__kernel__ void HIP_kernel(unsigned int* a, unsigned int* b, unsigned long long int* c, unsigned long long int* d, int width, int height) {
|
||||
|
||||
|
||||
int x = blockDimX * blockIdx.x + threadIdx.x;
|
||||
int y = blockDimY * blockIdy.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
a[i] = __brev(b[i]);
|
||||
c[i] = __brevll(d[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
|
||||
@@ -95,24 +95,6 @@ HIP_kernel(hipLaunchParm lp,
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
__kernel__ void HIP_kernel(unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d,
|
||||
unsigned int* e, int* f,unsigned int* g, long long int* h, int width, int height) {
|
||||
|
||||
|
||||
int x = blockDimX * blockIdx.x + threadIdx.x;
|
||||
int y = blockDimY * blockIdy.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
a[i] = __clz(b[i]);
|
||||
c[i] = __clzll(d[i]);
|
||||
e[i] = __clz(f[i]);
|
||||
g[i] = __clzll(h[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
|
||||
@@ -73,22 +73,6 @@ HIP_kernel(hipLaunchParm lp,
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
__kernel__ void HIP_kernel( unsigned int* a, unsigned int* b, unsigned int* c, unsigned long long int* d,
|
||||
int width, int height) {
|
||||
|
||||
|
||||
int x = blockDimX * blockIdx.x + threadIdx.x;
|
||||
int y = blockDimY * blockIdy.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
a[i] = __ffs(b[i]);
|
||||
c[i] = __ffsll(d[i]);
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
@@ -71,20 +71,7 @@ HIP_kernel(hipLaunchParm lp,
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
__kernel__ void HIP_kernel(unsigned int* a, unsigned int* b, unsigned int* c, unsigned long long int* d, int width, int height) {
|
||||
|
||||
|
||||
int x = blockDimX * blockIdx.x + threadIdx.x;
|
||||
int y = blockDimY * blockIdy.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
a[i] = __popc(b[i]);
|
||||
c[i] = __popcll(d[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle