Added test for ballot and removing HIP_FUNCTION from hipSampleAtomicsTest.cpp -sandeep

[ROCm/hip commit: 2b02fe082f]
Bu işleme şunda yer alıyor:
streamhsa
2016-02-02 14:50:55 +05:30
ebeveyn 114874e5ee
işleme 19d0b8095f
8 değiştirilmiş dosya ile 54 ekleme ve 66 silme
+2
Dosyayı Görüntüle
@@ -103,6 +103,7 @@ endmacro()
#install (TARGETS hipMemset DESTINATION bin)
#install (TARGETS hipEventRecord DESTINATION bin)
make_hip_executable (hip_ballot hip_ballot.cpp)
make_hip_executable (hip_anyall hip_anyall.cpp)
make_hip_executable (hip_popc hip_popc.cpp)
make_hip_executable (hip_clz hip_clz.cpp)
@@ -120,6 +121,7 @@ make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecis
make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp)
target_link_libraries(hipMathFunctionsHost m)
make_test(hip_ballot " " )
make_test(hip_anyall " " )
make_test(hip_popc " " )
make_test(hip_brev " " )
+2 -2
Dosyayı Görüntüle
@@ -190,7 +190,7 @@ int computeGold(int *gpuData, const int len)
return true;
}
__global__ void HIP_FUNCTION(testKernel,int *g_odata)
__global__ void testKernel(hipLaunchParm lp,int *g_odata)
{
// access thread id
const unsigned int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
@@ -236,7 +236,7 @@ __global__ void HIP_FUNCTION(testKernel,int *g_odata)
// Atomic XOR
atomicXor(&g_odata[10], tid);
}
HIP_FUNCTION_END
int main(int argc, char **argv)
{
+2 -2
Dosyayı Görüntüle
@@ -32,8 +32,8 @@ __global__ void
{
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
device_any[hipThreadIdx_x>>6] = __any(tid >77);
device_all[hipThreadIdx_x>>6] = __all(tid >77);
device_any[hipThreadIdx_x>>6] = __any(tid -77);
device_all[hipThreadIdx_x>>6] = __all(tid -77);
}
+48
Dosyayı Görüntüle
@@ -0,0 +1,48 @@
#include <iostream>
#include <hip_runtime.h>
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
__global__ void
gpu_ballot(hipLaunchParm lp, unsigned int* device_ballot, int Num_Warps_per_Block)
{
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
const unsigned int warp_num = hipThreadIdx_x >> 6;
atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popcll(__ballot(tid - 245)));
}
int main(int argc, char *argv[])
{
unsigned int Num_Threads_per_Block = 512;
unsigned int Num_Blocks_per_Grid = 1;
unsigned int Num_Warps_per_Block = Num_Threads_per_Block/64;
unsigned int Num_Warps_per_Grid = (Num_Threads_per_Block*Num_Blocks_per_Grid)/64;
unsigned int* host_ballot = (unsigned int*)malloc(Num_Warps_per_Grid*sizeof(unsigned int));
unsigned int* device_ballot;
HIP_ASSERT(hipMalloc((void**)&device_ballot, Num_Warps_per_Grid*sizeof(unsigned int)));
for (int i=0; i<Num_Warps_per_Grid; i++) host_ballot[i] = 0;
HIP_ASSERT(hipMemcpy(device_ballot, host_ballot, Num_Warps_per_Grid*sizeof(unsigned int), hipMemcpyHostToDevice));
hipLaunchKernel(gpu_ballot, dim3(Num_Blocks_per_Grid),dim3(Num_Threads_per_Block),0,0, device_ballot,Num_Warps_per_Block);
HIP_ASSERT(hipMemcpy(host_ballot, device_ballot, Num_Warps_per_Grid*sizeof(unsigned int), hipMemcpyDeviceToHost));
for (int i=0; i<Num_Warps_per_Grid; i++) {
if ((host_ballot[i] == 0)||(host_ballot[i]/64 == 64)) std::cout << "Warp " << i << " IS convergent- Predicate true for " << host_ballot[i]/64 << " threads\n";
else std::cout << "Warp " << i << " IS divergent - Predicate true for " << host_ballot[i]/64<< " threads\n";
}
return EXIT_SUCCESS;
}
-15
Dosyayı Görüntüle
@@ -79,21 +79,6 @@ HIP_kernel(hipLaunchParm lp,
#if 0
__kernel__ void HIP_kernel(unsigned int* a, unsigned int* b, unsigned long long int* c, unsigned long long int* d, int width, int height) {
int x = blockDimX * blockIdx.x + threadIdx.x;
int y = blockDimY * blockIdy.y + threadIdx.y;
int i = y * width + x;
if ( i < (width * height)) {
a[i] = __brev(b[i]);
c[i] = __brevll(d[i]);
}
}
#endif
using namespace std;
int main() {
-18
Dosyayı Görüntüle
@@ -95,24 +95,6 @@ HIP_kernel(hipLaunchParm lp,
#if 0
__kernel__ void HIP_kernel(unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d,
unsigned int* e, int* f,unsigned int* g, long long int* h, int width, int height) {
int x = blockDimX * blockIdx.x + threadIdx.x;
int y = blockDimY * blockIdy.y + threadIdx.y;
int i = y * width + x;
if ( i < (width * height)) {
a[i] = __clz(b[i]);
c[i] = __clzll(d[i]);
e[i] = __clz(f[i]);
g[i] = __clzll(h[i]);
}
}
#endif
using namespace std;
int main() {
-16
Dosyayı Görüntüle
@@ -73,22 +73,6 @@ HIP_kernel(hipLaunchParm lp,
#if 0
__kernel__ void HIP_kernel( unsigned int* a, unsigned int* b, unsigned int* c, unsigned long long int* d,
int width, int height) {
int x = blockDimX * blockIdx.x + threadIdx.x;
int y = blockDimY * blockIdy.y + threadIdx.y;
int i = y * width + x;
if ( i < (width * height)) {
a[i] = __ffs(b[i]);
c[i] = __ffsll(d[i]);
}
}
#endif
using namespace std;
-13
Dosyayı Görüntüle
@@ -71,20 +71,7 @@ HIP_kernel(hipLaunchParm lp,
#if 0
__kernel__ void HIP_kernel(unsigned int* a, unsigned int* b, unsigned int* c, unsigned long long int* d, int width, int height) {
int x = blockDimX * blockIdx.x + threadIdx.x;
int y = blockDimY * blockIdy.y + threadIdx.y;
int i = y * width + x;
if ( i < (width * height)) {
a[i] = __popc(b[i]);
c[i] = __popcll(d[i]);
}
}
#endif
using namespace std;