Merge branch 'amd-master' into amd-develop

[ROCm/hip commit: d7b040bdba]
Этот коммит содержится в:
Maneesh Gupta
2016-07-05 21:40:22 +05:30
родитель a16c78981e 456551c15a
Коммит d12f1ef4ce
6 изменённых файлов: 1114 добавлений и 415 удалений
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+2 -3
Просмотреть файл
@@ -37,9 +37,8 @@ THE SOFTWARE.
#include <string.h>
#include <stddef.h>
// Define NVCC_COMPAT for CUDA compatibility
#define NVCC_COMPAT
#define CUDA_SUCCESS hipSuccess
#include <hip/hip_runtime_api.h>
+59 -23
Просмотреть файл
@@ -836,7 +836,7 @@ __device__ float erfcf(float x)
}
__device__ float erfcinvf(float y)
{
return __hip_erfinvf(1 - y);
return __hip_erfinvf(1 - y);
}
__device__ float erfcxf(float x)
{
@@ -1697,75 +1697,111 @@ __device__ unsigned int test__popc(unsigned int input);
__device__ unsigned int __popcll( unsigned long long int input)
{
return hc::__popcount_u32_b64(input);
return hc::__popcount_u32_b64(input);
}
__device__ unsigned int __clz(unsigned int input)
{
return hc::__firstbit_u32_u32( input);
#ifdef NVCC_COMPAT
return input == 0 ? 32 : hc::__firstbit_u32_u32( input);
#else
return hc::__firstbit_u32_u32( input);
#endif
}
__device__ unsigned int __clzll(unsigned long long int input)
{
return hc::__firstbit_u32_u64( input);
#ifdef NVCC_COMPAT
return input == 0 ? 64 : hc::__firstbit_u32_u64( input);
#else
return hc::__firstbit_u32_u64( input);
#endif
}
__device__ unsigned int __clz(int input)
__device__ unsigned int __clz( int input)
{
return hc::__firstbit_u32_s32( input);
#ifdef NVCC_COMPAT
return input == 0 ? 32 : hc::__firstbit_u32_s32( input);
#else
return hc::__firstbit_u32_s32( input);
#endif
}
__device__ unsigned int __clzll(long long int input)
__device__ unsigned int __clzll( long long int input)
{
return hc::__firstbit_u32_s64( input);
#ifdef NVCC_COMPAT
return input == 0 ? 64 : hc::__firstbit_u32_s64( input);
#else
return hc::__firstbit_u32_s64( input);
#endif
}
__device__ unsigned int __ffs(unsigned int input)
{
return hc::__lastbit_u32_u32( input)+1;
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_u32( input)+1;
#else
return hc::__lastbit_u32_u32( input);
#endif
}
__device__ unsigned int __ffsll(unsigned long long int input)
{
return hc::__lastbit_u32_u64( input)+1;
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_u64( input)+1;
#else
return hc::__lastbit_u32_u64( input);
#endif
}
__device__ unsigned int __ffs(int input)
__device__ unsigned int __ffs( int input)
{
return hc::__lastbit_u32_s32( input)+1;
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_s32( input)+1;
#else
return hc::__lastbit_u32_s32( input);
#endif
}
__device__ unsigned int __ffsll(long long int input)
__device__ unsigned int __ffsll( long long int input)
{
return hc::__lastbit_u32_s64( input)+1;
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_s64( input)+1;
#else
return hc::__lastbit_u32_s64( input);
#endif
}
__device__ unsigned int __brev( unsigned int input)
{
return hc::__bitrev_b32( input);
return hc::__bitrev_b32( input);
}
__device__ unsigned long long int __brevll( unsigned long long int input)
{
return hc::__bitrev_b64( input);
return hc::__bitrev_b64( input);
}
// warp vote function __all __any __ballot
__device__ int __all( int input)
{
return hc::__all( input);
return hc::__all( input);
}
__device__ int __any( int input)
{
if( hc::__any( input)!=0) return 1;
else return 0;
#ifdef NVCC_COMPAT
if( hc::__any( input)!=0) return 1;
else return 0;
#else
return hc::__any( input);
#endif
}
__device__ unsigned long long int __ballot( int input)
{
return hc::__ballot( input);
return hc::__ballot( input);
}
// warp shuffle functions
@@ -1809,11 +1845,11 @@ __device__ float __shfl_xor(float input, int lane_mask, int width)
return hc::__shfl_xor(input,lane_mask,width);
}
__host__ __device__ int min(int arg1, int arg2)
{
__host__ __device__ int min(int arg1, int arg2)
{
return (int)(hc::precise_math::fmin((float)arg1, (float)arg2));
}
__host__ __device__ int max(int arg1, int arg2)
__host__ __device__ int max(int arg1, int arg2)
{
return (int)(hc::precise_math::fmax((float)arg1, (float)arg2));
}
+17 -10
Просмотреть файл
@@ -27,7 +27,7 @@ THE SOFTWARE.
#include <hip_runtime.h>
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
__global__ void
__global__ void
warpvote(hipLaunchParm lp, int* device_any, int* device_all , int Num_Warps_per_Block, int pshift)
{
@@ -36,13 +36,11 @@ __global__ void
device_all[hipThreadIdx_x>>pshift] = __all(tid -77);
}
int main(int argc, char *argv[])
{ int warpSize, pshift;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
if(strncmp(devProp.name,"Fiji",1)==0)
if(strncmp(devProp.name,"Fiji",1)==0)
{ warpSize =64;
pshift =6;
}
@@ -53,14 +51,14 @@ int main(int argc, char *argv[])
int Num_Blocks_per_Grid = 1;
int Num_Warps_per_Block = Num_Threads_per_Block/warpSize;
int Num_Warps_per_Grid = (Num_Threads_per_Block*Num_Blocks_per_Grid)/warpSize;
int * host_any = ( int*)malloc(Num_Warps_per_Grid*sizeof(int));
int * host_all = ( int*)malloc(Num_Warps_per_Grid*sizeof(int));
int *device_any;
int *device_any;
int *device_all;
HIP_ASSERT(hipMalloc((void**)&device_any,Num_Warps_per_Grid*sizeof( int)));
HIP_ASSERT(hipMalloc((void**)&device_all,Num_Warps_per_Grid*sizeof(int)));
for (int i=0; i<Num_Warps_per_Grid; i++)
for (int i=0; i<Num_Warps_per_Grid; i++)
{
host_any[i] = 0;
host_all[i] = 0;
@@ -77,11 +75,20 @@ for (int i=0; i<Num_Warps_per_Grid; i++)
printf("warp no. %d __any = %d \n",i,host_any[i]);
printf("warp no. %d __all = %d \n",i,host_all[i]);
if (host_any[i]!=1) ++anycount;
if (host_all[i]!=1) ++allcount;
if (host_all[i]!=1) ++allcount;
#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT )
if (host_any[i]!=64) ++anycount;
#else
if (host_any[i]!=1) ++anycount;
#endif
}
if (anycount == 0 && allcount ==1) printf("PASSED\n"); else printf("FAILED\n");
#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT )
if (anycount == 1 && allcount ==1) printf("PASSED\n"); else printf("FAILED\n");
#else
if (anycount == 0 && allcount ==1) printf("PASSED\n"); else printf("FAILED\n");
#endif
return EXIT_SUCCESS;
+36 -98
Просмотреть файл
@@ -27,13 +27,9 @@ THE SOFTWARE.
#include <iostream>
#include "hip_runtime.h"
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
#define WIDTH 32
#define HEIGHT 32
#define WIDTH 8
#define HEIGHT 8
#define NUM (WIDTH*HEIGHT)
#define THREADS_PER_BLOCK_X 8
@@ -43,41 +39,41 @@ THE SOFTWARE.
unsigned int firstbit_u32(unsigned int a)
{
if (a == 0)
{
#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT )
return -1;
#else
return 32;
#endif
}
unsigned int pos = 0;
while ((int )a > 0) {
a <<= 1; pos++;
}
return pos;
}
unsigned int firstbit_s32(int a)
{
unsigned int u = a >= 0? a: ~a; // complement negative numbers
return firstbit_u32(u);
}
unsigned int firstbit_u64(unsigned long long int a)
{
if (a == 0)
{
#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT )
return -1;
#else
return 64;
#endif
}
unsigned int pos = 0;
while ((long long int)a > 0) {
a <<= 1; pos++;
}
return pos;
}
unsigned int firstbit_s64(long long int a)
{
unsigned long long int u = a >= 0? a: ~a; // complement negative numbers
return firstbit_u64(u);
}
__global__ void
HIP_kernel(hipLaunchParm lp,
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d,
unsigned int* e, int* f,unsigned int* g, long long int* h, int width, int height)
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height)
{
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
@@ -87,14 +83,9 @@ HIP_kernel(hipLaunchParm lp,
if ( i < (width * height)) {
a[i] = __clz(b[i]);
c[i] = __clzll(d[i]);
e[i] = __clz(f[i]);
g[i] = __clzll(h[i]);
}
}
using namespace std;
int main() {
@@ -103,19 +94,11 @@ int main() {
unsigned int* hostB;
unsigned int* hostC;
unsigned long long int* hostD;
unsigned int* hostE;
int* hostF;
unsigned int* hostG;
long long int* hostH;
unsigned int* deviceA;
unsigned int* deviceB;
unsigned int* deviceC;
unsigned long long int* deviceD;
unsigned int* deviceE;
int* deviceF;
unsigned int* deviceG;
long long int* deviceH;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
@@ -125,57 +108,56 @@ int main() {
cout << "hip Device prop succeeded " << endl ;
int i;
unsigned int i;
int errors;
hostA = (unsigned int*)malloc(NUM * sizeof(unsigned int));
hostB = (unsigned int*)malloc(NUM * sizeof(unsigned int));
hostC = (unsigned int*)malloc(NUM * sizeof(unsigned int));
hostD = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int));
hostE = (unsigned int*)malloc(NUM * sizeof(unsigned int));
hostF = (int*)malloc(NUM * sizeof(int));
hostG = (unsigned int*)malloc(NUM * sizeof(unsigned int));
hostH = (long long int*)malloc(NUM * sizeof(long long int));
// initialize the input data
for (i = 0; i < NUM; i++) {
hostB[i] = i;
hostD[i] = 1099511627776+i;
hostF[i] = -2100+i;
hostH[i] = 1099511627776+i;
hostB[i] = 419430*i;
hostD[i] = i;
}
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int)));
HIP_ASSERT(hipMalloc((void**)&deviceE, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceF, NUM * sizeof(int)));
HIP_ASSERT(hipMalloc((void**)&deviceG, NUM * sizeof(unsigned int)));
HIP_ASSERT(hipMalloc((void**)&deviceH, NUM * sizeof(long long int)));
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(unsigned int), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemcpy(deviceD, hostD, NUM*sizeof(unsigned long long int), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemcpy(deviceF, hostF, NUM*sizeof(int), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemcpy(deviceH, hostD, NUM*sizeof(long long int), hipMemcpyHostToDevice));
hipLaunchKernel(HIP_kernel,
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
0, 0,
deviceA ,deviceB, deviceC,deviceD ,deviceE ,deviceF, deviceG,deviceH, WIDTH ,HEIGHT);
deviceA ,deviceB, deviceC ,deviceD , WIDTH ,HEIGHT);
HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
HIP_ASSERT(hipMemcpy(hostC, deviceC, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
HIP_ASSERT(hipMemcpy(hostE, deviceE, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
HIP_ASSERT(hipMemcpy(hostG, deviceG, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost));
// verify the results
errors = 0;
for (i = 0; i < NUM; i++) {
if (hostA[i] != firstbit_u32(hostB[i])) {
printf("gpu_clz =%d, cpu_clz =%d \n",hostA[i],firstbit_u32(hostB[i]));
if (hostA[i] != firstbit_u32(hostB[i])) {
errors++;
}
}
if (errors!=0) {
cout << "FAILED clz" << endl;
return -1;
} else {
cout << "__clz() checked!" << endl;
}
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_clzll =%d, cpu_clzll =%d \n",hostC[i],firstbit_u64(hostD[i]));
if (hostC[i] != firstbit_u64(hostD[i])) {
errors++;
}
}
@@ -183,43 +165,7 @@ int main() {
cout << "FAILED clz" << endl;
return -1;
} else {
cout << "__clz_u() for unsigned checked!" << endl;
}
errors = 0;
for (i = 0; i < NUM; i++) {
if (hostC[i] != firstbit_u64(hostD[i])) {
errors++;
}
}
if (errors!=0) {
cout << "FAILED clz" << endl;
return -1;
} else {
cout << "__clzll_u() for unsigned checked!" << endl;
}
errors = 0;
for (i = 0; i < NUM; i++) {
if (hostE[i] != firstbit_s32(hostF[i])) {
errors++;
}
}
if (errors!=0) {
cout << "FAILED clz\n" << endl;
return -1;
} else {
cout << "__clz_s() checked!" << endl;
}
errors = 0;
for (i = 0; i < NUM; i++) {
if (hostG[i] != firstbit_s64(hostH[i])) {
errors++;
}
}
if (errors!=0) {
cout << "FAILED clz" << endl;
return -1;
} else {
cout << "__clzll_s() checked!" << endl;
cout << "__clzll() checked!" << endl;
}
cout << "clz test PASSED!" << endl;
@@ -228,19 +174,11 @@ int main() {
HIP_ASSERT(hipFree(deviceB));
HIP_ASSERT(hipFree(deviceC));
HIP_ASSERT(hipFree(deviceD));
HIP_ASSERT(hipFree(deviceE));
HIP_ASSERT(hipFree(deviceF));
HIP_ASSERT(hipFree(deviceG));
HIP_ASSERT(hipFree(deviceH));
free(hostA);
free(hostB);
free(hostC);
free(hostD);
free(hostE);
free(hostF);
free(hostG);
free(hostH);
return errors;
}
+13 -3
Просмотреть файл
@@ -31,8 +31,8 @@ THE SOFTWARE.
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
#define WIDTH 32
#define HEIGHT 32
#define WIDTH 8
#define HEIGHT 8
#define NUM (WIDTH*HEIGHT)
@@ -44,12 +44,20 @@ template<typename T>
int lastbit( T a)
{
if (a == 0)
#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT )
return -1;
#else
return 0;
#endif
int pos = 1;
while ((a&1) != 1) {
a >>= 1; pos++;
}
return pos;
#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT )
return pos-1;
#else
return pos;
#endif
}
@@ -130,6 +138,7 @@ int main() {
// verify the results
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_ffs =%d, cpu_ffs =%d \n",hostA[i],lastbit(hostB[i]));
if (hostA[i] != lastbit(hostB[i])) {
errors++;
}
@@ -142,6 +151,7 @@ int main() {
}
errors = 0;
for (i = 0; i < NUM; i++) {
printf("gpu_ffsll =%d, cpu_ffsll =%d \n",hostC[i],lastbit(hostD[i]));
if (hostC[i] != lastbit(hostD[i])) {
errors++;
}