added __host__ for complex functions and corrected memset and memcpy test

Change-Id: I9ffefb7a0025aa111a54d20d2766982df15532e7


[ROCm/clr commit: 42739c37ef]
Этот коммит содержится в:
Aditya Atluri
2017-04-06 09:29:44 -05:00
родитель 388f35c67a
Коммит d1c334df66
2 изменённых файлов: 58 добавлений и 37 удалений
+25 -25
Просмотреть файл
@@ -177,45 +177,45 @@ COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long)
#endif
__device__ static inline float hipCrealf(hipFloatComplex z){
__device__ __host__ static inline float hipCrealf(hipFloatComplex z){
return z.x;
}
__device__ static inline float hipCimagf(hipFloatComplex z){
__device__ __host__ static inline float hipCimagf(hipFloatComplex z){
return z.y;
}
__device__ static inline hipFloatComplex make_hipFloatComplex(float a, float b){
__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b){
hipFloatComplex z;
z.x = a;
z.y = b;
return z;
}
__device__ static inline hipFloatComplex hipConjf(hipFloatComplex z){
__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z){
hipFloatComplex ret;
ret.x = z.x;
ret.y = -z.y;
return ret;
}
__device__ static inline float hipCsqabsf(hipFloatComplex z){
__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z){
return z.x * z.x + z.y * z.y;
}
__device__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q){
__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q){
return make_hipFloatComplex(p.x + q.x, p.y + q.y);
}
__device__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q){
__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q){
return make_hipFloatComplex(p.x - q.x, p.y - q.y);
}
__device__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q){
__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q){
return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
}
__device__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q){
__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q){
float sqabs = hipCsqabsf(q);
hipFloatComplex ret;
ret.x = (p.x * q.x + p.y * q.y)/sqabs;
@@ -223,51 +223,51 @@ __device__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatCom
return ret;
}
__device__ static inline float hipCabsf(hipFloatComplex z){
__device__ __host__ static inline float hipCabsf(hipFloatComplex z){
return sqrtf(hipCsqabsf(z));
}
__device__ static inline double hipCreal(hipDoubleComplex z){
__device__ __host__ static inline double hipCreal(hipDoubleComplex z){
return z.x;
}
__device__ static inline double hipCimag(hipDoubleComplex z){
__device__ __host__ static inline double hipCimag(hipDoubleComplex z){
return z.y;
}
__device__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b){
__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b){
hipDoubleComplex z;
z.x = a;
z.y = b;
return z;
}
__device__ static inline hipDoubleComplex hipConj(hipDoubleComplex z){
__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z){
hipDoubleComplex ret;
ret.x = z.x;
ret.y = z.y;
return ret;
}
__device__ static inline double hipCsqabs(hipDoubleComplex z){
__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z){
return z.x * z.x + z.y * z.y;
}
__device__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q){
__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q){
return make_hipDoubleComplex(p.x + q.x, p.y + q.y);
}
__device__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q){
__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q){
return make_hipDoubleComplex(p.x - q.x, p.y - q.y);
}
__device__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q){
__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q){
return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
}
__device__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q){
__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q){
double sqabs = hipCsqabs(q);
hipDoubleComplex ret;
ret.x = (p.x * q.x + p.y * q.y)/sqabs;
@@ -275,28 +275,28 @@ __device__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleC
return ret;
}
__device__ static inline double hipCabs(hipDoubleComplex z){
__device__ __host__ static inline double hipCabs(hipDoubleComplex z){
return sqrtf(hipCsqabs(z));
}
typedef hipFloatComplex hipComplex;
__device__ static inline hipComplex make_hipComplex(float x,
__device__ __host__ static inline hipComplex make_hipComplex(float x,
float y){
return make_hipFloatComplex(x, y);
}
__device__ static inline hipFloatComplex hipComplexDoubleToFloat
__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat
(hipDoubleComplex z){
return make_hipFloatComplex((float)z.x, (float)z.y);
}
__device__ static inline hipDoubleComplex hipComplexFloatToDouble
__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble
(hipFloatComplex z){
return make_hipDoubleComplex((double)z.x, (double)z.y);
}
__device__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r){
__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r){
float real = (p.x * q.x) + r.x;
float imag = (q.x * p.y) + r.y;
@@ -306,7 +306,7 @@ __device__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComp
return make_hipComplex(real, imag);
}
__device__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, hipDoubleComplex r){
__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, hipDoubleComplex r){
float real = (p.x * q.x) + r.x;
float imag = (q.x * p.y) + r.y;
+33 -12
Просмотреть файл
@@ -1,18 +1,29 @@
#include<iostream>
#include <iostream>
#include "hip/hip_runtime.h"
#include "hip/hip_runtime_api.h"
#include "../test_common.h"
#define LEN 1030
#define SIZE LEN << 2
__global__ void cpy(hipLaunchParm lp, uint32_t *Out, uint32_t *In, uint32_t *Vald)
/* HIT_START
* BUILD: %t %s ../test_common.cpp
* RUN: %t
* HIT_END
*/
__global__ void cpy(hipLaunchParm lp, uint32_t *Out, uint32_t *In)
{
memcpy(Out, In, SIZE, Vald);
int tx = hipThreadIdx_x;
memcpy(Out + tx, In + tx, SIZE/LEN);
}
__global__ void set(hipLaunchParm lp, uint32_t *ptr, uint8_t val, size_t size)
{
memset(ptr, val, size);
int tx = hipThreadIdx_x;
memset(ptr + tx, val, size);
}
int main()
@@ -24,19 +35,29 @@ int main()
Val = new uint32_t;
*Val = 0;
for(int i=0;i<LEN;i++){
A[i] = i *1.0f;
B[i] = 0.0f;
A[i] = i;
B[i] = 0;
}
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
hipMalloc((void**)&Vald, sizeof(uint32_t));
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(cpy, dim3(1), dim3(LEN/4), 0, 0, Bd, Ad, Vald);
hipLaunchKernel(set, dim3(1), dim3(LEN/4), 0, 0, Bd, 0x1, SIZE);
hipLaunchKernel(cpy, dim3(1), dim3(LEN), 0, 0, Bd, Ad);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
hipMemcpy(Val, Vald, sizeof(uint32_t), hipMemcpyDeviceToHost);
for(int i=LEN-16;i<LEN;i++){
std::cout<<A[i]<<" "<<B[i]<<std::endl;
if(A[i]!=B[i]){
return 0;
}
}
std::cout<<*Val<<std::endl;
hipLaunchKernel(set, dim3(1), dim3(LEN), 0, 0, Bd, 0x1, LEN);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
for(int i=LEN-16;i<LEN;i++){
if(0x01010101!=B[i]){
return 0;
}
}
passed();
}