added __host__ for complex functions and corrected memset and memcpy test
Change-Id: I9ffefb7a0025aa111a54d20d2766982df15532e7
[ROCm/clr commit: 42739c37ef]
Этот коммит содержится в:
@@ -177,45 +177,45 @@ COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long)
|
||||
|
||||
#endif
|
||||
|
||||
__device__ static inline float hipCrealf(hipFloatComplex z){
|
||||
__device__ __host__ static inline float hipCrealf(hipFloatComplex z){
|
||||
return z.x;
|
||||
}
|
||||
|
||||
__device__ static inline float hipCimagf(hipFloatComplex z){
|
||||
__device__ __host__ static inline float hipCimagf(hipFloatComplex z){
|
||||
return z.y;
|
||||
}
|
||||
|
||||
__device__ static inline hipFloatComplex make_hipFloatComplex(float a, float b){
|
||||
__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b){
|
||||
hipFloatComplex z;
|
||||
z.x = a;
|
||||
z.y = b;
|
||||
return z;
|
||||
}
|
||||
|
||||
__device__ static inline hipFloatComplex hipConjf(hipFloatComplex z){
|
||||
__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z){
|
||||
hipFloatComplex ret;
|
||||
ret.x = z.x;
|
||||
ret.y = -z.y;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__device__ static inline float hipCsqabsf(hipFloatComplex z){
|
||||
__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z){
|
||||
return z.x * z.x + z.y * z.y;
|
||||
}
|
||||
|
||||
__device__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q){
|
||||
__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q){
|
||||
return make_hipFloatComplex(p.x + q.x, p.y + q.y);
|
||||
}
|
||||
|
||||
__device__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q){
|
||||
__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q){
|
||||
return make_hipFloatComplex(p.x - q.x, p.y - q.y);
|
||||
}
|
||||
|
||||
__device__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q){
|
||||
__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q){
|
||||
return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
|
||||
}
|
||||
|
||||
__device__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q){
|
||||
__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q){
|
||||
float sqabs = hipCsqabsf(q);
|
||||
hipFloatComplex ret;
|
||||
ret.x = (p.x * q.x + p.y * q.y)/sqabs;
|
||||
@@ -223,51 +223,51 @@ __device__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatCom
|
||||
return ret;
|
||||
}
|
||||
|
||||
__device__ static inline float hipCabsf(hipFloatComplex z){
|
||||
__device__ __host__ static inline float hipCabsf(hipFloatComplex z){
|
||||
return sqrtf(hipCsqabsf(z));
|
||||
}
|
||||
|
||||
|
||||
|
||||
__device__ static inline double hipCreal(hipDoubleComplex z){
|
||||
__device__ __host__ static inline double hipCreal(hipDoubleComplex z){
|
||||
return z.x;
|
||||
}
|
||||
|
||||
__device__ static inline double hipCimag(hipDoubleComplex z){
|
||||
__device__ __host__ static inline double hipCimag(hipDoubleComplex z){
|
||||
return z.y;
|
||||
}
|
||||
|
||||
__device__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b){
|
||||
__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b){
|
||||
hipDoubleComplex z;
|
||||
z.x = a;
|
||||
z.y = b;
|
||||
return z;
|
||||
}
|
||||
|
||||
__device__ static inline hipDoubleComplex hipConj(hipDoubleComplex z){
|
||||
__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z){
|
||||
hipDoubleComplex ret;
|
||||
ret.x = z.x;
|
||||
ret.y = z.y;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__device__ static inline double hipCsqabs(hipDoubleComplex z){
|
||||
__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z){
|
||||
return z.x * z.x + z.y * z.y;
|
||||
}
|
||||
|
||||
__device__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q){
|
||||
__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q){
|
||||
return make_hipDoubleComplex(p.x + q.x, p.y + q.y);
|
||||
}
|
||||
|
||||
__device__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q){
|
||||
__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q){
|
||||
return make_hipDoubleComplex(p.x - q.x, p.y - q.y);
|
||||
}
|
||||
|
||||
__device__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q){
|
||||
__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q){
|
||||
return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
|
||||
}
|
||||
|
||||
__device__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q){
|
||||
__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q){
|
||||
double sqabs = hipCsqabs(q);
|
||||
hipDoubleComplex ret;
|
||||
ret.x = (p.x * q.x + p.y * q.y)/sqabs;
|
||||
@@ -275,28 +275,28 @@ __device__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleC
|
||||
return ret;
|
||||
}
|
||||
|
||||
__device__ static inline double hipCabs(hipDoubleComplex z){
|
||||
__device__ __host__ static inline double hipCabs(hipDoubleComplex z){
|
||||
return sqrtf(hipCsqabs(z));
|
||||
}
|
||||
|
||||
typedef hipFloatComplex hipComplex;
|
||||
|
||||
__device__ static inline hipComplex make_hipComplex(float x,
|
||||
__device__ __host__ static inline hipComplex make_hipComplex(float x,
|
||||
float y){
|
||||
return make_hipFloatComplex(x, y);
|
||||
}
|
||||
|
||||
__device__ static inline hipFloatComplex hipComplexDoubleToFloat
|
||||
__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat
|
||||
(hipDoubleComplex z){
|
||||
return make_hipFloatComplex((float)z.x, (float)z.y);
|
||||
}
|
||||
|
||||
__device__ static inline hipDoubleComplex hipComplexFloatToDouble
|
||||
__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble
|
||||
(hipFloatComplex z){
|
||||
return make_hipDoubleComplex((double)z.x, (double)z.y);
|
||||
}
|
||||
|
||||
__device__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r){
|
||||
__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r){
|
||||
float real = (p.x * q.x) + r.x;
|
||||
float imag = (q.x * p.y) + r.y;
|
||||
|
||||
@@ -306,7 +306,7 @@ __device__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComp
|
||||
return make_hipComplex(real, imag);
|
||||
}
|
||||
|
||||
__device__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, hipDoubleComplex r){
|
||||
__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, hipDoubleComplex r){
|
||||
float real = (p.x * q.x) + r.x;
|
||||
float imag = (q.x * p.y) + r.y;
|
||||
|
||||
|
||||
@@ -1,18 +1,29 @@
|
||||
#include<iostream>
|
||||
#include <iostream>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include "../test_common.h"
|
||||
|
||||
|
||||
#define LEN 1030
|
||||
#define SIZE LEN << 2
|
||||
|
||||
__global__ void cpy(hipLaunchParm lp, uint32_t *Out, uint32_t *In, uint32_t *Vald)
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
|
||||
__global__ void cpy(hipLaunchParm lp, uint32_t *Out, uint32_t *In)
|
||||
{
|
||||
memcpy(Out, In, SIZE, Vald);
|
||||
int tx = hipThreadIdx_x;
|
||||
memcpy(Out + tx, In + tx, SIZE/LEN);
|
||||
}
|
||||
|
||||
__global__ void set(hipLaunchParm lp, uint32_t *ptr, uint8_t val, size_t size)
|
||||
{
|
||||
memset(ptr, val, size);
|
||||
int tx = hipThreadIdx_x;
|
||||
memset(ptr + tx, val, size);
|
||||
}
|
||||
|
||||
int main()
|
||||
@@ -24,19 +35,29 @@ int main()
|
||||
Val = new uint32_t;
|
||||
*Val = 0;
|
||||
for(int i=0;i<LEN;i++){
|
||||
A[i] = i *1.0f;
|
||||
B[i] = 0.0f;
|
||||
A[i] = i;
|
||||
B[i] = 0;
|
||||
}
|
||||
hipMalloc((void**)&Ad, SIZE);
|
||||
hipMalloc((void**)&Bd, SIZE);
|
||||
hipMalloc((void**)&Vald, sizeof(uint32_t));
|
||||
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
|
||||
hipLaunchKernel(cpy, dim3(1), dim3(LEN/4), 0, 0, Bd, Ad, Vald);
|
||||
hipLaunchKernel(set, dim3(1), dim3(LEN/4), 0, 0, Bd, 0x1, SIZE);
|
||||
|
||||
hipLaunchKernel(cpy, dim3(1), dim3(LEN), 0, 0, Bd, Ad);
|
||||
|
||||
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
|
||||
hipMemcpy(Val, Vald, sizeof(uint32_t), hipMemcpyDeviceToHost);
|
||||
for(int i=LEN-16;i<LEN;i++){
|
||||
std::cout<<A[i]<<" "<<B[i]<<std::endl;
|
||||
if(A[i]!=B[i]){
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
std::cout<<*Val<<std::endl;
|
||||
hipLaunchKernel(set, dim3(1), dim3(LEN), 0, 0, Bd, 0x1, LEN);
|
||||
|
||||
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
|
||||
for(int i=LEN-16;i<LEN;i++){
|
||||
if(0x01010101!=B[i]){
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user