diff --git a/projects/hip/include/hcc_detail/hip_runtime.h b/projects/hip/include/hcc_detail/hip_runtime.h index aa420e992d..9f88017215 100644 --- a/projects/hip/include/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hcc_detail/hip_runtime.h @@ -401,6 +401,13 @@ __device__ int __all( int input); __device__ int __any( int input); __device__ unsigned long long int __ballot( int input); +// __ldg function +template +__device__ __forceinline__ T __ldg( const T * addr) +{ + return *addr; +} + // warp shuffle functions #ifdef __cplusplus diff --git a/projects/hip/tests/src/CMakeLists.txt b/projects/hip/tests/src/CMakeLists.txt index 68099ae083..5b1829286d 100644 --- a/projects/hip/tests/src/CMakeLists.txt +++ b/projects/hip/tests/src/CMakeLists.txt @@ -145,6 +145,7 @@ make_hip_executable (hip_popc hip_popc.cpp) make_hip_executable (hip_clz hip_clz.cpp) make_hip_executable (hip_brev hip_brev.cpp) make_hip_executable (hip_ffs hip_ffs.cpp) +make_hip_executable (hip_ldg hip_ldg.cpp) make_hip_executable (hipGetDeviceAttribute hipGetDeviceAttribute.cpp) make_hip_executable (hipEnvVar hipEnvVar.cpp) make_hip_executable (hipEnvVarDriver hipEnvVarDriver.cpp) @@ -184,6 +185,7 @@ make_test(hip_popc " " ) make_test(hip_brev " " ) make_test(hip_clz " " ) make_test(hip_ffs " " ) +make_test(hip_ldg " " ) make_test(hipEventRecord --iterations 10) make_test(hipMemset " " ) make_test(hipMemset --N 10 --memsetval 0x42 ) # small copy, just 10 bytes. diff --git a/projects/hip/tests/src/hip_ldg.cpp b/projects/hip/tests/src/hip_ldg.cpp new file mode 100644 index 0000000000..2f281c5991 --- /dev/null +++ b/projects/hip/tests/src/hip_ldg.cpp @@ -0,0 +1,150 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include +#include +#include "hip_runtime.h" + + +#define HIP_ASSERT(x) (assert((x)==hipSuccess)) + + +#define WIDTH 1024 +#define HEIGHT 1024 + +#define NUM (WIDTH*HEIGHT) + +#define THREADS_PER_BLOCK_X 16 +#define THREADS_PER_BLOCK_Y 16 +#define THREADS_PER_BLOCK_Z 1 + +__global__ void +vectoradd_float(hipLaunchParm lp, + float* a, const float* bm, const float* cm, int width, int height) + + { + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + int i = y * width + x; + if ( i < (width * height)) { + a[i] = __ldg(&bm[i]) + __ldg(&cm[i]); + } + + + + } + +#if 0 +__kernel__ void vectoradd_float(float* a, const float* b, const float* c, int width, int height) { + + + int x = blockDimX * blockIdx.x + threadIdx.x; + int y = blockDimY * blockIdy.y + threadIdx.y; + + int i = y * width + x; + if ( i < (width * height)) { + a[i] = b[i] + c[i]; + } +} +#endif + +using namespace std; + +int main() { + + float* hostA; + float* hostB; + float* hostC; + + float* deviceA; + float* deviceB; + float* deviceC; + + hipDeviceProp_t devProp; + hipGetDeviceProperties(&devProp, 0); + cout << " System minor " << devProp.minor << endl; + cout << " System major " << devProp.major << endl; + cout << " agent prop name " << devProp.name << endl; + + + + cout << "__ldg " << endl ; + + + int i; + int errors; + + hostA = (float*)malloc(NUM * sizeof(float)); + hostB = (float*)malloc(NUM * sizeof(float)); + hostC = (float*)malloc(NUM * sizeof(float)); + + // initialize the input data + for (i = 0; i < NUM; i++) { + hostB[i] = (float)i; + hostC[i] = (float)i*100.0f; + } + + HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(float))); + HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(float))); + HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(float))); + + HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(float), hipMemcpyHostToDevice)); + HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(float), hipMemcpyHostToDevice)); + + + hipLaunchKernel(vectoradd_float, + dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y), + dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), + 0, 0, + deviceA ,deviceB ,deviceC ,WIDTH ,HEIGHT); + + + HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM*sizeof(float), hipMemcpyDeviceToHost)); + + // verify the results + errors = 0; + for (i = 0; i < NUM; i++) { + if (hostA[i] != (hostB[i] + hostC[i])) { + errors++; + } + } + if (errors!=0) { + printf("FAILED: %d errors\n",errors); + } else { + printf ("PASSED!\n"); + } + + HIP_ASSERT(hipFree(deviceA)); + HIP_ASSERT(hipFree(deviceB)); + HIP_ASSERT(hipFree(deviceC)); + + free(hostA); + free(hostB); + free(hostC); + + //hipResetDefaultAccelerator(); + + return errors; +}