Adding kernels to read / write buffers
Change-Id: Icad95c084e0fcd0bd9f86154e23ac8f54c24afbe
This commit is contained in:
@@ -152,6 +152,8 @@ function(buildCodeObjects kname)
|
||||
|
||||
endfunction(buildCodeObjects)
|
||||
|
||||
buildCodeObjects("read")
|
||||
buildCodeObjects("write")
|
||||
buildCodeObjects("binary_search")
|
||||
|
||||
#
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
|
||||
/**
|
||||
* @brief Opencl kernel to read from a buffer and sum its values
|
||||
* into a destination integer
|
||||
*
|
||||
* @param src Pointer to an array of 16 unsigned integers (32-bit) i.e. one instance
|
||||
* has 16 * 32-bit = 64 bytes
|
||||
*
|
||||
* @param size Specifies number of uint16 elements in the array
|
||||
*
|
||||
* @param threads Number of threads running this kernel
|
||||
*
|
||||
* @param dst Output parameter updated with sum of the input buffer
|
||||
*
|
||||
* @note: It is critical that the size of 'src' be a integral multiple
|
||||
* of (threads * sizeof(uint16)). If it is fractional and less than ONE
|
||||
* it will lead to accessing memory that is out-of-bounds. If it is fractional
|
||||
* more but more than ONE then it will lead to some threads not doing work
|
||||
* at all leading to incorrect benchmark computation
|
||||
*
|
||||
*/
|
||||
|
||||
__kernel void
|
||||
read_kernel(__global uint16 *src,
|
||||
ulong size, uint threads, __global uint* dst) {
|
||||
|
||||
uint16 pval;
|
||||
int idx = get_global_id(0);
|
||||
__global uint16 *srcEnd = src + size;
|
||||
|
||||
uint tmp = 0;
|
||||
src = &src[idx];
|
||||
while (src < srcEnd) {
|
||||
pval = *src;
|
||||
src += threads;
|
||||
tmp += pval.s0 + pval.s1 + pval.s2 + pval.s3 + \
|
||||
pval.s4 + pval.s5 + pval.s6 + pval.s7 + \
|
||||
pval.s8 + pval.s9 + pval.sa + pval.sb + \
|
||||
pval.sc + pval.sd + pval.se + pval.sf;
|
||||
}
|
||||
atomic_add(dst, tmp);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
|
||||
/**
|
||||
* @brief Opencl kernel to write into a buffer the values of const integer list
|
||||
*
|
||||
* @param dst Pointer to an array of 16 unsigned integers (32-bit) i.e. one instance
|
||||
* has 16 * 32-bit = 64 bytes
|
||||
*
|
||||
* @param size Specifies number of uint16 elements in the array
|
||||
*
|
||||
* @param threads Number of threads running this kernel
|
||||
*
|
||||
* @note: It is critical that the size of 'dst' be a integral multiple
|
||||
* of (threads * sizeof(uint16)). If it is fractional and less than ONE
|
||||
* it will lead to accessing memory that is out-of-bounds. If it is fractional
|
||||
* more but more than ONE then it will lead to some threads not doing work
|
||||
* at all leading to incorrect benchmark computation
|
||||
*
|
||||
*/
|
||||
|
||||
__kernel void
|
||||
write_kernel(__global uint16 *dst,
|
||||
ulong size, uint threads) {
|
||||
|
||||
uint16 pval = (uint16)(0xabababab, 0xabababab, 0xabababab, 0xabababab,
|
||||
0xabababab, 0xabababab, 0xabababab, 0xabababab,
|
||||
0xabababab, 0xabababab, 0xabababab, 0xabababab,
|
||||
0xabababab, 0xabababab, 0xabababab, 0xabababab);
|
||||
|
||||
int idx = get_global_id(0);
|
||||
__global uint16 *dstEnd = dst + size;
|
||||
|
||||
dst = &dst[idx];
|
||||
do {
|
||||
*dst = pval;
|
||||
dst += threads;
|
||||
} while (dst < dstEnd);
|
||||
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user