Files
rocm-systems/projects/aqlprofile/test/simple_convolution/simple_convolution.cl
T
Evgeny 7c4369bde4 Initial Commit
Contributors:
Ammar ELWazir <aelwazir@amd.com>
AravindanC <aravindan.cheruvally@amd.com>
Benjamin Welton <bewelton@amd.com>
Ma, Bing <Bing.Ma@amd.com>
Chun Yang <chun.yang@amd.com>
Cole Nelson <cole.nelson@amd.com>
Ethan Stewart <ethan.stewart@amd.com>
Evgeny <evgeny.shcherbakov@amd.com>
Freddy Paul <Freddy.paul@amd.com>
Giovanni Baraldi <gbaraldi@amd.com>
Gopesh Bhardwaj <Gopesh.Bhardwaj@amd.com>
Icarus Sparry <icarus.sparry@amd.com>
itrowbri <Ian.Trowbridge@amd.com>
James Edwards <JamesAdrian.Edwards@amd.com>
jatang <jatang@amd.com>
Jeremy Newton <Jeremy.Newton@amd.com>
Jonathan Kim <jonathan.kim@amd.com>
Kent Russell <kent.russell@amd.com>
Kiumars Sabeti <kiumars.sabeti@amd.com>
Lang Yu <lang.yu@amd.com>
Laurent Morichetti <laurent.morichetti@amd.com>
Mallya, Ameya Keshava <AmeyaKeshava.Mallya@amd.com>
Manjunath Jakaraddi <manjunath.jakaraddi@amd.com>
Mark Laws <markdavid.laws@amd.com>
Mohan Kumar Mithur <Mohan.KumarMithur@amd.com>
Nicholas Curtis <nicurtis@amd.com>
Nirmal Unnikrishnan <Nirmal.Unnikrishnan@amd.com>
Parag Bhandari <parag.bhandari@amd.com>
Ranjith Ramakrishnan <Ranjith.Ramakrishnan@amd.com>
Robert Gregory <Robert.Gregory@amd.com>
Saravanan Solaiyappan <saravanan.solaiyappan@amd.com>
Saurabh Verma <saurabh.verma@amd.com>
Srihari Uttanur <srihari.u@amd.com>
Srinivasan Subramanian <srinivasan.subramanian@amd.com>
Sriraksha Nagaraj <Sriraksha.Nagaraj@amd.com>
Sushma Vaddireddy <svaddire@amd.com>
Xianwei Zhang <Xianwei.Zhang@amd.com>


[ROCm/aqlprofile commit: 1ed169e30c]
2025-05-28 10:10:47 -05:00

57 خطوط
2.0 KiB
Common Lisp

/**
* simple_convolution is where each pixel of the output image
* is the weighted sum of the neighborhood pixels of the input image
* The neighborhood is defined by the dimensions of the mask and
* weight of each neighbor is defined by the mask itself.
* @param output Output matrix after performing convolution
* @param input Input matrix on which convolution is to be performed
* @param mask mask matrix using which convolution was to be performed
* @param inputDimensions dimensions of the input matrix
* @param maskDimensions dimensions of the mask matrix
*/
__kernel void simple_convolution(__global uint * output,
__global uint * input,
__global float * mask,
const uint2 inputDimensions,
const uint2 maskDimensions) {
uint tid = get_global_id(0);
uint width = inputDimensions.x;
uint height = inputDimensions.y;
uint x = tid%width;
uint y = tid/width;
uint maskWidth = maskDimensions.x;
uint maskHeight = maskDimensions.y;
uint vstep = (maskWidth -1)/2;
uint hstep = (maskHeight -1)/2;
// find the left, right, top and bottom indices such that
// the indices do not go beyond image boundaires
uint left = (x < vstep) ? 0 : (x - vstep);
uint right = ((x + vstep) >= width) ? width - 1 : (x + vstep);
uint top = (y < hstep) ? 0 : (y - hstep);
uint bottom = ((y + hstep) >= height)? height - 1: (y + hstep);
// initializing wighted sum value
float sumFX = 0;
for(uint i = left; i <= right; ++i) {
for(uint j = top; j <= bottom; ++j) {
// performing wighted sum within the mask boundaries
uint maskIndex = (j - (y - hstep)) * maskWidth + (i - (x - vstep));
uint index = j * width + i;
sumFX += ((float)input[index] * mask[maskIndex]);
}
}
// To round to the nearest integer
sumFX += 0.5f;
output[tid] = (uint)sumFX;
}