/* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #pragma clang diagnostic ignored "-Wunused-parameter" #include #include namespace cg = cooperative_groups; __host__ __device__ inline float GetCoordinate(size_t iteration, size_t N, size_t dim, size_t num_subdivisions, bool normalized_coords) { float x = (static_cast(iteration) - N / 2) / num_subdivisions; return normalized_coords ? x / dim : x; } template __global__ void tex1DfetchKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid = cg::this_grid().thread_rank(); if (tid >= N) return; out[tid] = tex1Dfetch(tex_obj, tid); #endif } template __global__ void tex1DKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, bool normalized_coords) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid = cg::this_grid().thread_rank(); if (tid >= N) return; float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); out[tid] = tex1D(tex_obj, x); #endif } template __global__ void tex1DLodKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, bool normalized_coords, float level_of_detail) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid = cg::this_grid().thread_rank(); if (tid >= N) return; float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); out[tid] = tex1DLod(tex_obj, x, level_of_detail); #endif } template __global__ void tex1DLayeredLodKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, bool normalized_coords, int layer, float level_of_detail) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid = cg::this_grid().thread_rank(); if (tid >= N) return; float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); out[tid] = tex1DLayeredLod(tex_obj, x, layer, level_of_detail); #endif } template __global__ void tex1DGradKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, bool normalized_coords, float dx, float dy) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid = cg::this_grid().thread_rank(); if (tid >= N) return; float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); out[tid] = tex1DGrad(tex_obj, x, dx, dy); #endif } template __global__ void tex1DLayeredGradKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, bool normalized_coords, int layer, float dx, float dy) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid = cg::this_grid().thread_rank(); if (tid >= N) return; float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); out[tid] = tex1DLayeredGrad(tex_obj, x, layer, dx, dy); #endif } template __global__ void tex2DgatherKernel(TexelType* const out, int comp, size_t N_x, size_t N_y, hipTextureObject_t tex_obj, size_t width, size_t height, size_t num_subdivisions, bool normalized_coords) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); out[tid_y * N_x + tid_x] = tex2Dgather(tex_obj, x, y, comp); #endif } template __global__ void tex2DKernel(TexelType* const out, size_t N_x, size_t N_y, hipTextureObject_t tex_obj, size_t width, size_t height, size_t num_subdivisions, bool normalized_coords) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); out[tid_y * N_x + tid_x] = tex2D(tex_obj, x, y); #endif } template __global__ void tex2DGradKernel(TexelType* const out, size_t N_x, size_t N_y, hipTextureObject_t tex_obj, size_t width, size_t height, size_t num_subdivisions, bool normalized_coords, float2 dx, float2 dy) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); out[tid_y * N_x + tid_x] = tex2DGrad(tex_obj, x, y, dx, dy); #endif } template __global__ void tex2DLayeredGradKernel(TexelType* const out, size_t N_x, size_t N_y, hipTextureObject_t tex_obj, size_t width, size_t height, size_t num_subdivisions, bool normalized_coords, float layer, float2 dx, float2 dy) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); out[tid_y * N_x + tid_x] = tex2DLayeredGrad(tex_obj, x, y, layer, dx, dy); #endif } template __global__ void tex2DLodKernel(TexelType* const out, size_t N_x, size_t N_y, hipTextureObject_t tex_obj, size_t width, size_t height, size_t num_subdivisions, bool normalized_coords, float level) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); out[tid_y * N_x + tid_x] = tex2DLod(tex_obj, x, y, level); #endif } template __global__ void tex2DLayeredLodKernel(TexelType* const out, size_t N_x, size_t N_y, hipTextureObject_t tex_obj, size_t width, size_t height, size_t num_subdivisions, bool normalized_coords, int layer, float level) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); out[tid_y * N_x + tid_x] = tex2DLayeredLod(tex_obj, x, y, layer, level); #endif } template __global__ void tex3DKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth, size_t num_subdivisions, bool normalized_coords) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; if (tid_z >= N_z) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3D(tex_obj, x, y, z); #endif } template __global__ void tex3DLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth, size_t num_subdivisions, bool normalized_coords, float level) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; if (tid_z >= N_z) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3DLod(tex_obj, x, y, z, level); #endif } template __global__ void tex3DGradKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth, size_t num_subdivisions, bool normalized_coords, float4 dx, float4 dy) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; if (tid_z >= N_z) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3DGrad(tex_obj, x, y, z, dx, dy); #endif } template __global__ void texCubemapKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth, size_t num_subdivisions, bool normalized_coords) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; if (tid_z >= N_z) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemap(tex_obj, x, y, z); #endif } template __global__ void texCubemapLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth, size_t num_subdivisions, bool normalized_coords, float level) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; if (tid_z >= N_z) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemapLod(tex_obj, x, y, z, level); #endif } template __global__ void texCubemapGradKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth, size_t num_subdivisions, bool normalized_coords, float4 dx, float4 dy) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; if (tid_z >= N_z) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemapGrad(tex_obj, x, y, z, dx, dy); #endif } template __global__ void tex1DLayeredKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, bool normalized_coords, size_t layer) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid = cg::this_grid().thread_rank(); if (tid >= N) return; float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); out[tid] = tex1DLayered(tex_obj, x, layer); #endif } template __global__ void tex2DLayeredKernel(TexelType* const out, size_t N_x, size_t N_y, hipTextureObject_t tex_obj, size_t width, size_t height, size_t num_subdivisions, bool normalized_coords, size_t layer) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); out[tid_y * N_x + tid_x] = tex2DLayered(tex_obj, x, y, layer); #endif } template __global__ void texCubemapLayeredKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth, size_t num_subdivisions, bool normalized_coords, size_t layer) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; if (tid_z >= N_z) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemapLayered(tex_obj, x, y, z, layer); #endif } template __global__ void texCubemapLayeredLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth, size_t num_subdivisions, bool normalized_coords, size_t layer, float level) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; if (tid_z >= N_z) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemapLayeredLod(tex_obj, x, y, z, layer, level); #endif } template __global__ void texCubemapLayeredGradKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth, size_t num_subdivisions, bool normalized_coords, size_t layer, float4 dx, float4 dy) { #if !__HIP_NO_IMAGE_SUPPORT const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; if (tid_x >= N_x) return; const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; if (tid_y >= N_y) return; const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; if (tid_z >= N_z) return; float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemapLayeredGrad(tex_obj, x, y, z, layer, dx, dy); #endif }