43b7d702ff
Change-Id: I9b647aff79ba92f07b2ca18eac49d58ae63fa859
[ROCm/hip-tests commit: 8a9ed34f6a]
461 строка
19 KiB
C++
461 строка
19 KiB
C++
/*
|
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
|
|
#pragma once
|
|
#pragma clang diagnostic ignored "-Wunused-parameter"
|
|
|
|
#include <hip/hip_runtime_api.h>
|
|
#include <hip/hip_cooperative_groups.h>
|
|
|
|
namespace cg = cooperative_groups;
|
|
|
|
__host__ __device__ inline float GetCoordinate(size_t iteration, size_t N, size_t dim,
|
|
size_t num_subdivisions, bool normalized_coords) {
|
|
float x = (static_cast<float>(iteration) - N / 2) / num_subdivisions;
|
|
return normalized_coords ? x / dim : x;
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex1DfetchKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid = cg::this_grid().thread_rank();
|
|
if (tid >= N) return;
|
|
out[tid] = tex1Dfetch<TexelType>(tex_obj, tid);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex1DKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj,
|
|
size_t width, size_t num_subdivisions, bool normalized_coords) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid = cg::this_grid().thread_rank();
|
|
if (tid >= N) return;
|
|
|
|
float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords);
|
|
out[tid] = tex1D<TexelType>(tex_obj, x);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex1DLodKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj,
|
|
size_t width, size_t num_subdivisions, bool normalized_coords,
|
|
float level_of_detail) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid = cg::this_grid().thread_rank();
|
|
if (tid >= N) return;
|
|
|
|
float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords);
|
|
out[tid] = tex1DLod<TexelType>(tex_obj, x, level_of_detail);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex1DLayeredLodKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj,
|
|
size_t width, size_t num_subdivisions, bool normalized_coords,
|
|
int layer, float level_of_detail) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid = cg::this_grid().thread_rank();
|
|
if (tid >= N) return;
|
|
|
|
float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords);
|
|
out[tid] = tex1DLayeredLod<TexelType>(tex_obj, x, layer, level_of_detail);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex1DGradKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj,
|
|
size_t width, size_t num_subdivisions, bool normalized_coords,
|
|
float dx, float dy) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid = cg::this_grid().thread_rank();
|
|
if (tid >= N) return;
|
|
|
|
float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords);
|
|
out[tid] = tex1DGrad<TexelType>(tex_obj, x, dx, dy);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex1DLayeredGradKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj,
|
|
size_t width, size_t num_subdivisions,
|
|
bool normalized_coords, int layer, float dx, float dy) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid = cg::this_grid().thread_rank();
|
|
if (tid >= N) return;
|
|
|
|
float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords);
|
|
out[tid] = tex1DLayeredGrad<TexelType>(tex_obj, x, layer, dx, dy);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex2DgatherKernel(TexelType* const out, int comp, size_t N_x, size_t N_y,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t num_subdivisions, bool normalized_coords) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_y * N_x + tid_x] = tex2Dgather<TexelType>(tex_obj, x, y, comp);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex2DKernel(TexelType* const out, size_t N_x, size_t N_y,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t num_subdivisions, bool normalized_coords) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_y * N_x + tid_x] = tex2D<TexelType>(tex_obj, x, y);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex2DGradKernel(TexelType* const out, size_t N_x, size_t N_y,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t num_subdivisions, bool normalized_coords, float2 dx,
|
|
float2 dy) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_y * N_x + tid_x] = tex2DGrad<TexelType>(tex_obj, x, y, dx, dy);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex2DLayeredGradKernel(TexelType* const out, size_t N_x, size_t N_y,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t num_subdivisions, bool normalized_coords, float layer,
|
|
float2 dx, float2 dy) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_y * N_x + tid_x] = tex2DLayeredGrad<TexelType>(tex_obj, x, y, layer, dx, dy);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex2DLodKernel(TexelType* const out, size_t N_x, size_t N_y,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t num_subdivisions, bool normalized_coords, float level) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_y * N_x + tid_x] = tex2DLod<TexelType>(tex_obj, x, y, level);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex2DLayeredLodKernel(TexelType* const out, size_t N_x, size_t N_y,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t num_subdivisions, bool normalized_coords, int layer,
|
|
float level) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_y * N_x + tid_x] = tex2DLayeredLod<TexelType>(tex_obj, x, y, layer, level);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex3DKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth,
|
|
size_t num_subdivisions, bool normalized_coords) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z;
|
|
if (tid_z >= N_z) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3D<TexelType>(tex_obj, x, y, z);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex3DLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t depth, size_t num_subdivisions, bool normalized_coords,
|
|
float level) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z;
|
|
if (tid_z >= N_z) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3DLod<TexelType>(tex_obj, x, y, z, level);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex3DGradKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t depth, size_t num_subdivisions, bool normalized_coords,
|
|
float4 dx, float4 dy) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z;
|
|
if (tid_z >= N_z) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3DGrad<TexelType>(tex_obj, x, y, z, dx, dy);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void texCubemapKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t depth, size_t num_subdivisions, bool normalized_coords) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z;
|
|
if (tid_z >= N_z) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemap<TexelType>(tex_obj, x, y, z);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void texCubemapLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t depth, size_t num_subdivisions, bool normalized_coords,
|
|
float level) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z;
|
|
if (tid_z >= N_z) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemapLod<TexelType>(tex_obj, x, y, z, level);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void texCubemapGradKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t depth, size_t num_subdivisions, bool normalized_coords,
|
|
float4 dx, float4 dy) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z;
|
|
if (tid_z >= N_z) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_z * N_x * N_y + tid_y * N_x + tid_x] =
|
|
texCubemapGrad<TexelType>(tex_obj, x, y, z, dx, dy);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex1DLayeredKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj,
|
|
size_t width, size_t num_subdivisions, bool normalized_coords,
|
|
size_t layer) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid = cg::this_grid().thread_rank();
|
|
if (tid >= N) return;
|
|
|
|
float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords);
|
|
out[tid] = tex1DLayered<TexelType>(tex_obj, x, layer);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void tex2DLayeredKernel(TexelType* const out, size_t N_x, size_t N_y,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t num_subdivisions, bool normalized_coords, size_t layer) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_y * N_x + tid_x] = tex2DLayered<TexelType>(tex_obj, x, y, layer);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void texCubemapLayeredKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t depth, size_t num_subdivisions,
|
|
bool normalized_coords, size_t layer) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z;
|
|
if (tid_z >= N_z) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_z * N_x * N_y + tid_y * N_x + tid_x] =
|
|
texCubemapLayered<TexelType>(tex_obj, x, y, z, layer);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void texCubemapLayeredLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z,
|
|
hipTextureObject_t tex_obj, size_t width, size_t height,
|
|
size_t depth, size_t num_subdivisions,
|
|
bool normalized_coords, size_t layer, float level) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z;
|
|
if (tid_z >= N_z) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_z * N_x * N_y + tid_y * N_x + tid_x] =
|
|
texCubemapLayeredLod<TexelType>(tex_obj, x, y, z, layer, level);
|
|
#endif
|
|
}
|
|
|
|
template <typename TexelType>
|
|
__global__ void texCubemapLayeredGradKernel(TexelType* const out, size_t N_x, size_t N_y,
|
|
size_t N_z, hipTextureObject_t tex_obj, size_t width,
|
|
size_t height, size_t depth, size_t num_subdivisions,
|
|
bool normalized_coords, size_t layer, float4 dx,
|
|
float4 dy) {
|
|
#if !__HIP_NO_IMAGE_SUPPORT
|
|
const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (tid_x >= N_x) return;
|
|
|
|
const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
if (tid_y >= N_y) return;
|
|
|
|
const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z;
|
|
if (tid_z >= N_z) return;
|
|
|
|
float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords);
|
|
float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords);
|
|
float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords);
|
|
|
|
out[tid_z * N_x * N_y + tid_y * N_x + tid_x] =
|
|
texCubemapLayeredGrad<TexelType>(tex_obj, x, y, z, layer, dx, dy);
|
|
#endif
|
|
}
|