diff --git a/hipamd/include/hip/hcc_detail/hip_runtime.h b/hipamd/include/hip/hcc_detail/hip_runtime.h index c1ad5b2fe5..cbb3a0e99a 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -103,9 +103,12 @@ extern int HIP_TRACE_API; #include #include #include -#include #if __HCC__ #include + #include +#else + #include + #include #endif // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define. #if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__) diff --git a/hipamd/include/hip/hcc_detail/ockl_image.h b/hipamd/include/hip/hcc_detail/ockl_image.h new file mode 100644 index 0000000000..d9bc296791 --- /dev/null +++ b/hipamd/include/hip/hcc_detail/ockl_image.h @@ -0,0 +1,133 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +extern "C" { + +#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4))) + +__device__ float4::Native_vec_ __ockl_image_load_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c); + +__device__ float4::Native_vec_ __ockl_image_load_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f); + +__device__ float4::Native_vec_ __ockl_image_load_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f); + +__device__ float4::Native_vec_ __ockl_image_load_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, int l); + +__device__ void __ockl_image_store_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ float4::Native_vec_ __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c); + +__device__ float4::Native_vec_ __ockl_image_sample_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float dx, float dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float dx, float dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float4::Native_vec_ dx, float4::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_gather4r_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4g_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4b_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4a_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +}; \ No newline at end of file diff --git a/hipamd/include/hip/hcc_detail/texture_fetch_functions.h b/hipamd/include/hip/hcc_detail/texture_fetch_functions.h new file mode 100644 index 0000000000..ad8deda400 --- /dev/null +++ b/hipamd/include/hip/hcc_detail/texture_fetch_functions.h @@ -0,0 +1,272 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__cplusplus) + +#include +#include +#include + +#include + +#define TEXTURE_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; + +template +struct __hip_tex_ret {}; + +template +using __hip_tex_ret_t = typename __hip_tex_ret::type; + +template +struct __hip_tex_ret { using type = T; }; + +template +struct __hip_tex_ret { using type = float; }; + +template +struct __hip_tex_ret, hipReadModeNormalizedFloat> { using type = HIP_vector_type; }; + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1Dfetch(texture t, int x) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_1D(i, s, x); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1D(texture t, float x) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_1D(i, s, x); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2D(texture t, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLayered(texture t, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLayered(texture t, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex3D(texture t, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemap(texture t, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLod(texture t, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_1D(i, s, x, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLod(texture t, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLayeredLod(texture t, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLayeredLod(texture t, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_2Da(i, s, float4(x, y, layer, 0.0f).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex3DLod(texture t, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLod(texture t, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLayered(texture t, float x, float y, float z, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLayeredLod(texture t, float x, float y, float z, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapGrad(texture t, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); + return {}; +} + +template +static __forceinline__ __device__ __hip_tex_ret_t texCubemapLayeredGrad(texture t, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); + return {}; +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DGrad(texture t, float x, float dPdx, float dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DGrad(texture t, float x, float y, float2 dPdx, float2 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex1DLayeredGrad(texture t, float x, int layer, float dPdx, float dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex2DLayeredGrad(texture t, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +static __forceinline__ __device__ __hip_tex_ret_t tex3DGrad(texture t, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + return *reinterpret_cast<__hip_tex_ret_t*>(&tmp); +} + +template +struct __hip_tex2dgather_ret {}; + +template +using __hip_tex2dgather_ret_t = typename __hip_tex2dgather_ret::type; + +template +struct __hip_tex2dgather_ret { using type = HIP_vector_type; }; + +template +struct __hip_tex2dgather_ret, hipReadModeElementType> { using type = HIP_vector_type; }; + +template +struct __hip_tex2dgather_ret { using type = float4; }; + +template +static __forceinline__ __device__ __hip_tex2dgather_ret_t tex2Dgather(texture t, float x, float y, int comp=0) +{ + TEXTURE_PARAMETERS_INIT; + switch (comp) { + case 1: { + auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + case 2: { + auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + case 3: { + auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + default: { + auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data); + return *reinterpret_cast<__hip_tex2dgather_ret_t*>(&tmp); + } + } + return {}; +} + +#endif diff --git a/hipamd/include/hip/hcc_detail/texture_indirect_functions.h b/hipamd/include/hip/hcc_detail/texture_indirect_functions.h new file mode 100644 index 0000000000..56784f433a --- /dev/null +++ b/hipamd/include/hip/hcc_detail/texture_indirect_functions.h @@ -0,0 +1,382 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__cplusplus) + +#include +#include +#include + +#define TEXTURE_OBJECT_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; + +template +static __device__ T tex1Dfetch(hipTextureObject_t textureObject, int x) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1D(i, s, x); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex1Dfetch(T *ptr, hipTextureObject_t textureObject, int x) +{ + *ptr = tex1Dfetch(textureObject, x); +} + +template +static __device__ T tex1D(hipTextureObject_t textureObject, float x) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1D(i, s, x); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex1D(T *ptr, hipTextureObject_t textureObject, float x) +{ + *ptr = tex1D(textureObject, x); +} + +template +static __device__ T tex2D(hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex2D(T *ptr, hipTextureObject_t textureObject, float x, float y) +{ + *ptr = tex2D(textureObject, x, y); +} + +template +static __device__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex3D(T *ptr, hipTextureObject_t textureObject, float x, float y, float z) +{ + *ptr = tex3D(textureObject, x, y, z); +} + +template +static __device__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex1DLayered(T *ptr, hipTextureObject_t textureObject, float x, int layer) +{ + *ptr = tex1DLayered(textureObject, x, layer); +} + +template +static __device__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex2DLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer) +{ + *ptr = tex1DLayered(textureObject, x, y, layer); +} + +template +static __device__ T texCubemap(hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void texCubemap(T *ptr, hipTextureObject_t textureObject, float x, float y, float z) +{ + *ptr = texCubemap(textureObject, x, y, z); +} + +template +static __device__ T texCubemapLayered(hipTextureObject_t textureObject, float x, float y, float z, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void texCubemapLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer) +{ + *ptr = texCubemapLayered(textureObject, x, y, z, layer); +} + +template +static __device__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, int comp = 0) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + switch (comp) { + case 1: { + auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + case 2: { + auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + case 3: { + auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + default: { + auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data); + return *reinterpret_cast(&tmp); + break; + } + }; + return {}; +} + +template +static __device__ void tex2Dgather(T *ptr, hipTextureObject_t textureObject, float x, float y, int comp = 0) +{ + *ptr = texCubemapLayered(textureObject, x, y, comp); +} + +template +static __device__ T tex1DLod(hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_1D(i, s, x, level); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex1DLod(T *ptr, hipTextureObject_t textureObject, float x, float level) +{ + *ptr = tex1DLod(textureObject, x, level); +} + +template +static __device__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex2DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float level) +{ + *ptr = tex2DLod(textureObject, x, y, level); +} + +template +static __device__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex3DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + *ptr = tex3DLod(textureObject, x, y, z, level); +} + +template +static __device__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex1DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, int layer, float level) +{ + *ptr = tex1DLayeredLod(textureObject, x, layer, level); +} + +template +static __device__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex2DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + *ptr = tex2DLayeredLod(textureObject, x, y, layer, level); +} + +template +static __device__ T texCubemapLod(hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void texCubemapLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + *ptr = texCubemapLod(textureObject, x, y, z, level); +} + +template +static __device__ T texCubemapGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast(&tmp); + return {}; +} + +template +static __device__ void texCubemapGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + *ptr = texCubemapGrad(textureObject, x, y, z, dPdx, dPdy); +} + +template +static __device__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, float y, float z, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void texCubemapLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float level) +{ + *ptr = texCubemapLayeredLod(textureObject, x, y, z, layer, level); +} + +template +static __device__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dPdx, float dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex1DGrad(T *ptr, hipTextureObject_t textureObject, float x, float dPdx, float dPdy) +{ + *ptr = tex1DGrad(textureObject, x, dPdx, dPdy); +} + +template +static __device__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex2DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy) +{ + *ptr = tex2DGrad(textureObject, x, y, dPdx, dPdy); +} + +template +static __device__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex3DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + *ptr = tex3DGrad(textureObject, x, y, z, dPdx, dPdy); +} + +template +static __device__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex1DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy) +{ + *ptr = tex1DLayeredGrad(textureObject, x, layer, dPdx, dPdy); +} + +template +static __device__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return *reinterpret_cast(&tmp); +} + +template +static __device__ void tex2DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + *ptr = tex2DLayeredGrad(textureObject, x, y, layer, dPdx, dPdy); +} + +template +static __device__ T texCubemapLayeredGrad(hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return *reinterpret_cast(&tmp); + return {}; +} + +template +static __device__ void texCubemapLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + *ptr = texCubemapLayeredGrad(textureObject, x, y, z, layer, dPdx, dPdy); +} + +#endif