diff --git a/hipamd/include/hip/amd_detail/amd_surface_functions.h b/hipamd/include/hip/amd_detail/amd_surface_functions.h index 066c1e9e6b..4a8a6a63c8 100644 --- a/hipamd/include/hip/amd_detail/amd_surface_functions.h +++ b/hipamd/include/hip/amd_detail/amd_surface_functions.h @@ -164,6 +164,57 @@ static __HOST_DEVICE__ __forceinline__ float4::Native_vec_ __hipMapToNativeFloat return tmp; } +template +static __HOST_DEVICE__ __forceinline__ +typename std::enable_if::value, const T>::type +__hipMapFromNativeFloat4(const float4::Native_vec_& u) { + T tmp; + tmp = static_cast(u.x); + return tmp; +} + +template +static __HOST_DEVICE__ __forceinline__ +typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 1, const T>::type +__hipMapFromNativeFloat4(const float4::Native_vec_& u) { + T tmp; + tmp.x = static_cast(u.x); + return tmp; +} + +template +static __HOST_DEVICE__ __forceinline__ +typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 2, const T>::type +__hipMapFromNativeFloat4(const float4::Native_vec_& u) { + T tmp; + tmp.x = static_cast(u.x); + tmp.y = static_cast(u.y); + return tmp; +} + +template +static __HOST_DEVICE__ __forceinline__ +typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 3, const T>::type +__hipMapFromNativeFloat4(const float4::Native_vec_& u) { + T tmp; + tmp.x = static_cast(u.x); + tmp.y = static_cast(u.y); + tmp.z = static_cast(u.z); + return tmp; +} + +template +static __HOST_DEVICE__ __forceinline__ +typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 4, const T>::type +__hipMapFromNativeFloat4(const float4::Native_vec_& u) { + T tmp; + tmp.x = static_cast(u.x); + tmp.y = static_cast(u.y); + tmp.z = static_cast(u.z); + tmp.w = static_cast(u.w); + return tmp; +} + template < typename T, typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> @@ -172,7 +223,7 @@ static __device__ __hip_img_chk__ void surf1Dread(T* data, hipSurfaceObject_t su __HIP_SURFACE_OBJECT_PARAMETERS_INIT x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); auto tmp = __ockl_image_load_1D(i, x); - *data = mapFrom(tmp); + *data = __hipMapFromNativeFloat4(tmp); } template < @@ -194,7 +245,7 @@ static __device__ __hip_img_chk__ void surf2Dread(T* data, hipSurfaceObject_t su __HIP_SURFACE_OBJECT_PARAMETERS_INIT x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); auto tmp = __ockl_image_load_2D(i, int2(x, y).data); - *data = mapFrom(tmp); + *data = __hipMapFromNativeFloat4(tmp); } template < @@ -216,7 +267,7 @@ static __device__ __hip_img_chk__ void surf3Dread(T* data, hipSurfaceObject_t su __HIP_SURFACE_OBJECT_PARAMETERS_INIT x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); auto tmp = __ockl_image_load_3D(i, int4(x, y, z, 0).data); - *data = mapFrom(tmp); + *data = __hipMapFromNativeFloat4(tmp); } template < @@ -230,5 +281,93 @@ static __device__ __hip_img_chk__ void surf3Dwrite(T data, hipSurfaceObject_t su __ockl_image_store_3D(i, int4(x, y, z, 0).data, tmp); } +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1DLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int layer, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __ockl_image_load_lod_1D(i, x, layer); + *data = __hipMapFromNativeFloat4(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1DLayeredwrite(T data, hipSurfaceObject_t surfObj, int x, int layer, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_lod_1D(i, x, layer, tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2DLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int layer, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __ockl_image_load_lod_2D(i, int2(x, y).data, layer); + *data = __hipMapFromNativeFloat4(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2DLayeredwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int layer, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_lod_2D(i, int2(x, y).data, layer, tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __ockl_image_load_CM(i, int2(x, y).data, face); + *data = __hipMapFromNativeFloat4(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int face, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_CM(i, int2(x, y).data, face, tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face, + int layer, int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __ockl_image_load_lod_CM(i, int2(x, y).data, face, layer); + *data = __hipMapFromNativeFloat4(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapLayeredwrite(T* data, hipSurfaceObject_t surfObj, int x, int y, int face, + int layer, int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_lod_CM(i, int2(x, y).data, face, layer, tmp); +} + #endif #endif diff --git a/hipamd/include/hip/amd_detail/ockl_image.h b/hipamd/include/hip/amd_detail/ockl_image.h index f749d0899c..a3fa616cc5 100644 --- a/hipamd/include/hip/amd_detail/ockl_image.h +++ b/hipamd/include/hip/amd_detail/ockl_image.h @@ -68,9 +68,9 @@ __device__ void __ockl_image_store_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, in __device__ void __ockl_image_store_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); -__device__ void __ockl_image_store_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); +__device__ void __ockl_image_store_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, float4::Native_vec_ p); -__device__ void __ockl_image_store_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); +__device__ void __ockl_image_store_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, float4::Native_vec_ p); __device__ void __ockl_image_store_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l, float4::Native_vec_ p); @@ -82,9 +82,9 @@ __device__ void __ockl_image_store_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i __device__ void __ockl_image_store_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); -__device__ void __ockl_image_store_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); +__device__ void __ockl_image_store_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, int l, float4::Native_vec_ p); -__device__ void __ockl_image_store_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); +__device__ void __ockl_image_store_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, int l, float4::Native_vec_ p); __device__ float4::Native_vec_ __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c);