diff --git a/hipamd/include/hip/amd_detail/amd_hip_vector_types.h b/hipamd/include/hip/amd_detail/amd_hip_vector_types.h index 938e85467a..c183a734e3 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_vector_types.h +++ b/hipamd/include/hip/amd_detail/amd_hip_vector_types.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -127,6 +127,8 @@ template> class basic_is template> class basic_ostream; typedef basic_istream istream; typedef basic_ostream ostream; + +template struct is_scalar : public integral_constant {}; } // Namespace std. #endif // defined(__HIPCC_RTC__) diff --git a/hipamd/include/hip/amd_detail/amd_surface_functions.h b/hipamd/include/hip/amd_detail/amd_surface_functions.h index ee6e77769a..066c1e9e6b 100644 --- a/hipamd/include/hip/amd_detail/amd_surface_functions.h +++ b/hipamd/include/hip/amd_detail/amd_surface_functions.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2018 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2018 - 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,37 +23,212 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H -#include +#if defined(__cplusplus) -#define __SURFACE_FUNCTIONS_DECL__ static inline __device__ -template -__SURFACE_FUNCTIONS_DECL__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, int y, - int boundaryMode = hipBoundaryModeZero) { - hipArray* arrayPtr = (hipArray*)surfObj; - size_t width = arrayPtr->width; - size_t height = arrayPtr->height; - int32_t xOffset = x / sizeof(T); - T* dataPtr = (T*)arrayPtr->data; - if ((xOffset > width) || (xOffset < 0) || (y > height) || (y < 0)) { - if (boundaryMode == hipBoundaryModeZero) { - *data = 0; - } - } else { - *data = *(dataPtr + y * width + xOffset); - } +#include +#include +#include + +#define __HIP_SURFACE_OBJECT_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)surfObj; + +template +struct __hip_is_isurf_channel_type +{ + static constexpr bool value = + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +}; + +template< + typename T, + unsigned int rank> +struct __hip_is_isurf_channel_type> +{ + static constexpr bool value = + __hip_is_isurf_channel_type::value && + ((rank == 1) || + (rank == 2) || + (rank == 3) || + (rank == 4)); +}; + +// CUDA is using byte address, need map to pixel address for HIP +static __HOST_DEVICE__ __forceinline__ int __hipGetPixelAddr(int x, int format, int order) { + /* + * use below format index to generate format LUT + typedef enum { + HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0, + HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 5, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 6, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 = 7, + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8, + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9, + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13, + HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14, + HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT = 15 + } hsa_ext_image_channel_type_t; + */ + static const int FormatLUT[] = { 0, 1, 0, 1, 3, 1, 1, 1, 0, 1, 2, 0, 1, 2, 1, 2 }; + x = FormatLUT[format] == 3 ? x / FormatLUT[format] : x >> FormatLUT[format]; + + /* + * use below order index to generate order LUT + typedef enum { + HSA_EXT_IMAGE_CHANNEL_ORDER_A = 0, + HSA_EXT_IMAGE_CHANNEL_ORDER_R = 1, + HSA_EXT_IMAGE_CHANNEL_ORDER_RX = 2, + HSA_EXT_IMAGE_CHANNEL_ORDER_RG = 3, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGX = 4, + HSA_EXT_IMAGE_CHANNEL_ORDER_RA = 5, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGB = 6, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX = 7, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA = 8, + HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA = 9, + HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB = 10, + HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR = 11, + HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB = 12, + HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX = 13, + HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA = 14, + HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA = 15, + HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY = 16, + HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE = 17, + HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH = 18, + HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19 + } hsa_ext_image_channel_order_t; + */ + static const int OrderLUT[] = { 0, 0, 1, 1, 3, 1, 3, 2, 2, 2, 2, 2, 3, 2, 2, 2, 0, 0, 0, 0 }; + return x = OrderLUT[order] == 3 ? x / OrderLUT[order] : x >> OrderLUT[order]; } -template -__SURFACE_FUNCTIONS_DECL__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y, +template < + typename T, + typename std::enable_if::value>::type* = nullptr> +static __HOST_DEVICE__ __forceinline__ float4::Native_vec_ __hipMapToNativeFloat4(const T& t) { + float4::Native_vec_ tmp; + tmp.x = static_cast(t); + return tmp; +} + +template < + typename T, + typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 1>::type* = nullptr> +static __HOST_DEVICE__ __forceinline__ float4::Native_vec_ __hipMapToNativeFloat4(const T& t) { + float4::Native_vec_ tmp; + tmp.x = static_cast(t.x); + return tmp; +} + +template < + typename T, + typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 2>::type* = nullptr> +static __HOST_DEVICE__ __forceinline__ float4::Native_vec_ __hipMapToNativeFloat4(const T& t) { + float4::Native_vec_ tmp; + tmp.x = static_cast(t.x); + tmp.y = static_cast(t.y); + return tmp; +} + +template < + typename T, + typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 3>::type* = nullptr> +static __HOST_DEVICE__ __forceinline__ float4::Native_vec_ __hipMapToNativeFloat4(const T& t) { + float4::Native_vec_ tmp; + tmp.x = static_cast(t.x); + tmp.y = static_cast(t.y); + tmp.z = static_cast(t.z); + return tmp; +} + +template < + typename T, + typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 4>::type* = nullptr> +static __HOST_DEVICE__ __forceinline__ float4::Native_vec_ __hipMapToNativeFloat4(const T& t) { + float4::Native_vec_ tmp; + tmp.x = static_cast(t.x); + tmp.y = static_cast(t.y); + tmp.z = static_cast(t.z); + tmp.w = static_cast(t.w); + return tmp; +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1Dread(T* data, hipSurfaceObject_t surfObj, int x, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __ockl_image_load_1D(i, x); + *data = mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1Dwrite(T data, hipSurfaceObject_t surfObj, int x, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_1D(i, x, tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, int y, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __ockl_image_load_2D(i, int2(x, y).data); + *data = mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int boundaryMode = hipBoundaryModeZero) { - hipArray* arrayPtr = (hipArray*)surfObj; - size_t width = arrayPtr->width; - size_t height = arrayPtr->height; - int32_t xOffset = x / sizeof(T); - T* dataPtr = (T*)arrayPtr->data; - if (!((xOffset > width) || (xOffset < 0) || (y > height) || (y < 0))) { - *(dataPtr + y * width + xOffset) = data; - } + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_2D(i, int2(x, y).data, tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf3Dread(T* data, hipSurfaceObject_t surfObj, int x, int y, int z, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); + auto tmp = __ockl_image_load_3D(i, int4(x, y, z, 0).data); + *data = mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf3Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int z, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_3D(i, int4(x, y, z, 0).data, tmp); } #endif +#endif diff --git a/hipamd/include/hip/amd_detail/ockl_image.h b/hipamd/include/hip/amd_detail/ockl_image.h index a055f6d057..f749d0899c 100644 --- a/hipamd/include/hip/amd_detail/ockl_image.h +++ b/hipamd/include/hip/amd_detail/ockl_image.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -132,4 +132,44 @@ __device__ float4::Native_vec_ __ockl_image_gather4b_2D(unsigned int ADDRESS_SPA __device__ float4::Native_vec_ __ockl_image_gather4a_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); +__device__ int __ockl_image_channel_data_type_1D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_1Db(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2Dad(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2Dd(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_3D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_CM(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_1D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_1Db(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2Dad(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2Dd(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_3D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_CM(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i); + }; \ No newline at end of file