Files

398 rindas
13 KiB
C++

/*
* Copyright (C) Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <math.h>
#define HIP_SAMPLING_VERIFY_EPSILON 0.00001
// The internal precision varies by the GPU family and sometimes within the family.
// Thus the following threshold is subject to change.
#define HIP_SAMPLING_VERIFY_RELATIVE_THRESHOLD 0.05 // 5% for filter mode
#define HIP_SAMPLING_VERIFY_ABSOLUTE_THRESHOLD 0.1
#if HT_NVIDIA
typedef unsigned char uchar;
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, T>::type
inline __host__ __device__ operator+(const T &a, const T &b)
{
return {a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w};
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, T>::type
inline __host__ __device__ operator-(const T &a, const T &b)
{
return {a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w};
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, bool>::type
inline __host__ __device__ operator==(const T &a, const T &b)
{
return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w;
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, T>::type
inline __host__ __device__ operator*(const decltype(T::x) &a, const T &b)
{
return {a * b.x, a * b.y, a * b.z, a * b.w};
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, void>::type
inline __host__ __device__ operator*=(T &a, const decltype(T::x) &b)
{
a.x *= b;
a.y *= b;
a.z *= b;
a.w *= b;
}
#endif // HT_NVIDIA
template <typename T> struct mipmapLevelArray {
T* data; // level array data
hipExtent e; // level array size
};
// From CIE 1931 color space to sRGB
inline float hipSRGBMap(float fc) {
double c = static_cast<double>(fc);
#if !defined(_WIN32)
if (std::isnan(c))
c = 0.0;
#else
if (_isnan(c)) c = 0.0;
#endif
if (c > 1.0)
c = 1.0;
else if (c < 0.0)
c = 0.0;
else if (c < 0.0031308)
c = 12.92 * c;
else
c = 1.055 * pow(c, 5.0 / 12.0) - 0.055;
return static_cast<float>(c);
}
// From sRGB to CIE 1931 color space
inline float hipSRGBUnmap(float fc) {
double c = static_cast<double>(fc);
if (c <= 0.04045)
c = c / 12.92;
else
c = pow((c + 0.055) / 1.055, 2.4);
return static_cast<float>(c);
}
inline float4 hipSRGBMap(float4 fc) {
fc.x = hipSRGBMap(fc.x);
fc.y = hipSRGBMap(fc.y);
fc.z = hipSRGBMap(fc.z);
// Alpha channel will keep unchanged
return fc;
}
inline float4 hipSRGBUnmap(float4 fc) {
fc.x = hipSRGBUnmap(fc.x);
fc.y = hipSRGBUnmap(fc.y);
fc.z = hipSRGBUnmap(fc.z);
// Alpha channel will keep unchanged
return fc;
}
template<typename T>
typename std::enable_if<std::is_scalar<T>::value == true, double>::type
hipFabs(const T &t) {
return fabs(t);
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 1, double>::type
hipFabs(const T &t) {
return fabs(t.x);
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 2, double>::type
hipFabs(const T &t) {
double x = static_cast<double>(t.x);
double y = static_cast<double>(t.y);
double s = x * x + y * y;
return sqrt(s);
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 3, double>::type
hipFabs(const T &t) {
double x = static_cast<double>(t.x);
double y = static_cast<double>(t.y);
double z = static_cast<double>(t.z);
double s = x * x + y * y + z * z;
return sqrt(s);
}
template<typename T>
typename std::enable_if<sizeof(T) / sizeof(decltype(T::x)) == 4, double>::type
hipFabs(const T &t) {
double x = static_cast<double>(t.x);
double y = static_cast<double>(t.y);
double z = static_cast<double>(t.z);
double w = static_cast<double>(t.w);
double s = x * x + y * y + z * z + w * w;
return sqrt(s);
}
template<typename T, hipTextureFilterMode fMode = hipFilterModePoint, bool sRGB = false>
bool hipTextureSamplingVerify(const T &outputData, const T &expected) {
bool testResult = false;
if (fMode == hipFilterModePoint && !sRGB) {
testResult = outputData == expected;
} else {
double mean = (hipFabs(outputData) + hipFabs(expected)) / 2;
double diff = hipFabs(outputData - expected);
double ratio = diff / (mean + HIP_SAMPLING_VERIFY_EPSILON);
if (ratio <= HIP_SAMPLING_VERIFY_RELATIVE_THRESHOLD) {
testResult = true;
} else if (diff <= HIP_SAMPLING_VERIFY_ABSOLUTE_THRESHOLD) {
// Some small outputs have big ratio due to float operation difference of ALU and GPU
testResult = true;
}
}
return testResult;
}
// Simulate CTS static AddressingTable sAddressingTable
template<hipTextureAddressMode addressMode>
void hipTextureGetAddress(int &value, const int maxValue)
{
switch(addressMode)
{
case hipAddressModeClamp:
value = value < 0 ? 0
: (value > maxValue - 1 ? maxValue - 1 : value);
break;
case hipAddressModeBorder:
value = value < -1 ? -1
: (value > maxValue ? maxValue : value);
break;
default:
break;
}
}
// Simulate logics in CTS read_image_pixel_float().
// x, y and z must be returned by hipTextureGetAddress()
template<typename T, hipTextureAddressMode addressMode, bool sRGB = false>
T hipTextureGetValue(const T *data, const int x, const int width,
const int y = 0, const int height = 0, const int z = 0, const int depth = 0) {
T result;
memset(&result, 0, sizeof(result));
switch (addressMode) {
case hipAddressModeClamp:
if (width > 0) {
if (height == 0 && depth == 0) {
result = data[x]; // 1D
} else if (depth == 0) {
result = data[y * width + x]; // 2D
} else {
result = data[z * width * height + y * width + x]; // 3D
}
}
break;
case hipAddressModeBorder:
if (width > 0) {
if (height == 0 && depth == 0) {
if (x >= 0 && x < width)
result = data[x]; // 1D
} else if (depth == 0) {
if (x >= 0 && x < width && y >= 0 && y < height)
result = data[y * width + x]; // 2D
} else {
if (x >= 0 && x < width && y >= 0 && y < height && z >= 0 && z < depth)
result = data[z * width * height + y * width + x]; // 3D
}
}
break;
default:
break;
}
if constexpr (sRGB && std::is_same<T, float4>::value) {
result = hipSRGBUnmap(result);
}
return result;
}
template<typename T, hipTextureAddressMode addressMode, hipTextureFilterMode filterMode, bool sRGB = false>
T getExpectedValue(const int width, float x, const T *data) {
T result;
memset(&result, 0, sizeof(result));
switch (filterMode) {
case hipFilterModePoint: {
int i1 = static_cast<int>(floor(x));
hipTextureGetAddress < addressMode > (i1, width);
result = hipTextureGetValue < T, addressMode, sRGB > (data, i1, width);
}
break;
case hipFilterModeLinear: {
x -= 0.5;
int i1 = static_cast<int>(floor(x));
int i2 = i1 + 1;
float a = x - i1;
hipTextureGetAddress < addressMode > (i1, width);
hipTextureGetAddress < addressMode > (i2, width);
T t1 = hipTextureGetValue < T, addressMode, sRGB> (data, i1, width);
T t2 = hipTextureGetValue < T, addressMode, sRGB > (data, i2, width);
return (1 - a) * t1 + a * t2;
}
break;
}
return result;
}
template<typename T, hipTextureAddressMode addressMode, hipTextureFilterMode filterMode, bool sRGB = false>
T getExpectedValue(const int width, const int height, float x, float y, const T *data) {
T result;
memset(&result, 0, sizeof(result));
switch (filterMode) {
case hipFilterModePoint: {
int i1 = static_cast<int>(floor(x));
int j1 = static_cast<int>(floor(y));
hipTextureGetAddress < addressMode > (i1, width);
hipTextureGetAddress < addressMode > (j1, height);
result = hipTextureGetValue < T, addressMode, sRGB > (data, i1, width, j1, height);
}
break;
case hipFilterModeLinear: {
x -= 0.5;
y -= 0.5;
int i1 = static_cast<int>(floor(x));
int j1 = static_cast<int>(floor(y));
int i2 = i1 + 1;
int j2 = j1 + 1;
float a = x - i1;
float b = y - j1;
hipTextureGetAddress < addressMode > (i1, width);
hipTextureGetAddress < addressMode > (i2, width);
hipTextureGetAddress < addressMode > (j1, height);
hipTextureGetAddress < addressMode > (j2, height);
T t11 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j1, height);
T t21 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j1, height);
T t12 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j2, height);
T t22 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j2, height);
result = (1 - a) * (1 - b) * t11 + a * (1 - b) * t21 + (1 - a) * b * t12
+ a * b * t22;
}
break;
}
return result;
}
template<class T, hipTextureAddressMode addressMode, hipTextureFilterMode filterMode, bool sRGB = false>
T getExpectedValue(const int width, const int height, const int depth,
float x, float y, float z, const T *data) {
T result;
memset(&result, 0, sizeof(result));
switch (filterMode) {
case hipFilterModePoint: {
int i1 = static_cast<int>(floor(x));
int j1 = static_cast<int>(floor(y));
int k1 = static_cast<int>(floor(z));
hipTextureGetAddress < addressMode > (i1, width);
hipTextureGetAddress < addressMode > (j1, height);
hipTextureGetAddress < addressMode > (k1, depth);
result = hipTextureGetValue < T, addressMode, sRGB > (data, i1, width, j1, height, k1, depth);
}
break;
case hipFilterModeLinear: {
x -= 0.5;
y -= 0.5;
z -= 0.5;
int i1 = static_cast<int>(floor(x));
int j1 = static_cast<int>(floor(y));
int k1 = static_cast<int>(floor(z));
int i2 = i1 + 1;
int j2 = j1 + 1;
int k2 = k1 + 1;
float a = x - i1;
float b = y - j1;
float c = z - k1;
hipTextureGetAddress < addressMode > (i1, width);
hipTextureGetAddress < addressMode > (i2, width);
hipTextureGetAddress < addressMode > (j1, height);
hipTextureGetAddress < addressMode > (j2, height);
hipTextureGetAddress < addressMode > (k1, depth);
hipTextureGetAddress < addressMode > (k2, depth);
T t111 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j1, height, k1, depth);
T t211 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j1, height, k1, depth);
T t121 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j2, height, k1, depth);
T t112 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j1, height, k2, depth);
T t122 = hipTextureGetValue < T, addressMode, sRGB
> (data, i1, width, j2, height, k2, depth);
T t212 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j1, height, k2, depth);
T t221 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j2, height, k1, depth);
T t222 = hipTextureGetValue < T, addressMode, sRGB
> (data, i2, width, j2, height, k2, depth);
result =
(1 - a) * (1 - b) * (1 - c) * t111 + a * (1 - b) * (1 - c) * t211 +
(1 - a) * b * (1 - c) * t121 + a * b * (1 - c) * t221 +
(1 - a) * (1 - b) * c * t112 + a * (1 - b) * c * t212 +
(1 - a) * b * c * t122 + a * b * c * t222;
}
break;
}
return result;
}