diff --git a/projects/hip/bin/hipcc.pl b/projects/hip/bin/hipcc.pl index 71ec5f042a..2dd7c248ef 100755 --- a/projects/hip/bin/hipcc.pl +++ b/projects/hip/bin/hipcc.pl @@ -393,121 +393,7 @@ foreach $arg (@ARGV) $swallowArg = 1; } - ## process linker response file for hip-clang - ## extract object files from static library and pass them directly to - ## hip-clang in command line. - ## ToDo: Remove this after hip-clang switch to lto and lld is able to - ## handle clang-offload-bundler bundles. - if (($arg =~ m/^-Wl,@/ or $arg =~ m/^@/) and - $HIP_PLATFORM eq 'amd') { - my @split_arg = (split /\@/, $arg); # arg will have options type(-Wl,@ or @) and filename - my $file = $split_arg[1]; - open my $in, "<:encoding(utf8)", $file or die "$file: $!"; - my $new_arg = ""; - my $tmpdir = get_temp_dir (); - my $new_file = "$tmpdir/response_file"; - open my $out, ">", $new_file or die "$new_file: $!"; - while (my $line = <$in>) { - chomp $line; - if ($line =~ m/\.a$/ || $line =~ m/\.lo$/) { - my $libFile = $line; - my $path = abs_path($line); - my @objs = split ('\n', `cd $tmpdir; ar xv $path`); - ## Check if all files in .a are object files. - my $allIsObj = 1; - my $realObjs = ""; - foreach my $obj (@objs) { - chomp $obj; - $obj =~ s/^x - //; - $obj = "$tmpdir/$obj"; - my $fileType = `file $obj`; - my $isObj = ($fileType =~ m/ELF/ or $fileType =~ m/COFF/); - $allIsObj = ($allIsObj and $isObj); - if ($isObj) { - $realObjs = ($realObjs . " " . $obj); - } else { - push (@inputs, $obj); - $new_arg = "$new_arg $obj"; - } - } - chomp $realObjs; - if ($allIsObj) { - print $out "$line\n"; - } elsif ($realObjs) { - my($libBaseName, $libDir, $libExt) = fileparse($libFile); - $libBaseName = mktemp($libBaseName . "XXXX") . $libExt; - system("cd $tmpdir; ar rc $libBaseName $realObjs"); - print $out "$tmpdir/$libBaseName\n"; - } - } elsif ($line =~ m/\.o$/) { - my $fileType = `file $line`; - my $isObj = ($fileType =~ m/ELF/ or $fileType =~ m/COFF/); - if ($isObj) { - print $out "$line\n"; - } else { - push (@inputs, $line); - $new_arg = "$new_arg $line"; - } - } else { - print $out "$line\n"; - } - } - close $in; - close $out; - $arg = "$new_arg $split_arg[0]\@$new_file"; - $escapeArg = 0; - } elsif (($arg =~ m/\.a$/ || $arg =~ m/\.lo$/) && - $HIP_PLATFORM eq 'amd') { - ## process static library for hip-clang - ## extract object files from static library and pass them directly to - ## hip-clang. - ## ToDo: Remove this after hip-clang switch to lto and lld is able to - ## handle clang-offload-bundler bundles. - my $new_arg = ""; - my $tmpdir = get_temp_dir (); - my $libFile = $arg; - my $path = abs_path($arg); - my @objs = split ('\n', `cd $tmpdir; ar xv $path`); - ## Check if all files in .a are object files. - my $allIsObj = 1; - my $realObjs = ""; - foreach my $obj (@objs) { - chomp $obj; - $obj =~ s/^x - //; - $obj = "$tmpdir/$obj"; - my $fileType = `file $obj`; - my $isObj = ($fileType =~ m/ELF/ or $fileType =~ m/COFF/); - if ($fileType =~ m/ELF/) { - my $sections = `$HIP_CLANG_PATH/llvm-readelf -e -W $obj`; - $isObj = !($sections =~ m/__CLANG_OFFLOAD_BUNDLE__/); - } - $allIsObj = ($allIsObj and $isObj); - if ($isObj) { - $realObjs = ($realObjs . " " . $obj); - } else { - push (@inputs, $obj); - if ($new_arg ne "") { - $new_arg .= " "; - } - $new_arg .= "$obj"; - } - } - chomp $realObjs; - if ($allIsObj) { - $new_arg = $arg; - } elsif ($realObjs) { - my($libBaseName, $libDir, $libExt) = fileparse($libFile); - $libBaseName = mktemp($libBaseName . "XXXX") . $libExt; - system("cd $tmpdir; ar rc $libBaseName $realObjs"); - $new_arg .= " $tmpdir/$libBaseName"; - } - $arg = "$new_arg"; - $escapeArg = 0; - if ($toolArgs =~ m/-Xlinker$/) { - $toolArgs = substr $toolArgs, 0, -8; - chomp $toolArgs; - } - } elsif ($arg eq '-x') { + if ($arg eq '-x') { $fileTypeFlag = 1; } elsif (($arg eq 'c' and $prevArg eq '-x') or ($arg eq '-xc')) { $fileTypeFlag = 1; diff --git a/projects/hip/docs/markdown/hip_deprecated_api_list.md b/projects/hip/docs/markdown/hip_deprecated_api_list.md index b26aceecee..22f3b2a4c9 100644 --- a/projects/hip/docs/markdown/hip_deprecated_api_list.md +++ b/projects/hip/docs/markdown/hip_deprecated_api_list.md @@ -51,32 +51,32 @@ Should use roctracer/rocTX instead ## HIP Texture Management APIs -###hipGetTextureReference -###hipTexRefSetAddressMode -###hipTexRefSetArray -###hipTexRefSetFilterMode -###hipTexRefSetFlags -###hipTexRefSetFormat -###hipBindTexture -###hipBindTexture2D -###hipBindTextureToArray -###hipGetTextureAlignmentOffset -###hipUnbindTexture -###hipTexRefGetAddress -###hipTexRefGetAddressMode -###hipTexRefGetFilterMode -###hipTexRefGetFlags -###hipTexRefGetFormat -###hipTexRefGetMaxAnisotropy -###hipTexRefGetMipmapFilterMode -###hipTexRefGetMipmapLevelBias -###hipTexRefGetMipmapLevelClamp -###hipTexRefGetMipMappedArray -###hipTexRefSetAddress -###hipTexRefSetAddress2D -###hipTexRefSetMaxAnisotropy -###hipTexRefSetBorderColor -###hipTexRefSetMipmapFilterMode -###hipTexRefSetMipmapLevelBias -###hipTexRefSetMipmapLevelClamp -###hipTexRefSetMipmappedArray \ No newline at end of file +### hipGetTextureReference +### hipTexRefSetAddressMode +### hipTexRefSetArray +### hipTexRefSetFilterMode +### hipTexRefSetFlags +### hipTexRefSetFormat +### hipBindTexture +### hipBindTexture2D +### hipBindTextureToArray +### hipGetTextureAlignmentOffset +### hipUnbindTexture +### hipTexRefGetAddress +### hipTexRefGetAddressMode +### hipTexRefGetFilterMode +### hipTexRefGetFlags +### hipTexRefGetFormat +### hipTexRefGetMaxAnisotropy +### hipTexRefGetMipmapFilterMode +### hipTexRefGetMipmapLevelBias +### hipTexRefGetMipmapLevelClamp +### hipTexRefGetMipMappedArray +### hipTexRefSetAddress +### hipTexRefSetAddress2D +### hipTexRefSetMaxAnisotropy +### hipTexRefSetBorderColor +### hipTexRefSetMipmapFilterMode +### hipTexRefSetMipmapLevelBias +### hipTexRefSetMipmapLevelClamp +### hipTexRefSetMipmappedArray diff --git a/projects/hip/include/hip/hip_bfloat16.h b/projects/hip/include/hip/hip_bfloat16.h index eb576b3b5b..b4d4f641c2 100644 --- a/projects/hip/include/hip/hip_bfloat16.h +++ b/projects/hip/include/hip/hip_bfloat16.h @@ -1,7 +1,7 @@ /** * MIT License * - * Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -29,255 +29,12 @@ #ifndef _HIP_BFLOAT16_H_ #define _HIP_BFLOAT16_H_ -#if __cplusplus < 201103L || !defined(__HIPCC__) - -// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only -// include a minimal definition of hip_bfloat16 - -#include -/*! \brief Struct to represent a 16 bit brain floating point number. */ -typedef struct -{ - uint16_t data; -} hip_bfloat16; - -#else // __cplusplus < 201103L || !defined(__HIPCC__) - -#include -#include -#include -#include -#include -#include - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wshadow" -struct hip_bfloat16 -{ - uint16_t data; - - enum truncate_t - { - truncate - }; - - __host__ __device__ hip_bfloat16() = default; - - // round upper 16 bits of IEEE float to convert to bfloat16 - explicit __host__ __device__ hip_bfloat16(float f) - : data(float_to_bfloat16(f)) - { - } - - explicit __host__ __device__ hip_bfloat16(float f, truncate_t) - : data(truncate_float_to_bfloat16(f)) - { - } - - // zero extend lower 16 bits of bfloat16 to convert to IEEE float - __host__ __device__ operator float() const - { - union - { - uint32_t int32; - float fp32; - } u = {uint32_t(data) << 16}; - return u.fp32; - } - - static __host__ __device__ hip_bfloat16 round_to_bfloat16(float f) - { - hip_bfloat16 output; - output.data = float_to_bfloat16(f); - return output; - } - - static __host__ __device__ hip_bfloat16 round_to_bfloat16(float f, truncate_t) - { - hip_bfloat16 output; - output.data = truncate_float_to_bfloat16(f); - return output; - } - -private: - static __host__ __device__ uint16_t float_to_bfloat16(float f) - { - union - { - float fp32; - uint32_t int32; - } u = {f}; - if(~u.int32 & 0x7f800000) - { - // When the exponent bits are not all 1s, then the value is zero, normal, - // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus - // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd). - // This causes the bfloat16's mantissa to be incremented by 1 if the 16 - // least significant bits of the float mantissa are greater than 0x8000, - // or if they are equal to 0x8000 and the least significant bit of the - // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when - // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already - // has the value 0x7f, then incrementing it causes it to become 0x00 and - // the exponent is incremented by one, which is the next higher FP value - // to the unrounded bfloat16 value. When the bfloat16 value is subnormal - // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up - // to a normal value with an exponent of 0x01 and a mantissa of 0x00. - // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, - // incrementing it causes it to become an exponent of 0xFF and a mantissa - // of 0x00, which is Inf, the next higher value to the unrounded value. - u.int32 += 0x7fff + ((u.int32 >> 16) & 1); // Round to nearest, round to even - } - else if(u.int32 & 0xffff) - { - // When all of the exponent bits are 1, the value is Inf or NaN. - // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero - // mantissa bit. Quiet NaN is indicated by the most significant mantissa - // bit being 1. Signaling NaN is indicated by the most significant - // mantissa bit being 0 but some other bit(s) being 1. If any of the - // lower 16 bits of the mantissa are 1, we set the least significant bit - // of the bfloat16 mantissa, in order to preserve signaling NaN in case - // the bloat16's mantissa bits are all 0. - u.int32 |= 0x10000; // Preserve signaling NaN - } - return uint16_t(u.int32 >> 16); - } - - // Truncate instead of rounding, preserving SNaN - static __host__ __device__ uint16_t truncate_float_to_bfloat16(float f) - { - union - { - float fp32; - uint32_t int32; - } u = {f}; - return uint16_t(u.int32 >> 16) | (!(~u.int32 & 0x7f800000) && (u.int32 & 0xffff)); - } -}; -#pragma clang diagnostic pop - -typedef struct -{ - uint16_t data; -} hip_bfloat16_public; - -static_assert(std::is_standard_layout{}, - "hip_bfloat16 is not a standard layout type, and thus is " - "incompatible with C."); - -static_assert(std::is_trivial{}, - "hip_bfloat16 is not a trivial type, and thus is " - "incompatible with C."); - -static_assert(sizeof(hip_bfloat16) == sizeof(hip_bfloat16_public) - && offsetof(hip_bfloat16, data) == offsetof(hip_bfloat16_public, data), - "internal hip_bfloat16 does not match public hip_bfloat16"); - -inline std::ostream& operator<<(std::ostream& os, const hip_bfloat16& bf16) -{ - return os << float(bf16); -} -inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a) -{ - return a; -} -inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a) -{ - a.data ^= 0x8000; - return a; -} -inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a, hip_bfloat16 b) -{ - return hip_bfloat16(float(a) + float(b)); -} -inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a, hip_bfloat16 b) -{ - return hip_bfloat16(float(a) - float(b)); -} -inline __host__ __device__ hip_bfloat16 operator*(hip_bfloat16 a, hip_bfloat16 b) -{ - return hip_bfloat16(float(a) * float(b)); -} -inline __host__ __device__ hip_bfloat16 operator/(hip_bfloat16 a, hip_bfloat16 b) -{ - return hip_bfloat16(float(a) / float(b)); -} -inline __host__ __device__ bool operator<(hip_bfloat16 a, hip_bfloat16 b) -{ - return float(a) < float(b); -} -inline __host__ __device__ bool operator==(hip_bfloat16 a, hip_bfloat16 b) -{ - return float(a) == float(b); -} -inline __host__ __device__ bool operator>(hip_bfloat16 a, hip_bfloat16 b) -{ - return b < a; -} -inline __host__ __device__ bool operator<=(hip_bfloat16 a, hip_bfloat16 b) -{ - return !(a > b); -} -inline __host__ __device__ bool operator!=(hip_bfloat16 a, hip_bfloat16 b) -{ - return !(a == b); -} -inline __host__ __device__ bool operator>=(hip_bfloat16 a, hip_bfloat16 b) -{ - return !(a < b); -} -inline __host__ __device__ hip_bfloat16& operator+=(hip_bfloat16& a, hip_bfloat16 b) -{ - return a = a + b; -} -inline __host__ __device__ hip_bfloat16& operator-=(hip_bfloat16& a, hip_bfloat16 b) -{ - return a = a - b; -} -inline __host__ __device__ hip_bfloat16& operator*=(hip_bfloat16& a, hip_bfloat16 b) -{ - return a = a * b; -} -inline __host__ __device__ hip_bfloat16& operator/=(hip_bfloat16& a, hip_bfloat16 b) -{ - return a = a / b; -} -inline __host__ __device__ hip_bfloat16& operator++(hip_bfloat16& a) -{ - return a += hip_bfloat16(1.0f); -} -inline __host__ __device__ hip_bfloat16& operator--(hip_bfloat16& a) -{ - return a -= hip_bfloat16(1.0f); -} -inline __host__ __device__ hip_bfloat16 operator++(hip_bfloat16& a, int) -{ - hip_bfloat16 orig = a; - ++a; - return orig; -} -inline __host__ __device__ hip_bfloat16 operator--(hip_bfloat16& a, int) -{ - hip_bfloat16 orig = a; - --a; - return orig; -} - -namespace std -{ - constexpr __host__ __device__ bool isinf(hip_bfloat16 a) - { - return !(~a.data & 0x7f80) && !(a.data & 0x7f); - } - constexpr __host__ __device__ bool isnan(hip_bfloat16 a) - { - return !(~a.data & 0x7f80) && +(a.data & 0x7f); - } - constexpr __host__ __device__ bool iszero(hip_bfloat16 a) - { - return !(a.data & 0x7fff); - } -} - -#endif // __cplusplus < 201103L || !defined(__HIPCC__) +#if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) +#include +#elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) +#warning "hip_bfloat16.h is not supported on nvidia platform" +#else +#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); +#endif #endif // _HIP_BFLOAT16_H_ diff --git a/projects/hip/tests/catch/hipTestMain/config/config_amd_linux_common.json b/projects/hip/tests/catch/hipTestMain/config/config_amd_linux_common.json index ccb77bc2a4..742c45dcf1 100644 --- a/projects/hip/tests/catch/hipTestMain/config/config_amd_linux_common.json +++ b/projects/hip/tests/catch/hipTestMain/config/config_amd_linux_common.json @@ -12,6 +12,8 @@ "Unit_hipIpcOpenMemHandle_Negative_Open_In_Creating_Process", "Unit_hipDeviceGetPCIBusId_Negative_PartialFill", "Unit_hipInit_Negative", - "Unit_hipMemset_Negative_OutOfBoundsPtr" + "Unit_hipMemset_Negative_OutOfBoundsPtr", + "Unit_hipDeviceReset_Positive_Basic", + "Unit_hipDeviceReset_Positive_Threaded" ] } diff --git a/projects/hip/tests/catch/hipTestMain/config/config_amd_windows_common.json b/projects/hip/tests/catch/hipTestMain/config/config_amd_windows_common.json index 33da571b15..4724985967 100644 --- a/projects/hip/tests/catch/hipTestMain/config/config_amd_windows_common.json +++ b/projects/hip/tests/catch/hipTestMain/config/config_amd_windows_common.json @@ -80,6 +80,8 @@ "Unit_hipDeviceGetPCIBusId_Negative_PartialFill", "Unit_hipDeviceGetSharedMemConfig_Positive_Basic", "Unit_hipDeviceGetSharedMemConfig_Positive_Threaded", + "Unit_hipDeviceReset_Positive_Basic", + "Unit_hipDeviceReset_Positive_Threaded", "Unit_hipInit_Negative", "Unit_hipGraphMemcpyNodeSetParams_Functional", "Unit_hipGraphNodeGetDependentNodes_Functional", diff --git a/projects/hip/tests/catch/include/hip_array_common.hh b/projects/hip/tests/catch/include/hip_array_common.hh new file mode 100644 index 0000000000..fd6f094f8d --- /dev/null +++ b/projects/hip/tests/catch/include/hip_array_common.hh @@ -0,0 +1,84 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +template struct type_and_size_and_format { + using type = T; + static constexpr size_t size = N; + static constexpr hipArray_Format format = Format; +}; + +// Create a map of type to scalar type, vector size and scalar type format enum. +// This is useful for creating simpler function that depend on the vector size. +template struct vector_info; +template <> +struct vector_info : type_and_size_and_format {}; +template <> struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; + +template <> +struct vector_info : type_and_size_and_format {}; +template <> struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; + +template <> +struct vector_info : type_and_size_and_format {}; +template <> struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; +template <> +struct vector_info + : type_and_size_and_format {}; \ No newline at end of file diff --git a/projects/hip/tests/catch/include/resource_guards.hh b/projects/hip/tests/catch/include/resource_guards.hh index 7e6179c81a..a9c7512a3d 100644 --- a/projects/hip/tests/catch/include/resource_guards.hh +++ b/projects/hip/tests/catch/include/resource_guards.hh @@ -19,6 +19,7 @@ THE SOFTWARE. #pragma once +#include #include #include @@ -80,10 +81,8 @@ template class LinearAllocGuard { } } - T* ptr() { return ptr_; }; - T* const ptr() const { return ptr_; }; - T* host_ptr() { return host_ptr_; } - T* const host_ptr() const { return host_ptr(); } + T* ptr() const { return ptr_; }; + T* host_ptr() const { return host_ptr_; } private: const LinearAllocs allocation_type_; @@ -91,6 +90,112 @@ template class LinearAllocGuard { T* host_ptr_ = nullptr; }; +template class LinearAllocGuardMultiDim { + protected: + LinearAllocGuardMultiDim(hipExtent extent) : extent_{extent} {} + + ~LinearAllocGuardMultiDim() { static_cast(hipFree(pitched_ptr_.ptr)); } + + public: + T* ptr() const { return reinterpret_cast(pitched_ptr_.ptr); }; + + size_t pitch() const { return pitched_ptr_.pitch; } + + hipExtent extent() const { return extent_; } + + hipPitchedPtr pitched_ptr() const { return pitched_ptr_; } + + size_t width() const { return extent_.width; } + + size_t width_logical() const { return extent_.width / sizeof(T); } + + size_t height() const { return extent_.height; } + + public: + hipPitchedPtr pitched_ptr_; + const hipExtent extent_; +}; + +template class LinearAllocGuard2D : public LinearAllocGuardMultiDim { + public: + LinearAllocGuard2D(const size_t width_logical, const size_t height) + : LinearAllocGuardMultiDim{make_hipExtent(width_logical * sizeof(T), height, 1)} { + HIP_CHECK(hipMallocPitch(&this->pitched_ptr_.ptr, &this->pitched_ptr_.pitch, + this->extent_.width, this->extent_.height)); + } + + LinearAllocGuard2D(const LinearAllocGuard2D&) = delete; + LinearAllocGuard2D(LinearAllocGuard2D&&) = delete; +}; + +template class LinearAllocGuard3D : public LinearAllocGuardMultiDim { + public: + LinearAllocGuard3D(const size_t width_logical, const size_t height, const size_t depth) + : LinearAllocGuardMultiDim{make_hipExtent(width_logical * sizeof(T), height, depth)} { + HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_)); + } + + LinearAllocGuard3D(const hipExtent extent) : LinearAllocGuardMultiDim(extent) { + HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_)); + } + + LinearAllocGuard3D(const LinearAllocGuard3D&) = delete; + LinearAllocGuard3D(LinearAllocGuard3D&&) = delete; + + size_t depth() const { return this->extent_.depth; } +}; + +template class ArrayAllocGuard { + public: + // extent should contain logical width + ArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} { + hipChannelFormatDesc desc = hipCreateChannelDesc(); + HIP_CHECK(hipMalloc3DArray(&ptr_, &desc, extent_, flags)); + } + + ~ArrayAllocGuard() { static_cast(hipFreeArray(ptr_)); } + + ArrayAllocGuard(const ArrayAllocGuard&) = delete; + ArrayAllocGuard(ArrayAllocGuard&&) = delete; + + hipArray_t ptr() const { return ptr_; } + + hipExtent extent() const { return extent_; } + + private: + hipArray_t ptr_ = nullptr; + const hipExtent extent_; +}; + +template class DrvArrayAllocGuard { + public: + // extent should contain width in bytes + DrvArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} { + HIP_ARRAY3D_DESCRIPTOR desc{}; + using vec_info = vector_info; + desc.Format = vec_info::format; + desc.NumChannels = vec_info::size; + desc.Width = extent_.width / sizeof(T); + desc.Height = extent_.height; + desc.Depth = extent_.depth; + desc.Flags = flags; + HIP_CHECK(hipArray3DCreate(&ptr_, &desc)); + } + + ~DrvArrayAllocGuard() { static_cast(hipArrayDestroy(ptr_)); } + + DrvArrayAllocGuard(const DrvArrayAllocGuard&) = delete; + DrvArrayAllocGuard(DrvArrayAllocGuard&&) = delete; + + hiparray ptr() const { return ptr_; } + + hipExtent extent() const { return extent_; } + + private: + hiparray ptr_ = nullptr; + const hipExtent extent_; +}; + enum class Streams { nullstream, perThread, created }; class StreamGuard { diff --git a/projects/hip/tests/catch/include/utils.hh b/projects/hip/tests/catch/include/utils.hh index 9edffc6f7c..bbab2322fe 100644 --- a/projects/hip/tests/catch/include/utils.hh +++ b/projects/hip/tests/catch/include/utils.hh @@ -54,6 +54,37 @@ void ArrayFindIfNot(T* const array, const T expected_value, const size_t num_ele ArrayFindIfNot(array, array + num_elements, expected_value); } +template +void PitchedMemoryVerify(T* const ptr, const size_t pitch, const size_t width, const size_t height, + const size_t depth, F expected_value_generator) { + for (size_t z = 0; z < depth; ++z) { + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { + const auto slice = reinterpret_cast(ptr) + pitch * height * z; + const auto row = slice + pitch * y; + if (reinterpret_cast(row)[x] != expected_value_generator(x, y, z)) { + INFO("Mismatch at indices: " << x << ", " << y << ", " << z); + REQUIRE(reinterpret_cast(row)[x] == expected_value_generator(x, y, z)); + } + } + } + } +} + +template +void PitchedMemorySet(T* const ptr, const size_t pitch, const size_t width, const size_t height, + const size_t depth, F expected_value_generator) { + for (size_t z = 0; z < depth; ++z) { + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { + const auto slice = reinterpret_cast(ptr) + pitch * height * z; + const auto row = slice + pitch * y; + reinterpret_cast(row)[x] = expected_value_generator(x, y, z); + } + } + } +} + template __global__ void VectorIncrement(T* const vec, const T increment_value, size_t N) { size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); @@ -82,6 +113,18 @@ static __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) { } } +template +__global__ void Iota(T* const out, size_t pitch, size_t w, size_t h, size_t d) { + const auto x = blockIdx.x * blockDim.x + threadIdx.x; + const auto y = blockIdx.y * blockDim.y + threadIdx.y; + const auto z = blockIdx.z * blockDim.z + threadIdx.z; + if (x < w && y < h && z < d) { + char* const slice = reinterpret_cast(out) + pitch * h * z; + char* const row = slice + pitch * y; + reinterpret_cast(row)[x] = z * w * h + y * w + x; + } +} + inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hipStream_t stream) { int ticks_per_ms = 0; // Clock rate is in kHz => number of clock ticks in a millisecond diff --git a/projects/hip/tests/catch/multiproc/CMakeLists.txt b/projects/hip/tests/catch/multiproc/CMakeLists.txt index 85a57a9a71..5485ee9ca5 100644 --- a/projects/hip/tests/catch/multiproc/CMakeLists.txt +++ b/projects/hip/tests/catch/multiproc/CMakeLists.txt @@ -15,6 +15,7 @@ set(LINUX_TEST_SRC hipIpcMemAccessTest.cc deviceAllocationMproc.cc hipNoGpuTsts.cc + hipMemGetInfo.cc ) add_custom_target(dummy_kernel.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${CMAKE_CURRENT_SOURCE_DIR}/dummy_kernel.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../multiproc/dummy_kernel.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include) diff --git a/projects/hip/tests/catch/multiproc/hipMemGetInfo.cc b/projects/hip/tests/catch/multiproc/hipMemGetInfo.cc new file mode 100644 index 0000000000..f764279b65 --- /dev/null +++ b/projects/hip/tests/catch/multiproc/hipMemGetInfo.cc @@ -0,0 +1,300 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#ifdef __linux__ +#include +#include +#include + +#define ReadEnd 0 +#define WriteEnd 1 +#define MAX_SIZE 32 +#define FREE_MEM_TO_HIDE 4294967296 +#define SIZE_TO_ALLOCATE 2147483648 +/* +* In main process allocate 2 GB of device memory. +* Fork() a child process and verify that 2 GB has been +* allocated in parent process. +*/ +TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario1") { + constexpr size_t size = 2147483648; // 2GB + int fd[2], fd1[2], status; + status = pipe(fd); + REQUIRE(status == 0); + status = pipe(fd1); + REQUIRE(status == 0); + pid_t child_pid; + child_pid = fork(); // Create a new child process + if (child_pid < 0) { + WARN("Fork failed!!!!"); + } else if (child_pid == 0) { // child + close(fd1[WriteEnd]); + close(fd[ReadEnd]); + int result; + size_t free = 0, total = 0; + // Wait for signal from parent + int check_child; + status = read(fd1[ReadEnd], &check_child, sizeof(check_child)); + REQUIRE(status != -1); + close(fd1[ReadEnd]); + // Check the total and free memory which is allocated in parent + HIP_CHECK(hipMemGetInfo(&free, &total)); + if ((total - free) >= size) { + result = 1; + } else { + result = 0; + } + // Write the result to parent + status = write(fd[WriteEnd], &result, sizeof(result)); + REQUIRE(status != -1); + close(fd[WriteEnd]); + exit(0); + } else { // Parent + close(fd1[ReadEnd]); + close(fd[WriteEnd]); + // Allocate memory + char* A_d = nullptr; + HIP_CHECK(hipMalloc(&A_d, size)); + // Signal the child + int check = 0; + status = write(fd1[WriteEnd], &check, sizeof(check)); + REQUIRE(status != -1); + close(fd1[WriteEnd]); + // Read the result from Child + int read_result; + status = read(fd[ReadEnd], &read_result, sizeof(read_result)); + REQUIRE(status != -1); + close(fd[ReadEnd]); + REQUIRE(read_result == 1); + HIP_CHECK(hipFree(A_d)); + // wait for child exit + wait(NULL); + } +} +/** +* From main process Fork() a child process. In the child process allocate +* 2 GB of device memory. Signal the parent process. Verify from the parent +* process that 2 GB is allocated in the child process. +*/ +TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario2") { + constexpr size_t size = 2147483648; // 2GB + int fd[2], fd2[2], status; + status = pipe(fd); + REQUIRE(status == 0); + status = pipe(fd2); + REQUIRE(status == 0); + pid_t child_pid; + child_pid = fork(); // Create a new child process + if (child_pid < 0) { + WARN("Fork failed!!!!"); + } else if (child_pid == 0) { // Child + close(fd[ReadEnd]); + close(fd2[WriteEnd]); + // Allocate memory + float* A_d = nullptr; + HIP_CHECK(hipMalloc(&A_d, size)); + // Signal the parent + int data = 0; + status = write(fd[WriteEnd], &data, sizeof(data)); + REQUIRE(status != -1); + close(fd[WriteEnd]); + int valid = 0; + // Wait for Signal from parent before freeing memory and exiting + status = read(fd2[ReadEnd], &valid, sizeof(valid)); + REQUIRE(status != -1); + close(fd2[ReadEnd]); + // Free allocated device memory + HIP_CHECK(hipFree(A_d)); + exit(0); + } else { // Parent + size_t free = 0, total = 0; + close(fd[WriteEnd]); + close(fd2[ReadEnd]); + // Wait for child signal + int data = 0; + status = read(fd[ReadEnd], &data, sizeof(data)); + REQUIRE(status != -1); + close(fd[ReadEnd]); + // Verify the memory + HIP_CHECK(hipMemGetInfo(&free , &total)); + REQUIRE((total - free) >= size); + // Signal child that validation is over and child can free memory + int valid = 0; + status = write(fd2[WriteEnd], &valid, sizeof(valid)); + REQUIRE(status != -1); + close(fd2[WriteEnd]); + // wait for child exit + wait(NULL); + } +} +/* +* From main process Fork() a child process. In the child process +* allocate 2 GB of device memory. Free the memory and exit from +* child process. Verify from the parent process that 2 GB is +* freed in the child process. +*/ +TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario3") { + constexpr size_t size = 2147483648; // 2GB + int fd[2], status; + status = pipe(fd); + REQUIRE(status == 0); + pid_t child_pid; + child_pid = fork(); // Create a new child process + if (child_pid < 0) { + WARN("Fork failed!!!!"); + } else if (child_pid == 0) { // Child + close(fd[ReadEnd]); + // Allocate the memory + void* A_d = nullptr; + HIP_CHECK(hipMalloc(&A_d, size)); + // Free the allocated memory + HIP_CHECK(hipFree(A_d)); + // Signal the parent about memory free + int check = 0; + status = write(fd[WriteEnd], &check, sizeof(check)); + REQUIRE(status != -1); + close(fd[WriteEnd]); + exit(0); + } else { // Parent + close(fd[WriteEnd]); + // Wait for the signal from child about memory free + int check_parent; + status = read(fd[ReadEnd], &check_parent, sizeof(check_parent)); + REQUIRE(status != -1); + close(fd[ReadEnd]); + size_t free = 0, total = 0; + // Verify the memory + HIP_CHECK(hipMemGetInfo(&free , &total)); + REQUIRE((total - free) >= 0); + // wait for child exit + wait(NULL); + } +} +/* +* From main process Fork() a child process. In the child process allocate +* 2 GB of device memory. Exit from child process. Verify from the parent +* process that 2 GB is freed in the child process. +*/ +TEST_CASE("Unit_hipMemGetInfo_Functional_scenario4") { + constexpr size_t size = 2147483648; // 2GB + pid_t child_pid; + child_pid = fork(); // Create a new child process + if (child_pid < 0) { + WARN("Fork failed!!!!"); + } else if (child_pid == 0) { // Child + // Allocate the memory + void* A_d = nullptr; + HIP_CHECK(hipMalloc(&A_d, size)); + exit(0); + } else { // Parent + // wait for child exit + wait(NULL); + size_t free = 0, total = 0; + // Verify the memory + HIP_CHECK(hipMemGetInfo(&free , &total)); + REQUIRE((total-free) >= 0); + } +} +/* +* Multidevice Scenario: In main process allocate 2 GB of device memory +* in every device. Verify that 2 GB is allocated using hipMemGetInfo. +* Fork() a child process and verify that 2 GB has been allocated from +* parent process in every device. +*/ +TEST_CASE("Unit_hipMemGetInfo_Functional_MultiDevice_Scenario5") { + constexpr size_t size = 2147483648; // 2GB + size_t free = 0, total = 0; + int fd1[2], fd2[2], status; + status = pipe(fd1); + REQUIRE(status == 0); + status = pipe(fd2); + REQUIRE(status == 0); + pid_t child_pid; + child_pid = fork(); // Create a new child process + if (child_pid < 0) { + WARN("Fork failed!!!!"); + } else if (child_pid == 0) { // Child + close(fd1[WriteEnd]); + close(fd2[ReadEnd]); + // Wait for the signal from parent after memory allocatoin + int check_child; + status = read(fd1[ReadEnd], &check_child, sizeof(check_child)); + REQUIRE(status != -1); + close(fd1[ReadEnd]); + int num_devices, result, count = 0; + // Get the device count + HIP_CHECK(hipGetDeviceCount(&num_devices)); + for (int i = 0; i < num_devices; i++) { + HIP_CHECK(hipSetDevice(i)); + // Check the memory + HIP_CHECK(hipMemGetInfo(&free , &total)); + if ((total - free) >= size) { + count+=1; + } + } + if ( count == num_devices ) { + result = 1; + } else { + result = 0; + } + // Write the result to Parent + status = write(fd2[WriteEnd], &result, sizeof(result)); + REQUIRE(status != -1); + close(fd2[WriteEnd]); + exit(0); + } else { // Parent + close(fd1[ReadEnd]); + close(fd2[WriteEnd]); + int num_devices; + // Get the device count + HIP_CHECK(hipGetDeviceCount(&num_devices)); + std::vectorv(num_devices, nullptr); + for (int i = 0; i < num_devices; i++) { + HIP_CHECK(hipSetDevice(i)); + // verify the memory + HIP_CHECK(hipMemGetInfo(&free , &total)); + // Allocate memory + HIP_CHECK(hipMalloc(&v[i], size)); + // Verify the memory + HIP_CHECK(hipMemGetInfo(&free , &total)); + } + // Signal the child about memory allocation + int check = 0; + status = write(fd1[WriteEnd], &check, sizeof(check)); + REQUIRE(status != -1); + close(fd1[WriteEnd]); + // Read result from child + int result_parent; + status = read(fd2[ReadEnd], &result_parent, sizeof(result_parent)); + REQUIRE(status != -1); + REQUIRE(result_parent == 1); + close(fd2[ReadEnd]); + // Free the allocated memory on each device + for (int i = 0; i < num_devices; i++) { + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipFree(v[i])); + } + // wait for child exit + wait(NULL); + } +} +#endif + diff --git a/projects/hip/tests/catch/unit/device/CMakeLists.txt b/projects/hip/tests/catch/unit/device/CMakeLists.txt index 9f11be6877..1e26944d0f 100644 --- a/projects/hip/tests/catch/unit/device/CMakeLists.txt +++ b/projects/hip/tests/catch/unit/device/CMakeLists.txt @@ -23,6 +23,7 @@ set(TEST_SRC hipExtGetLinkTypeAndHopCount.cc hipDeviceSetLimit.cc hipDeviceSetGetSharedMemConfig.cc + hipDeviceReset.cc hipDeviceSetGetMemPool.cc hipInit.cc hipDriverGetVersion.cc diff --git a/projects/hip/tests/catch/unit/device/hipDeviceReset.cc b/projects/hip/tests/catch/unit/device/hipDeviceReset.cc new file mode 100644 index 0000000000..ad9e6fe0c1 --- /dev/null +++ b/projects/hip/tests/catch/unit/device/hipDeviceReset.cc @@ -0,0 +1,131 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +TEST_CASE("Unit_hipDeviceReset_Positive_Basic") { + const auto device = GENERATE(range(0, HipTest::getDeviceCount())); + HIP_CHECK(hipSetDevice(device)); + INFO("Current device is: " << device); + + unsigned int flags_before = 0u; + HIP_CHECK(hipGetDeviceFlags(&flags_before)); + hipSharedMemConfig mem_config_before; + HIP_CHECK(hipDeviceGetSharedMemConfig(&mem_config_before)); + + void* ptr = nullptr; + HIP_CHECK(hipMalloc(&ptr, 500)); + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + + const auto cache_config_ret = hipDeviceSetCacheConfig(hipFuncCachePreferL1); + REQUIRE((cache_config_ret == hipSuccess || cache_config_ret == hipErrorNotSupported)); + + const auto shared_mem_config_ret = hipDeviceSetSharedMemConfig( + mem_config_before == hipSharedMemBankSizeFourByte ? hipSharedMemBankSizeEightByte + : hipSharedMemBankSizeFourByte); + REQUIRE((shared_mem_config_ret == hipSuccess || shared_mem_config_ret == hipErrorNotSupported)); + + HIP_CHECK(hipSetDeviceFlags(flags_before ^ (1u << 2))); + + HIP_CHECK(hipDeviceReset()); + + unsigned int flags_after = 0u; + CHECK(hipGetDeviceFlags(&flags_after) == hipSuccess); + CHECK(flags_after == flags_before); + + CHECK(hipFree(ptr) == hipErrorInvalidValue); + +// Inconsistent behavior in CUDA, sometimes segfaults, sometimes works +// Return value mismatch on AMD - EXSWHTEC-124 +#if 0 + CHECK(hipStreamDestroy(stream) == hipErrorInvalidHandle); +#endif + + if (cache_config_ret == hipSuccess) { + hipFuncCache_t cache_config; + CHECK(hipDeviceGetCacheConfig(&cache_config) == hipSuccess); + CHECK(cache_config == hipFuncCachePreferNone); + } + + if (shared_mem_config_ret == hipSuccess) { + hipSharedMemConfig mem_config_after; + CHECK(hipDeviceGetSharedMemConfig(&mem_config_after) == hipSuccess); + CHECK(mem_config_after == mem_config_before); + } +} + +TEST_CASE("Unit_hipDeviceReset_Positive_Threaded") { + HIP_CHECK(hipSetDevice(0)); + INFO("Current device is: " << 0); + + unsigned int flags_before = 0u; + HIP_CHECK(hipGetDeviceFlags(&flags_before)); + hipSharedMemConfig mem_config_before; + HIP_CHECK(hipDeviceGetSharedMemConfig(&mem_config_before)); + + void* ptr = nullptr; + HIP_CHECK(hipMalloc(&ptr, 500)); + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + + const auto cache_config_ret = hipDeviceSetCacheConfig(hipFuncCachePreferL1); + REQUIRE((cache_config_ret == hipSuccess || cache_config_ret == hipErrorNotSupported)); + + const auto shared_mem_config_ret = hipDeviceSetSharedMemConfig( + mem_config_before == hipSharedMemBankSizeFourByte ? hipSharedMemBankSizeEightByte + : hipSharedMemBankSizeFourByte); + REQUIRE((shared_mem_config_ret == hipSuccess || shared_mem_config_ret == hipErrorNotSupported)); + + + HIP_CHECK(hipSetDeviceFlags(flags_before ^ (1u << 2))); + + std::thread([] { + HIP_CHECK_THREAD(hipSetDevice(0)); + HIP_CHECK_THREAD(hipDeviceReset()); + }).join(); + HIP_CHECK_THREAD_FINALIZE(); + + unsigned int flags_after = 0u; + CHECK(hipGetDeviceFlags(&flags_after) == hipSuccess); + CHECK(flags_after == flags_before); + + CHECK(hipFree(ptr) == hipErrorInvalidValue); + +// Inconsistent behavior in CUDA, sometimes segfaults, sometimes works +// Return value mismatch on AMD - EXSWHTEC-124 +#if 0 + CHECK(hipStreamDestroy(stream) == hipErrorInvalidHandle); +#endif + + if (cache_config_ret == hipSuccess) { + hipFuncCache_t cache_config; + CHECK(hipDeviceGetCacheConfig(&cache_config) == hipSuccess); + CHECK(cache_config == hipFuncCachePreferNone); + } + + if (shared_mem_config_ret == hipSuccess) { + hipSharedMemConfig mem_config_after; + CHECK(hipDeviceGetSharedMemConfig(&mem_config_after) == hipSuccess); + CHECK(mem_config_after == mem_config_before); + } +} diff --git a/projects/hip/tests/catch/unit/deviceLib/CMakeLists.txt b/projects/hip/tests/catch/unit/deviceLib/CMakeLists.txt index 2ee700545c..c54cd68c08 100644 --- a/projects/hip/tests/catch/unit/deviceLib/CMakeLists.txt +++ b/projects/hip/tests/catch/unit/deviceLib/CMakeLists.txt @@ -27,6 +27,7 @@ set(AMD_TEST_SRC bitExtract.cc bitInsert.cc floatTM.cc + hipMathFunctions.cc ) set(AMD_ARCH_SPEC_TEST_SRC AtomicAdd_Coherent_withunsafeflag.cc @@ -76,6 +77,7 @@ if(${ARCH_CHECK} GREATER_EQUAL 0) set_source_files_properties(unsafeAtomicAdd_NonCoherent_withunsafeflag.cc PROPERTIES COMPILE_OPTIONS "-munsafe-fp-atomics") set_source_files_properties(unsafeAtomicAdd_Coherent_withnounsafeflag.cc PROPERTIES COMPILE_OPTIONS "-mno-unsafe-fp-atomics") set_source_files_properties(unsafeAtomicAdd_NonCoherent_withnounsafeflag.cc PROPERTIES COMPILE_OPTIONS "-mno-unsafe-fp-atomics") + set_source_files_properties(hipMathFunctions.cc PROPERTIES COMPILE_FLAGS "-Xclang -fallow-half-arguments-and-returns") file(GLOB AtomicAdd_files *AtomicAdd_*_*.cc) set_property(SOURCE ${AtomicAdd_files} PROPERTY COMPILE_FLAGS --save-temps) file(GLOB unsafeAtomicAdd_files *unsafeAtomicAdd_*_*.cc) diff --git a/projects/hip/tests/catch/unit/deviceLib/hipMathFunctions.cc b/projects/hip/tests/catch/unit/deviceLib/hipMathFunctions.cc new file mode 100644 index 0000000000..543bacc647 --- /dev/null +++ b/projects/hip/tests/catch/unit/deviceLib/hipMathFunctions.cc @@ -0,0 +1,136 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + Test Scenarios : + 1) Verification of absolute int64 operation performed at device. + 2) Verification of __fp16 operation performed at device. + 3) Verification of pow operations performed at device. +*/ + +#include + +__global__ void kernel_abs_int64(long long *input, long long *output) { // NOLINT + int tx = threadIdx.x; + output[tx] = abs(input[tx]); +} + + +#define CHECK_ABS_INT64(IN, OUT, EXP) \ + { \ + if (OUT != EXP) { \ + INFO("check_abs_int64 failed on " << IN << ", output " << OUT << \ + ", expected " << EXP); \ + REQUIRE(false); \ + } \ + } + +template +__global__ void kernel_simple(F f, T *out) { + *out = f(); +} + +template +void check_simple(F f, T expected, const char* file, unsigned line) { + auto memsize = sizeof(T); + T *outputCPU = reinterpret_cast(malloc(memsize)); + T *outputGPU = nullptr; + REQUIRE(outputCPU != nullptr); + HIP_CHECK(hipMalloc(&outputGPU, memsize)); + hipLaunchKernelGGL(kernel_simple, 1, 1, 0, 0, f, outputGPU); + HIP_CHECK(hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost)); + if (*outputCPU != expected) { + INFO("File " << file << ", line " << line << " check failed." << + " output = " << static_cast(*outputCPU) << " expected " + << static_cast(expected)); + REQUIRE(false); + } + HIP_CHECK(hipFree(outputGPU)); + free(outputCPU); +} + +#define CHECK_SIMPLE(lambda, expected) \ + check_simple(lambda, expected, __FILE__, __LINE__); + + +/** + Verification of absolute int64 operation performed at device. + */ +TEST_CASE("Unit_abs_int64_Verification") { + using datatype_t = long long; // NOLINT + + datatype_t *inputCPU{}, *outputCPU{}; + datatype_t *inputGPU{}, *outputGPU{}; + const int NUM_INPUTS = 8; + auto memsize = NUM_INPUTS * sizeof(datatype_t); + + // allocate memories + inputCPU = reinterpret_cast(malloc(memsize)); + outputCPU = reinterpret_cast(malloc(memsize)); + REQUIRE(inputCPU != nullptr); + REQUIRE(outputCPU != nullptr); + HIP_CHECK(hipMalloc(&inputGPU, memsize)); + HIP_CHECK(hipMalloc(&outputGPU, memsize)); + + // populate input with constants + inputCPU[0] = -81985529216486895ll; + inputCPU[1] = 81985529216486895ll; + inputCPU[2] = -1250999896491ll; + inputCPU[3] = 1250999896491ll; + inputCPU[4] = -19088743ll; + inputCPU[5] = 19088743ll; + inputCPU[6] = -291ll; + inputCPU[7] = 291ll; + + // copy inputs to device + HIP_CHECK(hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice)); + + // launch kernel + hipLaunchKernelGGL(kernel_abs_int64, dim3(1), dim3(NUM_INPUTS), 0, 0, + inputGPU, outputGPU); + // copy outputs from device + HIP_CHECK(hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost)); + + // check outputs + CHECK_ABS_INT64(inputCPU[0], outputCPU[0], outputCPU[1]); + CHECK_ABS_INT64(inputCPU[1], outputCPU[1], outputCPU[1]); + CHECK_ABS_INT64(inputCPU[2], outputCPU[2], outputCPU[3]); + CHECK_ABS_INT64(inputCPU[3], outputCPU[3], outputCPU[3]); + CHECK_ABS_INT64(inputCPU[4], outputCPU[4], outputCPU[5]); + CHECK_ABS_INT64(inputCPU[5], outputCPU[5], outputCPU[5]); + CHECK_ABS_INT64(inputCPU[6], outputCPU[6], outputCPU[7]); + CHECK_ABS_INT64(inputCPU[7], outputCPU[7], outputCPU[7]); + + // free memories + HIP_CHECK(hipFree(inputGPU)); + HIP_CHECK(hipFree(outputGPU)); + free(inputCPU); + free(outputCPU); +} + +/** + Verification of pow operations performed at device. + */ +TEST_CASE("Unit_pown_Verification") { + CHECK_SIMPLE([]__device__(){ return powif(2.0f, 2); }, 4.0f); + CHECK_SIMPLE([]__device__(){ return powi(2.0, 2); }, 4.0); + CHECK_SIMPLE([]__device__(){ return pow(2.0f, 2); }, 4.0f); + CHECK_SIMPLE([]__device__(){ return pow(2.0, 2); }, 4.0); + CHECK_SIMPLE([]__device__(){ return pow(2.0f16, 2); }, 4.0f16); +} diff --git a/projects/hip/tests/catch/unit/memory/CMakeLists.txt b/projects/hip/tests/catch/unit/memory/CMakeLists.txt index 4d2d74c033..1b4cef3698 100644 --- a/projects/hip/tests/catch/unit/memory/CMakeLists.txt +++ b/projects/hip/tests/catch/unit/memory/CMakeLists.txt @@ -172,6 +172,7 @@ set(TEST_SRC hipMemsetAsync.cc hipMemAdvise.cc hipMemRangeGetAttributes.cc + hipGetSymbolSizeAddress.cc ) endif() diff --git a/projects/hip/tests/catch/unit/memory/hipArray3DCreate.cc b/projects/hip/tests/catch/unit/memory/hipArray3DCreate.cc index 973868eded..4cf189611b 100644 --- a/projects/hip/tests/catch/unit/memory/hipArray3DCreate.cc +++ b/projects/hip/tests/catch/unit/memory/hipArray3DCreate.cc @@ -20,6 +20,7 @@ THE SOFTWARE. #include #include "DriverContext.hh" #include "hipArrayCommon.hh" +#include "hip_array_common.hh" #include "hip_test_common.hh" namespace { diff --git a/projects/hip/tests/catch/unit/memory/hipArrayCommon.hh b/projects/hip/tests/catch/unit/memory/hipArrayCommon.hh index b40014b490..b0beeb3126 100644 --- a/projects/hip/tests/catch/unit/memory/hipArrayCommon.hh +++ b/projects/hip/tests/catch/unit/memory/hipArrayCommon.hh @@ -26,66 +26,6 @@ THE SOFTWARE. constexpr size_t BlockSize = 16; -template struct type_and_size_and_format { - using type = T; - static constexpr size_t size = N; - static constexpr hipArray_Format format = Format; -}; - -// Create a map of type to scalar type, vector size and scalar type format enum. -// This is useful for creating simpler function that depend on the vector size. -template struct vector_info; -template <> -struct vector_info : type_and_size_and_format {}; -template <> struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; - -template <> -struct vector_info : type_and_size_and_format {}; -template <> struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; - -template <> -struct vector_info : type_and_size_and_format {}; -template <> struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; -template <> -struct vector_info - : type_and_size_and_format {}; - // read from a texture using normalized coordinates constexpr size_t ChannelToRead = 1; template diff --git a/projects/hip/tests/catch/unit/memory/hipArrayCreate.cc b/projects/hip/tests/catch/unit/memory/hipArrayCreate.cc index 6cc535593a..70a8636922 100644 --- a/projects/hip/tests/catch/unit/memory/hipArrayCreate.cc +++ b/projects/hip/tests/catch/unit/memory/hipArrayCreate.cc @@ -27,6 +27,7 @@ hipArrayCreate API test scenarios #include #include #include +#include #include "hipArrayCommon.hh" #include "DriverContext.hh" diff --git a/projects/hip/tests/catch/unit/memory/hipFree.cc b/projects/hip/tests/catch/unit/memory/hipFree.cc index 1248deebc1..b29854271c 100644 --- a/projects/hip/tests/catch/unit/memory/hipFree.cc +++ b/projects/hip/tests/catch/unit/memory/hipFree.cc @@ -22,6 +22,7 @@ THE SOFTWARE. #include +#include #include "hipArrayCommon.hh" #include "DriverContext.hh" diff --git a/projects/hip/tests/catch/unit/memory/hipGetSymbolSizeAddress.cc b/projects/hip/tests/catch/unit/memory/hipGetSymbolSizeAddress.cc new file mode 100644 index 0000000000..5c011c7e81 --- /dev/null +++ b/projects/hip/tests/catch/unit/memory/hipGetSymbolSizeAddress.cc @@ -0,0 +1,123 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include +#include +#include +#include + +namespace { +constexpr size_t kArraySize = 5; +} // anonymous namespace + +#define HIP_GET_SYMBOL_SIZE_ADDRESS_DEFINE_GLOBALS(type) \ + __device__ type type##_var = 0; \ + __device__ type type##_arr[kArraySize] = {}; \ + __global__ void type##_var_address_validation_kernel(void* ptr, bool* out) { \ + *out = static_cast(&type##_var) == ptr; \ + } \ + __global__ void type##_arr_address_validation_kernel(void* ptr, bool* out) { \ + *out = static_cast(type##_arr) == ptr; \ + } + +HIP_GET_SYMBOL_SIZE_ADDRESS_DEFINE_GLOBALS(int) +HIP_GET_SYMBOL_SIZE_ADDRESS_DEFINE_GLOBALS(float) +HIP_GET_SYMBOL_SIZE_ADDRESS_DEFINE_GLOBALS(char) +HIP_GET_SYMBOL_SIZE_ADDRESS_DEFINE_GLOBALS(double) + +template +static void HipGetSymbolSizeAddressTest(const void* symbol) { + constexpr auto size = N * sizeof(T); + + T* symbol_ptr = nullptr; + size_t symbol_size = 0; + HIP_CHECK(hipGetSymbolAddress(reinterpret_cast(&symbol_ptr), symbol)); + HIP_CHECK(hipGetSymbolSize(&symbol_size, symbol)); + REQUIRE(symbol_size == size); + REQUIRE(symbol_ptr != nullptr); + + LinearAllocGuard equal_addresses(LinearAllocs::hipMalloc, sizeof(bool)); + HIP_CHECK(hipMemset(equal_addresses.ptr(), false, sizeof(*equal_addresses.ptr()))) + validation_kernel<<<1, 1>>>(symbol_ptr, equal_addresses.ptr()); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipStreamSynchronize(nullptr)); + bool ok = false; + HIP_CHECK(hipMemcpy(&ok, equal_addresses.ptr(), sizeof(ok), hipMemcpyDeviceToHost)); + REQUIRE(ok); + + constexpr T expected_value = 42; + std::array fill_buffer; + std::fill_n(fill_buffer.begin(), N, expected_value); + HIP_CHECK(hipMemcpy(symbol_ptr, fill_buffer.data(), symbol_size, hipMemcpyHostToDevice)); + + + std::array read_buffer; + HIP_CHECK(hipMemcpy(read_buffer.data(), symbol_ptr, symbol_size, hipMemcpyDeviceToHost)); + ArrayFindIfNot(read_buffer.data(), expected_value, read_buffer.size()); +} + +#if HT_AMD +#define SYMBOL(expr) &HIP_SYMBOL(expr) +#else +#define SYMBOL(expr) HIP_SYMBOL(expr) +#endif + +#define HIP_GET_SYMBOL_SIZE_ADDRESS_TEST(type) \ + HipGetSymbolSizeAddressTest(SYMBOL(type##_var)); \ + HipGetSymbolSizeAddressTest( \ + SYMBOL(type##_arr)); + +TEST_CASE("Unit_hipGetSymbolSizeAddress_Positive_Basic") { + SECTION("int") { HIP_GET_SYMBOL_SIZE_ADDRESS_TEST(int); } + SECTION("float") { HIP_GET_SYMBOL_SIZE_ADDRESS_TEST(float); } + SECTION("char") { HIP_GET_SYMBOL_SIZE_ADDRESS_TEST(char); } + SECTION("double") { HIP_GET_SYMBOL_SIZE_ADDRESS_TEST(double); } +} + +TEST_CASE("Unit_hipGetSymbolAddress_Negative_Parameters") { +// Causes a segfault in CUDA +#if HT_AMD + SECTION("devPtr == nullptr") { + HIP_CHECK_ERROR(hipGetSymbolAddress(nullptr, SYMBOL(int_var)), hipErrorInvalidValue); + } +#endif + + SECTION("symbolName == nullptr") { + void* ptr = nullptr; + HIP_CHECK_ERROR(hipGetSymbolAddress(&ptr, nullptr), hipErrorInvalidSymbol); + } +} + +TEST_CASE("Unit_hipGetSymbolSize_Negative_Parameters") { +// Causes a segfault in CUDA +#if HT_AMD + SECTION("size == nullptr") { + HIP_CHECK_ERROR(hipGetSymbolSize(nullptr, SYMBOL(int_var)), hipErrorInvalidValue); + } +#endif + + SECTION("symbolName == nullptr") { + size_t size = 0; + HIP_CHECK_ERROR(hipGetSymbolSize(&size, nullptr), hipErrorInvalidSymbol); + } +} \ No newline at end of file diff --git a/projects/hip/tests/catch/unit/memory/hipMallocArray.cc b/projects/hip/tests/catch/unit/memory/hipMallocArray.cc index b6c4939b1e..530eb11077 100644 --- a/projects/hip/tests/catch/unit/memory/hipMallocArray.cc +++ b/projects/hip/tests/catch/unit/memory/hipMallocArray.cc @@ -26,6 +26,7 @@ hipMallocArray API test scenarios */ #include +#include #include #include #include "hipArrayCommon.hh" diff --git a/projects/hip/tests/src/deviceLib/hipBfloat16.cpp b/projects/hip/tests/src/deviceLib/hipBfloat16.cpp index 55de397b40..d5f3ddfc20 100644 --- a/projects/hip/tests/src/deviceLib/hipBfloat16.cpp +++ b/projects/hip/tests/src/deviceLib/hipBfloat16.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 + * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM nvidia * TEST: %t * HIT_END */