SWDEV-299127 - Merge 'develop' into 'amd-staging'
Change-Id: Id62d80246bf513c1cd64ba85c60cfde6a816091d
[ROCm/hip commit: 118d79ad14]
This commit is contained in:
+1
-115
@@ -393,121 +393,7 @@ foreach $arg (@ARGV)
|
||||
$swallowArg = 1;
|
||||
}
|
||||
|
||||
## process linker response file for hip-clang
|
||||
## extract object files from static library and pass them directly to
|
||||
## hip-clang in command line.
|
||||
## ToDo: Remove this after hip-clang switch to lto and lld is able to
|
||||
## handle clang-offload-bundler bundles.
|
||||
if (($arg =~ m/^-Wl,@/ or $arg =~ m/^@/) and
|
||||
$HIP_PLATFORM eq 'amd') {
|
||||
my @split_arg = (split /\@/, $arg); # arg will have options type(-Wl,@ or @) and filename
|
||||
my $file = $split_arg[1];
|
||||
open my $in, "<:encoding(utf8)", $file or die "$file: $!";
|
||||
my $new_arg = "";
|
||||
my $tmpdir = get_temp_dir ();
|
||||
my $new_file = "$tmpdir/response_file";
|
||||
open my $out, ">", $new_file or die "$new_file: $!";
|
||||
while (my $line = <$in>) {
|
||||
chomp $line;
|
||||
if ($line =~ m/\.a$/ || $line =~ m/\.lo$/) {
|
||||
my $libFile = $line;
|
||||
my $path = abs_path($line);
|
||||
my @objs = split ('\n', `cd $tmpdir; ar xv $path`);
|
||||
## Check if all files in .a are object files.
|
||||
my $allIsObj = 1;
|
||||
my $realObjs = "";
|
||||
foreach my $obj (@objs) {
|
||||
chomp $obj;
|
||||
$obj =~ s/^x - //;
|
||||
$obj = "$tmpdir/$obj";
|
||||
my $fileType = `file $obj`;
|
||||
my $isObj = ($fileType =~ m/ELF/ or $fileType =~ m/COFF/);
|
||||
$allIsObj = ($allIsObj and $isObj);
|
||||
if ($isObj) {
|
||||
$realObjs = ($realObjs . " " . $obj);
|
||||
} else {
|
||||
push (@inputs, $obj);
|
||||
$new_arg = "$new_arg $obj";
|
||||
}
|
||||
}
|
||||
chomp $realObjs;
|
||||
if ($allIsObj) {
|
||||
print $out "$line\n";
|
||||
} elsif ($realObjs) {
|
||||
my($libBaseName, $libDir, $libExt) = fileparse($libFile);
|
||||
$libBaseName = mktemp($libBaseName . "XXXX") . $libExt;
|
||||
system("cd $tmpdir; ar rc $libBaseName $realObjs");
|
||||
print $out "$tmpdir/$libBaseName\n";
|
||||
}
|
||||
} elsif ($line =~ m/\.o$/) {
|
||||
my $fileType = `file $line`;
|
||||
my $isObj = ($fileType =~ m/ELF/ or $fileType =~ m/COFF/);
|
||||
if ($isObj) {
|
||||
print $out "$line\n";
|
||||
} else {
|
||||
push (@inputs, $line);
|
||||
$new_arg = "$new_arg $line";
|
||||
}
|
||||
} else {
|
||||
print $out "$line\n";
|
||||
}
|
||||
}
|
||||
close $in;
|
||||
close $out;
|
||||
$arg = "$new_arg $split_arg[0]\@$new_file";
|
||||
$escapeArg = 0;
|
||||
} elsif (($arg =~ m/\.a$/ || $arg =~ m/\.lo$/) &&
|
||||
$HIP_PLATFORM eq 'amd') {
|
||||
## process static library for hip-clang
|
||||
## extract object files from static library and pass them directly to
|
||||
## hip-clang.
|
||||
## ToDo: Remove this after hip-clang switch to lto and lld is able to
|
||||
## handle clang-offload-bundler bundles.
|
||||
my $new_arg = "";
|
||||
my $tmpdir = get_temp_dir ();
|
||||
my $libFile = $arg;
|
||||
my $path = abs_path($arg);
|
||||
my @objs = split ('\n', `cd $tmpdir; ar xv $path`);
|
||||
## Check if all files in .a are object files.
|
||||
my $allIsObj = 1;
|
||||
my $realObjs = "";
|
||||
foreach my $obj (@objs) {
|
||||
chomp $obj;
|
||||
$obj =~ s/^x - //;
|
||||
$obj = "$tmpdir/$obj";
|
||||
my $fileType = `file $obj`;
|
||||
my $isObj = ($fileType =~ m/ELF/ or $fileType =~ m/COFF/);
|
||||
if ($fileType =~ m/ELF/) {
|
||||
my $sections = `$HIP_CLANG_PATH/llvm-readelf -e -W $obj`;
|
||||
$isObj = !($sections =~ m/__CLANG_OFFLOAD_BUNDLE__/);
|
||||
}
|
||||
$allIsObj = ($allIsObj and $isObj);
|
||||
if ($isObj) {
|
||||
$realObjs = ($realObjs . " " . $obj);
|
||||
} else {
|
||||
push (@inputs, $obj);
|
||||
if ($new_arg ne "") {
|
||||
$new_arg .= " ";
|
||||
}
|
||||
$new_arg .= "$obj";
|
||||
}
|
||||
}
|
||||
chomp $realObjs;
|
||||
if ($allIsObj) {
|
||||
$new_arg = $arg;
|
||||
} elsif ($realObjs) {
|
||||
my($libBaseName, $libDir, $libExt) = fileparse($libFile);
|
||||
$libBaseName = mktemp($libBaseName . "XXXX") . $libExt;
|
||||
system("cd $tmpdir; ar rc $libBaseName $realObjs");
|
||||
$new_arg .= " $tmpdir/$libBaseName";
|
||||
}
|
||||
$arg = "$new_arg";
|
||||
$escapeArg = 0;
|
||||
if ($toolArgs =~ m/-Xlinker$/) {
|
||||
$toolArgs = substr $toolArgs, 0, -8;
|
||||
chomp $toolArgs;
|
||||
}
|
||||
} elsif ($arg eq '-x') {
|
||||
if ($arg eq '-x') {
|
||||
$fileTypeFlag = 1;
|
||||
} elsif (($arg eq 'c' and $prevArg eq '-x') or ($arg eq '-xc')) {
|
||||
$fileTypeFlag = 1;
|
||||
|
||||
@@ -51,32 +51,32 @@ Should use roctracer/rocTX instead
|
||||
|
||||
## HIP Texture Management APIs
|
||||
|
||||
###hipGetTextureReference
|
||||
###hipTexRefSetAddressMode
|
||||
###hipTexRefSetArray
|
||||
###hipTexRefSetFilterMode
|
||||
###hipTexRefSetFlags
|
||||
###hipTexRefSetFormat
|
||||
###hipBindTexture
|
||||
###hipBindTexture2D
|
||||
###hipBindTextureToArray
|
||||
###hipGetTextureAlignmentOffset
|
||||
###hipUnbindTexture
|
||||
###hipTexRefGetAddress
|
||||
###hipTexRefGetAddressMode
|
||||
###hipTexRefGetFilterMode
|
||||
###hipTexRefGetFlags
|
||||
###hipTexRefGetFormat
|
||||
###hipTexRefGetMaxAnisotropy
|
||||
###hipTexRefGetMipmapFilterMode
|
||||
###hipTexRefGetMipmapLevelBias
|
||||
###hipTexRefGetMipmapLevelClamp
|
||||
###hipTexRefGetMipMappedArray
|
||||
###hipTexRefSetAddress
|
||||
###hipTexRefSetAddress2D
|
||||
###hipTexRefSetMaxAnisotropy
|
||||
###hipTexRefSetBorderColor
|
||||
###hipTexRefSetMipmapFilterMode
|
||||
###hipTexRefSetMipmapLevelBias
|
||||
###hipTexRefSetMipmapLevelClamp
|
||||
###hipTexRefSetMipmappedArray
|
||||
### hipGetTextureReference
|
||||
### hipTexRefSetAddressMode
|
||||
### hipTexRefSetArray
|
||||
### hipTexRefSetFilterMode
|
||||
### hipTexRefSetFlags
|
||||
### hipTexRefSetFormat
|
||||
### hipBindTexture
|
||||
### hipBindTexture2D
|
||||
### hipBindTextureToArray
|
||||
### hipGetTextureAlignmentOffset
|
||||
### hipUnbindTexture
|
||||
### hipTexRefGetAddress
|
||||
### hipTexRefGetAddressMode
|
||||
### hipTexRefGetFilterMode
|
||||
### hipTexRefGetFlags
|
||||
### hipTexRefGetFormat
|
||||
### hipTexRefGetMaxAnisotropy
|
||||
### hipTexRefGetMipmapFilterMode
|
||||
### hipTexRefGetMipmapLevelBias
|
||||
### hipTexRefGetMipmapLevelClamp
|
||||
### hipTexRefGetMipMappedArray
|
||||
### hipTexRefSetAddress
|
||||
### hipTexRefSetAddress2D
|
||||
### hipTexRefSetMaxAnisotropy
|
||||
### hipTexRefSetBorderColor
|
||||
### hipTexRefSetMipmapFilterMode
|
||||
### hipTexRefSetMipmapLevelBias
|
||||
### hipTexRefSetMipmapLevelClamp
|
||||
### hipTexRefSetMipmappedArray
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -29,255 +29,12 @@
|
||||
#ifndef _HIP_BFLOAT16_H_
|
||||
#define _HIP_BFLOAT16_H_
|
||||
|
||||
#if __cplusplus < 201103L || !defined(__HIPCC__)
|
||||
|
||||
// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only
|
||||
// include a minimal definition of hip_bfloat16
|
||||
|
||||
#include <stdint.h>
|
||||
/*! \brief Struct to represent a 16 bit brain floating point number. */
|
||||
typedef struct
|
||||
{
|
||||
uint16_t data;
|
||||
} hip_bfloat16;
|
||||
|
||||
#else // __cplusplus < 201103L || !defined(__HIPCC__)
|
||||
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <ostream>
|
||||
#include <type_traits>
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wshadow"
|
||||
struct hip_bfloat16
|
||||
{
|
||||
uint16_t data;
|
||||
|
||||
enum truncate_t
|
||||
{
|
||||
truncate
|
||||
};
|
||||
|
||||
__host__ __device__ hip_bfloat16() = default;
|
||||
|
||||
// round upper 16 bits of IEEE float to convert to bfloat16
|
||||
explicit __host__ __device__ hip_bfloat16(float f)
|
||||
: data(float_to_bfloat16(f))
|
||||
{
|
||||
}
|
||||
|
||||
explicit __host__ __device__ hip_bfloat16(float f, truncate_t)
|
||||
: data(truncate_float_to_bfloat16(f))
|
||||
{
|
||||
}
|
||||
|
||||
// zero extend lower 16 bits of bfloat16 to convert to IEEE float
|
||||
__host__ __device__ operator float() const
|
||||
{
|
||||
union
|
||||
{
|
||||
uint32_t int32;
|
||||
float fp32;
|
||||
} u = {uint32_t(data) << 16};
|
||||
return u.fp32;
|
||||
}
|
||||
|
||||
static __host__ __device__ hip_bfloat16 round_to_bfloat16(float f)
|
||||
{
|
||||
hip_bfloat16 output;
|
||||
output.data = float_to_bfloat16(f);
|
||||
return output;
|
||||
}
|
||||
|
||||
static __host__ __device__ hip_bfloat16 round_to_bfloat16(float f, truncate_t)
|
||||
{
|
||||
hip_bfloat16 output;
|
||||
output.data = truncate_float_to_bfloat16(f);
|
||||
return output;
|
||||
}
|
||||
|
||||
private:
|
||||
static __host__ __device__ uint16_t float_to_bfloat16(float f)
|
||||
{
|
||||
union
|
||||
{
|
||||
float fp32;
|
||||
uint32_t int32;
|
||||
} u = {f};
|
||||
if(~u.int32 & 0x7f800000)
|
||||
{
|
||||
// When the exponent bits are not all 1s, then the value is zero, normal,
|
||||
// or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
|
||||
// 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
|
||||
// This causes the bfloat16's mantissa to be incremented by 1 if the 16
|
||||
// least significant bits of the float mantissa are greater than 0x8000,
|
||||
// or if they are equal to 0x8000 and the least significant bit of the
|
||||
// bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
|
||||
// the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
|
||||
// has the value 0x7f, then incrementing it causes it to become 0x00 and
|
||||
// the exponent is incremented by one, which is the next higher FP value
|
||||
// to the unrounded bfloat16 value. When the bfloat16 value is subnormal
|
||||
// with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
|
||||
// to a normal value with an exponent of 0x01 and a mantissa of 0x00.
|
||||
// When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
|
||||
// incrementing it causes it to become an exponent of 0xFF and a mantissa
|
||||
// of 0x00, which is Inf, the next higher value to the unrounded value.
|
||||
u.int32 += 0x7fff + ((u.int32 >> 16) & 1); // Round to nearest, round to even
|
||||
}
|
||||
else if(u.int32 & 0xffff)
|
||||
{
|
||||
// When all of the exponent bits are 1, the value is Inf or NaN.
|
||||
// Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
|
||||
// mantissa bit. Quiet NaN is indicated by the most significant mantissa
|
||||
// bit being 1. Signaling NaN is indicated by the most significant
|
||||
// mantissa bit being 0 but some other bit(s) being 1. If any of the
|
||||
// lower 16 bits of the mantissa are 1, we set the least significant bit
|
||||
// of the bfloat16 mantissa, in order to preserve signaling NaN in case
|
||||
// the bloat16's mantissa bits are all 0.
|
||||
u.int32 |= 0x10000; // Preserve signaling NaN
|
||||
}
|
||||
return uint16_t(u.int32 >> 16);
|
||||
}
|
||||
|
||||
// Truncate instead of rounding, preserving SNaN
|
||||
static __host__ __device__ uint16_t truncate_float_to_bfloat16(float f)
|
||||
{
|
||||
union
|
||||
{
|
||||
float fp32;
|
||||
uint32_t int32;
|
||||
} u = {f};
|
||||
return uint16_t(u.int32 >> 16) | (!(~u.int32 & 0x7f800000) && (u.int32 & 0xffff));
|
||||
}
|
||||
};
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint16_t data;
|
||||
} hip_bfloat16_public;
|
||||
|
||||
static_assert(std::is_standard_layout<hip_bfloat16>{},
|
||||
"hip_bfloat16 is not a standard layout type, and thus is "
|
||||
"incompatible with C.");
|
||||
|
||||
static_assert(std::is_trivial<hip_bfloat16>{},
|
||||
"hip_bfloat16 is not a trivial type, and thus is "
|
||||
"incompatible with C.");
|
||||
|
||||
static_assert(sizeof(hip_bfloat16) == sizeof(hip_bfloat16_public)
|
||||
&& offsetof(hip_bfloat16, data) == offsetof(hip_bfloat16_public, data),
|
||||
"internal hip_bfloat16 does not match public hip_bfloat16");
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const hip_bfloat16& bf16)
|
||||
{
|
||||
return os << float(bf16);
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a)
|
||||
{
|
||||
return a;
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a)
|
||||
{
|
||||
a.data ^= 0x8000;
|
||||
return a;
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a, hip_bfloat16 b)
|
||||
{
|
||||
return hip_bfloat16(float(a) + float(b));
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a, hip_bfloat16 b)
|
||||
{
|
||||
return hip_bfloat16(float(a) - float(b));
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16 operator*(hip_bfloat16 a, hip_bfloat16 b)
|
||||
{
|
||||
return hip_bfloat16(float(a) * float(b));
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16 operator/(hip_bfloat16 a, hip_bfloat16 b)
|
||||
{
|
||||
return hip_bfloat16(float(a) / float(b));
|
||||
}
|
||||
inline __host__ __device__ bool operator<(hip_bfloat16 a, hip_bfloat16 b)
|
||||
{
|
||||
return float(a) < float(b);
|
||||
}
|
||||
inline __host__ __device__ bool operator==(hip_bfloat16 a, hip_bfloat16 b)
|
||||
{
|
||||
return float(a) == float(b);
|
||||
}
|
||||
inline __host__ __device__ bool operator>(hip_bfloat16 a, hip_bfloat16 b)
|
||||
{
|
||||
return b < a;
|
||||
}
|
||||
inline __host__ __device__ bool operator<=(hip_bfloat16 a, hip_bfloat16 b)
|
||||
{
|
||||
return !(a > b);
|
||||
}
|
||||
inline __host__ __device__ bool operator!=(hip_bfloat16 a, hip_bfloat16 b)
|
||||
{
|
||||
return !(a == b);
|
||||
}
|
||||
inline __host__ __device__ bool operator>=(hip_bfloat16 a, hip_bfloat16 b)
|
||||
{
|
||||
return !(a < b);
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16& operator+=(hip_bfloat16& a, hip_bfloat16 b)
|
||||
{
|
||||
return a = a + b;
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16& operator-=(hip_bfloat16& a, hip_bfloat16 b)
|
||||
{
|
||||
return a = a - b;
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16& operator*=(hip_bfloat16& a, hip_bfloat16 b)
|
||||
{
|
||||
return a = a * b;
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16& operator/=(hip_bfloat16& a, hip_bfloat16 b)
|
||||
{
|
||||
return a = a / b;
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16& operator++(hip_bfloat16& a)
|
||||
{
|
||||
return a += hip_bfloat16(1.0f);
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16& operator--(hip_bfloat16& a)
|
||||
{
|
||||
return a -= hip_bfloat16(1.0f);
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16 operator++(hip_bfloat16& a, int)
|
||||
{
|
||||
hip_bfloat16 orig = a;
|
||||
++a;
|
||||
return orig;
|
||||
}
|
||||
inline __host__ __device__ hip_bfloat16 operator--(hip_bfloat16& a, int)
|
||||
{
|
||||
hip_bfloat16 orig = a;
|
||||
--a;
|
||||
return orig;
|
||||
}
|
||||
|
||||
namespace std
|
||||
{
|
||||
constexpr __host__ __device__ bool isinf(hip_bfloat16 a)
|
||||
{
|
||||
return !(~a.data & 0x7f80) && !(a.data & 0x7f);
|
||||
}
|
||||
constexpr __host__ __device__ bool isnan(hip_bfloat16 a)
|
||||
{
|
||||
return !(~a.data & 0x7f80) && +(a.data & 0x7f);
|
||||
}
|
||||
constexpr __host__ __device__ bool iszero(hip_bfloat16 a)
|
||||
{
|
||||
return !(a.data & 0x7fff);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __cplusplus < 201103L || !defined(__HIPCC__)
|
||||
#if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__))
|
||||
#include <hip/amd_detail/amd_hip_bfloat16.h>
|
||||
#elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__))
|
||||
#warning "hip_bfloat16.h is not supported on nvidia platform"
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
|
||||
#endif
|
||||
|
||||
#endif // _HIP_BFLOAT16_H_
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
"Unit_hipIpcOpenMemHandle_Negative_Open_In_Creating_Process",
|
||||
"Unit_hipDeviceGetPCIBusId_Negative_PartialFill",
|
||||
"Unit_hipInit_Negative",
|
||||
"Unit_hipMemset_Negative_OutOfBoundsPtr"
|
||||
"Unit_hipMemset_Negative_OutOfBoundsPtr",
|
||||
"Unit_hipDeviceReset_Positive_Basic",
|
||||
"Unit_hipDeviceReset_Positive_Threaded"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -80,6 +80,8 @@
|
||||
"Unit_hipDeviceGetPCIBusId_Negative_PartialFill",
|
||||
"Unit_hipDeviceGetSharedMemConfig_Positive_Basic",
|
||||
"Unit_hipDeviceGetSharedMemConfig_Positive_Threaded",
|
||||
"Unit_hipDeviceReset_Positive_Basic",
|
||||
"Unit_hipDeviceReset_Positive_Threaded",
|
||||
"Unit_hipInit_Negative",
|
||||
"Unit_hipGraphMemcpyNodeSetParams_Functional",
|
||||
"Unit_hipGraphNodeGetDependentNodes_Functional",
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
template <class T, size_t N, hipArray_Format Format> struct type_and_size_and_format {
|
||||
using type = T;
|
||||
static constexpr size_t size = N;
|
||||
static constexpr hipArray_Format format = Format;
|
||||
};
|
||||
|
||||
// Create a map of type to scalar type, vector size and scalar type format enum.
|
||||
// This is useful for creating simpler function that depend on the vector size.
|
||||
template <typename T> struct vector_info;
|
||||
template <>
|
||||
struct vector_info<int> : type_and_size_and_format<int, 1, HIP_AD_FORMAT_SIGNED_INT32> {};
|
||||
template <> struct vector_info<float> : type_and_size_and_format<float, 1, HIP_AD_FORMAT_FLOAT> {};
|
||||
template <>
|
||||
struct vector_info<short> : type_and_size_and_format<short, 1, HIP_AD_FORMAT_SIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<char> : type_and_size_and_format<char, 1, HIP_AD_FORMAT_SIGNED_INT8> {};
|
||||
template <>
|
||||
struct vector_info<unsigned int>
|
||||
: type_and_size_and_format<unsigned int, 1, HIP_AD_FORMAT_UNSIGNED_INT32> {};
|
||||
template <>
|
||||
struct vector_info<unsigned short>
|
||||
: type_and_size_and_format<unsigned short, 1, HIP_AD_FORMAT_UNSIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<unsigned char>
|
||||
: type_and_size_and_format<unsigned char, 1, HIP_AD_FORMAT_UNSIGNED_INT8> {};
|
||||
|
||||
template <>
|
||||
struct vector_info<int2> : type_and_size_and_format<int, 2, HIP_AD_FORMAT_SIGNED_INT32> {};
|
||||
template <> struct vector_info<float2> : type_and_size_and_format<float, 2, HIP_AD_FORMAT_FLOAT> {};
|
||||
template <>
|
||||
struct vector_info<short2> : type_and_size_and_format<short, 2, HIP_AD_FORMAT_SIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<char2> : type_and_size_and_format<char, 2, HIP_AD_FORMAT_SIGNED_INT8> {};
|
||||
template <>
|
||||
struct vector_info<uint2>
|
||||
: type_and_size_and_format<unsigned int, 2, HIP_AD_FORMAT_UNSIGNED_INT32> {};
|
||||
template <>
|
||||
struct vector_info<ushort2>
|
||||
: type_and_size_and_format<unsigned short, 2, HIP_AD_FORMAT_UNSIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<uchar2>
|
||||
: type_and_size_and_format<unsigned char, 2, HIP_AD_FORMAT_UNSIGNED_INT8> {};
|
||||
|
||||
template <>
|
||||
struct vector_info<int4> : type_and_size_and_format<int, 4, HIP_AD_FORMAT_SIGNED_INT32> {};
|
||||
template <> struct vector_info<float4> : type_and_size_and_format<float, 4, HIP_AD_FORMAT_FLOAT> {};
|
||||
template <>
|
||||
struct vector_info<short4> : type_and_size_and_format<short, 4, HIP_AD_FORMAT_SIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<char4> : type_and_size_and_format<char, 4, HIP_AD_FORMAT_SIGNED_INT8> {};
|
||||
template <>
|
||||
struct vector_info<uint4>
|
||||
: type_and_size_and_format<unsigned int, 4, HIP_AD_FORMAT_UNSIGNED_INT32> {};
|
||||
template <>
|
||||
struct vector_info<ushort4>
|
||||
: type_and_size_and_format<unsigned short, 4, HIP_AD_FORMAT_UNSIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<uchar4>
|
||||
: type_and_size_and_format<unsigned char, 4, HIP_AD_FORMAT_UNSIGNED_INT8> {};
|
||||
@@ -19,6 +19,7 @@ THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip_array_common.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
@@ -80,10 +81,8 @@ template <typename T> class LinearAllocGuard {
|
||||
}
|
||||
}
|
||||
|
||||
T* ptr() { return ptr_; };
|
||||
T* const ptr() const { return ptr_; };
|
||||
T* host_ptr() { return host_ptr_; }
|
||||
T* const host_ptr() const { return host_ptr(); }
|
||||
T* ptr() const { return ptr_; };
|
||||
T* host_ptr() const { return host_ptr_; }
|
||||
|
||||
private:
|
||||
const LinearAllocs allocation_type_;
|
||||
@@ -91,6 +90,112 @@ template <typename T> class LinearAllocGuard {
|
||||
T* host_ptr_ = nullptr;
|
||||
};
|
||||
|
||||
template <typename T> class LinearAllocGuardMultiDim {
|
||||
protected:
|
||||
LinearAllocGuardMultiDim(hipExtent extent) : extent_{extent} {}
|
||||
|
||||
~LinearAllocGuardMultiDim() { static_cast<void>(hipFree(pitched_ptr_.ptr)); }
|
||||
|
||||
public:
|
||||
T* ptr() const { return reinterpret_cast<T*>(pitched_ptr_.ptr); };
|
||||
|
||||
size_t pitch() const { return pitched_ptr_.pitch; }
|
||||
|
||||
hipExtent extent() const { return extent_; }
|
||||
|
||||
hipPitchedPtr pitched_ptr() const { return pitched_ptr_; }
|
||||
|
||||
size_t width() const { return extent_.width; }
|
||||
|
||||
size_t width_logical() const { return extent_.width / sizeof(T); }
|
||||
|
||||
size_t height() const { return extent_.height; }
|
||||
|
||||
public:
|
||||
hipPitchedPtr pitched_ptr_;
|
||||
const hipExtent extent_;
|
||||
};
|
||||
|
||||
template <typename T> class LinearAllocGuard2D : public LinearAllocGuardMultiDim<T> {
|
||||
public:
|
||||
LinearAllocGuard2D(const size_t width_logical, const size_t height)
|
||||
: LinearAllocGuardMultiDim<T>{make_hipExtent(width_logical * sizeof(T), height, 1)} {
|
||||
HIP_CHECK(hipMallocPitch(&this->pitched_ptr_.ptr, &this->pitched_ptr_.pitch,
|
||||
this->extent_.width, this->extent_.height));
|
||||
}
|
||||
|
||||
LinearAllocGuard2D(const LinearAllocGuard2D&) = delete;
|
||||
LinearAllocGuard2D(LinearAllocGuard2D&&) = delete;
|
||||
};
|
||||
|
||||
template <typename T> class LinearAllocGuard3D : public LinearAllocGuardMultiDim<T> {
|
||||
public:
|
||||
LinearAllocGuard3D(const size_t width_logical, const size_t height, const size_t depth)
|
||||
: LinearAllocGuardMultiDim<T>{make_hipExtent(width_logical * sizeof(T), height, depth)} {
|
||||
HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_));
|
||||
}
|
||||
|
||||
LinearAllocGuard3D(const hipExtent extent) : LinearAllocGuardMultiDim<T>(extent) {
|
||||
HIP_CHECK(hipMalloc3D(&this->pitched_ptr_, this->extent_));
|
||||
}
|
||||
|
||||
LinearAllocGuard3D(const LinearAllocGuard3D&) = delete;
|
||||
LinearAllocGuard3D(LinearAllocGuard3D&&) = delete;
|
||||
|
||||
size_t depth() const { return this->extent_.depth; }
|
||||
};
|
||||
|
||||
template <typename T> class ArrayAllocGuard {
|
||||
public:
|
||||
// extent should contain logical width
|
||||
ArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} {
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
|
||||
HIP_CHECK(hipMalloc3DArray(&ptr_, &desc, extent_, flags));
|
||||
}
|
||||
|
||||
~ArrayAllocGuard() { static_cast<void>(hipFreeArray(ptr_)); }
|
||||
|
||||
ArrayAllocGuard(const ArrayAllocGuard&) = delete;
|
||||
ArrayAllocGuard(ArrayAllocGuard&&) = delete;
|
||||
|
||||
hipArray_t ptr() const { return ptr_; }
|
||||
|
||||
hipExtent extent() const { return extent_; }
|
||||
|
||||
private:
|
||||
hipArray_t ptr_ = nullptr;
|
||||
const hipExtent extent_;
|
||||
};
|
||||
|
||||
template <typename T> class DrvArrayAllocGuard {
|
||||
public:
|
||||
// extent should contain width in bytes
|
||||
DrvArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) : extent_{extent} {
|
||||
HIP_ARRAY3D_DESCRIPTOR desc{};
|
||||
using vec_info = vector_info<T>;
|
||||
desc.Format = vec_info::format;
|
||||
desc.NumChannels = vec_info::size;
|
||||
desc.Width = extent_.width / sizeof(T);
|
||||
desc.Height = extent_.height;
|
||||
desc.Depth = extent_.depth;
|
||||
desc.Flags = flags;
|
||||
HIP_CHECK(hipArray3DCreate(&ptr_, &desc));
|
||||
}
|
||||
|
||||
~DrvArrayAllocGuard() { static_cast<void>(hipArrayDestroy(ptr_)); }
|
||||
|
||||
DrvArrayAllocGuard(const DrvArrayAllocGuard&) = delete;
|
||||
DrvArrayAllocGuard(DrvArrayAllocGuard&&) = delete;
|
||||
|
||||
hiparray ptr() const { return ptr_; }
|
||||
|
||||
hipExtent extent() const { return extent_; }
|
||||
|
||||
private:
|
||||
hiparray ptr_ = nullptr;
|
||||
const hipExtent extent_;
|
||||
};
|
||||
|
||||
enum class Streams { nullstream, perThread, created };
|
||||
|
||||
class StreamGuard {
|
||||
|
||||
@@ -54,6 +54,37 @@ void ArrayFindIfNot(T* const array, const T expected_value, const size_t num_ele
|
||||
ArrayFindIfNot(array, array + num_elements, expected_value);
|
||||
}
|
||||
|
||||
template <typename T, typename F>
|
||||
void PitchedMemoryVerify(T* const ptr, const size_t pitch, const size_t width, const size_t height,
|
||||
const size_t depth, F expected_value_generator) {
|
||||
for (size_t z = 0; z < depth; ++z) {
|
||||
for (size_t y = 0; y < height; ++y) {
|
||||
for (size_t x = 0; x < width; ++x) {
|
||||
const auto slice = reinterpret_cast<uint8_t*>(ptr) + pitch * height * z;
|
||||
const auto row = slice + pitch * y;
|
||||
if (reinterpret_cast<T*>(row)[x] != expected_value_generator(x, y, z)) {
|
||||
INFO("Mismatch at indices: " << x << ", " << y << ", " << z);
|
||||
REQUIRE(reinterpret_cast<T*>(row)[x] == expected_value_generator(x, y, z));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename F>
|
||||
void PitchedMemorySet(T* const ptr, const size_t pitch, const size_t width, const size_t height,
|
||||
const size_t depth, F expected_value_generator) {
|
||||
for (size_t z = 0; z < depth; ++z) {
|
||||
for (size_t y = 0; y < height; ++y) {
|
||||
for (size_t x = 0; x < width; ++x) {
|
||||
const auto slice = reinterpret_cast<uint8_t*>(ptr) + pitch * height * z;
|
||||
const auto row = slice + pitch * y;
|
||||
reinterpret_cast<T*>(row)[x] = expected_value_generator(x, y, z);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void VectorIncrement(T* const vec, const T increment_value, size_t N) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
@@ -82,6 +113,18 @@ static __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void Iota(T* const out, size_t pitch, size_t w, size_t h, size_t d) {
|
||||
const auto x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const auto y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
const auto z = blockIdx.z * blockDim.z + threadIdx.z;
|
||||
if (x < w && y < h && z < d) {
|
||||
char* const slice = reinterpret_cast<char*>(out) + pitch * h * z;
|
||||
char* const row = slice + pitch * y;
|
||||
reinterpret_cast<T*>(row)[x] = z * w * h + y * w + x;
|
||||
}
|
||||
}
|
||||
|
||||
inline void LaunchDelayKernel(const std::chrono::milliseconds interval, const hipStream_t stream) {
|
||||
int ticks_per_ms = 0;
|
||||
// Clock rate is in kHz => number of clock ticks in a millisecond
|
||||
|
||||
@@ -15,6 +15,7 @@ set(LINUX_TEST_SRC
|
||||
hipIpcMemAccessTest.cc
|
||||
deviceAllocationMproc.cc
|
||||
hipNoGpuTsts.cc
|
||||
hipMemGetInfo.cc
|
||||
)
|
||||
|
||||
add_custom_target(dummy_kernel.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${CMAKE_CURRENT_SOURCE_DIR}/dummy_kernel.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../multiproc/dummy_kernel.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include)
|
||||
|
||||
@@ -0,0 +1,300 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#define ReadEnd 0
|
||||
#define WriteEnd 1
|
||||
#define MAX_SIZE 32
|
||||
#define FREE_MEM_TO_HIDE 4294967296
|
||||
#define SIZE_TO_ALLOCATE 2147483648
|
||||
/*
|
||||
* In main process allocate 2 GB of device memory.
|
||||
* Fork() a child process and verify that 2 GB has been
|
||||
* allocated in parent process.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario1") {
|
||||
constexpr size_t size = 2147483648; // 2GB
|
||||
int fd[2], fd1[2], status;
|
||||
status = pipe(fd);
|
||||
REQUIRE(status == 0);
|
||||
status = pipe(fd1);
|
||||
REQUIRE(status == 0);
|
||||
pid_t child_pid;
|
||||
child_pid = fork(); // Create a new child process
|
||||
if (child_pid < 0) {
|
||||
WARN("Fork failed!!!!");
|
||||
} else if (child_pid == 0) { // child
|
||||
close(fd1[WriteEnd]);
|
||||
close(fd[ReadEnd]);
|
||||
int result;
|
||||
size_t free = 0, total = 0;
|
||||
// Wait for signal from parent
|
||||
int check_child;
|
||||
status = read(fd1[ReadEnd], &check_child, sizeof(check_child));
|
||||
REQUIRE(status != -1);
|
||||
close(fd1[ReadEnd]);
|
||||
// Check the total and free memory which is allocated in parent
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
if ((total - free) >= size) {
|
||||
result = 1;
|
||||
} else {
|
||||
result = 0;
|
||||
}
|
||||
// Write the result to parent
|
||||
status = write(fd[WriteEnd], &result, sizeof(result));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[WriteEnd]);
|
||||
exit(0);
|
||||
} else { // Parent
|
||||
close(fd1[ReadEnd]);
|
||||
close(fd[WriteEnd]);
|
||||
// Allocate memory
|
||||
char* A_d = nullptr;
|
||||
HIP_CHECK(hipMalloc(&A_d, size));
|
||||
// Signal the child
|
||||
int check = 0;
|
||||
status = write(fd1[WriteEnd], &check, sizeof(check));
|
||||
REQUIRE(status != -1);
|
||||
close(fd1[WriteEnd]);
|
||||
// Read the result from Child
|
||||
int read_result;
|
||||
status = read(fd[ReadEnd], &read_result, sizeof(read_result));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[ReadEnd]);
|
||||
REQUIRE(read_result == 1);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* From main process Fork() a child process. In the child process allocate
|
||||
* 2 GB of device memory. Signal the parent process. Verify from the parent
|
||||
* process that 2 GB is allocated in the child process.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario2") {
|
||||
constexpr size_t size = 2147483648; // 2GB
|
||||
int fd[2], fd2[2], status;
|
||||
status = pipe(fd);
|
||||
REQUIRE(status == 0);
|
||||
status = pipe(fd2);
|
||||
REQUIRE(status == 0);
|
||||
pid_t child_pid;
|
||||
child_pid = fork(); // Create a new child process
|
||||
if (child_pid < 0) {
|
||||
WARN("Fork failed!!!!");
|
||||
} else if (child_pid == 0) { // Child
|
||||
close(fd[ReadEnd]);
|
||||
close(fd2[WriteEnd]);
|
||||
// Allocate memory
|
||||
float* A_d = nullptr;
|
||||
HIP_CHECK(hipMalloc(&A_d, size));
|
||||
// Signal the parent
|
||||
int data = 0;
|
||||
status = write(fd[WriteEnd], &data, sizeof(data));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[WriteEnd]);
|
||||
int valid = 0;
|
||||
// Wait for Signal from parent before freeing memory and exiting
|
||||
status = read(fd2[ReadEnd], &valid, sizeof(valid));
|
||||
REQUIRE(status != -1);
|
||||
close(fd2[ReadEnd]);
|
||||
// Free allocated device memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
exit(0);
|
||||
} else { // Parent
|
||||
size_t free = 0, total = 0;
|
||||
close(fd[WriteEnd]);
|
||||
close(fd2[ReadEnd]);
|
||||
// Wait for child signal
|
||||
int data = 0;
|
||||
status = read(fd[ReadEnd], &data, sizeof(data));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[ReadEnd]);
|
||||
// Verify the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
REQUIRE((total - free) >= size);
|
||||
// Signal child that validation is over and child can free memory
|
||||
int valid = 0;
|
||||
status = write(fd2[WriteEnd], &valid, sizeof(valid));
|
||||
REQUIRE(status != -1);
|
||||
close(fd2[WriteEnd]);
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* From main process Fork() a child process. In the child process
|
||||
* allocate 2 GB of device memory. Free the memory and exit from
|
||||
* child process. Verify from the parent process that 2 GB is
|
||||
* freed in the child process.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_Functional_Scenario3") {
|
||||
constexpr size_t size = 2147483648; // 2GB
|
||||
int fd[2], status;
|
||||
status = pipe(fd);
|
||||
REQUIRE(status == 0);
|
||||
pid_t child_pid;
|
||||
child_pid = fork(); // Create a new child process
|
||||
if (child_pid < 0) {
|
||||
WARN("Fork failed!!!!");
|
||||
} else if (child_pid == 0) { // Child
|
||||
close(fd[ReadEnd]);
|
||||
// Allocate the memory
|
||||
void* A_d = nullptr;
|
||||
HIP_CHECK(hipMalloc(&A_d, size));
|
||||
// Free the allocated memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
// Signal the parent about memory free
|
||||
int check = 0;
|
||||
status = write(fd[WriteEnd], &check, sizeof(check));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[WriteEnd]);
|
||||
exit(0);
|
||||
} else { // Parent
|
||||
close(fd[WriteEnd]);
|
||||
// Wait for the signal from child about memory free
|
||||
int check_parent;
|
||||
status = read(fd[ReadEnd], &check_parent, sizeof(check_parent));
|
||||
REQUIRE(status != -1);
|
||||
close(fd[ReadEnd]);
|
||||
size_t free = 0, total = 0;
|
||||
// Verify the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
REQUIRE((total - free) >= 0);
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* From main process Fork() a child process. In the child process allocate
|
||||
* 2 GB of device memory. Exit from child process. Verify from the parent
|
||||
* process that 2 GB is freed in the child process.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_Functional_scenario4") {
|
||||
constexpr size_t size = 2147483648; // 2GB
|
||||
pid_t child_pid;
|
||||
child_pid = fork(); // Create a new child process
|
||||
if (child_pid < 0) {
|
||||
WARN("Fork failed!!!!");
|
||||
} else if (child_pid == 0) { // Child
|
||||
// Allocate the memory
|
||||
void* A_d = nullptr;
|
||||
HIP_CHECK(hipMalloc(&A_d, size));
|
||||
exit(0);
|
||||
} else { // Parent
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
size_t free = 0, total = 0;
|
||||
// Verify the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
REQUIRE((total-free) >= 0);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Multidevice Scenario: In main process allocate 2 GB of device memory
|
||||
* in every device. Verify that 2 GB is allocated using hipMemGetInfo.
|
||||
* Fork() a child process and verify that 2 GB has been allocated from
|
||||
* parent process in every device.
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetInfo_Functional_MultiDevice_Scenario5") {
|
||||
constexpr size_t size = 2147483648; // 2GB
|
||||
size_t free = 0, total = 0;
|
||||
int fd1[2], fd2[2], status;
|
||||
status = pipe(fd1);
|
||||
REQUIRE(status == 0);
|
||||
status = pipe(fd2);
|
||||
REQUIRE(status == 0);
|
||||
pid_t child_pid;
|
||||
child_pid = fork(); // Create a new child process
|
||||
if (child_pid < 0) {
|
||||
WARN("Fork failed!!!!");
|
||||
} else if (child_pid == 0) { // Child
|
||||
close(fd1[WriteEnd]);
|
||||
close(fd2[ReadEnd]);
|
||||
// Wait for the signal from parent after memory allocatoin
|
||||
int check_child;
|
||||
status = read(fd1[ReadEnd], &check_child, sizeof(check_child));
|
||||
REQUIRE(status != -1);
|
||||
close(fd1[ReadEnd]);
|
||||
int num_devices, result, count = 0;
|
||||
// Get the device count
|
||||
HIP_CHECK(hipGetDeviceCount(&num_devices));
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
// Check the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
if ((total - free) >= size) {
|
||||
count+=1;
|
||||
}
|
||||
}
|
||||
if ( count == num_devices ) {
|
||||
result = 1;
|
||||
} else {
|
||||
result = 0;
|
||||
}
|
||||
// Write the result to Parent
|
||||
status = write(fd2[WriteEnd], &result, sizeof(result));
|
||||
REQUIRE(status != -1);
|
||||
close(fd2[WriteEnd]);
|
||||
exit(0);
|
||||
} else { // Parent
|
||||
close(fd1[ReadEnd]);
|
||||
close(fd2[WriteEnd]);
|
||||
int num_devices;
|
||||
// Get the device count
|
||||
HIP_CHECK(hipGetDeviceCount(&num_devices));
|
||||
std::vector<void*>v(num_devices, nullptr);
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
// verify the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
// Allocate memory
|
||||
HIP_CHECK(hipMalloc(&v[i], size));
|
||||
// Verify the memory
|
||||
HIP_CHECK(hipMemGetInfo(&free , &total));
|
||||
}
|
||||
// Signal the child about memory allocation
|
||||
int check = 0;
|
||||
status = write(fd1[WriteEnd], &check, sizeof(check));
|
||||
REQUIRE(status != -1);
|
||||
close(fd1[WriteEnd]);
|
||||
// Read result from child
|
||||
int result_parent;
|
||||
status = read(fd2[ReadEnd], &result_parent, sizeof(result_parent));
|
||||
REQUIRE(status != -1);
|
||||
REQUIRE(result_parent == 1);
|
||||
close(fd2[ReadEnd]);
|
||||
// Free the allocated memory on each device
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipFree(v[i]));
|
||||
}
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -23,6 +23,7 @@ set(TEST_SRC
|
||||
hipExtGetLinkTypeAndHopCount.cc
|
||||
hipDeviceSetLimit.cc
|
||||
hipDeviceSetGetSharedMemConfig.cc
|
||||
hipDeviceReset.cc
|
||||
hipDeviceSetGetMemPool.cc
|
||||
hipInit.cc
|
||||
hipDriverGetVersion.cc
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
TEST_CASE("Unit_hipDeviceReset_Positive_Basic") {
|
||||
const auto device = GENERATE(range(0, HipTest::getDeviceCount()));
|
||||
HIP_CHECK(hipSetDevice(device));
|
||||
INFO("Current device is: " << device);
|
||||
|
||||
unsigned int flags_before = 0u;
|
||||
HIP_CHECK(hipGetDeviceFlags(&flags_before));
|
||||
hipSharedMemConfig mem_config_before;
|
||||
HIP_CHECK(hipDeviceGetSharedMemConfig(&mem_config_before));
|
||||
|
||||
void* ptr = nullptr;
|
||||
HIP_CHECK(hipMalloc(&ptr, 500));
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
const auto cache_config_ret = hipDeviceSetCacheConfig(hipFuncCachePreferL1);
|
||||
REQUIRE((cache_config_ret == hipSuccess || cache_config_ret == hipErrorNotSupported));
|
||||
|
||||
const auto shared_mem_config_ret = hipDeviceSetSharedMemConfig(
|
||||
mem_config_before == hipSharedMemBankSizeFourByte ? hipSharedMemBankSizeEightByte
|
||||
: hipSharedMemBankSizeFourByte);
|
||||
REQUIRE((shared_mem_config_ret == hipSuccess || shared_mem_config_ret == hipErrorNotSupported));
|
||||
|
||||
HIP_CHECK(hipSetDeviceFlags(flags_before ^ (1u << 2)));
|
||||
|
||||
HIP_CHECK(hipDeviceReset());
|
||||
|
||||
unsigned int flags_after = 0u;
|
||||
CHECK(hipGetDeviceFlags(&flags_after) == hipSuccess);
|
||||
CHECK(flags_after == flags_before);
|
||||
|
||||
CHECK(hipFree(ptr) == hipErrorInvalidValue);
|
||||
|
||||
// Inconsistent behavior in CUDA, sometimes segfaults, sometimes works
|
||||
// Return value mismatch on AMD - EXSWHTEC-124
|
||||
#if 0
|
||||
CHECK(hipStreamDestroy(stream) == hipErrorInvalidHandle);
|
||||
#endif
|
||||
|
||||
if (cache_config_ret == hipSuccess) {
|
||||
hipFuncCache_t cache_config;
|
||||
CHECK(hipDeviceGetCacheConfig(&cache_config) == hipSuccess);
|
||||
CHECK(cache_config == hipFuncCachePreferNone);
|
||||
}
|
||||
|
||||
if (shared_mem_config_ret == hipSuccess) {
|
||||
hipSharedMemConfig mem_config_after;
|
||||
CHECK(hipDeviceGetSharedMemConfig(&mem_config_after) == hipSuccess);
|
||||
CHECK(mem_config_after == mem_config_before);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipDeviceReset_Positive_Threaded") {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
INFO("Current device is: " << 0);
|
||||
|
||||
unsigned int flags_before = 0u;
|
||||
HIP_CHECK(hipGetDeviceFlags(&flags_before));
|
||||
hipSharedMemConfig mem_config_before;
|
||||
HIP_CHECK(hipDeviceGetSharedMemConfig(&mem_config_before));
|
||||
|
||||
void* ptr = nullptr;
|
||||
HIP_CHECK(hipMalloc(&ptr, 500));
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
const auto cache_config_ret = hipDeviceSetCacheConfig(hipFuncCachePreferL1);
|
||||
REQUIRE((cache_config_ret == hipSuccess || cache_config_ret == hipErrorNotSupported));
|
||||
|
||||
const auto shared_mem_config_ret = hipDeviceSetSharedMemConfig(
|
||||
mem_config_before == hipSharedMemBankSizeFourByte ? hipSharedMemBankSizeEightByte
|
||||
: hipSharedMemBankSizeFourByte);
|
||||
REQUIRE((shared_mem_config_ret == hipSuccess || shared_mem_config_ret == hipErrorNotSupported));
|
||||
|
||||
|
||||
HIP_CHECK(hipSetDeviceFlags(flags_before ^ (1u << 2)));
|
||||
|
||||
std::thread([] {
|
||||
HIP_CHECK_THREAD(hipSetDevice(0));
|
||||
HIP_CHECK_THREAD(hipDeviceReset());
|
||||
}).join();
|
||||
HIP_CHECK_THREAD_FINALIZE();
|
||||
|
||||
unsigned int flags_after = 0u;
|
||||
CHECK(hipGetDeviceFlags(&flags_after) == hipSuccess);
|
||||
CHECK(flags_after == flags_before);
|
||||
|
||||
CHECK(hipFree(ptr) == hipErrorInvalidValue);
|
||||
|
||||
// Inconsistent behavior in CUDA, sometimes segfaults, sometimes works
|
||||
// Return value mismatch on AMD - EXSWHTEC-124
|
||||
#if 0
|
||||
CHECK(hipStreamDestroy(stream) == hipErrorInvalidHandle);
|
||||
#endif
|
||||
|
||||
if (cache_config_ret == hipSuccess) {
|
||||
hipFuncCache_t cache_config;
|
||||
CHECK(hipDeviceGetCacheConfig(&cache_config) == hipSuccess);
|
||||
CHECK(cache_config == hipFuncCachePreferNone);
|
||||
}
|
||||
|
||||
if (shared_mem_config_ret == hipSuccess) {
|
||||
hipSharedMemConfig mem_config_after;
|
||||
CHECK(hipDeviceGetSharedMemConfig(&mem_config_after) == hipSuccess);
|
||||
CHECK(mem_config_after == mem_config_before);
|
||||
}
|
||||
}
|
||||
@@ -27,6 +27,7 @@ set(AMD_TEST_SRC
|
||||
bitExtract.cc
|
||||
bitInsert.cc
|
||||
floatTM.cc
|
||||
hipMathFunctions.cc
|
||||
)
|
||||
set(AMD_ARCH_SPEC_TEST_SRC
|
||||
AtomicAdd_Coherent_withunsafeflag.cc
|
||||
@@ -76,6 +77,7 @@ if(${ARCH_CHECK} GREATER_EQUAL 0)
|
||||
set_source_files_properties(unsafeAtomicAdd_NonCoherent_withunsafeflag.cc PROPERTIES COMPILE_OPTIONS "-munsafe-fp-atomics")
|
||||
set_source_files_properties(unsafeAtomicAdd_Coherent_withnounsafeflag.cc PROPERTIES COMPILE_OPTIONS "-mno-unsafe-fp-atomics")
|
||||
set_source_files_properties(unsafeAtomicAdd_NonCoherent_withnounsafeflag.cc PROPERTIES COMPILE_OPTIONS "-mno-unsafe-fp-atomics")
|
||||
set_source_files_properties(hipMathFunctions.cc PROPERTIES COMPILE_FLAGS "-Xclang -fallow-half-arguments-and-returns")
|
||||
file(GLOB AtomicAdd_files *AtomicAdd_*_*.cc)
|
||||
set_property(SOURCE ${AtomicAdd_files} PROPERTY COMPILE_FLAGS --save-temps)
|
||||
file(GLOB unsafeAtomicAdd_files *unsafeAtomicAdd_*_*.cc)
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Test Scenarios :
|
||||
1) Verification of absolute int64 operation performed at device.
|
||||
2) Verification of __fp16 operation performed at device.
|
||||
3) Verification of pow operations performed at device.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
__global__ void kernel_abs_int64(long long *input, long long *output) { // NOLINT
|
||||
int tx = threadIdx.x;
|
||||
output[tx] = abs(input[tx]);
|
||||
}
|
||||
|
||||
|
||||
#define CHECK_ABS_INT64(IN, OUT, EXP) \
|
||||
{ \
|
||||
if (OUT != EXP) { \
|
||||
INFO("check_abs_int64 failed on " << IN << ", output " << OUT << \
|
||||
", expected " << EXP); \
|
||||
REQUIRE(false); \
|
||||
} \
|
||||
}
|
||||
|
||||
template<class T, class F>
|
||||
__global__ void kernel_simple(F f, T *out) {
|
||||
*out = f();
|
||||
}
|
||||
|
||||
template<class T, class F>
|
||||
void check_simple(F f, T expected, const char* file, unsigned line) {
|
||||
auto memsize = sizeof(T);
|
||||
T *outputCPU = reinterpret_cast<T *>(malloc(memsize));
|
||||
T *outputGPU = nullptr;
|
||||
REQUIRE(outputCPU != nullptr);
|
||||
HIP_CHECK(hipMalloc(&outputGPU, memsize));
|
||||
hipLaunchKernelGGL(kernel_simple, 1, 1, 0, 0, f, outputGPU);
|
||||
HIP_CHECK(hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost));
|
||||
if (*outputCPU != expected) {
|
||||
INFO("File " << file << ", line " << line << " check failed." <<
|
||||
" output = " << static_cast<double>(*outputCPU) << " expected "
|
||||
<< static_cast<double>(expected));
|
||||
REQUIRE(false);
|
||||
}
|
||||
HIP_CHECK(hipFree(outputGPU));
|
||||
free(outputCPU);
|
||||
}
|
||||
|
||||
#define CHECK_SIMPLE(lambda, expected) \
|
||||
check_simple(lambda, expected, __FILE__, __LINE__);
|
||||
|
||||
|
||||
/**
|
||||
Verification of absolute int64 operation performed at device.
|
||||
*/
|
||||
TEST_CASE("Unit_abs_int64_Verification") {
|
||||
using datatype_t = long long; // NOLINT
|
||||
|
||||
datatype_t *inputCPU{}, *outputCPU{};
|
||||
datatype_t *inputGPU{}, *outputGPU{};
|
||||
const int NUM_INPUTS = 8;
|
||||
auto memsize = NUM_INPUTS * sizeof(datatype_t);
|
||||
|
||||
// allocate memories
|
||||
inputCPU = reinterpret_cast<datatype_t *>(malloc(memsize));
|
||||
outputCPU = reinterpret_cast<datatype_t *>(malloc(memsize));
|
||||
REQUIRE(inputCPU != nullptr);
|
||||
REQUIRE(outputCPU != nullptr);
|
||||
HIP_CHECK(hipMalloc(&inputGPU, memsize));
|
||||
HIP_CHECK(hipMalloc(&outputGPU, memsize));
|
||||
|
||||
// populate input with constants
|
||||
inputCPU[0] = -81985529216486895ll;
|
||||
inputCPU[1] = 81985529216486895ll;
|
||||
inputCPU[2] = -1250999896491ll;
|
||||
inputCPU[3] = 1250999896491ll;
|
||||
inputCPU[4] = -19088743ll;
|
||||
inputCPU[5] = 19088743ll;
|
||||
inputCPU[6] = -291ll;
|
||||
inputCPU[7] = 291ll;
|
||||
|
||||
// copy inputs to device
|
||||
HIP_CHECK(hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice));
|
||||
|
||||
// launch kernel
|
||||
hipLaunchKernelGGL(kernel_abs_int64, dim3(1), dim3(NUM_INPUTS), 0, 0,
|
||||
inputGPU, outputGPU);
|
||||
// copy outputs from device
|
||||
HIP_CHECK(hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost));
|
||||
|
||||
// check outputs
|
||||
CHECK_ABS_INT64(inputCPU[0], outputCPU[0], outputCPU[1]);
|
||||
CHECK_ABS_INT64(inputCPU[1], outputCPU[1], outputCPU[1]);
|
||||
CHECK_ABS_INT64(inputCPU[2], outputCPU[2], outputCPU[3]);
|
||||
CHECK_ABS_INT64(inputCPU[3], outputCPU[3], outputCPU[3]);
|
||||
CHECK_ABS_INT64(inputCPU[4], outputCPU[4], outputCPU[5]);
|
||||
CHECK_ABS_INT64(inputCPU[5], outputCPU[5], outputCPU[5]);
|
||||
CHECK_ABS_INT64(inputCPU[6], outputCPU[6], outputCPU[7]);
|
||||
CHECK_ABS_INT64(inputCPU[7], outputCPU[7], outputCPU[7]);
|
||||
|
||||
// free memories
|
||||
HIP_CHECK(hipFree(inputGPU));
|
||||
HIP_CHECK(hipFree(outputGPU));
|
||||
free(inputCPU);
|
||||
free(outputCPU);
|
||||
}
|
||||
|
||||
/**
|
||||
Verification of pow operations performed at device.
|
||||
*/
|
||||
TEST_CASE("Unit_pown_Verification") {
|
||||
CHECK_SIMPLE([]__device__(){ return powif(2.0f, 2); }, 4.0f);
|
||||
CHECK_SIMPLE([]__device__(){ return powi(2.0, 2); }, 4.0);
|
||||
CHECK_SIMPLE([]__device__(){ return pow(2.0f, 2); }, 4.0f);
|
||||
CHECK_SIMPLE([]__device__(){ return pow(2.0, 2); }, 4.0);
|
||||
CHECK_SIMPLE([]__device__(){ return pow(2.0f16, 2); }, 4.0f16);
|
||||
}
|
||||
@@ -172,6 +172,7 @@ set(TEST_SRC
|
||||
hipMemsetAsync.cc
|
||||
hipMemAdvise.cc
|
||||
hipMemRangeGetAttributes.cc
|
||||
hipGetSymbolSizeAddress.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ THE SOFTWARE.
|
||||
#include <limits>
|
||||
#include "DriverContext.hh"
|
||||
#include "hipArrayCommon.hh"
|
||||
#include "hip_array_common.hh"
|
||||
#include "hip_test_common.hh"
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -26,66 +26,6 @@ THE SOFTWARE.
|
||||
|
||||
constexpr size_t BlockSize = 16;
|
||||
|
||||
template <class T, size_t N, hipArray_Format Format> struct type_and_size_and_format {
|
||||
using type = T;
|
||||
static constexpr size_t size = N;
|
||||
static constexpr hipArray_Format format = Format;
|
||||
};
|
||||
|
||||
// Create a map of type to scalar type, vector size and scalar type format enum.
|
||||
// This is useful for creating simpler function that depend on the vector size.
|
||||
template <typename T> struct vector_info;
|
||||
template <>
|
||||
struct vector_info<int> : type_and_size_and_format<int, 1, HIP_AD_FORMAT_SIGNED_INT32> {};
|
||||
template <> struct vector_info<float> : type_and_size_and_format<float, 1, HIP_AD_FORMAT_FLOAT> {};
|
||||
template <>
|
||||
struct vector_info<short> : type_and_size_and_format<short, 1, HIP_AD_FORMAT_SIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<char> : type_and_size_and_format<char, 1, HIP_AD_FORMAT_SIGNED_INT8> {};
|
||||
template <>
|
||||
struct vector_info<unsigned int>
|
||||
: type_and_size_and_format<unsigned int, 1, HIP_AD_FORMAT_UNSIGNED_INT32> {};
|
||||
template <>
|
||||
struct vector_info<unsigned short>
|
||||
: type_and_size_and_format<unsigned short, 1, HIP_AD_FORMAT_UNSIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<unsigned char>
|
||||
: type_and_size_and_format<unsigned char, 1, HIP_AD_FORMAT_UNSIGNED_INT8> {};
|
||||
|
||||
template <>
|
||||
struct vector_info<int2> : type_and_size_and_format<int, 2, HIP_AD_FORMAT_SIGNED_INT32> {};
|
||||
template <> struct vector_info<float2> : type_and_size_and_format<float, 2, HIP_AD_FORMAT_FLOAT> {};
|
||||
template <>
|
||||
struct vector_info<short2> : type_and_size_and_format<short, 2, HIP_AD_FORMAT_SIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<char2> : type_and_size_and_format<char, 2, HIP_AD_FORMAT_SIGNED_INT8> {};
|
||||
template <>
|
||||
struct vector_info<uint2>
|
||||
: type_and_size_and_format<unsigned int, 2, HIP_AD_FORMAT_UNSIGNED_INT32> {};
|
||||
template <>
|
||||
struct vector_info<ushort2>
|
||||
: type_and_size_and_format<unsigned short, 2, HIP_AD_FORMAT_UNSIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<uchar2>
|
||||
: type_and_size_and_format<unsigned char, 2, HIP_AD_FORMAT_UNSIGNED_INT8> {};
|
||||
|
||||
template <>
|
||||
struct vector_info<int4> : type_and_size_and_format<int, 4, HIP_AD_FORMAT_SIGNED_INT32> {};
|
||||
template <> struct vector_info<float4> : type_and_size_and_format<float, 4, HIP_AD_FORMAT_FLOAT> {};
|
||||
template <>
|
||||
struct vector_info<short4> : type_and_size_and_format<short, 4, HIP_AD_FORMAT_SIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<char4> : type_and_size_and_format<char, 4, HIP_AD_FORMAT_SIGNED_INT8> {};
|
||||
template <>
|
||||
struct vector_info<uint4>
|
||||
: type_and_size_and_format<unsigned int, 4, HIP_AD_FORMAT_UNSIGNED_INT32> {};
|
||||
template <>
|
||||
struct vector_info<ushort4>
|
||||
: type_and_size_and_format<unsigned short, 4, HIP_AD_FORMAT_UNSIGNED_INT16> {};
|
||||
template <>
|
||||
struct vector_info<uchar4>
|
||||
: type_and_size_and_format<unsigned char, 4, HIP_AD_FORMAT_UNSIGNED_INT8> {};
|
||||
|
||||
// read from a texture using normalized coordinates
|
||||
constexpr size_t ChannelToRead = 1;
|
||||
template <typename T>
|
||||
|
||||
@@ -27,6 +27,7 @@ hipArrayCreate API test scenarios
|
||||
#include <array>
|
||||
#include <numeric>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_array_common.hh>
|
||||
#include "hipArrayCommon.hh"
|
||||
#include "DriverContext.hh"
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_array_common.hh>
|
||||
#include "hipArrayCommon.hh"
|
||||
#include "DriverContext.hh"
|
||||
|
||||
|
||||
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <tuple>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
namespace {
|
||||
constexpr size_t kArraySize = 5;
|
||||
} // anonymous namespace
|
||||
|
||||
#define HIP_GET_SYMBOL_SIZE_ADDRESS_DEFINE_GLOBALS(type) \
|
||||
__device__ type type##_var = 0; \
|
||||
__device__ type type##_arr[kArraySize] = {}; \
|
||||
__global__ void type##_var_address_validation_kernel(void* ptr, bool* out) { \
|
||||
*out = static_cast<void*>(&type##_var) == ptr; \
|
||||
} \
|
||||
__global__ void type##_arr_address_validation_kernel(void* ptr, bool* out) { \
|
||||
*out = static_cast<void*>(type##_arr) == ptr; \
|
||||
}
|
||||
|
||||
HIP_GET_SYMBOL_SIZE_ADDRESS_DEFINE_GLOBALS(int)
|
||||
HIP_GET_SYMBOL_SIZE_ADDRESS_DEFINE_GLOBALS(float)
|
||||
HIP_GET_SYMBOL_SIZE_ADDRESS_DEFINE_GLOBALS(char)
|
||||
HIP_GET_SYMBOL_SIZE_ADDRESS_DEFINE_GLOBALS(double)
|
||||
|
||||
template <typename T, size_t N, void (*validation_kernel)(void*, bool*)>
|
||||
static void HipGetSymbolSizeAddressTest(const void* symbol) {
|
||||
constexpr auto size = N * sizeof(T);
|
||||
|
||||
T* symbol_ptr = nullptr;
|
||||
size_t symbol_size = 0;
|
||||
HIP_CHECK(hipGetSymbolAddress(reinterpret_cast<void**>(&symbol_ptr), symbol));
|
||||
HIP_CHECK(hipGetSymbolSize(&symbol_size, symbol));
|
||||
REQUIRE(symbol_size == size);
|
||||
REQUIRE(symbol_ptr != nullptr);
|
||||
|
||||
LinearAllocGuard<bool> equal_addresses(LinearAllocs::hipMalloc, sizeof(bool));
|
||||
HIP_CHECK(hipMemset(equal_addresses.ptr(), false, sizeof(*equal_addresses.ptr())))
|
||||
validation_kernel<<<1, 1>>>(symbol_ptr, equal_addresses.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamSynchronize(nullptr));
|
||||
bool ok = false;
|
||||
HIP_CHECK(hipMemcpy(&ok, equal_addresses.ptr(), sizeof(ok), hipMemcpyDeviceToHost));
|
||||
REQUIRE(ok);
|
||||
|
||||
constexpr T expected_value = 42;
|
||||
std::array<T, N> fill_buffer;
|
||||
std::fill_n(fill_buffer.begin(), N, expected_value);
|
||||
HIP_CHECK(hipMemcpy(symbol_ptr, fill_buffer.data(), symbol_size, hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
std::array<T, N> read_buffer;
|
||||
HIP_CHECK(hipMemcpy(read_buffer.data(), symbol_ptr, symbol_size, hipMemcpyDeviceToHost));
|
||||
ArrayFindIfNot(read_buffer.data(), expected_value, read_buffer.size());
|
||||
}
|
||||
|
||||
#if HT_AMD
|
||||
#define SYMBOL(expr) &HIP_SYMBOL(expr)
|
||||
#else
|
||||
#define SYMBOL(expr) HIP_SYMBOL(expr)
|
||||
#endif
|
||||
|
||||
#define HIP_GET_SYMBOL_SIZE_ADDRESS_TEST(type) \
|
||||
HipGetSymbolSizeAddressTest<type, 1, type##_var_address_validation_kernel>(SYMBOL(type##_var)); \
|
||||
HipGetSymbolSizeAddressTest<type, kArraySize, type##_arr_address_validation_kernel>( \
|
||||
SYMBOL(type##_arr));
|
||||
|
||||
TEST_CASE("Unit_hipGetSymbolSizeAddress_Positive_Basic") {
|
||||
SECTION("int") { HIP_GET_SYMBOL_SIZE_ADDRESS_TEST(int); }
|
||||
SECTION("float") { HIP_GET_SYMBOL_SIZE_ADDRESS_TEST(float); }
|
||||
SECTION("char") { HIP_GET_SYMBOL_SIZE_ADDRESS_TEST(char); }
|
||||
SECTION("double") { HIP_GET_SYMBOL_SIZE_ADDRESS_TEST(double); }
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipGetSymbolAddress_Negative_Parameters") {
|
||||
// Causes a segfault in CUDA
|
||||
#if HT_AMD
|
||||
SECTION("devPtr == nullptr") {
|
||||
HIP_CHECK_ERROR(hipGetSymbolAddress(nullptr, SYMBOL(int_var)), hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("symbolName == nullptr") {
|
||||
void* ptr = nullptr;
|
||||
HIP_CHECK_ERROR(hipGetSymbolAddress(&ptr, nullptr), hipErrorInvalidSymbol);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipGetSymbolSize_Negative_Parameters") {
|
||||
// Causes a segfault in CUDA
|
||||
#if HT_AMD
|
||||
SECTION("size == nullptr") {
|
||||
HIP_CHECK_ERROR(hipGetSymbolSize(nullptr, SYMBOL(int_var)), hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("symbolName == nullptr") {
|
||||
size_t size = 0;
|
||||
HIP_CHECK_ERROR(hipGetSymbolSize(&size, nullptr), hipErrorInvalidSymbol);
|
||||
}
|
||||
}
|
||||
@@ -26,6 +26,7 @@ hipMallocArray API test scenarios
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_array_common.hh>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include "hipArrayCommon.hh"
|
||||
|
||||
@@ -18,7 +18,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11
|
||||
* BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM nvidia
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user