Merge branch 'master' into move-memcpy

This commit is contained in:
Maneesh Gupta
2018-07-17 10:51:42 +05:30
کامیت شده توسط GitHub
کامیت 1fc3ef3cd4
35فایلهای تغییر یافته به همراه2155 افزوده شده و 16857 حذف شده
+1 -4
مشاهده پرونده
@@ -193,9 +193,7 @@ if(HIP_PLATFORM STREQUAL "hcc")
src/program_state.cpp)
set(SOURCE_FILES_DEVICE
src/device_util.cpp
src/hip_ldg.cpp
src/device_functions.cpp)
src/device_util.cpp)
execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic")
@@ -238,7 +236,6 @@ endif()
# Install hip_hcc if platform is hcc
if(HIP_PLATFORM STREQUAL "hcc")
install(TARGETS hip_hcc_static hip_hcc hip_device DESTINATION lib)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_hc.ll DESTINATION lib)
# Install .hipInfo
install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib)
+2 -3
مشاهده پرونده
@@ -259,8 +259,7 @@ if($HIP_PLATFORM eq "hcc"){
}
if(($HIP_PLATFORM eq "hcc")){
$ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n";
$ENV{HIP_HC_IR_FILE}="";
$ENV{HCC_EXTRA_LIBRARIES}="\n";
}
if($HIP_PLATFORM eq "nvcc"){
@@ -533,7 +532,7 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){
print "No valid AMD GPU target was either specified or found. Please specify a valid target using --amdgpu-target=" and die();
}
$ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n";
$ENV{HCC_EXTRA_LIBRARIES}="\n";
if($HIP_PLATFORM eq "hcc") {
$GPU_ARCH_OPT = " --amdgpu-target=";
@@ -231,6 +231,11 @@
| 0x02 |*`CU_MEMORYTYPE_DEVICE`* | |
| 0x03 |*`CU_MEMORYTYPE_ARRAY`* | |
| 0x04 |*`CU_MEMORYTYPE_UNIFIED`* | |
| enum |***`CUcomputemode`*** |***`hipComputeMode`*** |
| 0 |*`CU_COMPUTEMODE_DEFAULT`* |*`hipComputeModeDefault`* |
| 1 |*`CU_COMPUTEMODE_EXCLUSIVE`* |*`hipComputeModeExclusive`* |
| 2 |*`CU_COMPUTEMODE_PROHIBITED`* |*`hipComputeModeProhibited`* |
| 3 |*`CU_COMPUTEMODE_EXCLUSIVE_PROCESS`* |*`hipComputeModeExclusiveProcess`* |
| enum |***`CUoccupancy_flags`*** | |
| 0x00 |*`CU_OCCUPANCY_DEFAULT`* | |
| 0x01 |*`CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE`* | |
@@ -243,7 +248,7 @@
| 6 |*`CU_POINTER_ATTRIBUTE_SYNC_MEMOPS`* | |
| 7 |*`CU_POINTER_ATTRIBUTE_BUFFER_ID`* | |
| 8 |*`CU_POINTER_ATTRIBUTE_IS_MANAGED`* | |
| enum |***`CUmemorytype`*** | |
| enum |***`CUresourcetype`*** | |
| 0x00 |*`CU_RESOURCE_TYPE_ARRAY`* | |
| 0x01 |*`CU_RESOURCE_TYPE_MIPMAPPED_ARRAY`* | |
| 0x02 |*`CU_RESOURCE_TYPE_LINEAR`* | |
@@ -418,11 +418,11 @@
| 1 |*`cudaChannelFormatKindUnsigned`* |*`hipChannelFormatKindUnsigned`* |
| 2 |*`cudaChannelFormatKindFloat`* |*`hipChannelFormatKindFloat`* |
| 3 |*`cudaChannelFormatKindNone`* |*`hipChannelFormatKindNone`* |
| enum |***`cudaComputeMode`*** | |
| 0 |*`cudaComputeModeDefault`* | |
| 1 |*`cudaComputeModeExclusive`* | |
| 2 |*`cudaComputeModeProhibited`* | |
| 3 |*`cudaComputeModeExclusiveProcess`* | |
| enum |***`cudaComputeMode`*** |***`hipComputeMode`*** |
| 0 |*`cudaComputeModeDefault`* |*`hipComputeModeDefault`* |
| 1 |*`cudaComputeModeExclusive`* |*`hipComputeModeExclusive`* |
| 2 |*`cudaComputeModeProhibited`* |*`hipComputeModeProhibited`* |
| 3 |*`cudaComputeModeExclusiveProcess`* |*`hipComputeModeExclusiveProcess`* |
| enum |***`cudaDeviceAttr`*** |***`hipDeviceAttribute_t`*** |
| 1 |*`cudaDevAttrMaxThreadsPerBlock`* |*`hipDeviceAttributeMaxThreadsPerBlock`* |
| 2 |*`cudaDevAttrMaxBlockDimX`* |*`hipDeviceAttributeMaxBlockDimX`* |
@@ -1,22 +1,22 @@
# HIP Deprecated API List
# HIP Deprecated APIs
## HIP Context API
CUDA supports cuCtx API, the Driver API that defines "Context" and "Devices" as separate entities. Contexts contain a single device, and a device can theoretically have multiple contexts. HIP initially added limited support for these API to facilitate easy porting from existing driver codes. These API are marked as deprecated now since there are better alternate interface (such as hipSetDevice or the stream API) to achieve the required functions.
###hipCtxCreate
###hipCtxDestroy
###hipCtxPopCurrent
###hipCtxPushCurrent
###hipCtxSetCurrent
###hipCtxGetCurrent
###hipCtxGetDevice
###hipCtxGetApiVersion
###hipCtxGetCacheConfig
###hipCtxSetCacheConfig
###hipCtxSetSharedMemConfig
###hipCtxGetSharedMemConfig
###hipCtxSynchronize
###hipCtxGetFlags
###hipCtxEnablePeerAccess
###hipCtxDisablePeerAccess
### hipCtxCreate
### hipCtxDestroy
### hipCtxPopCurrent
### hipCtxPushCurrent
### hipCtxSetCurrent
### hipCtxGetCurrent
### hipCtxGetDevice
### hipCtxGetApiVersion
### hipCtxGetCacheConfig
### hipCtxSetCacheConfig
### hipCtxSetSharedMemConfig
### hipCtxGetSharedMemConfig
### hipCtxSynchronize
### hipCtxGetFlags
### hipCtxEnablePeerAccess
### hipCtxDisablePeerAccess
@@ -51,6 +51,10 @@ if(WIN32)
target_link_libraries(hipify-clang version)
endif()
if ((LLVM_PACKAGE_VERSION VERSION_EQUAL "7") OR (LLVM_PACKAGE_VERSION VERSION_GREATER "7"))
target_link_libraries(hipify-clang clangToolingInclusions)
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS}")
if(MSVC)
@@ -95,7 +99,7 @@ if (HIPIFY_CLANG_TESTS)
message(STATUS "Please install clang 4.0 or higher.")
elseif (CUDA_VERSION VERSION_EQUAL "9.0")
message(STATUS "Please install clang 6.0 or higher.")
elseif (CUDA_VERSION VERSION_EQUAL "9.1")
elseif ((CUDA_VERSION VERSION_EQUAL "9.1") OR (CUDA_VERSION VERSION_EQUAL "9.2"))
message(STATUS "Please install clang 7.0 or higher.")
endif()
endif()
@@ -24,7 +24,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_TYPE_NAME_MAP{
{"CUaddress_mode", {"hipAddress_mode", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
{"CUarray_cubemap_face", {"hipArray_cubemap_face", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
{"CUarray_format", {"hipArray_format", CONV_TYPE, API_DRIVER}},
{"CUcomputemode", {"hipComputemode", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // API_RUNTIME ANALOGUE (cudaComputeMode)
{"CUcomputemode", {"hipComputeMode", CONV_TYPE, API_DRIVER}}, // API_RUNTIME ANALOGUE (cudaComputeMode)
{"CUmem_advise", {"hipMemAdvise", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // API_RUNTIME ANALOGUE (cudaComputeMode)
{"CUmem_range_attribute", {"hipMemRangeAttribute", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // API_RUNTIME ANALOGUE (cudaMemRangeAttribute)
{"CUctx_flags", {"hipCctx_flags", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
@@ -236,7 +236,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_TYPE_NAME_MAP{
{"cudaDeviceAttr", {"hipDeviceAttribute_t", CONV_TYPE, API_RUNTIME}}, // API_DRIVER ANALOGUE (CUdevice_attribute)
{"cudaDeviceProp", {"hipDeviceProp_t", CONV_TYPE, API_RUNTIME}},
{"cudaDeviceP2PAttr", {"hipDeviceP2PAttribute", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // API_DRIVER ANALOGUE (CUdevice_P2PAttribute)
{"cudaComputeMode", {"hipComputeMode", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // API_DRIVER ANALOGUE (CUcomputemode)
{"cudaComputeMode", {"hipComputeMode", CONV_TYPE, API_RUNTIME}}, // API_DRIVER ANALOGUE (CUcomputemode)
{"cudaFuncCache", {"hipFuncCache_t", CONV_CACHE, API_RUNTIME}}, // API_Driver ANALOGUE (CUfunc_cache)
{"cudaFuncAttributes", {"hipFuncAttributes", CONV_EXEC, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaSharedMemConfig", {"hipSharedMemConfig", CONV_TYPE, API_RUNTIME}},
@@ -628,10 +628,10 @@ const std::map<llvm::StringRef, hipCounter> CUDA_IDENTIFIER_MAP{
{"CU_AD_FORMAT_FLOAT", {"HIP_AD_FORMAT_FLOAT", CONV_TYPE, API_DRIVER}}, // 0x20
// CUcomputemode enum
{"CU_COMPUTEMODE_DEFAULT", {"hipComputeModeDefault", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0 // API_RUNTIME ANALOGUE (cudaComputeModeDefault = 0)
{"CU_COMPUTEMODE_EXCLUSIVE", {"hipComputeModeExclusive", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 1 // API_RUNTIME ANALOGUE (cudaComputeModeExclusive = 1)
{"CU_COMPUTEMODE_PROHIBITED", {"hipComputeModeProhibited", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 2 // API_RUNTIME ANALOGUE (cudaComputeModeProhibited = 2)
{"CU_COMPUTEMODE_EXCLUSIVE_PROCESS", {"hipComputeModeExclusiveProcess", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 3 // API_RUNTIME ANALOGUE (cudaComputeModeExclusiveProcess = 3)
{"CU_COMPUTEMODE_DEFAULT", {"hipComputeModeDefault", CONV_TYPE, API_DRIVER}}, // 0 // API_RUNTIME ANALOGUE (cudaComputeModeDefault = 0)
{"CU_COMPUTEMODE_EXCLUSIVE", {"hipComputeModeExclusive", CONV_TYPE, API_DRIVER}}, // 1 // API_RUNTIME ANALOGUE (cudaComputeModeExclusive = 1)
{"CU_COMPUTEMODE_PROHIBITED", {"hipComputeModeProhibited", CONV_TYPE, API_DRIVER}}, // 2 // API_RUNTIME ANALOGUE (cudaComputeModeProhibited = 2)
{"CU_COMPUTEMODE_EXCLUSIVE_PROCESS", {"hipComputeModeExclusiveProcess", CONV_TYPE, API_DRIVER}}, // 3 // API_RUNTIME ANALOGUE (cudaComputeModeExclusiveProcess = 3)
// Memory advise values
// {"CUmem_advise_enum", {"hipMemAdvise", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
@@ -1698,10 +1698,10 @@ const std::map<llvm::StringRef, hipCounter> CUDA_IDENTIFIER_MAP{
{"cudaDeviceGetP2PAttribute", {"hipDeviceGetP2PAttribute", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, // API_DRIVER ANALOGUE (cuDeviceGetP2PAttribute)
// enum cudaComputeMode
{"cudaComputeModeDefault", {"hipComputeModeDefault", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_DEFAULT = 0)
{"cudaComputeModeExclusive", {"hipComputeModeExclusive", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE = 1)
{"cudaComputeModeProhibited", {"hipComputeModeProhibited", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_PROHIBITED = 2)
{"cudaComputeModeExclusiveProcess", {"hipComputeModeExclusiveProcess", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3)
{"cudaComputeModeDefault", {"hipComputeModeDefault", CONV_TYPE, API_RUNTIME}}, // 0 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_DEFAULT = 0)
{"cudaComputeModeExclusive", {"hipComputeModeExclusive", CONV_TYPE, API_RUNTIME}}, // 1 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE = 1)
{"cudaComputeModeProhibited", {"hipComputeModeProhibited", CONV_TYPE, API_RUNTIME}}, // 2 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_PROHIBITED = 2)
{"cudaComputeModeExclusiveProcess", {"hipComputeModeExclusiveProcess", CONV_TYPE, API_RUNTIME}}, // 3 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3)
// Device Flags
{"cudaGetDeviceFlags", {"hipGetDeviceFlags", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}},
@@ -428,7 +428,11 @@ public:
void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token& include_token,
StringRef file_name, bool is_angled, clang::CharSourceRange filename_range,
const clang::FileEntry* file, StringRef search_path, StringRef relative_path,
const clang::Module* imported) override {
const clang::Module* imported
#if LLVM_VERSION_MAJOR > 6
, clang::SrcMgr::CharacteristicKind FileType
#endif
) override {
hipifyAction.InclusionDirective(hash_loc, include_token, file_name, is_angled, filename_range, file, search_path, relative_path, imported);
}
@@ -23,6 +23,10 @@ namespace llcompat {
#define GET_NUM_ARGS() getNumArgs()
#endif
#if LLVM_VERSION_MAJOR < 7
#define LLVM_DEBUG(X) DEBUG(X)
#endif
void PrintStackTraceOnErrorSignal();
/**
@@ -132,7 +132,7 @@ int main(int argc, const char **argv) {
// Hipify _all_ the things!
if (Tool.runAndSave(&actionFactory)) {
DEBUG(llvm::dbgs() << "Skipped some replacements.\n");
LLVM_DEBUG(llvm::dbgs() << "Skipped some replacements.\n");
}
// Either move the tmpfile to the output, or remove it.
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,80 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @file hcc_detail/device_library_decls.h
* @brief Contains declarations for types and functions in device library.
*/
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H
#define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_LIBRARY_DECLS_H
#include "hip/hcc_detail/host_defines.h"
extern "C" __device__ __attribute__((const)) bool __ockl_wfany_i32(int);
extern "C" __device__ __attribute__((const)) bool __ockl_wfall_i32(int);
extern "C" __device__ uint __ockl_activelane_u32(void);
extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint);
extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int);
extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint);
extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int);
extern "C" __device__ __attribute__((const)) uint __ockl_sad_u32(uint, uint, uint);
extern "C" __device__ __attribute__((const)) float __ocml_floor_f32(float);
extern "C" __device__ __attribute__((const)) float __ocml_rint_f32(float);
extern "C" __device__ __attribute__((const)) float __ocml_ceil_f32(float);
extern "C" __device__ __attribute__((const)) float __ocml_trunc_f32(float);
extern "C" __device__ __attribute__((const)) float __ocml_fmin_f32(float, float);
extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float);
// Introduce local address space
#define __local __attribute__((address_space(3)))
#ifdef __HIP_DEVICE_COMPILE__
__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; }
#endif //__HIP_DEVICE_COMPILE__
// __llvm_fence* functions from device-libs/irif/src/fence.ll
extern "C" __device__ void __llvm_fence_acq_sg(void);
extern "C" __device__ void __llvm_fence_acq_wg(void);
extern "C" __device__ void __llvm_fence_acq_dev(void);
extern "C" __device__ void __llvm_fence_acq_sys(void);
extern "C" __device__ void __llvm_fence_rel_sg(void);
extern "C" __device__ void __llvm_fence_rel_wg(void);
extern "C" __device__ void __llvm_fence_rel_dev(void);
extern "C" __device__ void __llvm_fence_rel_sys(void);
extern "C" __device__ void __llvm_fence_ar_sg(void);
extern "C" __device__ void __llvm_fence_ar_wg(void);
extern "C" __device__ void __llvm_fence_ar_dev(void);
extern "C" __device__ void __llvm_fence_ar_sys(void);
extern "C" __device__ void __llvm_fence_sc_sg(void);
extern "C" __device__ void __llvm_fence_sc_wg(void);
extern "C" __device__ void __llvm_fence_sc_dev(void);
extern "C" __device__ void __llvm_fence_sc_sys(void);
#endif
@@ -23,54 +23,81 @@ THE SOFTWARE.
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H
#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_LDG_H
#if defined __HCC__
#if __hcc_workweek__ >= 16164
#if defined(__HCC_OR_HIP_CLANG__)
#if __hcc_workweek__ >= 16164 || defined(__HIP_CLANG_ONLY__)
#include "hip_vector_types.h"
#include "host_defines.h"
__device__ char __ldg(const char*);
__device__ char2 __ldg(const char2*);
__device__ char4 __ldg(const char4*);
__device__ signed char __ldg(const signed char*);
__device__ unsigned char __ldg(const unsigned char*);
__device__ inline static char __ldg(const char* ptr) { return *ptr; }
__device__ short __ldg(const short*);
__device__ short2 __ldg(const short2*);
__device__ short4 __ldg(const short4*);
__device__ unsigned short __ldg(const unsigned short*);
__device__ inline static char2 __ldg(const char2* ptr) { return *ptr; }
__device__ int __ldg(const int*);
__device__ int2 __ldg(const int2*);
__device__ int4 __ldg(const int4*);
__device__ unsigned int __ldg(const unsigned int*);
__device__ inline static char4 __ldg(const char4* ptr) { return *ptr; }
__device__ inline static signed char __ldg(const signed char* ptr) { return ptr[0]; }
__device__ inline static unsigned char __ldg(const unsigned char* ptr) { return ptr[0]; }
__device__ long __ldg(const long*);
__device__ unsigned long __ldg(const unsigned long*);
__device__ inline static short __ldg(const short* ptr) { return ptr[0]; }
__device__ long long __ldg(const long long*);
__device__ longlong2 __ldg(const longlong2*);
__device__ unsigned long long __ldg(const unsigned long long*);
__device__ inline static short2 __ldg(const short2* ptr) { return ptr[0]; }
__device__ uchar2 __ldg(const uchar2*);
__device__ uchar4 __ldg(const uchar4*);
__device__ inline static short4 __ldg(const short4* ptr) { return ptr[0]; }
__device__ ushort2 __ldg(const ushort2*);
__device__ inline static unsigned short __ldg(const unsigned short* ptr) { return ptr[0]; }
__device__ uint2 __ldg(const uint2*);
__device__ uint4 __ldg(const uint4*);
__device__ ulonglong2 __ldg(const ulonglong2*);
__device__ inline static int __ldg(const int* ptr) { return ptr[0]; }
__device__ float __ldg(const float*);
__device__ float2 __ldg(const float2*);
__device__ float4 __ldg(const float4*);
__device__ inline static int2 __ldg(const int2* ptr) { return ptr[0]; }
__device__ double __ldg(const double*);
__device__ double2 __ldg(const double2*);
__device__ inline static int4 __ldg(const int4* ptr) { return ptr[0]; }
#endif // __hcc_workweek__
__device__ inline static unsigned int __ldg(const unsigned int* ptr) { return ptr[0]; }
#endif // __HCC__
__device__ inline static long __ldg(const long* ptr) { return ptr[0]; }
__device__ inline static unsigned long __ldg(const unsigned long* ptr) { return ptr[0]; }
__device__ inline static long long __ldg(const long long* ptr) { return ptr[0]; }
__device__ inline static longlong2 __ldg(const longlong2* ptr) { return ptr[0]; }
__device__ inline static unsigned long long __ldg(const unsigned long long* ptr) { return ptr[0]; }
__device__ inline static uchar2 __ldg(const uchar2* ptr) { return ptr[0]; }
__device__ inline static uchar4 __ldg(const uchar4* ptr) { return ptr[0]; }
__device__ inline static ushort2 __ldg(const ushort2* ptr) { return ptr[0]; }
__device__ inline static uint2 __ldg(const uint2* ptr) { return ptr[0]; }
__device__ inline static uint4 __ldg(const uint4* ptr) { return ptr[0]; }
__device__ inline static ulonglong2 __ldg(const ulonglong2* ptr) { return ptr[0]; }
__device__ inline static float __ldg(const float* ptr) { return ptr[0]; }
__device__ inline static float2 __ldg(const float2* ptr) { return ptr[0]; }
__device__ inline static float4 __ldg(const float4* ptr) { return ptr[0]; }
__device__ inline static double __ldg(const double* ptr) { return ptr[0]; }
__device__ inline static double2 __ldg(const double2* ptr) { return ptr[0]; }
#endif // __hcc_workweek__ || defined(__HIP_CLANG_ONLY__)
#endif // defined(__HCC_OR_HIP_CLANG__)
#endif // HIP_LDG_H
@@ -110,9 +110,9 @@ extern int HIP_TRACE_API;
#include <hip/hcc_detail/host_defines.h>
#include <hip/hcc_detail/math_functions.h>
#include <hip/hcc_detail/device_functions.h>
#include <hip/hcc_detail/surface_functions.h>
#if __HCC__
#include <hip/hcc_detail/texture_functions.h>
#include <hip/hcc_detail/surface_functions.h>
#endif // __HCC__
// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
@@ -184,45 +184,11 @@ extern int HIP_TRACE_API;
#define __HCC_C__
#endif
// TODO - hipify-clang - change to use the function call.
//#define warpSize hc::__wavesize()
static constexpr int warpSize = 64;
#define clock_t long long int
__device__ long long int clock64();
__device__ clock_t clock();
// abort
__device__ void abort();
// warp vote function __all __any __ballot
__device__ int __all(int input);
__device__ int __any(int input);
__device__ unsigned long long int __ballot(int input);
#if __HIP_ARCH_GFX701__ == 0
// warp shuffle functions
#ifdef __cplusplus
__device__ int __shfl(int input, int lane, int width = warpSize);
__device__ int __shfl_up(int input, unsigned int lane_delta, int width = warpSize);
__device__ int __shfl_down(int input, unsigned int lane_delta, int width = warpSize);
__device__ int __shfl_xor(int input, int lane_mask, int width = warpSize);
__device__ float __shfl(float input, int lane, int width = warpSize);
__device__ float __shfl_up(float input, unsigned int lane_delta, int width = warpSize);
__device__ float __shfl_down(float input, unsigned int lane_delta, int width = warpSize);
__device__ float __shfl_xor(float input, int lane_mask, int width = warpSize);
#else
__device__ int __shfl(int input, int lane, int width);
__device__ int __shfl_up(int input, unsigned int lane_delta, int width);
__device__ int __shfl_down(int input, unsigned int lane_delta, int width);
__device__ int __shfl_xor(int input, int lane_mask, int width);
__device__ float __shfl(float input, int lane, int width);
__device__ float __shfl_up(float input, unsigned int lane_delta, int width);
__device__ float __shfl_down(float input, unsigned int lane_delta, int width);
__device__ float __shfl_xor(float input, int lane_mask, int width);
#endif //__cplusplus
__device__ unsigned __hip_ds_bpermute(int index, unsigned src);
__device__ float __hip_ds_bpermutef(int index, float src);
__device__ unsigned __hip_ds_permute(int index, unsigned src);
@@ -235,85 +201,15 @@ __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask
#endif //__HIP_ARCH_GFX803__ == 1
__host__ __device__ int min(int arg1, int arg2);
__host__ __device__ int max(int arg1, int arg2);
__device__ inline static int min(int arg1, int arg2) {
return (arg1 < arg2) ? arg1 : arg2;
}
__device__ inline static int max(int arg1, int arg2) {
return (arg1 > arg2) ? arg1 : arg2;
}
__device__ void* __get_dynamicgroupbaseptr();
/**
* CUDA 8 device function features
*/
/**
* Kernel launching
*/
/**
*-------------------------------------------------------------------------------------------------
*-------------------------------------------------------------------------------------------------
* @defgroup Fence Fence Functions
* @{
*
*
* @warning The HIP memory fence functions are currently not supported yet.
* If any of those threadfence stubs are reached by the application, you should set "export
*HSA_DISABLE_CACHE=1" to disable L1 and L2 caches.
*
*
* On AMD platforms, the threadfence* routines are currently empty stubs.
*/
extern __attribute__((const)) __device__ void __hip_hc_threadfence() __asm("__llvm_fence_sc_dev");
extern __attribute__((const)) __device__ void __hip_hc_threadfence_block() __asm(
"__llvm_fence_sc_wg");
/**
* @brief threadfence_block makes writes visible to threads running in same block.
*
* @Returns void
*
* @param void
*
* @warning __threadfence_block is a stub and map to no-op.
*/
// __device__ void __threadfence_block(void);
__device__ static inline void __threadfence_block(void) { return __hip_hc_threadfence_block(); }
/**
* @brief threadfence makes wirtes visible to other threads running on same GPU.
*
* @Returns void
*
* @param void
*
* @warning __threadfence is a stub and map to no-op, application should set "export
* HSA_DISABLE_CACHE=1" to disable both L1 and L2 caches.
*/
// __device__ void __threadfence(void) __attribute__((deprecated("Provided for compile-time
// compatibility, not yet functional")));
__device__ static inline void __threadfence(void) { return __hip_hc_threadfence(); }
/**
* @brief threadfence_system makes writes to pinned system memory visible on host CPU.
*
* @Returns void
*
* @param void
*
* @warning __threadfence_system is a stub and map to no-op.
*/
//__device__ void __threadfence_system(void) __attribute__((deprecated("Provided with workaround
//configuration, see hip_kernel_language.md for details")));
__device__ void __threadfence_system(void);
// doxygen end Fence Fence
/**
* @}
*/
__host__ inline static int min(int arg1, int arg2) { return std::min(arg1, arg2); }
__host__ inline static int max(int arg1, int arg2) { return std::max(arg1, arg2); }
#endif // __HCC_OR_HIP_CLANG__
@@ -424,17 +320,6 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri
#endif //__HCC_CPP__
/**
* extern __shared__
*/
// Macro to replace extern __shared__ declarations
// to local variable definitions
#define HIP_DYNAMIC_SHARED(type, var) type* var = (type*)__get_dynamicgroupbaseptr();
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
/**
* @defgroup HIP-ENV HIP Environment Variables
* @{
@@ -553,33 +438,6 @@ extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim;
#define hipGridDim_y gridDim.y
#define hipGridDim_z gridDim.z
#pragma push_macro("__DEVICE__")
#define __DEVICE__ extern "C" __device__ __attribute__((always_inline)) \
__attribute__((weak))
__DEVICE__ void __device_trap() __asm("llvm.trap");
__DEVICE__ void inline __assert_fail(const char * __assertion,
const char *__file,
unsigned int __line,
const char *__function)
{
// Ignore all the args for now.
__device_trap();
}
extern "C" __device__ __attribute__((noduplicate)) void __syncthreads();
extern "C" __device__ void *__amdgcn_get_dynamicgroupbaseptr();
// Macro to replace extern __shared__ declarations
// to local variable definitions
#define HIP_DYNAMIC_SHARED(type, var) \
type* var = (type*)__amdgcn_get_dynamicgroupbaseptr();
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
#pragma push_macro("__DEVICE__")
#include <hip/hcc_detail/math_functions.h>
#endif
@@ -2621,6 +2621,13 @@ hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray
return ihipBindTextureToArrayImpl(dim, readMode, array, desc, &tex);
}
template <class T, int dim, enum hipTextureReadMode readMode>
inline static hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> *tex,
hipArray_const_t array,
const struct hipChannelFormatDesc* desc) {
return ihipBindTextureToArrayImpl(dim, readMode, array, *desc, tex);
}
// C API
hipError_t hipBindTextureToMipmappedArray(const textureReference* tex,
hipMipmappedArray_const_t mipmappedArray,
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,70 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @file hcc_detail/llvm_intrinsics.h
* @brief Contains declarations for wrapper functions for llvm intrinsics
* like llvm.amdgcn.s.barrier.
*/
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H
#define HIP_INCLUDE_HIP_HCC_DETAIL_LLVM_INTRINSICS_H
#include "hip/hcc_detail/host_defines.h"
__device__
__attribute__((convergent))
ulong __llvm_amdgcn_icmp_i32(uint x, uint y, uint z) __asm("llvm.amdgcn.icmp.i32");
__device__
unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize");
__device__
unsigned int __llvm_bitrev_b32(unsigned int src0) __asm("llvm.bitreverse.i32");
__device__
uint64_t __llvm_bitrev_b64(uint64_t src0) __asm("llvm.bitreverse.i64");
extern
__device__
__attribute__((const))
unsigned int __mbcnt_lo(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.lo");
extern
__device__
__attribute__((const))
unsigned int __mbcnt_hi(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.hi");
__device__
int __llvm_amdgcn_ds_bpermute(int index, int src) __asm("llvm.amdgcn.ds.bpermute");
__device__
int __llvm_amdgcn_ds_permute(int index, int src) __asm("llvm.amdgcn.ds.permute");
__device__
int __llvm_amdgcn_ds_swizzle(int index, int pattern) __asm("llvm.amdgcn.ds.swizzle");
__device__
int __llvm_amdgcn_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask,
bool bound_ctrl) __asm("llvm.amdgcn.mov.dpp.i32");
#endif
@@ -23,8 +23,6 @@ THE SOFTWARE.
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H
#define HIP_INCLUDE_HIP_HCC_DETAIL_SURFACE_FUNCTIONS_H
#include <hc.hpp>
#include <hc_short_vector.hpp>
#include <hip/hcc_detail/hip_surface_types.h>
#define __SURFACE_FUNCTIONS_DECL__ static __inline__ __device__
@@ -110,47 +110,47 @@ union TData {
#define TEXTURE_RETURN_UNSIGNED return texel.u.x;
#define TEXTURE_RETURN_CHAR_X return char1(texel.i.x);
#define TEXTURE_RETURN_CHAR_X return make_char1(texel.i.x);
#define TEXTURE_RETURN_UCHAR_X return uchar1(texel.u.x);
#define TEXTURE_RETURN_UCHAR_X return make_uchar1(texel.u.x);
#define TEXTURE_RETURN_SHORT_X return short1(texel.i.x);
#define TEXTURE_RETURN_SHORT_X return make_short1(texel.i.x);
#define TEXTURE_RETURN_USHORT_X return ushort1(texel.u.x);
#define TEXTURE_RETURN_USHORT_X return make_ushort1(texel.u.x);
#define TEXTURE_RETURN_INT_X return int1(texel.i.x);
#define TEXTURE_RETURN_INT_X return make_int1(texel.i.x);
#define TEXTURE_RETURN_UINT_X return uint1(texel.u.x);
#define TEXTURE_RETURN_UINT_X return make_uint1(texel.u.x);
#define TEXTURE_RETURN_FLOAT_X return float1(texel.f.x);
#define TEXTURE_RETURN_FLOAT_X return make_float1(texel.f.x);
#define TEXTURE_RETURN_CHAR_XY return char2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_CHAR_XY return make_char2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_UCHAR_XY return uchar2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_UCHAR_XY return make_uchar2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_SHORT_XY return short2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_SHORT_XY return make_short2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_USHORT_XY return ushort2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_USHORT_XY return make_ushort2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_INT_XY return int2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_INT_XY return make_int2(texel.i.x, texel.i.y);
#define TEXTURE_RETURN_UINT_XY return uint2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_UINT_XY return make_uint2(texel.u.x, texel.u.y);
#define TEXTURE_RETURN_FLOAT_XY return float2(texel.f.x, texel.f.y);
#define TEXTURE_RETURN_FLOAT_XY return make_float2(texel.f.x, texel.f.y);
#define TEXTURE_RETURN_CHAR_XYZW return char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_CHAR_XYZW return make_char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_UCHAR_XYZW return uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_UCHAR_XYZW return make_uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_SHORT_XYZW return short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_SHORT_XYZW return make_short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_USHORT_XYZW return ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_USHORT_XYZW return make_ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_INT_XYZW return int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_INT_XYZW return make_int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
#define TEXTURE_RETURN_UINT_XYZW return uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_UINT_XYZW return make_uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
#define TEXTURE_RETURN_FLOAT_XYZW return float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
#define TEXTURE_RETURN_FLOAT_XYZW return make_float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
extern "C" {
hc::short_vector::float4::vector_value_type __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT* i,
@@ -297,7 +297,7 @@ enum hipComputeMode {
hipComputeModeDefault = 0,
hipComputeModeExclusive = 1,
hipComputeModeProhibited = 2,
hipComputeModeExcusiveProcess = 3
hipComputeModeExclusiveProcess = 3
};
/**
@@ -163,6 +163,7 @@ typedef cudaSurfaceObject_t hipSurfaceObject_t;
#define hipTextureType1D cudaTextureType1D
#define hipTextureType1DLayered cudaTextureType1DLayered
#define hipTextureType2D cudaTextureType2D
#define hipTextureType2DLayered cudaTextureType2DLayered
#define hipTextureType3D cudaTextureType3D
#define hipDeviceMapHost cudaDeviceMapHost
@@ -1168,20 +1169,20 @@ inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int
return hipCUDAErrorTohipError(cerror);
}
template <class T, int dim, enum cudaTextureReadMode readMode>
template <class T, int dim, enum hipTextureReadMode readMode>
inline static hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
const void* devPtr, size_t size = UINT_MAX) {
return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size));
}
template <class T, int dim, enum cudaTextureReadMode readMode>
template <class T, int dim, enum hipTextureReadMode readMode>
inline static hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
const void* devPtr, const struct hipChannelFormatDesc& desc,
size_t size = UINT_MAX) {
return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
}
template <class T, int dim, enum cudaTextureReadMode readMode>
template <class T, int dim, enum hipTextureReadMode readMode>
inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>* tex) {
return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
}
@@ -1198,7 +1199,14 @@ inline static hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>&
return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
}
template <class T, int dim, enum cudaTextureReadMode readMode>
template <class T, int dim, enum hipTextureReadMode readMode>
inline static hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> *tex,
hipArray_const_t array,
const struct hipChannelFormatDesc* desc) {
return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc));
}
template <class T, int dim, enum hipTextureReadMode readMode>
inline static hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex,
hipArray_const_t array) {
return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array));
@@ -1239,6 +1247,16 @@ inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDe
hipTextureObject_t textureObject) {
return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject));
}
inline static hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref)
{
return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref));
}
inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array)
{
return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array));
}
#endif //__CUDACC__
#endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H
@@ -5,7 +5,6 @@ install(FILES @PROJECT_BINARY_DIR@/libhip_hcc.so DESTINATION lib)
install(FILES @PROJECT_BINARY_DIR@/libhip_hcc_static.a DESTINATION lib)
install(FILES @PROJECT_BINARY_DIR@/libhip_device.a DESTINATION lib)
install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib)
install(FILES @hip_SOURCE_DIR@/src/hip_hc.ll DESTINATION lib)
install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip)
install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip)
@@ -38,7 +38,7 @@ std::string isa_name(std::string triple)
hsa_isa_from_name(triple.c_str(), &tmp) != HSA_STATUS_SUCCESS};
if (is_old_rocr) {
auto tmp{triple.substr(triple.rfind('x') + 1)};
std::string tmp{triple.substr(triple.rfind('x') + 1)};
triple.replace(0, std::string::npos, "AMD:AMDGPU");
for (auto&& x : tmp) {
@@ -51,7 +51,7 @@ std::string isa_name(std::string triple)
}
hsa_isa_t hip_impl::triple_to_hsa_isa(const std::string& triple) {
const auto isa{isa_name(std::move(triple))};
const std::string isa{isa_name(std::move(triple))};
if (isa.empty()) return hsa_isa_t({});
@@ -1,387 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/device_functions.h>
#include <hc.hpp>
#include <grid_launch.h>
#include <hc_math.hpp>
#include "device_util.h"
__device__ float __double2float_rd(double x) { return (double)x; }
__device__ float __double2float_rn(double x) { return (double)x; }
__device__ float __double2float_ru(double x) { return (double)x; }
__device__ float __double2float_rz(double x) { return (double)x; }
__device__ int __double2hiint(double x) {
static_assert(sizeof(double) == 2 * sizeof(int), "");
int tmp[2];
__builtin_memcpy(tmp, &x, sizeof(tmp));
return tmp[1];
}
__device__ int __double2loint(double x) {
static_assert(sizeof(double) == 2 * sizeof(int), "");
int tmp[2];
__builtin_memcpy(tmp, &x, sizeof(tmp));
return tmp[0];
}
__device__ int __double2int_rd(double x) { return (int)x; }
__device__ int __double2int_rn(double x) { return (int)x; }
__device__ int __double2int_ru(double x) { return (int)x; }
__device__ int __double2int_rz(double x) { return (int)x; }
__device__ long long int __double2ll_rd(double x) { return (long long int)x; }
__device__ long long int __double2ll_rn(double x) { return (long long int)x; }
__device__ long long int __double2ll_ru(double x) { return (long long int)x; }
__device__ long long int __double2ll_rz(double x) { return (long long int)x; }
__device__ unsigned int __double2uint_rd(double x) { return (unsigned int)x; }
__device__ unsigned int __double2uint_rn(double x) { return (unsigned int)x; }
__device__ unsigned int __double2uint_ru(double x) { return (unsigned int)x; }
__device__ unsigned int __double2uint_rz(double x) { return (unsigned int)x; }
__device__ unsigned long long int __double2ull_rd(double x) { return (unsigned long long int)x; }
__device__ unsigned long long int __double2ull_rn(double x) { return (unsigned long long int)x; }
__device__ unsigned long long int __double2ull_ru(double x) { return (unsigned long long int)x; }
__device__ unsigned long long int __double2ull_rz(double x) { return (unsigned long long int)x; }
__device__ long long int __double_as_longlong(double x) {
static_assert(sizeof(long long) == sizeof(double), "");
long long tmp;
__builtin_memcpy(&tmp, &x, sizeof(tmp));
return tmp;
}
__device__ int __float2int_rd(float x) { return (int)__ocml_floor_f32(x); }
__device__ int __float2int_rn(float x) { return (int)__ocml_rint_f32(x); }
__device__ int __float2int_ru(float x) { return (int)__ocml_ceil_f32(x); }
__device__ int __float2int_rz(float x) { return (int)__ocml_trunc_f32(x); }
__device__ long long int __float2ll_rd(float x) { return (long long int)x; }
__device__ long long int __float2ll_rn(float x) { return (long long int)x; }
__device__ long long int __float2ll_ru(float x) { return (long long int)x; }
__device__ long long int __float2ll_rz(float x) { return (long long int)x; }
__device__ unsigned int __float2uint_rd(float x) { return (unsigned int)x; }
__device__ unsigned int __float2uint_rn(float x) { return (unsigned int)x; }
__device__ unsigned int __float2uint_ru(float x) { return (unsigned int)x; }
__device__ unsigned int __float2uint_rz(float x) { return (unsigned int)x; }
__device__ unsigned long long int __float2ull_rd(float x) { return (unsigned long long int)x; }
__device__ unsigned long long int __float2ull_rn(float x) { return (unsigned long long int)x; }
__device__ unsigned long long int __float2ull_ru(float x) { return (unsigned long long int)x; }
__device__ unsigned long long int __float2ull_rz(float x) { return (unsigned long long int)x; }
__device__ int __float_as_int(float x) {
static_assert(sizeof(int) == sizeof(float), "");
int tmp;
__builtin_memcpy(&tmp, &x, sizeof(tmp));
return tmp;
}
__device__ unsigned int __float_as_uint(float x) {
static_assert(sizeof(unsigned int) == sizeof(float), "");
unsigned int tmp;
__builtin_memcpy(&tmp, &x, sizeof(tmp));
return tmp;
}
__device__ double __hiloint2double(int32_t hi, int32_t lo) {
static_assert(sizeof(double) == sizeof(uint64_t), "");
uint64_t tmp0 = (static_cast<uint64_t>(hi) << 32ull) | static_cast<uint32_t>(lo);
double tmp1;
__builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
return tmp1;
}
__device__ double __int2double_rn(int x) { return (double)x; }
__device__ float __int2float_rd(int x) { return (float)x; }
__device__ float __int2float_rn(int x) { return (float)x; }
__device__ float __int2float_ru(int x) { return (float)x; }
__device__ float __int2float_rz(int x) { return (float)x; }
__device__ float __int_as_float(int x) {
static_assert(sizeof(float) == sizeof(int), "");
float tmp;
__builtin_memcpy(&tmp, &x, sizeof(tmp));
return tmp;
}
__device__ double __ll2double_rd(long long int x) { return (double)x; }
__device__ double __ll2double_rn(long long int x) { return (double)x; }
__device__ double __ll2double_ru(long long int x) { return (double)x; }
__device__ double __ll2double_rz(long long int x) { return (double)x; }
__device__ float __ll2float_rd(long long int x) { return (float)x; }
__device__ float __ll2float_rn(long long int x) { return (float)x; }
__device__ float __ll2float_ru(long long int x) { return (float)x; }
__device__ float __ll2float_rz(long long int x) { return (float)x; }
__device__ double __longlong_as_double(long long int x) {
static_assert(sizeof(double) == sizeof(long long), "");
double tmp;
__builtin_memcpy(&tmp, &x, sizeof(tmp));
return x;
}
__device__ double __uint2double_rn(int x) { return (double)x; }
__device__ float __uint2float_rd(unsigned int x) { return (float)x; }
__device__ float __uint2float_rn(unsigned int x) { return (float)x; }
__device__ float __uint2float_ru(unsigned int x) { return (float)x; }
__device__ float __uint2float_rz(unsigned int x) { return (float)x; }
__device__ float __uint_as_float(unsigned int x) {
static_assert(sizeof(float) == sizeof(unsigned int), "");
float tmp;
__builtin_memcpy(&tmp, &x, sizeof(tmp));
return tmp;
}
__device__ double __ull2double_rd(unsigned long long int x) { return (double)x; }
__device__ double __ull2double_rn(unsigned long long int x) { return (double)x; }
__device__ double __ull2double_ru(unsigned long long int x) { return (double)x; }
__device__ double __ull2double_rz(unsigned long long int x) { return (double)x; }
__device__ float __ull2float_rd(unsigned long long int x) { return (float)x; }
__device__ float __ull2float_rn(unsigned long long int x) { return (float)x; }
__device__ float __ull2float_ru(unsigned long long int x) { return (float)x; }
__device__ float __ull2float_rz(unsigned long long int x) { return (float)x; }
/*
Integer Intrinsics
*/
// integer intrinsic function __poc __clz __ffs __brev
__device__ unsigned int __popc(unsigned int input) { return hc::__popcount_u32_b32(input); }
__device__ unsigned int __popcll(unsigned long long int input) {
return hc::__popcount_u32_b64(input);
}
__device__ unsigned int __clz(unsigned int input) {
#ifdef NVCC_COMPAT
return input == 0 ? 32 : hc::__firstbit_u32_u32(input);
#else
return hc::__firstbit_u32_u32(input);
#endif
}
__device__ unsigned int __clzll(unsigned long long int input) {
#ifdef NVCC_COMPAT
return input == 0 ? 64 : hc::__firstbit_u32_u64(input);
#else
return hc::__firstbit_u32_u64(input);
#endif
}
__device__ unsigned int __clz(int input) {
#ifdef NVCC_COMPAT
return input == 0 ? 32 : hc::__firstbit_u32_s32(input);
#else
return hc::__firstbit_u32_s32(input);
#endif
}
__device__ unsigned int __clzll(long long int input) {
#ifdef NVCC_COMPAT
return input == 0 ? 64 : hc::__firstbit_u32_s64(input);
#else
return hc::__firstbit_u32_s64(input);
#endif
}
__device__ unsigned int __ffs(unsigned int input) {
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_u32(input) + 1;
#else
return hc::__lastbit_u32_u32(input);
#endif
}
__device__ unsigned int __ffsll(unsigned long long int input) {
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_u64(input) + 1;
#else
return hc::__lastbit_u32_u64(input);
#endif
}
__device__ unsigned int __ffs(int input) {
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_s32(input) + 1;
#else
return hc::__lastbit_u32_s32(input);
#endif
}
__device__ unsigned int __ffsll(long long int input) {
#ifdef NVCC_COMPAT
return hc::__lastbit_u32_s64(input) + 1;
#else
return hc::__lastbit_u32_s64(input);
#endif
}
__device__ unsigned int __brev(unsigned int input) { return hc::__bitrev_b32(input); }
__device__ unsigned long long int __brevll(unsigned long long int input) {
return hc::__bitrev_b64(input);
}
struct ucharHolder {
union {
unsigned char c[4];
unsigned int ui;
};
} __attribute__((aligned(4)));
struct uchar2Holder {
union {
unsigned int ui[2];
unsigned char c[8];
};
} __attribute__((aligned(8)));
__device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) {
struct uchar2Holder cHoldVal;
struct ucharHolder cHoldKey;
struct ucharHolder cHoldOut;
cHoldKey.ui = s;
cHoldVal.ui[0] = x;
cHoldVal.ui[1] = y;
cHoldOut.c[0] = cHoldVal.c[cHoldKey.c[0]];
cHoldOut.c[1] = cHoldVal.c[cHoldKey.c[1]];
cHoldOut.c[2] = cHoldVal.c[cHoldKey.c[2]];
cHoldOut.c[3] = cHoldVal.c[cHoldKey.c[3]];
return cHoldOut.ui;
}
__device__ long long __mul64hi(long long int x, long long int y) {
ulong x0 = (ulong)x & 0xffffffffUL;
long x1 = x >> 32;
ulong y0 = (ulong)y & 0xffffffffUL;
long y1 = y >> 32;
ulong z0 = x0*y0;
long t = x1*y0 + (z0 >> 32);
long z1 = t & 0xffffffffL;
long z2 = t >> 32;
z1 = x0*y1 + z1;
return x1*y1 + z2 + (z1 >> 32);
}
__device__ unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) {
ulong x0 = x & 0xffffffffUL;
ulong x1 = x >> 32;
ulong y0 = y & 0xffffffffUL;
ulong y1 = y >> 32;
ulong z0 = x0*y0;
ulong t = x1*y0 + (z0 >> 32);
ulong z1 = t & 0xffffffffUL;
ulong z2 = t >> 32;
z1 = x0*y1 + z1;
return x1*y1 + z2 + (z1 >> 32);
}
/*
HIP specific device functions
*/
__device__ unsigned __hip_ds_bpermute(int index, unsigned src) {
return hc::__amdgcn_ds_bpermute(index, src);
}
__device__ float __hip_ds_bpermutef(int index, float src) {
return hc::__amdgcn_ds_bpermute(index, src);
}
__device__ unsigned __hip_ds_permute(int index, unsigned src) {
return hc::__amdgcn_ds_permute(index, src);
}
__device__ float __hip_ds_permutef(int index, float src) {
return hc::__amdgcn_ds_permute(index, src);
}
__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern) {
return hc::__amdgcn_ds_swizzle(src, pattern);
}
__device__ float __hip_ds_swizzlef(float src, int pattern) {
return hc::__amdgcn_ds_swizzle(src, pattern);
}
__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl) {
return hc::__amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
}
#define MASK1 0x00ff00ff
#define MASK2 0xff00ff00
__device__ char4 __hip_hc_add8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 + one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 + one2) & MASK2);
return out;
}
__device__ char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 - one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 - one2) & MASK2);
return out;
}
__device__ char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
char4 out;
unsigned one1 = in1.a & MASK1;
unsigned one2 = in2.a & MASK1;
out.a = (one1 * one2) & MASK1;
one1 = in1.a & MASK2;
one2 = in2.a & MASK2;
out.a = out.a | ((one1 * one2) & MASK2);
return out;
}
@@ -92,70 +92,7 @@ __device__ void* __hip_hc_free(void* ptr) {
return nullptr;
}
__device__ long long int clock64() { return (long long int)hc::__cycle_u64(); };
__device__ clock_t clock() { return (clock_t)hc::__cycle_u64(); };
// abort
__device__ void abort() { return hc::abort(); }
// warp vote function __all __any __ballot
__device__ int __all(int input) { return hc::__all(input); }
__device__ int __any(int input) {
#ifdef NVCC_COMPAT
if (hc::__any(input) != 0)
return 1;
else
return 0;
#else
return hc::__any(input);
#endif
}
__device__ unsigned long long int __ballot(int input) { return hc::__ballot(input); }
// warp shuffle functions
__device__ int __shfl(int input, int lane, int width) { return hc::__shfl(input, lane, width); }
__device__ int __shfl_up(int input, unsigned int lane_delta, int width) {
return hc::__shfl_up(input, lane_delta, width);
}
__device__ int __shfl_down(int input, unsigned int lane_delta, int width) {
return hc::__shfl_down(input, lane_delta, width);
}
__device__ int __shfl_xor(int input, int lane_mask, int width) {
return hc::__shfl_xor(input, lane_mask, width);
}
__device__ float __shfl(float input, int lane, int width) { return hc::__shfl(input, lane, width); }
__device__ float __shfl_up(float input, unsigned int lane_delta, int width) {
return hc::__shfl_up(input, lane_delta, width);
}
__device__ float __shfl_down(float input, unsigned int lane_delta, int width) {
return hc::__shfl_down(input, lane_delta, width);
}
__device__ float __shfl_xor(float input, int lane_mask, int width) {
return hc::__shfl_xor(input, lane_mask, width);
}
__host__ __device__ int min(int arg1, int arg2) {
return (int)(hc::precise_math::fmin((float)arg1, (float)arg2));
}
__host__ __device__ int max(int arg1, int arg2) {
return (int)(hc::precise_math::fmax((float)arg1, (float)arg2));
}
__device__ void* __get_dynamicgroupbaseptr() {
return hc::get_dynamic_group_segment_base_pointer();
}
__host__ void* __get_dynamicgroupbaseptr() { return nullptr; }
__device__ void __threadfence_system(void) { std::atomic_thread_fence(std::memory_order_seq_cst); }
-1
مشاهده پرونده
@@ -125,7 +125,6 @@ __device__ double __hip_fast_dsqrt_rd(double x);
__device__ double __hip_fast_dsqrt_rn(double x);
__device__ double __hip_fast_dsqrt_ru(double x);
__device__ double __hip_fast_dsqrt_rz(double x);
__device__ void __threadfence_system(void);
float __hip_host_j0f(float x);
double __hip_host_j0(double x);
-30
مشاهده پرونده
@@ -1,30 +0,0 @@
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--amdhsa"
define i32 @__hip_hc_ir_mul24_int(i32 %a, i32 %b) #1 {
%1 = tail call i32 asm sideeffect "v_mul_i32_i24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
ret i32 %1
}
define i32 @__hip_hc_ir_umul24_int(i32 %a, i32 %b) #1 {
%1 = tail call i32 asm sideeffect "v_mul_u32_u24 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
ret i32 %1
}
define i32 @__hip_hc_ir_mulhi_int(i32 %a, i32 %b) #1 {
%1 = tail call i32 asm sideeffect "v_mul_hi_i32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
ret i32 %1
}
define i32 @__hip_hc_ir_umulhi_int(i32 %a, i32 %b) #1 {
%1 = tail call i32 asm sideeffect "v_mul_hi_u32 $0, $1, $2","=v,v,v"(i32 %a, i32 %b)
ret i32 %1
}
define i32 @__hip_hc_ir_usad_int(i32 %a, i32 %b, i32 %c) #1 {
%1 = tail call i32 asm sideeffect "v_sad_u32 $0, $1, $2, $3","=v,v,v,v"(i32 %a, i32 %b, i32 %c)
ret i32 %1
}
attributes #1 = { alwaysinline nounwind }
-83
مشاهده پرونده
@@ -1,83 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hcc_detail/hip_ldg.h"
#include "hip/hcc_detail/hip_vector_types.h"
__device__ char __ldg(const char* ptr) { return *ptr; }
__device__ char2 __ldg(const char2* ptr) { return *ptr; }
__device__ char4 __ldg(const char4* ptr) { return *ptr; }
__device__ signed char __ldg(const signed char* ptr) { return ptr[0]; }
__device__ unsigned char __ldg(const unsigned char* ptr) { return ptr[0]; }
__device__ short __ldg(const short* ptr) { return ptr[0]; }
__device__ short2 __ldg(const short2* ptr) { return ptr[0]; }
__device__ short4 __ldg(const short4* ptr) { return ptr[0]; }
__device__ unsigned short __ldg(const unsigned short* ptr) { return ptr[0]; }
__device__ int __ldg(const int* ptr) { return ptr[0]; }
__device__ int2 __ldg(const int2* ptr) { return ptr[0]; }
__device__ int4 __ldg(const int4* ptr) { return ptr[0]; }
__device__ unsigned int __ldg(const unsigned int* ptr) { return ptr[0]; }
__device__ long __ldg(const long* ptr) { return ptr[0]; }
__device__ unsigned long __ldg(const unsigned long* ptr) { return ptr[0]; }
__device__ long long __ldg(const long long* ptr) { return ptr[0]; }
__device__ longlong2 __ldg(const longlong2* ptr) { return ptr[0]; }
__device__ unsigned long long __ldg(const unsigned long long* ptr) { return ptr[0]; }
__device__ uchar2 __ldg(const uchar2* ptr) { return ptr[0]; }
__device__ uchar4 __ldg(const uchar4* ptr) { return ptr[0]; }
__device__ ushort2 __ldg(const ushort2* ptr) { return ptr[0]; }
__device__ uint2 __ldg(const uint2* ptr) { return ptr[0]; }
__device__ uint4 __ldg(const uint4* ptr) { return ptr[0]; }
__device__ ulonglong2 __ldg(const ulonglong2* ptr) { return ptr[0]; }
__device__ float __ldg(const float* ptr) { return ptr[0]; }
__device__ float2 __ldg(const float2* ptr) { return ptr[0]; }
__device__ float4 __ldg(const float4* ptr) { return ptr[0]; }
__device__ double __ldg(const double* ptr) { return ptr[0]; }
__device__ double2 __ldg(const double2* ptr) { return ptr[0]; }
+1 -4
مشاهده پرونده
@@ -1680,12 +1680,9 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
actualDest = pinnedPtr;
}
}
#if 0
if((width == dpitch) && (width == spitch)) {
hip_internal::memcpyAsync(dst, src, width*height, kind, stream);
} else
#endif
{
} else {
try {
if(!isLocked){
for (int i = 0; i < height; ++i)
+4 -2
مشاهده پرونده
@@ -389,7 +389,8 @@ hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t
enum hipTextureFilterMode filterMode = tex->filterMode;
int normalizedCoords = tex->normalized;
hipTextureObject_t& textureObject = tex->textureObject;
*offset = 0;
if(offset != nullptr)
*offset = 0;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hc::accelerator acc = ctx->getDevice()->_acc;
@@ -459,7 +460,8 @@ hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size
enum hipTextureFilterMode filterMode = tex->filterMode;
int normalizedCoords = tex->normalized;
hipTextureObject_t& textureObject = tex->textureObject;
*offset = 0;
if(offset != nullptr)
*offset = 0;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hc::accelerator acc = ctx->getDevice()->_acc;
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -88,7 +88,7 @@ int main() {
for (unsigned int i = 0; i < num_threads; i++) {
unsigned int this_lane_id = i % wave_size;
unsigned int this_mbcnt_lo = this_lane_id >= 32 ? 32 : this_lane_id;
unsigned int this_mbcnt_hi = this_lane_id < 32 ? 0 : (this_lane_id - 22);
unsigned int this_mbcnt_hi = this_lane_id < 32 ? 0 : (this_lane_id - 32);
if (host_mbcnt_lo[i] != this_mbcnt_lo)
mbcnt_lo_errors++;
@@ -35,7 +35,7 @@ THE SOFTWARE.
#include "hip/hip_vector_types.h"
#include "test_common.h"
#if (__hcc_workweek__ >= 16164) || defined(__HIP_PLATFORM_NVCC__)
#if (__hcc_workweek__ >= 16164) || defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_CLANG_ONLY__)
#define HIP_ASSERT(x) (assert((x) == hipSuccess))
@@ -0,0 +1,69 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <type_traits>
template<bool b, typename T = void>
using Enable_if_t = typename std::enable_if<b, T>::type;
__host__ __device__
std::false_type is_vec4(...);
__host__ __device__
std::false_type is_vec3(...);
__host__ __device__
std::false_type is_vec2(...);
__host__ __device__
std::false_type is_vec1(...);
template<typename T>
__host__ __device__
auto is_vec4(const T&) -> decltype(std::declval<T>().xyzw, std::true_type{});
template<
typename T, Enable_if_t<decltype(!is_vec4(std::declval<T>())){}>* = nullptr>
__host__ __device__
auto is_vec3(const T&) -> decltype(std::declval<T>().xyz, std::true_type{});
template<
typename T,
Enable_if_t<
!decltype(is_vec4(std::declval<T>())){} &&
!decltype(is_vec3(std::declval<T>())){}>* = nullptr>
__host__ __device__
auto is_vec2(const T&) -> decltype(std::declval<T>().xy, std::true_type{});
template<
typename T,
Enable_if_t<
!decltype(is_vec4(std::declval<T>())){} &&
!decltype(is_vec3(std::declval<T>())){} &&
!decltype(is_vec2(std::declval<T>())){}>* = nullptr>
__host__ __device__
auto is_vec1(const T&) -> decltype(std::declval<T>().x, std::true_type{});
template<typename T, int dimension>
__host__ __device__
constexpr
bool is_vec() {
return (dimension == 1) ? decltype(is_vec1(std::declval<T>())){} :
((dimension == 2) ? decltype(is_vec2(std::declval<T>())){} :
((dimension == 3) ? decltype(is_vec3(std::declval<T>())){} :
decltype(is_vec4(std::declval<T>())){}));
}