Merge branch 'master' into tex_unbind_issue_fix

[ROCm/clr commit: 9332a39838]
This commit is contained in:
Anusha Godavarthy Surya
2019-10-25 15:54:25 +05:30
5 changed files with 170 additions and 31 deletions
@@ -58,15 +58,15 @@ inline
float atomicAdd(float* address, float val)
{
unsigned int* uaddr{reinterpret_cast<unsigned int*>(address)};
unsigned int old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
unsigned int r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
unsigned int r;
unsigned int old;
do {
r = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
if (r != old) { r = old; continue; }
old = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r)));
r = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r)));
if (r == old) break;
} while (true);
@@ -78,15 +78,15 @@ inline
double atomicAdd(double* address, double val)
{
unsigned long long* uaddr{reinterpret_cast<unsigned long long*>(address)};
unsigned long long old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
unsigned long long r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
unsigned long long r;
unsigned long long old;
do {
r = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
old = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
if (r != old) { r = old; continue; }
old = atomicCAS(
r = atomicCAS(
uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
if (r == old) break;
@@ -140,13 +140,13 @@ __device__
inline
int atomicMin(int* address, int val)
{
return __sync_fetch_and_min(address, val);
return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
}
__device__
inline
unsigned int atomicMin(unsigned int* address, unsigned int val)
{
return __sync_fetch_and_umin(address, val);
return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
}
__device__
inline
@@ -169,13 +169,13 @@ __device__
inline
int atomicMax(int* address, int val)
{
return __sync_fetch_and_max(address, val);
return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
}
__device__
inline
unsigned int atomicMax(unsigned int* address, unsigned int val)
{
return __sync_fetch_and_umax(address, val);
return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
}
__device__
inline
@@ -2063,6 +2063,45 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const
hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset,
size_t count, hipMemcpyKind kind);
/**
* @brief Copies data between host and device.
*
* @param[in] dst Destination memory address
* @param[in] dpitch Pitch of destination memory
* @param[in] src Source memory address
* @param[in] wOffset Source starting X offset
* @param[in] hOffset Source starting Y offset
* @param[in] width Width of matrix transfer (columns in bytes)
* @param[in] height Height of matrix transfer (rows)
* @param[in] kind Type of transfer
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
* #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
*
* @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
* hipMemcpyAsync
*/
hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind);
/**
* @brief Copies data between host and device asynchronously.
*
* @param[in] dst Destination memory address
* @param[in] dpitch Pitch of destination memory
* @param[in] src Source memory address
* @param[in] wOffset Source starting X offset
* @param[in] hOffset Source starting Y offset
* @param[in] width Width of matrix transfer (columns in bytes)
* @param[in] height Height of matrix transfer (rows)
* @param[in] kind Type of transfer
* @param[in] stream Accelerator view which the copy is being enqueued
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue,
* #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection
*
* @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol,
* hipMemcpyAsync
*/
hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0));
/**
* @brief Copies data between host and device.
*
@@ -44,18 +44,35 @@ THE SOFTWARE.
__attribute__((vector_size(__ROUND_UP_TO_NEXT_POT__(n) * sizeof(T))))
#endif
#if defined(__cplusplus)
#if defined(__cplusplus) && defined(__clang__)
#include <type_traits>
namespace hip_impl {
template<typename T, typename Vector, unsigned int idx>
struct Scalar_accessor {
struct Address {
const Scalar_accessor* p;
__host__ __device__
operator const T*() const noexcept {
return &reinterpret_cast<const T*>(p)[idx];
}
__host__ __device__
operator T*() noexcept {
return &reinterpret_cast<T*>(
const_cast<Scalar_accessor*>(p))[idx];
}
};
// Idea from https://t0rakka.silvrback.com/simd-scalar-accessor
Vector data;
__host__ __device__
operator T() const noexcept { return data[idx]; }
__host__ __device__
Address operator&() const noexcept { return Address{this}; }
__host__ __device__
Scalar_accessor& operator=(T x) noexcept {
data[idx] = x;
@@ -63,6 +80,29 @@ THE SOFTWARE.
return *this;
}
__host__ __device__
Scalar_accessor& operator++() noexcept {
++data[idx];
return *this;
}
__host__ __device__
T operator++(int) noexcept {
auto r{data[idx]};
++data[idx];
return *this;
}
__host__ __device__
Scalar_accessor& operator--() noexcept {
--data[idx];
return *this;
}
__host__ __device__
T operator--(int) noexcept {
auto r{data[idx]};
--data[idx];
return *this;
}
__host__ __device__
Scalar_accessor& operator+=(T x) noexcept {
data[idx] += x;
+76 -12
View File
@@ -1827,6 +1827,24 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
return ihipLogStatus(e);
}
hipError_t ihip2dOffsetMemcpy(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
size_t height, size_t srcXOffsetInBytes, size_t srcYOffset,
size_t dstXOffsetInBytes, size_t dstYOffset,hipMemcpyKind kind,
hipStream_t stream, bool isAsync) {
if((spitch < width + srcXOffsetInBytes) || (srcYOffset >= height)){
return hipErrorInvalidValue;
} else if((dpitch < width + dstXOffsetInBytes) || (dstYOffset >= height)){
return hipErrorInvalidValue;
}
src = (void*)((char*)src+ srcYOffset*spitch + srcXOffsetInBytes);
dst = (void*)((char*)dst+ dstYOffset*dpitch + dstXOffsetInBytes);
if(isAsync){
return ihipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, hipMemcpyDefault, stream);
} else{
return ihipMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyDefault);
}
}
hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool isAsync) {
if (pCopy == nullptr) {
return hipErrorInvalidValue;
@@ -1864,18 +1882,10 @@ hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool
default:
return hipErrorInvalidValue;
}
if(pCopy->srcPitch < pCopy->WidthInBytes + pCopy->srcXInBytes || pCopy->srcY >= pCopy->Height){
return hipErrorInvalidValue;
} else if(pCopy->dstPitch < pCopy->WidthInBytes + pCopy->dstXInBytes || pCopy->dstY >= pCopy->Height){
return hipErrorInvalidValue;
}
src = (void*)((char*)src+pCopy->srcY*pCopy->srcPitch + pCopy->srcXInBytes);
dst = (void*)((char*)dst+pCopy->dstY*pCopy->dstPitch + pCopy->dstXInBytes);
if(isAsync){
return ihipMemcpy2DAsync(dst, dpitch, src, spitch, pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault, stream);
} else{
return ihipMemcpy2D(dst, dpitch, src, spitch, pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault);
}
return ihip2dOffsetMemcpy(dst, dpitch, src, spitch, pCopy->WidthInBytes,
pCopy->Height, pCopy->srcXInBytes, pCopy->srcY,
pCopy->dstXInBytes, pCopy->dstY, hipMemcpyDefault,
stream, isAsync);
}
hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
@@ -1888,6 +1898,60 @@ hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream)
return ihipLogStatus(ihipMemcpyParam2D(pCopy, stream, true));
}
hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind ){
HIP_INIT_SPECIAL_API(hipMemcpyParam2D, (TRACE_MCMD), dst, dpitch, src, wOffset, hOffset, width, height, kind);
size_t byteSize;
if(src) {
switch (src->desc.f) {
case hipChannelFormatKindSigned:
byteSize = sizeof(int);
break;
case hipChannelFormatKindUnsigned:
byteSize = sizeof(unsigned int);
break;
case hipChannelFormatKindFloat:
byteSize = sizeof(float);
break;
case hipChannelFormatKindNone:
byteSize = sizeof(size_t);
break;
default:
byteSize = 0;
break;
}
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
return ihipLogStatus(ihip2dOffsetMemcpy(dst, dpitch, src->data, src->width*byteSize, width, height, wOffset, hOffset, 0, 0, kind, hipStreamNull, false));
}
hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream ){
HIP_INIT_SPECIAL_API(hipMemcpyParam2D, (TRACE_MCMD), dst, dpitch, src, wOffset, hOffset, width, height, kind);
size_t byteSize;
if(src) {
switch (src->desc.f) {
case hipChannelFormatKindSigned:
byteSize = sizeof(int);
break;
case hipChannelFormatKindUnsigned:
byteSize = sizeof(unsigned int);
break;
case hipChannelFormatKindFloat:
byteSize = sizeof(float);
break;
case hipChannelFormatKindNone:
byteSize = sizeof(size_t);
break;
default:
byteSize = 0;
break;
}
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
return ihipLogStatus(ihip2dOffsetMemcpy(dst, dpitch, src->data, src->width*byteSize, width, height, wOffset, hOffset, 0, 0, kind, stream, true));
}
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) {
HIP_INIT_SPECIAL_API(hipMemsetAsync, (TRACE_MCMD), dst, value, sizeBytes, stream);
@@ -45,7 +45,7 @@ bool testhipMemset2D(int memsetval,int p_gpuDevice)
char *A_d;
char *A_h;
bool testResult = true;
HIPCHECK(hipMemAllocPitch((hipDeviceptr_t*)&A_d, &pitch_A, width , numH,16));
HIPCHECK (hipMallocPitch((void**)&A_d, &pitch_A, width , numH));
A_h = (char*)malloc(sizeElements);
HIPASSERT(A_h != NULL);
for (size_t i=0; i<elements; i++) {
@@ -81,7 +81,7 @@ bool testhipMemset2DAsync(int memsetval,int p_gpuDevice)
char *A_h;
bool testResult = true;
HIPCHECK ( hipMallocPitch((void**)&A_d, &pitch_A, width , numH) );
HIPCHECK (hipMallocPitch((void**)&A_d, &pitch_A, width , numH));
A_h = (char*)malloc(sizeElements);
HIPASSERT(A_h != NULL);
for (size_t i=0; i<elements; i++) {
@@ -110,13 +110,9 @@ int main(int argc, char *argv[])
{
HipTest::parseStandardArguments(argc, argv, true);
HIPCHECK(hipSetDevice(p_gpuDevice));
hipCtx_t context;
hipCtxCreate(&context, 0, p_gpuDevice);
bool testResult = true;
testResult &= testhipMemset2D(memsetval, p_gpuDevice);
testResult &= testhipMemset2DAsync(memsetval, p_gpuDevice);
hipCtxDestroy(context);
if(testResult){
passed();
}