diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_atomic.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_atomic.h index 2c13411319..263f639e96 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_atomic.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_atomic.h @@ -58,15 +58,15 @@ inline float atomicAdd(float* address, float val) { unsigned int* uaddr{reinterpret_cast(address)}; - unsigned int old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; + unsigned int r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; - unsigned int r; + unsigned int old; do { - r = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + old = __atomic_load_n(uaddr, __ATOMIC_RELAXED); if (r != old) { r = old; continue; } - old = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r))); + r = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r))); if (r == old) break; } while (true); @@ -78,15 +78,15 @@ inline double atomicAdd(double* address, double val) { unsigned long long* uaddr{reinterpret_cast(address)}; - unsigned long long old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; + unsigned long long r{__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; - unsigned long long r; + unsigned long long old; do { - r = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + old = __atomic_load_n(uaddr, __ATOMIC_RELAXED); if (r != old) { r = old; continue; } - old = atomicCAS( + r = atomicCAS( uaddr, r, __double_as_longlong(val + __longlong_as_double(r))); if (r == old) break; @@ -140,13 +140,13 @@ __device__ inline int atomicMin(int* address, int val) { - return __sync_fetch_and_min(address, val); + return __atomic_fetch_min(address, val, __ATOMIC_RELAXED); } __device__ inline unsigned int atomicMin(unsigned int* address, unsigned int val) { - return __sync_fetch_and_umin(address, val); + return __atomic_fetch_min(address, val, __ATOMIC_RELAXED); } __device__ inline @@ -169,13 +169,13 @@ __device__ inline int atomicMax(int* address, int val) { - return __sync_fetch_and_max(address, val); + return __atomic_fetch_max(address, val, __ATOMIC_RELAXED); } __device__ inline unsigned int atomicMax(unsigned int* address, unsigned int val) { - return __sync_fetch_and_umax(address, val); + return __atomic_fetch_max(address, val, __ATOMIC_RELAXED); } __device__ inline diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h index 3a81305ba3..624f3615e1 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -2063,6 +2063,45 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, size_t count, hipMemcpyKind kind); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind); + +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Accelerator view which the copy is being enqueued + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0)); + /** * @brief Copies data between host and device. * diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_vector_types.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_vector_types.h index f80745038a..b203d942a8 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_vector_types.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_vector_types.h @@ -44,18 +44,35 @@ THE SOFTWARE. __attribute__((vector_size(__ROUND_UP_TO_NEXT_POT__(n) * sizeof(T)))) #endif -#if defined(__cplusplus) +#if defined(__cplusplus) && defined(__clang__) #include namespace hip_impl { template struct Scalar_accessor { + struct Address { + const Scalar_accessor* p; + + __host__ __device__ + operator const T*() const noexcept { + return &reinterpret_cast(p)[idx]; + } + __host__ __device__ + operator T*() noexcept { + return &reinterpret_cast( + const_cast(p))[idx]; + } + }; + // Idea from https://t0rakka.silvrback.com/simd-scalar-accessor Vector data; __host__ __device__ operator T() const noexcept { return data[idx]; } + __host__ __device__ + Address operator&() const noexcept { return Address{this}; } + __host__ __device__ Scalar_accessor& operator=(T x) noexcept { data[idx] = x; @@ -63,6 +80,29 @@ THE SOFTWARE. return *this; } + __host__ __device__ + Scalar_accessor& operator++() noexcept { + ++data[idx]; + return *this; + } + __host__ __device__ + T operator++(int) noexcept { + auto r{data[idx]}; + ++data[idx]; + return *this; + } + __host__ __device__ + Scalar_accessor& operator--() noexcept { + --data[idx]; + return *this; + } + __host__ __device__ + T operator--(int) noexcept { + auto r{data[idx]}; + --data[idx]; + return *this; + } + __host__ __device__ Scalar_accessor& operator+=(T x) noexcept { data[idx] += x; diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index c8369685ec..140e5bb319 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -1827,6 +1827,24 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp return ihipLogStatus(e); } +hipError_t ihip2dOffsetMemcpy(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, size_t srcXOffsetInBytes, size_t srcYOffset, + size_t dstXOffsetInBytes, size_t dstYOffset,hipMemcpyKind kind, + hipStream_t stream, bool isAsync) { + if((spitch < width + srcXOffsetInBytes) || (srcYOffset >= height)){ + return hipErrorInvalidValue; + } else if((dpitch < width + dstXOffsetInBytes) || (dstYOffset >= height)){ + return hipErrorInvalidValue; + } + src = (void*)((char*)src+ srcYOffset*spitch + srcXOffsetInBytes); + dst = (void*)((char*)dst+ dstYOffset*dpitch + dstXOffsetInBytes); + if(isAsync){ + return ihipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, hipMemcpyDefault, stream); + } else{ + return ihipMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyDefault); + } +} + hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool isAsync) { if (pCopy == nullptr) { return hipErrorInvalidValue; @@ -1864,18 +1882,10 @@ hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool default: return hipErrorInvalidValue; } - if(pCopy->srcPitch < pCopy->WidthInBytes + pCopy->srcXInBytes || pCopy->srcY >= pCopy->Height){ - return hipErrorInvalidValue; - } else if(pCopy->dstPitch < pCopy->WidthInBytes + pCopy->dstXInBytes || pCopy->dstY >= pCopy->Height){ - return hipErrorInvalidValue; - } - src = (void*)((char*)src+pCopy->srcY*pCopy->srcPitch + pCopy->srcXInBytes); - dst = (void*)((char*)dst+pCopy->dstY*pCopy->dstPitch + pCopy->dstXInBytes); - if(isAsync){ - return ihipMemcpy2DAsync(dst, dpitch, src, spitch, pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault, stream); - } else{ - return ihipMemcpy2D(dst, dpitch, src, spitch, pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault); - } + return ihip2dOffsetMemcpy(dst, dpitch, src, spitch, pCopy->WidthInBytes, + pCopy->Height, pCopy->srcXInBytes, pCopy->srcY, + pCopy->dstXInBytes, pCopy->dstY, hipMemcpyDefault, + stream, isAsync); } hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { @@ -1888,6 +1898,60 @@ hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream) return ihipLogStatus(ihipMemcpyParam2D(pCopy, stream, true)); } +hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind ){ + HIP_INIT_SPECIAL_API(hipMemcpyParam2D, (TRACE_MCMD), dst, dpitch, src, wOffset, hOffset, width, height, kind); + size_t byteSize; + if(src) { + switch (src->desc.f) { + case hipChannelFormatKindSigned: + byteSize = sizeof(int); + break; + case hipChannelFormatKindUnsigned: + byteSize = sizeof(unsigned int); + break; + case hipChannelFormatKindFloat: + byteSize = sizeof(float); + break; + case hipChannelFormatKindNone: + byteSize = sizeof(size_t); + break; + default: + byteSize = 0; + break; + } + } else { + return ihipLogStatus(hipErrorInvalidValue); + } + return ihipLogStatus(ihip2dOffsetMemcpy(dst, dpitch, src->data, src->width*byteSize, width, height, wOffset, hOffset, 0, 0, kind, hipStreamNull, false)); +} + +hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream ){ + HIP_INIT_SPECIAL_API(hipMemcpyParam2D, (TRACE_MCMD), dst, dpitch, src, wOffset, hOffset, width, height, kind); + size_t byteSize; + if(src) { + switch (src->desc.f) { + case hipChannelFormatKindSigned: + byteSize = sizeof(int); + break; + case hipChannelFormatKindUnsigned: + byteSize = sizeof(unsigned int); + break; + case hipChannelFormatKindFloat: + byteSize = sizeof(float); + break; + case hipChannelFormatKindNone: + byteSize = sizeof(size_t); + break; + default: + byteSize = 0; + break; + } + } else { + return ihipLogStatus(hipErrorInvalidValue); + } + return ihipLogStatus(ihip2dOffsetMemcpy(dst, dpitch, src->data, src->width*byteSize, width, height, wOffset, hOffset, 0, 0, kind, stream, true)); +} + // TODO-sync: function is async unless target is pinned host memory - then these are fully sync. hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { HIP_INIT_SPECIAL_API(hipMemsetAsync, (TRACE_MCMD), dst, value, sizeBytes, stream); diff --git a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemset2D.cpp b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemset2D.cpp index 2eb62a859f..e7fe932b3d 100644 --- a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemset2D.cpp +++ b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipMemset2D.cpp @@ -45,7 +45,7 @@ bool testhipMemset2D(int memsetval,int p_gpuDevice) char *A_d; char *A_h; bool testResult = true; - HIPCHECK(hipMemAllocPitch((hipDeviceptr_t*)&A_d, &pitch_A, width , numH,16)); + HIPCHECK (hipMallocPitch((void**)&A_d, &pitch_A, width , numH)); A_h = (char*)malloc(sizeElements); HIPASSERT(A_h != NULL); for (size_t i=0; i