Files
rocm-systems/src/hip_memory.cpp
T

2405 строки
86 KiB
C++
Исходник Обычный вид История

2016-03-24 07:04:01 -05:00
/*
2017-03-31 12:11:34 -05:00
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
2016-10-15 22:55:22 +05:30
2016-10-12 19:14:17 -05:00
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
2016-10-15 22:55:22 +05:30
2016-10-12 19:14:17 -05:00
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2016-10-15 22:55:22 +05:30
2016-10-12 19:14:17 -05:00
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hc_am.hpp>
2016-10-04 22:17:18 +05:30
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "hip/hip_runtime.h"
2017-03-31 12:11:34 -05:00
#include "hip_hcc_internal.h"
#include "trace_helper.h"
2017-03-10 15:04:46 -06:00
2019-10-01 12:40:36 +05:30
#include <fstream>
2018-11-01 16:20:35 -04:00
__device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP];
__device__ uint32_t __hip_device_page_flag[__HIP_NUM_PAGES];
2017-03-10 15:04:46 -06:00
// Internal HIP APIS:
namespace hip_internal {
2018-03-12 11:29:03 +05:30
hipError_t memcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
hipStream_t stream) {
2017-03-10 15:04:46 -06:00
hipError_t e = hipSuccess;
// Return success if number of bytes to copy is 0
if (sizeBytes == 0) return e;
2017-03-10 15:04:46 -06:00
stream = ihipSyncAndResolveStream(stream);
2017-03-10 15:04:46 -06:00
if ((dst == NULL) || (src == NULL)) {
2018-03-12 11:29:03 +05:30
e = hipErrorInvalidValue;
2017-03-10 15:04:46 -06:00
} else if (stream) {
try {
stream->locked_copyAsync(dst, src, sizeBytes, kind);
2018-03-12 11:29:03 +05:30
} catch (ihipException& ex) {
2017-03-10 15:04:46 -06:00
e = ex._code;
}
} else {
e = hipErrorInvalidValue;
}
return e;
}
// return 0 on success or -1 on error:
2018-03-12 11:29:03 +05:30
int sharePtr(void* ptr, ihipCtx_t* ctx, bool shareWithAll, unsigned hipFlags) {
2017-03-10 15:04:46 -06:00
int ret = 0;
auto device = ctx->getWriteableDevice();
if (shareWithAll) {
// shareWithAll memory is not mapped to any device
hc::am_memtracker_update(ptr, -1, hipFlags);
2018-03-12 11:29:03 +05:30
hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt + 1, g_allAgents, NULL, ptr);
tprintf(DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt);
if (s != HSA_STATUS_SUCCESS) {
ret = -1;
}
} else {
#if USE_APP_PTR_FOR_CTX
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags, ctx);
#else
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags);
#endif
2018-03-12 11:29:03 +05:30
int peerCnt = 0;
{
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
// the peerCnt always stores self so make sure the trace actually
peerCnt = crit->peerCnt();
2018-03-12 11:29:03 +05:30
tprintf(DB_MEM, " allow access to %d other peer(s)\n", peerCnt - 1);
if (peerCnt > 1) {
2018-03-12 11:29:03 +05:30
// printf ("peer self access\n");
// TODOD - remove me:
2018-03-12 11:29:03 +05:30
for (auto iter = crit->_peers.begin(); iter != crit->_peers.end(); iter++) {
tprintf(DB_MEM, " allow access to peer: %s%s\n", (*iter)->toString().c_str(),
(iter == crit->_peers.begin()) ? " (self)" : "");
};
2018-03-12 11:29:03 +05:30
hsa_status_t s =
hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, ptr);
if (s != HSA_STATUS_SUCCESS) {
ret = -1;
}
2017-03-10 15:04:46 -06:00
}
}
}
return ret;
}
// Allocate a new pointer with am_alloc and share with all valid peers.
// Returns null-ptr if a memory error occurs (either allocation or sharing)
2018-03-12 11:29:03 +05:30
void* allocAndSharePtr(const char* msg, size_t sizeBytes, ihipCtx_t* ctx, bool shareWithAll,
unsigned amFlags, unsigned hipFlags, size_t alignment) {
void* ptr = nullptr;
2017-03-10 15:04:46 -06:00
auto device = ctx->getWriteableDevice();
2017-08-15 15:51:38 +05:30
#if (__hcc_workweek__ >= 17332)
if (alignment != 0) {
ptr = hc::am_aligned_alloc(sizeBytes, device->_acc, amFlags, alignment);
2017-08-15 15:51:38 +05:30
} else
#endif
{
ptr = hc::am_alloc(sizeBytes, device->_acc, amFlags);
}
2018-03-12 11:29:03 +05:30
tprintf(DB_MEM, " alloc %s ptr:%p-%p size:%zu on dev:%d\n", msg, ptr,
static_cast<char*>(ptr) + sizeBytes, sizeBytes, device->_deviceId);
2017-03-10 15:04:46 -06:00
if (HIP_INIT_ALLOC != -1) {
// TODO , dont' call HIP API directly here:
hipMemset(ptr, HIP_INIT_ALLOC, sizeBytes);
}
2017-03-10 15:04:46 -06:00
if (ptr != nullptr) {
int r = sharePtr(ptr, ctx, shareWithAll, hipFlags);
2017-03-10 15:04:46 -06:00
if (r != 0) {
ptr = nullptr;
}
}
return ptr;
}
hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags) {
hipError_t hip_status = hipSuccess;
if (HIP_SYNC_HOST_ALLOC) {
hipDeviceSynchronize();
}
auto ctx = ihipGetTlsDefaultCtx();
if ((ctx == nullptr) || (ptr == nullptr)) {
hip_status = hipErrorInvalidValue;
}
else if (sizeBytes == 0) {
hip_status = hipSuccess;
// TODO - should size of 0 return err or be siliently ignored?
} else {
unsigned trueFlags = flags;
if (flags == hipHostMallocDefault) {
// HCC/ROCM provide a modern system with unified memory and should set both of these
// flags by default:
trueFlags = hipHostMallocMapped | hipHostMallocPortable;
}
const unsigned supportedFlags = hipHostMallocPortable | hipHostMallocMapped |
hipHostMallocWriteCombined | hipHostMallocCoherent |
hipHostMallocNonCoherent;
const unsigned coherencyFlags = hipHostMallocCoherent | hipHostMallocNonCoherent;
if ((flags & ~supportedFlags) || ((flags & coherencyFlags) == coherencyFlags)) {
*ptr = nullptr;
// can't specify unsupported flags, can't specify both Coherent + NonCoherent
hip_status = hipErrorInvalidValue;
} else {
auto device = ctx->getWriteableDevice();
#if (__hcc_workweek__ >= 19115)
//Avoid mapping host pinned memory to all devices by HCC
unsigned amFlags = amHostUnmapped;
#else
unsigned amFlags = 0;
#endif
if (flags & hipHostMallocCoherent) {
amFlags |= amHostCoherent;
} else if (flags & hipHostMallocNonCoherent) {
amFlags |= amHostNonCoherent;
} else {
// depends on env variables:
amFlags |= HIP_HOST_COHERENT ? amHostCoherent : amHostNonCoherent;
}
*ptr = hip_internal::allocAndSharePtr(
(amFlags & amHostCoherent) ? "finegrained_host" : "pinned_host", sizeBytes, ctx,
true /*shareWithAll*/, amFlags, flags, 0);
if (sizeBytes && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
}
}
}
if (HIP_SYNC_HOST_ALLOC) {
hipDeviceSynchronize();
}
return hip_status;
}
hipError_t ihipHostFree(TlsData *tls, void* ptr) {
// Synchronize to ensure all work has finished.
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits
// for all activity to finish.
hipError_t hipStatus = hipErrorInvalidValue;
if (ptr) {
hc::accelerator acc;
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
#else
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
#endif
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
if (status == AM_SUCCESS) {
if (amPointerInfo._hostPointer == ptr) {
hc::am_free(ptr);
hipStatus = hipSuccess;
}
}
} else {
// free NULL pointer succeeds and is common technique to initialize runtime
hipStatus = hipSuccess;
}
return hipStatus;
}
2017-03-10 15:04:46 -06:00
2018-03-12 11:29:03 +05:30
} // end namespace hip_internal
2017-03-10 15:04:46 -06:00
2016-03-24 09:28:46 -05:00
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
// Memory
//
//
//
2018-03-12 11:29:03 +05:30
// HIP uses several "app*" fields HC memory tracker to track state necessary for the HIP API.
//_appId : DeviceID. For device mem, this is device where the memory is physically allocated.
2018-03-12 11:29:03 +05:30
// For host or registered mem, this is the current device when the memory is allocated or
// registered. This device will have a GPUVM mapping for the host mem.
//
2018-03-12 11:29:03 +05:30
//_appAllocationFlags : These are flags provided by the user when allocation is performed. They are
//returned to user in hipHostGetFlags and other APIs.
// TODO - add more info here when available.
//
2018-03-12 11:29:03 +05:30
hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipPointerGetAttributes, attributes, ptr);
2016-03-24 09:28:46 -05:00
hipError_t e = hipSuccess;
2018-03-12 11:29:03 +05:30
if ((attributes == nullptr) || (ptr == nullptr)) {
2017-08-25 08:46:34 +05:30
e = hipErrorInvalidValue;
} else {
hc::accelerator acc;
2017-08-15 15:51:38 +05:30
#if (__hcc_workweek__ >= 17332)
2017-08-25 08:46:34 +05:30
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
2017-08-15 15:51:38 +05:30
#else
2017-08-25 08:46:34 +05:30
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
2017-08-15 15:51:38 +05:30
#endif
2017-08-25 08:46:34 +05:30
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
if (status == AM_SUCCESS) {
2018-03-12 11:29:03 +05:30
attributes->memoryType =
amPointerInfo._isInDeviceMem ? hipMemoryTypeDevice : hipMemoryTypeHost;
attributes->hostPointer = amPointerInfo._hostPointer;
2017-08-25 08:46:34 +05:30
attributes->devicePointer = amPointerInfo._devicePointer;
2018-03-12 11:29:03 +05:30
attributes->isManaged = 0;
if (attributes->memoryType == hipMemoryTypeHost) {
2017-08-25 08:46:34 +05:30
attributes->hostPointer = (void*)ptr;
}
2018-03-12 11:29:03 +05:30
if (attributes->memoryType == hipMemoryTypeDevice) {
2017-08-25 08:46:34 +05:30
attributes->devicePointer = (void*)ptr;
}
attributes->allocationFlags = amPointerInfo._appAllocationFlags;
2018-03-12 11:29:03 +05:30
attributes->device = amPointerInfo._appId;
2016-03-24 09:28:46 -05:00
if (attributes->device < -1) {
2017-08-25 08:46:34 +05:30
e = hipErrorInvalidDevice;
}
} else {
2018-03-12 11:29:03 +05:30
attributes->memoryType = hipMemoryTypeDevice;
attributes->hostPointer = 0;
2017-08-25 08:46:34 +05:30
attributes->devicePointer = 0;
attributes->device = -2;
2018-03-12 11:29:03 +05:30
attributes->isManaged = 0;
2017-08-25 08:46:34 +05:30
attributes->allocationFlags = 0;
e = hipErrorInvalidValue;
2017-08-25 08:46:34 +05:30
}
2016-03-24 09:28:46 -05:00
}
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsigned flags) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipHostGetDevicePointer, devicePointer, hostPointer, flags);
2016-03-24 09:28:46 -05:00
hipError_t e = hipSuccess;
// Flags must be 0:
2018-03-12 11:29:03 +05:30
if ((flags != 0) || (devicePointer == nullptr) || (hostPointer == nullptr)) {
2016-03-24 09:28:46 -05:00
e = hipErrorInvalidValue;
} else {
hc::accelerator acc;
2017-08-25 08:46:34 +05:30
*devicePointer = NULL;
2017-08-15 15:51:38 +05:30
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
2017-08-15 15:51:38 +05:30
#else
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
#endif
2016-03-24 09:28:46 -05:00
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPointer);
if (status == AM_SUCCESS) {
2018-03-12 11:29:03 +05:30
*devicePointer =
static_cast<char*>(amPointerInfo._devicePointer) +
(static_cast<char*>(hostPointer) - static_cast<char*>(amPointerInfo._hostPointer));
tprintf(DB_MEM, " host_ptr=%p returned device_pointer=%p\n", hostPointer,
*devicePointer);
2016-03-24 09:28:46 -05:00
} else {
e = hipErrorMemoryAllocation;
}
}
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
hipError_t hipMalloc(void** ptr, size_t sizeBytes) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMalloc, (TRACE_MEM), ptr, sizeBytes);
2017-01-09 23:54:01 +05:30
HIP_SET_DEVICE();
2018-03-12 11:29:03 +05:30
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
2016-10-11 12:09:58 -05:00
// return NULL pointer when malloc size is 0
if ( nullptr == ctx || nullptr == ptr) {
hip_status = hipErrorInvalidValue;
}
else if (sizeBytes == 0) {
*ptr = NULL;
hip_status = hipSuccess;
} else {
auto device = ctx->getWriteableDevice();
2018-03-12 11:29:03 +05:30
*ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, false /*shareWithAll*/,
0 /*amFlags*/, 0 /*hipFlags*/, 0);
2016-03-24 09:28:46 -05:00
2018-03-12 11:29:03 +05:30
if (sizeBytes && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
}
}
2016-03-24 09:28:46 -05:00
2016-09-01 18:00:31 -05:00
2016-03-24 09:28:46 -05:00
return ihipLogStatus(hip_status);
}
2019-03-19 11:59:22 +05:30
hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags) {
HIP_INIT_SPECIAL_API(hipExtMallocWithFlags, (TRACE_MEM), ptr, sizeBytes, flags);
HIP_SET_DEVICE();
#if (__hcc_workweek__ >= 19115)
2019-03-19 11:59:22 +05:30
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
// return NULL pointer when malloc size is 0
if (sizeBytes == 0) {
*ptr = NULL;
hip_status = hipSuccess;
} else if ((ctx == nullptr) || (ptr == nullptr)) {
hip_status = hipErrorInvalidValue;
} else {
unsigned amFlags = 0;
if (flags & hipDeviceMallocFinegrained) {
amFlags = amDeviceFinegrained;
} else if (flags != hipDeviceMallocDefault) {
hip_status = hipErrorInvalidValue;
return ihipLogStatus(hip_status);
}
auto device = ctx->getWriteableDevice();
*ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, false /*shareWithAll*/,
amFlags /*amFlags*/, 0 /*hipFlags*/, 0);
if (sizeBytes && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
}
}
#else
hipError_t hip_status = hipErrorMemoryAllocation;
#endif
2019-03-19 11:59:22 +05:30
return ihipLogStatus(hip_status);
}
hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) {
HIP_INIT_SPECIAL_API(hipHostMalloc, (TRACE_MEM), ptr, sizeBytes, flags);
HIP_SET_DEVICE();
hipError_t hip_status = hipSuccess;
hip_status = hip_internal::ihipHostMalloc(tls, ptr, sizeBytes, flags);
return ihipLogStatus(hip_status);
}
hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) {
HIP_INIT_SPECIAL_API(hipMallocManaged, (TRACE_MEM), devPtr, size, flags);
HIP_SET_DEVICE();
hipError_t hip_status = hipSuccess;
if(flags != hipMemAttachGlobal)
hip_status = hipErrorInvalidValue;
else
hip_status = hip_internal::ihipHostMalloc(tls, devPtr, size, hipHostMallocDefault);
2016-03-24 09:28:46 -05:00
return ihipLogStatus(hip_status);
}
// Deprecated function:
2018-03-12 11:29:03 +05:30
hipError_t hipMallocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); }
2019-10-04 13:36:31 +05:30
// Deprecated function:
hipError_t hipMemAllocHost(void** ptr, size_t sizeBytes) { return hipHostMalloc(ptr, sizeBytes, 0); }
// Deprecated function:
2018-03-12 11:29:03 +05:30
hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) {
return hipHostMalloc(ptr, sizeBytes, flags);
};
2016-07-21 12:29:56 +05:30
// width in bytes
2019-08-05 02:51:02 -07:00
hipError_t ihipMallocPitch(TlsData* tls, void** ptr, size_t* pitch, size_t width, size_t height, size_t depth) {
2018-03-12 11:29:03 +05:30
hipError_t hip_status = hipSuccess;
if(ptr==NULL || pitch == NULL)
{
hip_status=hipErrorInvalidValue;
return hip_status;
}
// hardcoded 128 bytes
2018-03-12 11:29:03 +05:30
*pitch = ((((int)width - 1) / 128) + 1) * 128;
2019-02-28 22:42:46 +00:00
const size_t sizeBytes = (*pitch) * height * ((depth==0) ? 1 : depth);
2016-07-21 12:29:56 +05:30
auto ctx = ihipGetTlsDefaultCtx();
2016-07-21 12:29:56 +05:30
if (ctx) {
hc::accelerator acc = ctx->getDevice()->_acc;
2018-03-12 11:29:03 +05:30
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
size_t allocGranularity = 0;
2018-03-12 11:29:03 +05:30
hsa_amd_memory_pool_t* allocRegion =
static_cast<hsa_amd_memory_pool_t*>(acc.get_hsa_am_region());
hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
&allocGranularity);
hsa_ext_image_descriptor_t imageDescriptor;
imageDescriptor.width = *pitch;
imageDescriptor.height = height;
2018-06-04 18:00:22 +05:30
imageDescriptor.depth = depth;
imageDescriptor.array_size = 0;
2018-03-12 11:29:03 +05:30
if (depth == 0)
2017-12-05 14:11:13 +05:30
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
else
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
imageDescriptor.format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
imageDescriptor.format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
hsa_ext_image_data_info_t imageInfo;
2018-03-12 11:29:03 +05:30
hsa_status_t status =
hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
2016-07-21 12:29:56 +05:30
const unsigned am_flags = 0;
2018-03-12 11:29:03 +05:30
*ptr = hip_internal::allocAndSharePtr("device_pitch", sizeBytes, ctx,
false /*shareWithAll*/, am_flags, 0, alignment);
2016-07-21 12:29:56 +05:30
if (sizeBytes && (*ptr == NULL)) {
2016-07-21 12:29:56 +05:30
hip_status = hipErrorMemoryAllocation;
2017-03-31 12:11:34 -05:00
}
} else {
hip_status = hipErrorMemoryAllocation;
2016-07-21 12:29:56 +05:30
}
2017-12-05 14:11:13 +05:30
return hip_status;
}
// width in bytes
2018-03-12 11:29:03 +05:30
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMallocPitch, (TRACE_MEM), ptr, pitch, width, height);
2017-12-05 14:11:13 +05:30
HIP_SET_DEVICE();
2018-03-12 11:29:03 +05:30
hipError_t hip_status = hipSuccess;
2017-12-05 14:11:13 +05:30
2018-03-12 11:29:03 +05:30
if (width == 0 || height == 0) return ihipLogStatus(hipErrorUnknown);
2017-12-05 14:11:13 +05:30
2019-08-05 02:51:02 -07:00
hip_status = ihipMallocPitch(tls, ptr, pitch, width, height, 0);
2017-12-05 14:11:13 +05:30
return ihipLogStatus(hip_status);
}
2019-10-04 13:36:31 +05:30
hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes){
HIP_INIT_SPECIAL_API(hipMemAllocPitch, (TRACE_MEM), dptr, pitch, widthInBytes, height,elementSizeBytes);
HIP_SET_DEVICE();
if (widthInBytes == 0 || height == 0) return ihipLogStatus(hipErrorInvalidValue);
return ihipLogStatus(ihipMallocPitch(tls, dptr, pitch, widthInBytes, height, 0));
}
2018-03-12 11:29:03 +05:30
hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipMalloc3D, pitchedDevPtr, &extent);
2017-12-05 14:11:13 +05:30
HIP_SET_DEVICE();
2018-03-12 11:29:03 +05:30
hipError_t hip_status = hipSuccess;
2017-12-05 14:11:13 +05:30
2018-03-12 11:29:03 +05:30
if (extent.width == 0 || extent.height == 0) return ihipLogStatus(hipErrorUnknown);
if (!pitchedDevPtr) return ihipLogStatus(hipErrorInvalidValue);
2017-12-05 14:11:13 +05:30
void* ptr;
size_t pitch;
2018-03-12 11:29:03 +05:30
hip_status =
2019-08-05 02:51:02 -07:00
ihipMallocPitch(tls, &pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth);
2018-03-12 11:29:03 +05:30
if (hip_status == hipSuccess) {
2017-12-05 14:11:13 +05:30
pitchedDevPtr->pitch = pitch;
pitchedDevPtr->xsize = extent.width;
pitchedDevPtr->ysize = extent.height;
}
return ihipLogStatus(hip_status);
2016-07-21 12:29:56 +05:30
}
2018-03-12 11:29:03 +05:30
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) {
hipChannelFormatDesc cd;
2018-03-12 11:29:03 +05:30
cd.x = x;
cd.y = y;
cd.z = z;
cd.w = w;
cd.f = f;
return cd;
2016-07-21 12:29:56 +05:30
}
extern void getChannelOrderAndType(const hipChannelFormatDesc& desc,
2018-03-12 11:29:03 +05:30
enum hipTextureReadMode readMode,
hsa_ext_image_channel_order_t* channelOrder,
hsa_ext_image_channel_type_t* channelType);
2018-03-12 11:29:03 +05:30
hipError_t hipArrayCreate(hipArray** array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipArrayCreate, (TRACE_MEM), array, pAllocateArray);
2017-11-09 22:10:55 +05:30
HIP_SET_DEVICE();
2018-03-12 11:29:03 +05:30
hipError_t hip_status = hipSuccess;
if (pAllocateArray->Width > 0) {
2018-03-12 11:29:03 +05:30
auto ctx = ihipGetTlsDefaultCtx();
2017-11-21 21:19:06 +05:30
*array = (hipArray*)malloc(sizeof(hipArray));
HIP_ARRAY3D_DESCRIPTOR array3D;
array3D.Width = pAllocateArray->Width;
array3D.Height = pAllocateArray->Height;
array3D.Format = pAllocateArray->Format;
array3D.NumChannels = pAllocateArray->NumChannels;
array[0]->width = pAllocateArray->Width;
array[0]->height = pAllocateArray->Height;
array[0]->Format = pAllocateArray->Format;
array[0]->NumChannels = pAllocateArray->NumChannels;
2017-11-09 22:10:55 +05:30
array[0]->isDrv = true;
array[0]->textureType = hipTextureType2D;
2018-03-12 11:29:03 +05:30
void** ptr = &array[0]->data;
if (ctx) {
2017-11-21 21:19:06 +05:30
const unsigned am_flags = 0;
size_t size = pAllocateArray->Width;
if (pAllocateArray->Height > 0) {
size = size * pAllocateArray->Height;
2017-11-21 21:19:06 +05:30
}
2017-11-09 22:10:55 +05:30
hsa_ext_image_channel_type_t channelType;
2017-11-21 21:19:06 +05:30
size_t allocSize = 0;
switch (pAllocateArray->Format) {
2017-11-21 21:19:06 +05:30
case HIP_AD_FORMAT_UNSIGNED_INT8:
allocSize = size * sizeof(uint8_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
break;
case HIP_AD_FORMAT_UNSIGNED_INT16:
allocSize = size * sizeof(uint16_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16;
break;
case HIP_AD_FORMAT_UNSIGNED_INT32:
allocSize = size * sizeof(uint32_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
break;
case HIP_AD_FORMAT_SIGNED_INT8:
allocSize = size * sizeof(int8_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8;
break;
case HIP_AD_FORMAT_SIGNED_INT16:
allocSize = size * sizeof(int16_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16;
break;
case HIP_AD_FORMAT_SIGNED_INT32:
allocSize = size * sizeof(int32_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
break;
case HIP_AD_FORMAT_HALF:
allocSize = size * sizeof(int16_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT;
break;
case HIP_AD_FORMAT_FLOAT:
allocSize = size * sizeof(float);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
break;
default:
hip_status = hipErrorUnknown;
break;
}
hc::accelerator acc = ctx->getDevice()->_acc;
2018-03-12 11:29:03 +05:30
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
2017-11-21 21:19:06 +05:30
size_t allocGranularity = 0;
2018-03-12 11:29:03 +05:30
hsa_amd_memory_pool_t* allocRegion =
static_cast<hsa_amd_memory_pool_t*>(acc.get_hsa_am_region());
hsa_amd_memory_pool_get_info(
*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &allocGranularity);
2017-11-21 21:19:06 +05:30
hsa_ext_image_descriptor_t imageDescriptor;
imageDescriptor.width = pAllocateArray->Width;
imageDescriptor.height = pAllocateArray->Height;
2017-11-21 21:19:06 +05:30
imageDescriptor.depth = 0;
imageDescriptor.array_size = 0;
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
hsa_ext_image_channel_order_t channelOrder;
if (pAllocateArray->NumChannels == 4) {
2017-11-21 21:19:06 +05:30
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
} else if (pAllocateArray->NumChannels == 2) {
2017-11-21 21:19:06 +05:30
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG;
} else if (pAllocateArray->NumChannels == 1) {
2017-11-21 21:19:06 +05:30
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
}
imageDescriptor.format.channel_order = channelOrder;
imageDescriptor.format.channel_type = channelType;
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
hsa_ext_image_data_info_t imageInfo;
2018-03-12 11:29:03 +05:30
hsa_status_t status =
hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
2017-11-21 21:19:06 +05:30
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
2018-03-12 11:29:03 +05:30
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx,
false /*shareWithAll*/, am_flags, 0, alignment);
2017-11-21 21:19:06 +05:30
if (size && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
}
} else {
hip_status = hipErrorMemoryAllocation;
}
} else {
hip_status = hipErrorInvalidValue;
2018-03-12 11:29:03 +05:30
}
2017-11-09 22:10:55 +05:30
return ihipLogStatus(hip_status);
}
2018-03-12 11:29:03 +05:30
hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width,
size_t height, unsigned int flags) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMallocArray, (TRACE_MEM), array, desc, width, height, flags);
2017-01-09 23:54:01 +05:30
HIP_SET_DEVICE();
2018-03-12 11:29:03 +05:30
hipError_t hip_status = hipSuccess;
if (width > 0) {
2017-09-12 21:52:11 +05:30
auto ctx = ihipGetTlsDefaultCtx();
*array = (hipArray*)malloc(sizeof(hipArray));
2018-03-12 11:29:03 +05:30
array[0]->type = flags;
2017-09-12 21:52:11 +05:30
array[0]->width = width;
array[0]->height = height;
array[0]->depth = 1;
array[0]->desc = *desc;
2017-11-09 22:10:55 +05:30
array[0]->isDrv = false;
2017-12-05 14:11:13 +05:30
array[0]->textureType = hipTextureType2D;
2018-03-12 11:29:03 +05:30
void** ptr = &array[0]->data;
2017-09-12 21:52:11 +05:30
if (ctx) {
const unsigned am_flags = 0;
size_t size = width;
2018-03-12 11:29:03 +05:30
if (height > 0) {
2017-09-12 21:52:11 +05:30
size = size * height;
}
2016-07-21 12:29:56 +05:30
const size_t allocSize = size * ((desc->x + desc->y + desc->z + desc->w) / 8);
2017-09-12 21:52:11 +05:30
hc::accelerator acc = ctx->getDevice()->_acc;
2018-03-12 11:29:03 +05:30
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
2017-09-12 21:52:11 +05:30
size_t allocGranularity = 0;
2018-03-12 11:29:03 +05:30
hsa_amd_memory_pool_t* allocRegion =
static_cast<hsa_amd_memory_pool_t*>(acc.get_hsa_am_region());
hsa_amd_memory_pool_get_info(
*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &allocGranularity);
2017-09-12 21:52:11 +05:30
hsa_ext_image_descriptor_t imageDescriptor;
imageDescriptor.width = width;
imageDescriptor.height = height;
imageDescriptor.depth = 0;
imageDescriptor.array_size = 0;
switch (flags) {
2018-03-12 11:29:03 +05:30
case hipArrayLayered:
case hipArrayCubemap:
case hipArraySurfaceLoadStore:
case hipArrayTextureGather:
assert(0);
break;
case hipArrayDefault:
default:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
break;
2017-09-12 21:52:11 +05:30
}
hsa_ext_image_channel_order_t channelOrder;
hsa_ext_image_channel_type_t channelType;
2017-11-21 21:19:06 +05:30
getChannelOrderAndType(*desc, hipReadModeElementType, &channelOrder, &channelType);
2017-09-12 21:52:11 +05:30
imageDescriptor.format.channel_order = channelOrder;
imageDescriptor.format.channel_type = channelType;
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
hsa_ext_image_data_info_t imageInfo;
2018-03-12 11:29:03 +05:30
hsa_status_t status =
hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
2017-09-12 21:52:11 +05:30
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
2018-03-12 11:29:03 +05:30
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx,
false /*shareWithAll*/, am_flags, 0, alignment);
2017-09-12 21:52:11 +05:30
if (size && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
}
2017-09-12 21:52:11 +05:30
} else {
hip_status = hipErrorMemoryAllocation;
2017-03-31 12:11:34 -05:00
}
2016-07-21 12:29:56 +05:30
} else {
2018-03-12 11:29:03 +05:30
hip_status = hipErrorInvalidValue;
2016-07-21 12:29:56 +05:30
}
return ihipLogStatus(hip_status);
}
hipError_t hipArray3DCreate(hipArray** array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipArray3DCreate, (TRACE_MEM), array, pAllocateArray);
2018-03-12 11:29:03 +05:30
hipError_t hip_status = hipSuccess;
2017-12-05 14:11:13 +05:30
auto ctx = ihipGetTlsDefaultCtx();
*array = (hipArray*)malloc(sizeof(hipArray));
array[0]->type = pAllocateArray->Flags;
array[0]->width = pAllocateArray->Width;
array[0]->height = pAllocateArray->Height;
array[0]->depth = pAllocateArray->Depth;
array[0]->Format = pAllocateArray->Format;
array[0]->NumChannels = pAllocateArray->NumChannels;
2017-12-05 14:11:13 +05:30
array[0]->isDrv = true;
array[0]->textureType = hipTextureType3D;
2018-03-12 11:29:03 +05:30
void** ptr = &array[0]->data;
2017-12-05 14:11:13 +05:30
if (ctx) {
const unsigned am_flags = 0;
const size_t size = pAllocateArray->Width * pAllocateArray->Height * pAllocateArray->Depth;
2017-12-05 14:11:13 +05:30
size_t allocSize = 0;
hsa_ext_image_channel_type_t channelType;
switch (pAllocateArray->Format) {
2017-12-05 14:11:13 +05:30
case HIP_AD_FORMAT_UNSIGNED_INT8:
allocSize = size * sizeof(uint8_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
break;
case HIP_AD_FORMAT_UNSIGNED_INT16:
allocSize = size * sizeof(uint16_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16;
break;
case HIP_AD_FORMAT_UNSIGNED_INT32:
allocSize = size * sizeof(uint32_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
break;
case HIP_AD_FORMAT_SIGNED_INT8:
allocSize = size * sizeof(int8_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8;
break;
case HIP_AD_FORMAT_SIGNED_INT16:
allocSize = size * sizeof(int16_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16;
break;
case HIP_AD_FORMAT_SIGNED_INT32:
allocSize = size * sizeof(int32_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
break;
case HIP_AD_FORMAT_HALF:
allocSize = size * sizeof(int16_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT;
break;
case HIP_AD_FORMAT_FLOAT:
allocSize = size * sizeof(float);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
break;
default:
hip_status = hipErrorUnknown;
break;
}
hc::accelerator acc = ctx->getDevice()->_acc;
2018-03-12 11:29:03 +05:30
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
2017-12-05 14:11:13 +05:30
size_t allocGranularity = 0;
2018-03-12 11:29:03 +05:30
hsa_amd_memory_pool_t* allocRegion =
static_cast<hsa_amd_memory_pool_t*>(acc.get_hsa_am_region());
hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
&allocGranularity);
2017-12-05 14:11:13 +05:30
hsa_ext_image_descriptor_t imageDescriptor;
imageDescriptor.width = pAllocateArray->Width;
imageDescriptor.height = pAllocateArray->Height;
2017-12-05 14:11:13 +05:30
imageDescriptor.depth = 0;
imageDescriptor.array_size = 0;
switch (pAllocateArray->Flags) {
2018-03-12 11:29:03 +05:30
case hipArrayLayered:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
imageDescriptor.array_size = pAllocateArray->Depth;
2018-03-12 11:29:03 +05:30
break;
case hipArraySurfaceLoadStore:
case hipArrayTextureGather:
case hipArrayDefault:
assert(0);
break;
case hipArrayCubemap:
default:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
imageDescriptor.depth = pAllocateArray->Depth;
2018-03-12 11:29:03 +05:30
break;
2017-12-05 14:11:13 +05:30
}
hsa_ext_image_channel_order_t channelOrder;
2018-03-12 11:29:03 +05:30
// getChannelOrderAndType(*desc, hipReadModeElementType, &channelOrder, &channelType);
if (pAllocateArray->NumChannels == 4) {
2017-12-05 14:11:13 +05:30
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
} else if (pAllocateArray->NumChannels == 2) {
2017-12-05 14:11:13 +05:30
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG;
} else if (pAllocateArray->NumChannels == 1) {
2017-12-05 14:11:13 +05:30
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
}
imageDescriptor.format.channel_order = channelOrder;
imageDescriptor.format.channel_type = channelType;
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
hsa_ext_image_data_info_t imageInfo;
2018-03-12 11:29:03 +05:30
hsa_status_t status =
hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
2017-12-05 14:11:13 +05:30
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
2018-03-12 11:29:03 +05:30
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false, am_flags, 0,
alignment);
2017-12-05 14:11:13 +05:30
if (size && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
}
} else {
hip_status = hipErrorMemoryAllocation;
}
return ihipLogStatus(hip_status);
}
2018-05-02 11:56:37 +05:30
hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc,
2018-03-12 11:29:03 +05:30
struct hipExtent extent, unsigned int flags) {
2018-04-20 17:40:00 +05:30
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipMalloc3DArray, array, desc, &extent, flags);
2017-07-17 15:16:12 -04:00
HIP_SET_DEVICE();
2018-03-12 11:29:03 +05:30
hipError_t hip_status = hipSuccess;
2017-07-17 15:16:12 -04:00
2018-06-04 18:00:22 +05:30
if(array==NULL )
{
2018-04-20 17:40:00 +05:30
hip_status=hipErrorInvalidValue;
return ihipLogStatus(hip_status);
2018-06-04 18:00:22 +05:30
}
2017-07-17 15:16:12 -04:00
auto ctx = ihipGetTlsDefaultCtx();
*array = (hipArray*)malloc(sizeof(hipArray));
2018-03-12 11:29:03 +05:30
array[0]->type = flags;
2017-07-17 15:16:12 -04:00
array[0]->width = extent.width;
array[0]->height = extent.height;
array[0]->depth = extent.depth;
array[0]->desc = *desc;
2017-12-05 14:11:13 +05:30
array[0]->isDrv = false;
array[0]->textureType = hipTextureType3D;
2018-03-12 11:29:03 +05:30
void** ptr = &array[0]->data;
2017-07-17 15:16:12 -04:00
if (ctx) {
const unsigned am_flags = 0;
2018-03-12 11:29:03 +05:30
const size_t size = extent.width * extent.height * extent.depth;
2017-07-17 15:16:12 -04:00
const size_t allocSize = size * ((desc->x + desc->y + desc->z + desc->w) / 8);
hc::accelerator acc = ctx->getDevice()->_acc;
2018-03-12 11:29:03 +05:30
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
size_t allocGranularity = 0;
2018-03-12 11:29:03 +05:30
hsa_amd_memory_pool_t* allocRegion =
static_cast<hsa_amd_memory_pool_t*>(acc.get_hsa_am_region());
hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
&allocGranularity);
hsa_ext_image_descriptor_t imageDescriptor;
imageDescriptor.width = extent.width;
imageDescriptor.height = extent.height;
2018-06-04 18:00:22 +05:30
imageDescriptor.depth = extent.depth;
imageDescriptor.array_size = 0;
switch (flags) {
2018-03-12 11:29:03 +05:30
case hipArrayLayered:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
imageDescriptor.array_size = extent.depth;
break;
case hipArraySurfaceLoadStore:
case hipArrayTextureGather:
case hipArrayDefault:
assert(0);
break;
case hipArrayCubemap:
default:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
imageDescriptor.depth = extent.depth;
break;
}
hsa_ext_image_channel_order_t channelOrder;
hsa_ext_image_channel_type_t channelType;
2017-11-21 21:19:06 +05:30
getChannelOrderAndType(*desc, hipReadModeElementType, &channelOrder, &channelType);
imageDescriptor.format.channel_order = channelOrder;
imageDescriptor.format.channel_type = channelType;
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
hsa_ext_image_data_info_t imageInfo;
2018-03-12 11:29:03 +05:30
hsa_status_t status =
hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
2018-03-12 11:29:03 +05:30
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false, am_flags, 0,
alignment);
2017-07-17 15:16:12 -04:00
if (size && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
}
} else {
hip_status = hipErrorMemoryAllocation;
}
2017-12-05 14:11:13 +05:30
return ihipLogStatus(hip_status);
2017-07-17 15:16:12 -04:00
}
2018-03-12 11:29:03 +05:30
hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipHostGetFlags, flagsPtr, hostPtr);
2016-03-24 09:28:46 -05:00
hipError_t hip_status = hipSuccess;
hc::accelerator acc;
2017-08-15 15:51:38 +05:30
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
2017-08-15 15:51:38 +05:30
#else
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
#endif
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
2018-03-12 11:29:03 +05:30
if (status == AM_SUCCESS) {
*flagsPtr = amPointerInfo._appAllocationFlags;
//0 is valid flag hipHostMallocDefault, and during hipHostMalloc if unsupported flags are passed as parameter it throws error
hip_status = hipSuccess;
tprintf(DB_MEM, " %s: host ptr=%p\n", __func__, hostPtr);
2018-03-12 11:29:03 +05:30
} else {
hip_status = hipErrorInvalidValue;
}
return ihipLogStatus(hip_status);
2016-03-24 09:28:46 -05:00
}
// TODO - need to fix several issues here related to P2P access, host memory fallback.
2018-03-12 11:29:03 +05:30
hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipHostRegister, hostPtr, sizeBytes, flags);
2016-03-24 09:28:46 -05:00
2016-04-15 10:32:01 -05:00
hipError_t hip_status = hipSuccess;
2016-03-24 09:28:46 -05:00
2016-08-07 21:46:51 -05:00
auto ctx = ihipGetTlsDefaultCtx();
2018-03-12 11:29:03 +05:30
if (hostPtr == NULL) {
2016-04-15 10:32:01 -05:00
return ihipLogStatus(hipErrorInvalidValue);
}
2016-04-15 10:08:10 -05:00
2016-04-15 10:32:01 -05:00
hc::accelerator acc;
2017-08-15 15:51:38 +05:30
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
2017-08-15 15:51:38 +05:30
#else
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
#endif
2016-04-15 10:32:01 -05:00
am_status_t am_status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
2018-03-12 11:29:03 +05:30
if (am_status == AM_SUCCESS) {
2016-04-15 10:32:01 -05:00
hip_status = hipErrorHostMemoryAlreadyRegistered;
} else {
2016-08-07 21:46:51 -05:00
auto ctx = ihipGetTlsDefaultCtx();
2017-03-10 15:04:46 -06:00
if (hostPtr == NULL) {
2016-04-15 10:32:01 -05:00
return ihipLogStatus(hipErrorInvalidValue);
}
2018-03-12 11:29:03 +05:30
// TODO-test : multi-gpu access to registered host memory.
if (ctx) {
if ((flags == hipHostRegisterDefault) || (flags & hipHostRegisterPortable) ||
(flags & hipHostRegisterMapped) || (flags == hipExtHostRegisterCoarseGrained)) {
2017-03-10 15:04:46 -06:00
auto device = ctx->getWriteableDevice();
2018-03-12 11:29:03 +05:30
std::vector<hc::accelerator> vecAcc;
for (int i = 0; i < g_deviceCnt; i++) {
2016-08-07 21:46:51 -05:00
vecAcc.push_back(ihipGetDevice(i)->_acc);
2016-04-15 10:32:01 -05:00
}
#if (__hcc_workweek__ >= 19183)
if(flags & hipExtHostRegisterCoarseGrained) {
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0],
vecAcc.size());
} else {
am_status = hc::am_memory_host_lock_with_flag(device->_acc, hostPtr, sizeBytes, &vecAcc[0],
vecAcc.size());
}
#else
2018-03-12 11:29:03 +05:30
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0],
vecAcc.size());
#endif
2018-11-17 05:38:35 +05:30
if ( am_status == AM_SUCCESS ) {
am_status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
if ( am_status == AM_SUCCESS ) {
void *devPtr = amPointerInfo._devicePointer;
#if USE_APP_PTR_FOR_CTX
hc::am_memtracker_update(hostPtr, device->_deviceId, flags, ctx);
hc::am_memtracker_update(devPtr, device->_deviceId, flags, ctx);
#else
hc::am_memtracker_update(hostPtr, device->_deviceId, flags);
hc::am_memtracker_update(devPtr, device->_deviceId, flags);
#endif
tprintf(DB_MEM, " %s registered ptr=%p and allowed access to %zu peers\n", __func__,
hostPtr, vecAcc.size());
};
};
2018-03-12 11:29:03 +05:30
if (am_status == AM_SUCCESS) {
2016-04-15 10:32:01 -05:00
hip_status = hipSuccess;
} else {
2016-04-15 10:32:01 -05:00
hip_status = hipErrorMemoryAllocation;
}
} else {
2016-04-15 10:32:01 -05:00
hip_status = hipErrorInvalidValue;
}
}
}
return ihipLogStatus(hip_status);
}
2016-03-24 09:28:46 -05:00
2018-03-12 11:29:03 +05:30
hipError_t hipHostUnregister(void* hostPtr) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipHostUnregister, hostPtr);
2016-08-07 21:46:51 -05:00
auto ctx = ihipGetTlsDefaultCtx();
2016-04-15 10:32:01 -05:00
hipError_t hip_status = hipSuccess;
2018-03-12 11:29:03 +05:30
if (hostPtr == NULL) {
2016-04-15 10:32:01 -05:00
hip_status = hipErrorInvalidValue;
2018-03-12 11:29:03 +05:30
} else {
auto device = ctx->getWriteableDevice();
am_status_t am_status = hc::am_memory_host_unlock(device->_acc, hostPtr);
tprintf(DB_MEM, " %s unregistered ptr=%p\n", __func__, hostPtr);
2018-03-12 11:29:03 +05:30
if (am_status != AM_SUCCESS) {
hip_status = hipErrorHostMemoryNotRegistered;
2016-04-15 10:32:01 -05:00
}
}
return ihipLogStatus(hip_status);
2016-03-24 09:28:46 -05:00
}
2019-03-06 14:01:44 +02:00
namespace hip_impl {
hipError_t hipMemcpyToSymbol(void* dst, const void* src, size_t count,
size_t offset, hipMemcpyKind kind,
const char* symbol_name) {
HIP_INIT_SPECIAL_API(hipMemcpyToSymbol, (TRACE_MCMD), symbol_name, src,
count, offset, kind);
2018-03-12 11:29:03 +05:30
2019-03-06 14:01:44 +02:00
tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst);
2016-10-11 13:29:46 -05:00
2018-03-12 11:29:03 +05:30
if (dst == nullptr) {
2016-10-11 13:29:46 -05:00
return ihipLogStatus(hipErrorInvalidSymbol);
}
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDefault ||
2018-03-12 11:29:03 +05:30
kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) {
stream->locked_copySync((char*)dst+offset, (void*)src, count, kind, false);
2017-02-01 17:54:59 -06:00
} else {
2018-03-12 11:29:03 +05:30
return ihipLogStatus(hipErrorInvalidValue);
2017-02-01 17:54:59 -06:00
}
2016-10-11 13:29:46 -05:00
2016-03-24 09:28:46 -05:00
return ihipLogStatus(hipSuccess);
}
2019-03-06 14:01:44 +02:00
hipError_t hipMemcpyFromSymbol(void* dst, const void* src, size_t count,
size_t offset, hipMemcpyKind kind,
const char* symbol_name) {
HIP_INIT_SPECIAL_API(hipMemcpyFromSymbol, (TRACE_MCMD), symbol_name, dst,
count, offset, kind);
2019-03-06 14:01:44 +02:00
tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst);
2018-03-12 11:29:03 +05:30
if (dst == nullptr) {
return ihipLogStatus(hipErrorInvalidSymbol);
}
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
if (kind == hipMemcpyDefault || kind == hipMemcpyDeviceToHost ||
2018-03-12 11:29:03 +05:30
kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) {
stream->locked_copySync((void*)dst, (char*)src+offset, count, kind, false);
2018-03-12 11:29:03 +05:30
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
return ihipLogStatus(hipSuccess);
}
2019-03-06 14:01:44 +02:00
hipError_t hipMemcpyToSymbolAsync(void* dst, const void* src, size_t count,
size_t offset, hipMemcpyKind kind,
hipStream_t stream, const char* symbol_name) {
HIP_INIT_SPECIAL_API(hipMemcpyToSymbolAsync, (TRACE_MCMD), symbol_name, src,
count, offset, kind, stream);
2016-10-11 13:29:46 -05:00
2019-03-06 14:01:44 +02:00
tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst);
2016-10-11 13:29:46 -05:00
2018-03-12 11:29:03 +05:30
if (dst == nullptr) {
2016-10-11 13:29:46 -05:00
return ihipLogStatus(hipErrorInvalidSymbol);
}
2019-03-06 14:01:44 +02:00
hipError_t e = hipSuccess;
2016-10-11 13:29:46 -05:00
if (stream) {
try {
hip_internal::memcpyAsync((char*)dst+offset, src, count, kind, stream);
2018-03-12 11:29:03 +05:30
} catch (ihipException& ex) {
2016-10-11 13:29:46 -05:00
e = ex._code;
}
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
2017-02-22 19:16:35 -06:00
}
2019-03-06 14:01:44 +02:00
hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* src, size_t count,
size_t offset, hipMemcpyKind kind,
hipStream_t stream, const char* symbol_name) {
HIP_INIT_SPECIAL_API(hipMemcpyFromSymbolAsync, (TRACE_MCMD), symbol_name,
dst, count, offset, kind, stream);
2017-02-22 19:16:35 -06:00
2019-03-06 14:01:44 +02:00
tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, src);
2016-10-11 13:29:46 -05:00
2018-03-12 11:29:03 +05:30
if (src == nullptr || dst == nullptr) {
2017-02-22 19:16:35 -06:00
return ihipLogStatus(hipErrorInvalidSymbol);
}
2019-03-06 14:01:44 +02:00
hipError_t e = hipSuccess;
2017-03-24 10:30:33 +05:30
stream = ihipSyncAndResolveStream(stream);
2017-02-22 19:16:35 -06:00
if (stream) {
try {
hip_internal::memcpyAsync(dst, (char*)src+offset, count, kind, stream);
2018-03-12 11:29:03 +05:30
} catch (ihipException& ex) {
2017-02-22 19:16:35 -06:00
e = ex._code;
}
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
2016-10-11 13:29:46 -05:00
}
2019-03-06 14:01:44 +02:00
} // Namespace hip_impl.
2016-03-24 09:28:46 -05:00
//---
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpy, (TRACE_MCMD), dst, src, sizeBytes, kind);
2016-03-24 09:28:46 -05:00
hipError_t e = hipSuccess;
// Return success if number of bytes to copy is 0
if (sizeBytes == 0) return ihipLogStatus(e);
2016-03-24 09:28:46 -05:00
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
if(dst==NULL || src==NULL)
{
e=hipErrorInvalidValue;
return ihipLogStatus(e);
}
2016-03-24 09:28:46 -05:00
try {
2016-03-28 09:46:40 -05:00
stream->locked_copySync(dst, src, sizeBytes, kind);
2018-03-12 11:29:03 +05:30
} catch (ihipException& ex) {
2016-03-24 09:28:46 -05:00
e = ex._code;
}
2016-08-26 13:11:01 -05:00
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyHtoD, (TRACE_MCMD), dst, src, sizeBytes);
2016-08-26 13:11:01 -05:00
hipError_t e = hipSuccess;
if (sizeBytes == 0) return ihipLogStatus(e);
if(dst==NULL || src==NULL){
return ihipLogStatus(hipErrorInvalidValue);
}
2016-08-26 13:11:01 -05:00
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
try {
stream->locked_copySync((void*)dst, (void*)src, sizeBytes, hipMemcpyHostToDevice, false);
2018-03-12 11:29:03 +05:30
} catch (ihipException& ex) {
2016-08-26 13:11:01 -05:00
e = ex._code;
}
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyDtoH, (TRACE_MCMD), dst, src, sizeBytes);
2016-08-26 13:11:01 -05:00
hipError_t e = hipSuccess;
if (sizeBytes == 0) return ihipLogStatus(e);
if(dst==NULL || src==NULL){
return ihipLogStatus(hipErrorInvalidValue);
}
2016-08-26 13:11:01 -05:00
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
try {
stream->locked_copySync((void*)dst, (void*)src, sizeBytes, hipMemcpyDeviceToHost, false);
2018-03-12 11:29:03 +05:30
} catch (ihipException& ex) {
2016-08-26 13:11:01 -05:00
e = ex._code;
}
2016-03-24 09:28:46 -05:00
2016-08-26 13:11:01 -05:00
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyDtoD, (TRACE_MCMD), dst, src, sizeBytes);
2016-08-26 13:11:01 -05:00
hipError_t e = hipSuccess;
if (sizeBytes == 0) return ihipLogStatus(e);
if(dst==NULL || src==NULL){
return ihipLogStatus(hipErrorInvalidValue);
}
2016-08-26 13:11:01 -05:00
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
try {
stream->locked_copySync((void*)dst, (void*)src, sizeBytes, hipMemcpyDeviceToDevice, false);
2018-03-12 11:29:03 +05:30
} catch (ihipException& ex) {
2016-08-26 13:11:01 -05:00
e = ex._code;
}
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyHtoH, (TRACE_MCMD), dst, src, sizeBytes);
2016-08-26 13:11:01 -05:00
hipError_t e = hipSuccess;
if (sizeBytes == 0) return ihipLogStatus(e);
if(dst==NULL || src==NULL){
return ihipLogStatus(hipErrorInvalidValue);
}
2016-08-26 13:11:01 -05:00
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
try {
stream->locked_copySync((void*)dst, (void*)src, sizeBytes, hipMemcpyHostToHost, false);
2018-03-12 11:29:03 +05:30
} catch (ihipException& ex) {
2016-08-26 13:11:01 -05:00
e = ex._code;
}
2016-03-24 09:28:46 -05:00
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
hipStream_t stream) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyAsync, (TRACE_MCMD), dst, src, sizeBytes, kind, stream);
2016-09-09 10:21:52 -05:00
2016-11-10 10:49:44 -06:00
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, kind, stream));
}
2016-09-09 10:21:52 -05:00
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyHtoDAsync, (TRACE_MCMD), dst, src, sizeBytes, stream);
2016-09-09 10:21:52 -05:00
2018-03-12 11:29:03 +05:30
return ihipLogStatus(
hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyHostToDevice, stream));
2016-09-09 10:21:52 -05:00
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes,
hipStream_t stream) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyDtoDAsync, (TRACE_MCMD), dst, src, sizeBytes, stream);
2016-09-09 10:21:52 -05:00
2018-03-12 11:29:03 +05:30
return ihipLogStatus(
hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream));
2016-09-09 10:21:52 -05:00
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyDtoHAsync, (TRACE_MCMD), dst, src, sizeBytes, stream);
2016-09-09 10:21:52 -05:00
2018-03-12 11:29:03 +05:30
return ihipLogStatus(
hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToHost, stream));
2016-09-09 10:21:52 -05:00
}
2016-07-21 12:29:56 +05:30
hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
2018-03-12 11:29:03 +05:30
size_t spitch, size_t width, size_t height, hipMemcpyKind kind) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpy2DToArray, (TRACE_MCMD), dst, wOffset, hOffset, src, spitch, width, height, kind);
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
hipError_t e = hipSuccess;
size_t byteSize;
2018-03-12 11:29:03 +05:30
if (dst) {
switch (dst[0].desc.f) {
case hipChannelFormatKindSigned:
byteSize = sizeof(int);
break;
case hipChannelFormatKindUnsigned:
byteSize = sizeof(unsigned int);
break;
case hipChannelFormatKindFloat:
byteSize = sizeof(float);
break;
case hipChannelFormatKindNone:
byteSize = sizeof(size_t);
break;
default:
byteSize = 0;
break;
}
} else {
return ihipLogStatus(hipErrorUnknown);
2016-07-21 12:29:56 +05:30
}
2018-03-12 11:29:03 +05:30
if ((wOffset + width > (dst->width * byteSize)) || width > spitch) {
return ihipLogStatus(hipErrorUnknown);
}
2016-07-21 12:29:56 +05:30
size_t src_w = spitch;
2018-03-12 11:29:03 +05:30
size_t dst_w = (dst->width) * byteSize;
2016-07-21 12:29:56 +05:30
try {
2018-03-12 11:29:03 +05:30
for (int i = 0; i < height; ++i) {
stream->locked_copySync((unsigned char*)dst->data + i * dst_w,
(unsigned char*)src + i * src_w, width, kind);
}
2018-03-12 11:29:03 +05:30
} catch (ihipException& ex) {
e = ex._code;
2016-07-21 12:29:56 +05:30
}
return ihipLogStatus(e);
2016-07-21 12:29:56 +05:30
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
size_t count, hipMemcpyKind kind) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyToArray, (TRACE_MCMD), dst, wOffset, hOffset, src, count, kind);
2016-07-21 12:29:56 +05:30
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
2016-07-21 12:29:56 +05:30
hc::completion_future marker;
2016-07-21 12:29:56 +05:30
hipError_t e = hipSuccess;
2016-07-21 12:29:56 +05:30
try {
2018-03-12 11:29:03 +05:30
stream->locked_copySync((char*)dst->data + wOffset, src, count, kind);
} catch (ihipException& ex) {
e = ex._code;
}
2016-07-21 12:29:56 +05:30
return ihipLogStatus(e);
2016-07-21 12:29:56 +05:30
}
2018-01-16 11:44:19 +05:30
hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset,
2018-03-12 11:29:03 +05:30
size_t count, hipMemcpyKind kind) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyFromArray, (TRACE_MCMD), dst, srcArray, wOffset, hOffset, count, kind);
2018-01-16 11:44:19 +05:30
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
hipError_t e = hipSuccess;
try {
2018-03-12 11:29:03 +05:30
stream->locked_copySync((char*)dst, (char*)srcArray->data + wOffset, count, kind);
} catch (ihipException& ex) {
2018-01-16 11:44:19 +05:30
e = ex._code;
}
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyHtoA, (TRACE_MCMD), dstArray, dstOffset, srcHost, count);
2018-01-16 11:44:19 +05:30
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
hipError_t e = hipSuccess;
try {
2018-03-12 11:29:03 +05:30
stream->locked_copySync((char*)dstArray->data + dstOffset, srcHost, count,
hipMemcpyHostToDevice);
} catch (ihipException& ex) {
2018-01-16 11:44:19 +05:30
e = ex._code;
}
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpyAtoH, (TRACE_MCMD), dst, srcArray, srcOffset, count);
2018-01-16 11:44:19 +05:30
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
hipError_t e = hipSuccess;
try {
2018-03-12 11:29:03 +05:30
stream->locked_copySync((char*)dst, (char*)srcArray->data + srcOffset, count,
hipMemcpyDeviceToHost);
} catch (ihipException& ex) {
2018-01-16 11:44:19 +05:30
e = ex._code;
}
return ihipLogStatus(e);
}
2019-08-15 19:13:16 -07:00
hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bool isAsync) {
2017-07-17 15:16:12 -04:00
hipError_t e = hipSuccess;
2019-08-15 19:13:16 -07:00
if(p) {
2017-12-05 14:11:13 +05:30
size_t byteSize;
size_t depth;
size_t height;
size_t widthInBytes;
size_t srcPitch;
size_t dstPitch;
2018-03-12 11:29:03 +05:30
void* srcPtr;
void* dstPtr;
2017-12-05 14:11:13 +05:30
size_t ySize;
2018-03-12 11:29:03 +05:30
if (p->dstArray != nullptr) {
if (p->dstArray->isDrv == false) {
switch (p->dstArray->desc.f) {
case hipChannelFormatKindSigned:
byteSize = sizeof(int);
break;
case hipChannelFormatKindUnsigned:
byteSize = sizeof(unsigned int);
break;
case hipChannelFormatKindFloat:
byteSize = sizeof(float);
break;
case hipChannelFormatKindNone:
byteSize = sizeof(size_t);
break;
default:
byteSize = 0;
break;
2017-12-05 14:11:13 +05:30
}
2018-03-12 11:29:03 +05:30
depth = p->extent.depth;
height = p->extent.height;
widthInBytes = p->extent.width * byteSize;
srcPitch = p->srcPtr.pitch;
srcPtr = p->srcPtr.ptr;
ySize = p->srcPtr.ysize;
2018-06-05 18:54:33 +05:30
dstPitch = p->dstArray->width * byteSize;
2018-03-12 11:29:03 +05:30
dstPtr = p->dstArray->data;
} else {
depth = p->Depth;
height = p->Height;
widthInBytes = p->WidthInBytes;
2018-06-05 18:54:33 +05:30
dstPitch = p->dstArray->width * 4;
2018-03-12 11:29:03 +05:30
srcPitch = p->srcPitch;
srcPtr = (void*)p->srcHost;
ySize = p->srcHeight;
dstPtr = p->dstArray->data;
}
2017-12-05 14:11:13 +05:30
} else {
2018-03-12 11:29:03 +05:30
// Non array destination
2017-12-05 14:11:13 +05:30
depth = p->extent.depth;
height = p->extent.height;
widthInBytes = p->extent.width;
srcPitch = p->srcPtr.pitch;
srcPtr = p->srcPtr.ptr;
dstPtr = p->dstPtr.ptr;
ySize = p->srcPtr.ysize;
2018-06-05 18:54:33 +05:30
dstPitch = p->dstPtr.pitch;
2017-07-17 15:16:12 -04:00
}
2019-08-15 19:13:16 -07:00
stream = ihipSyncAndResolveStream(stream);
2017-12-05 14:11:13 +05:30
hc::completion_future marker;
try {
2018-06-05 18:54:33 +05:30
if((widthInBytes == dstPitch) && (widthInBytes == srcPitch)) {
2019-08-15 19:13:16 -07:00
if(isAsync)
stream->locked_copyAsync((void*)dstPtr, (void*)srcPtr, widthInBytes*height*depth, p->kind);
else
stream->locked_copySync((void*)dstPtr, (void*)srcPtr, widthInBytes*height*depth, p->kind, false);
2018-06-05 18:54:33 +05:30
} else {
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
// TODO: p->srcPos or p->dstPos are not 0.
unsigned char* src =
(unsigned char*)srcPtr + i * ySize * srcPitch + j * srcPitch;
unsigned char* dst =
(unsigned char*)dstPtr + i * height * dstPitch + j * dstPitch;
2019-08-15 19:13:16 -07:00
if(isAsync)
stream->locked_copyAsync(dst, src, widthInBytes, p->kind);
else
stream->locked_copySync(dst, src, widthInBytes, p->kind);
2018-06-05 18:54:33 +05:30
}
2018-03-12 11:29:03 +05:30
}
2018-06-05 18:54:33 +05:30
}
2018-03-12 11:29:03 +05:30
} catch (ihipException ex) {
e = ex._code;
}
2017-07-17 15:16:12 -04:00
} else {
2017-12-05 14:11:13 +05:30
e = hipErrorInvalidValue;
2017-07-17 15:16:12 -04:00
}
2019-08-15 19:13:16 -07:00
return e;
}
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p) {
HIP_INIT_SPECIAL_API(hipMemcpy3D, (TRACE_MCMD), p);
hipError_t e = hipSuccess;
e = ihipMemcpy3D(p, hipStreamNull, false);
return ihipLogStatus(e);
}
hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream) {
HIP_INIT_SPECIAL_API(hipMemcpy3DAsync, (TRACE_MCMD), p, stream);
hipError_t e = hipSuccess;
e = ihipMemcpy3D(p, stream, true);
2017-07-17 15:16:12 -04:00
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
namespace {
template <uint32_t block_dim, typename RandomAccessIterator, typename N, typename T>
__global__ void hip_fill_n(RandomAccessIterator f, N n, T value) {
const uint32_t grid_dim = gridDim.x * blockDim.x;
size_t idx = blockIdx.x * block_dim + threadIdx.x;
while (idx < n) {
__builtin_memcpy(reinterpret_cast<void*>(&f[idx]), reinterpret_cast<const void*>(&value),
sizeof(T));
idx += grid_dim;
}
2018-03-12 11:29:03 +05:30
}
2018-03-12 11:29:03 +05:30
template <typename T, typename std::enable_if<std::is_integral<T>{}>::type* = nullptr>
inline const T& clamp_integer(const T& x, const T& lower, const T& upper) {
assert(!(upper < lower));
2018-03-12 11:29:03 +05:30
return std::min(upper, std::max(x, lower));
}
template <typename T>
__global__ void hip_copy2d_n(T* dst, const T* src, size_t width, size_t height, size_t destPitch, size_t srcPitch) {
size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
size_t idy = blockIdx.y * blockDim.y + threadIdx.y;
2018-05-23 14:43:47 +05:30
size_t floorWidth = (width/sizeof(T));
2018-05-24 23:30:27 +05:30
T *dstPtr = (T *)((uint8_t*) dst + idy * destPitch);
T *srcPtr = (T *)((uint8_t*) src + idy * srcPitch);
if((idx < floorWidth) && (idy < height)){
dstPtr[idx] = srcPtr[idx];
} else if((idx < width) && (idy < height)){
size_t bytesToCopy = width - (floorWidth * sizeof(T));
dstPtr += floorWidth;
srcPtr += floorWidth;
__builtin_memcpy(reinterpret_cast<uint8_t*>(dstPtr), reinterpret_cast<const uint8_t*>(srcPtr),bytesToCopy);
}
}
2018-03-12 11:29:03 +05:30
} // namespace
//Get the allocated size
hipError_t ihipMemPtrGetInfo(void* ptr, size_t* size) {
hipError_t e = hipSuccess;
if (ptr != nullptr && size != nullptr) {
*size = 0;
hc::accelerator acc;
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
#else
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
#endif
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
if (status == AM_SUCCESS) {
*size = amPointerInfo._sizeBytes;
} else {
e = hipErrorInvalidValue;
}
} else {
e = hipErrorInvalidValue;
}
return e;
}
template <typename T>
2019-02-27 15:42:54 +00:00
void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t count) {
static constexpr uint32_t block_dim = 256;
2019-02-27 15:42:54 +00:00
const uint32_t grid_dim = clamp_integer<size_t>(count / block_dim, 1, UINT32_MAX);
2018-03-12 11:29:03 +05:30
hipLaunchKernelGGL(hip_fill_n<block_dim>, dim3(grid_dim), dim3{block_dim}, 0u, stream, ptr,
2019-02-27 15:42:54 +00:00
count, std::move(val));
}
template <typename T>
void ihipMemcpy2dKernel(hipStream_t stream, T* dst, const T* src, size_t width, size_t height, size_t destPitch, size_t srcPitch) {
2018-05-24 23:51:52 +05:30
size_t threadsPerBlock_x = 64;
size_t threadsPerBlock_y = 4;
uint32_t grid_dim_x = clamp_integer<size_t>( (width+(threadsPerBlock_x*sizeof(T)-1)) / (threadsPerBlock_x*sizeof(T)), 1, UINT32_MAX);
uint32_t grid_dim_y = clamp_integer<size_t>( (height+(threadsPerBlock_y-1)) / threadsPerBlock_y, 1, UINT32_MAX);
hipLaunchKernelGGL(hip_copy2d_n, dim3(grid_dim_x,grid_dim_y), dim3(threadsPerBlock_x,threadsPerBlock_y), 0u, stream, dst, src,
width, height, destPitch, srcPitch);
}
typedef enum ihipMemsetDataType {
ihipMemsetDataTypeChar = 0,
ihipMemsetDataTypeShort = 1,
ihipMemsetDataTypeInt = 2
}ihipMemsetDataType;
2018-04-11 15:58:48 +05:30
hipError_t ihipMemset(void* dst, int value, size_t count, hipStream_t stream, enum ihipMemsetDataType copyDataType)
2018-04-11 15:58:48 +05:30
{
2016-03-24 09:28:46 -05:00
hipError_t e = hipSuccess;
2019-02-27 15:42:54 +00:00
if (count == 0) return e;
size_t allocSize = 0;
bool isInbound = (ihipMemPtrGetInfo(dst, &allocSize) == hipSuccess);
isInbound &= (allocSize >= count);
if (stream && (dst != NULL) && isInbound) {
if(copyDataType == ihipMemsetDataTypeChar){
2019-02-27 15:42:54 +00:00
if ((count & 0x3) == 0) {
2018-04-11 15:58:48 +05:30
// use a faster dword-per-workitem copy:
try {
value = value & 0xff;
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value) ;
2019-02-27 15:42:54 +00:00
ihipMemsetKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), value32, count/sizeof(uint32_t));
2018-04-11 15:58:48 +05:30
}
catch (std::exception &ex) {
e = hipErrorInvalidValue;
}
} else {
// use a slow byte-per-workitem copy:
try {
2019-02-27 15:42:54 +00:00
ihipMemsetKernel<char> (stream, static_cast<char*> (dst), value, count);
2018-04-11 15:58:48 +05:30
}
catch (std::exception &ex) {
e = hipErrorInvalidValue;
}
2016-03-24 09:28:46 -05:00
}
} else {
if(copyDataType == ihipMemsetDataTypeInt) { // 4 Bytes value
2018-04-11 15:58:48 +05:30
try {
2019-02-27 15:42:54 +00:00
ihipMemsetKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), value, count);
2018-04-11 15:58:48 +05:30
} catch (std::exception &ex) {
e = hipErrorInvalidValue;
}
} else if(copyDataType == ihipMemsetDataTypeShort) {
2018-04-11 15:58:48 +05:30
try {
value = value & 0xffff;
2019-02-27 15:42:54 +00:00
ihipMemsetKernel<uint16_t> (stream, static_cast<uint16_t*> (dst), value, count);
2018-04-11 15:58:48 +05:30
} catch (std::exception &ex) {
e = hipErrorInvalidValue;
}
2016-03-24 09:28:46 -05:00
}
}
if (HIP_API_BLOCKING) {
tprintf (DB_SYNC, "%s LAUNCH_BLOCKING wait for hipMemsetAsync.\n", ToString(stream).c_str());
stream->locked_wait();
}
2016-03-24 09:28:46 -05:00
} else {
e = hipErrorInvalidValue;
}
2018-04-11 15:58:48 +05:30
return e;
};
2018-06-13 23:10:05 +05:30
hipError_t getLockedPointer(void *hostPtr, size_t dataLen, void **devicePtrPtr)
{
2018-05-31 13:14:27 +05:30
hc::accelerator acc;
2018-05-31 13:14:27 +05:30
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
#else
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
#endif
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
if (status == AM_SUCCESS) {
2018-06-13 23:10:05 +05:30
*devicePtrPtr = static_cast<char*>(amPointerInfo._devicePointer) +
(static_cast<char*>(hostPtr) - static_cast<char*>(amPointerInfo._hostPointer));
2018-05-31 13:14:27 +05:30
return(hipSuccess);
};
return(hipErrorHostMemoryNotRegistered);
};
// TODO - review and optimize
hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
size_t height, hipMemcpyKind kind) {
if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue;
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
2019-03-28 02:21:45 +05:30
int isLockedOrD2D = 0;
2018-05-31 13:14:27 +05:30
void *pinnedPtr=NULL;
void *actualSrc = (void*)src;
void *actualDest = dst;
if(kind == hipMemcpyHostToDevice ) {
if(getLockedPointer((void*)src, spitch, &pinnedPtr) == hipSuccess ){
2019-03-28 02:21:45 +05:30
isLockedOrD2D = 1;
2018-05-31 13:14:27 +05:30
actualSrc = pinnedPtr;
}
} else if(kind == hipMemcpyDeviceToHost) {
2018-05-31 13:14:27 +05:30
if(getLockedPointer((void*)dst, dpitch, &pinnedPtr) == hipSuccess ){
2019-03-28 02:21:45 +05:30
isLockedOrD2D = 1;
2018-05-31 13:14:27 +05:30
actualDest = pinnedPtr;
}
2019-03-28 02:21:45 +05:30
} else if(kind == hipMemcpyDeviceToDevice) {
isLockedOrD2D = 1;
}
2019-03-28 02:21:45 +05:30
hc::completion_future marker;
hipError_t e = hipSuccess;
if((width == dpitch) && (width == spitch)) {
stream->locked_copySync((void*)dst, (void*)src, width*height, kind, false);
} else {
try {
2019-03-28 02:21:45 +05:30
if(!isLockedOrD2D) {
for (int i = 0; i < height; ++i)
stream->locked_copySync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind);
} else {
2019-03-14 13:03:06 +05:30
if(!stream->locked_copy2DSync(dst, src, width, height, spitch, dpitch, kind)){
ihipMemcpy2dKernel<uint8_t> (stream, static_cast<uint8_t*> (dst), static_cast<const uint8_t*> (src), width, height, dpitch, spitch);
stream->locked_wait();
}
}
} catch (ihipException& ex) {
e = ex._code;
}
}
return e;
}
hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
size_t height, hipMemcpyKind kind) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemcpy2D, (TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind);
hipError_t e = hipSuccess;
e = ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind);
return ihipLogStatus(e);
}
2019-08-09 04:50:37 -07:00
hipError_t ihipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
size_t height, hipMemcpyKind kind, hipStream_t stream) {
2019-08-09 04:50:37 -07:00
if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue;
hipError_t e = hipSuccess;
2019-03-28 02:21:45 +05:30
int isLockedOrD2D = 0;
2018-05-31 13:14:27 +05:30
void *pinnedPtr=NULL;
void *actualSrc = (void*)src;
void *actualDest = dst;
2018-06-14 11:58:56 +05:30
stream = ihipSyncAndResolveStream(stream);
2018-05-31 13:14:27 +05:30
if(kind == hipMemcpyHostToDevice ) {
if(getLockedPointer((void*)src, spitch, &pinnedPtr) == hipSuccess ){
2019-03-28 02:21:45 +05:30
isLockedOrD2D = 1;
2018-05-31 13:14:27 +05:30
actualSrc = pinnedPtr;
}
} else if(kind == hipMemcpyDeviceToHost) {
2018-05-31 13:14:27 +05:30
if(getLockedPointer((void*)dst, dpitch, &pinnedPtr) == hipSuccess ){
2019-03-28 02:21:45 +05:30
isLockedOrD2D = 1;
2018-05-31 13:14:27 +05:30
actualDest = pinnedPtr;
}
2019-03-28 02:21:45 +05:30
} else if(kind == hipMemcpyDeviceToDevice) {
isLockedOrD2D = 1;
}
2019-03-28 02:21:45 +05:30
if((width == dpitch) && (width == spitch)) {
hip_internal::memcpyAsync(dst, src, width*height, kind, stream);
} else {
try {
2019-03-28 02:21:45 +05:30
if(!isLockedOrD2D){
for (int i = 0; i < height; ++i)
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
(unsigned char*)src + i * spitch, width, kind, stream);
} else{
2019-03-14 13:03:06 +05:30
if(!stream->locked_copy2DAsync(dst, src, width, height, spitch, dpitch, kind)){
ihipMemcpy2dKernel<uint8_t> (stream, static_cast<uint8_t*> (dst), static_cast<const uint8_t*> (src), width, height, dpitch, spitch);
}
}
} catch (ihipException& ex) {
e = ex._code;
}
}
2019-08-09 04:50:37 -07:00
return e;
}
hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
size_t height, hipMemcpyKind kind, hipStream_t stream) {
HIP_INIT_SPECIAL_API(hipMemcpy2DAsync, (TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream);
hipError_t e = hipSuccess;
e = ihipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream);
return ihipLogStatus(e);
}
2018-04-11 15:58:48 +05:30
2019-10-25 03:13:33 -07:00
hipError_t ihip2dOffsetMemcpy(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
size_t height, size_t srcXOffsetInBytes, size_t srcYOffset,
size_t dstXOffsetInBytes, size_t dstYOffset,hipMemcpyKind kind,
hipStream_t stream, bool isAsync) {
if((spitch < width + srcXOffsetInBytes) || (srcYOffset >= height)){
return hipErrorInvalidValue;
} else if((dpitch < width + dstXOffsetInBytes) || (dstYOffset >= height)){
return hipErrorInvalidValue;
}
src = (void*)((char*)src+ srcYOffset*spitch + srcXOffsetInBytes);
dst = (void*)((char*)dst+ dstYOffset*dpitch + dstXOffsetInBytes);
if(isAsync){
return ihipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, hipMemcpyDefault, stream);
} else{
return ihipMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyDefault);
}
}
2019-10-04 13:36:31 +05:30
hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool isAsync) {
if (pCopy == nullptr) {
2019-10-04 13:36:31 +05:30
return hipErrorInvalidValue;
}
void* dst; const void* src;
size_t spitch = pCopy->srcPitch;
size_t dpitch = pCopy->dstPitch;
switch(pCopy->srcMemoryType){
case hipMemoryTypeHost:
src = pCopy->srcHost;
break;
case hipMemoryTypeArray:
src = pCopy->srcArray->data;
spitch = pCopy->WidthInBytes;
break;
case hipMemoryTypeUnified:
case hipMemoryTypeDevice:
src = pCopy->srcDevice;
break;
default:
return hipErrorInvalidValue;
}
switch(pCopy->dstMemoryType){
case hipMemoryTypeHost:
dst = pCopy->dstHost;
break;
case hipMemoryTypeArray:
dst = pCopy->dstArray->data;
dpitch = pCopy->WidthInBytes;
break;
case hipMemoryTypeUnified:
case hipMemoryTypeDevice:
dst = pCopy->dstDevice;
break;
default:
return hipErrorInvalidValue;
}
2019-10-25 03:13:33 -07:00
return ihip2dOffsetMemcpy(dst, dpitch, src, spitch, pCopy->WidthInBytes,
pCopy->Height, pCopy->srcXInBytes, pCopy->srcY,
pCopy->dstXInBytes, pCopy->dstY, hipMemcpyDefault,
stream, isAsync);
2019-10-04 13:36:31 +05:30
}
hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
HIP_INIT_SPECIAL_API(hipMemcpyParam2D, (TRACE_MCMD), pCopy);
return ihipLogStatus(ihipMemcpyParam2D(pCopy, hipStreamNull, false));
2019-08-09 04:50:37 -07:00
}
hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream) {
HIP_INIT_SPECIAL_API(hipMemcpyParam2DAsync, (TRACE_MCMD), pCopy, stream);
2019-10-04 13:36:31 +05:30
return ihipLogStatus(ihipMemcpyParam2D(pCopy, stream, true));
}
2019-10-25 03:13:33 -07:00
hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind ){
2019-10-31 06:16:10 +05:30
HIP_INIT_SPECIAL_API(hipMemcpy2DFromArray, (TRACE_MCMD), dst, dpitch, src, wOffset, hOffset, width, height, kind);
2019-10-25 03:13:33 -07:00
size_t byteSize;
if(src) {
switch (src->desc.f) {
case hipChannelFormatKindSigned:
byteSize = sizeof(int);
break;
case hipChannelFormatKindUnsigned:
byteSize = sizeof(unsigned int);
break;
case hipChannelFormatKindFloat:
byteSize = sizeof(float);
break;
case hipChannelFormatKindNone:
byteSize = sizeof(size_t);
break;
default:
byteSize = 0;
break;
}
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
return ihipLogStatus(ihip2dOffsetMemcpy(dst, dpitch, src->data, src->width*byteSize, width, height, wOffset, hOffset, 0, 0, kind, hipStreamNull, false));
}
hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream ){
2019-10-31 06:27:06 +05:30
HIP_INIT_SPECIAL_API(hipMemcpy2DFromArrayAsync, (TRACE_MCMD), dst, dpitch, src, wOffset, hOffset, width, height, kind, stream);
2019-10-25 03:13:33 -07:00
size_t byteSize;
if(src) {
switch (src->desc.f) {
case hipChannelFormatKindSigned:
byteSize = sizeof(int);
break;
case hipChannelFormatKindUnsigned:
byteSize = sizeof(unsigned int);
break;
case hipChannelFormatKindFloat:
byteSize = sizeof(float);
break;
case hipChannelFormatKindNone:
byteSize = sizeof(size_t);
break;
default:
byteSize = 0;
break;
}
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
return ihipLogStatus(ihip2dOffsetMemcpy(dst, dpitch, src->data, src->width*byteSize, width, height, wOffset, hOffset, 0, 0, kind, stream, true));
}
2018-04-11 15:58:48 +05:30
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemsetAsync, (TRACE_MCMD), dst, value, sizeBytes, stream);
2018-04-11 15:58:48 +05:30
hipError_t e = hipSuccess;
stream = ihipSyncAndResolveStream(stream);
2016-03-24 09:28:46 -05:00
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeChar);
2016-03-24 09:28:46 -05:00
return ihipLogStatus(e);
};
hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, hipStream_t stream) {
2019-02-27 15:42:54 +00:00
HIP_INIT_SPECIAL_API(hipMemsetD32Async, (TRACE_MCMD), dst, value, count, stream);
hipError_t e = hipSuccess;
stream = ihipSyncAndResolveStream(stream);
e = ihipMemset(dst, value, count, stream, ihipMemsetDataTypeInt);
return ihipLogStatus(e);
};
2018-03-12 11:29:03 +05:30
hipError_t hipMemset(void* dst, int value, size_t sizeBytes) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemset, (TRACE_MCMD), dst, value, sizeBytes);
2016-04-25 11:05:30 -05:00
hipError_t e = hipSuccess;
2017-01-20 14:48:29 -06:00
hipStream_t stream = hipStreamNull;
2018-04-11 19:01:53 +05:30
stream = ihipSyncAndResolveStream(stream);
2016-04-25 11:05:30 -05:00
if (stream) {
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeChar);
stream->locked_wait();
2016-04-25 11:05:30 -05:00
} else {
e = hipErrorInvalidValue;
}
2016-04-25 11:05:30 -05:00
return ihipLogStatus(e);
2016-03-24 09:28:46 -05:00
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemset2D, (TRACE_MCMD), dst, pitch, value, width, height);
2017-07-17 15:16:12 -04:00
hipError_t e = hipSuccess;
hipStream_t stream = hipStreamNull;
2018-03-12 11:29:03 +05:30
stream = ihipSyncAndResolveStream(stream);
2017-07-17 15:16:12 -04:00
if (stream) {
size_t sizeBytes = pitch * height;
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeChar);
stream->locked_wait();
2017-07-17 15:16:12 -04:00
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
}
2018-04-17 18:27:27 +05:30
hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream )
{
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemset2DAsync, (TRACE_MCMD), dst, pitch, value, width, height, stream);
2018-04-17 18:27:27 +05:30
hipError_t e = hipSuccess;
stream = ihipSyncAndResolveStream(stream);
if (stream) {
size_t sizeBytes = pitch * height;
e = ihipMemset(dst, value, sizeBytes, stream, ihipMemsetDataTypeChar);
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
};
hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t count) {
HIP_INIT_SPECIAL_API(hipMemsetD8, (TRACE_MCMD), dst, value, count);
2017-03-14 22:11:34 +05:30
hipError_t e = hipSuccess;
hipStream_t stream = hipStreamNull;
2018-03-12 11:29:03 +05:30
stream = ihipSyncAndResolveStream(stream);
2017-03-14 22:11:34 +05:30
if (stream) {
e = ihipMemset(dst, value, count, stream, ihipMemsetDataTypeChar);
stream->locked_wait();
2017-03-14 22:11:34 +05:30
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
}
2017-03-10 15:04:46 -06:00
hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t count , hipStream_t stream ) {
HIP_INIT_SPECIAL_API(hipMemsetD8Async, (TRACE_MCMD), dst, value, count, stream);
2019-10-04 13:36:31 +05:30
stream = ihipSyncAndResolveStream(stream);
if (stream) {
return ihipLogStatus(ihipMemset(dst, value, count, stream, ihipMemsetDataTypeChar));
2019-10-04 13:36:31 +05:30
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
}
hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t count){
HIP_INIT_SPECIAL_API(hipMemsetD16, (TRACE_MCMD), dst, value, count);
2019-10-04 13:36:31 +05:30
hipError_t e = hipSuccess;
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
if (stream) {
e = ihipMemset(dst, value, count, stream, ihipMemsetDataTypeShort);
2019-10-04 13:36:31 +05:30
if(hipSuccess == e)
stream->locked_wait();
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
}
hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t count, hipStream_t stream ){
HIP_INIT_SPECIAL_API(hipMemsetD16Async, (TRACE_MCMD), dst, value, count, stream);
2019-10-04 13:36:31 +05:30
stream = ihipSyncAndResolveStream(stream);
if (stream) {
return ihipLogStatus(ihipMemset(dst, value, count, stream, ihipMemsetDataTypeShort));
2019-10-04 13:36:31 +05:30
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
}
2019-02-27 15:42:54 +00:00
hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) {
HIP_INIT_SPECIAL_API(hipMemsetD32, (TRACE_MCMD), dst, value, count);
hipError_t e = hipSuccess;
hipStream_t stream = hipStreamNull;
stream = ihipSyncAndResolveStream(stream);
if (stream) {
e = ihipMemset(dst, value, count, stream, ihipMemsetDataTypeInt);
stream->locked_wait();
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
}
2018-05-07 10:24:30 +05:30
hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent )
{
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemset3D, (TRACE_MCMD), &pitchedDevPtr, value, &extent);
2018-05-07 10:24:30 +05:30
hipError_t e = hipSuccess;
hipStream_t stream = hipStreamNull;
// TODO - call an ihip memset so HIP_TRACE is correct.
stream = ihipSyncAndResolveStream(stream);
if (stream) {
size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth;
e = ihipMemset(pitchedDevPtr.ptr, value, sizeBytes, stream, ihipMemsetDataTypeChar);
stream->locked_wait();
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
}
hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ,hipStream_t stream )
{
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipMemset3DAsync, (TRACE_MCMD), &pitchedDevPtr, value, &extent);
2018-05-07 10:24:30 +05:30
hipError_t e = hipSuccess;
// TODO - call an ihip memset so HIP_TRACE is correct.
stream = ihipSyncAndResolveStream(stream);
if (stream) {
size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth;
e = ihipMemset(pitchedDevPtr.ptr, value, sizeBytes, stream, ihipMemsetDataTypeChar);
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemGetInfo(size_t* free, size_t* total) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipMemGetInfo, free, total);
2016-03-24 09:28:46 -05:00
hipError_t e = hipSuccess;
2018-03-12 11:29:03 +05:30
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
2016-08-07 21:46:51 -05:00
if (ctx) {
auto device = ctx->getWriteableDevice();
2016-03-24 09:28:46 -05:00
if (total) {
*total = device->_props.totalGlobalMem;
2018-03-12 11:29:03 +05:30
} else {
e = hipErrorInvalidValue;
}
2019-10-01 12:40:36 +05:30
2016-03-24 09:28:46 -05:00
if (free) {
2019-10-01 12:40:36 +05:30
if (!device->_driver_node_id) return ihipLogStatus(hipErrorInvalidDevice);
std::string fileName = std::string("/sys/class/kfd/kfd/topology/nodes/") + std::to_string(device->_driver_node_id) + std::string("/mem_banks/0/used_memory");
std::ifstream file;
file.open(fileName);
if (!file) return ihipLogStatus(hipErrorFileNotFound);
std::string deviceSize;
size_t deviceMemSize;
file >> deviceSize;
file.close();
if ((deviceMemSize=strtol(deviceSize.c_str(),NULL,10))){
*free = device->_props.totalGlobalMem - deviceMemSize;
// Deduct the amount of memory from the free memory reported from the system
if (HIP_HIDDEN_FREE_MEM) *free -= (size_t)HIP_HIDDEN_FREE_MEM * 1024 * 1024;
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
2018-03-12 11:29:03 +05:30
} else {
e = hipErrorInvalidValue;
}
2016-03-24 09:28:46 -05:00
} else {
e = hipErrorInvalidDevice;
}
return ihipLogStatus(e);
}
2018-03-12 11:29:03 +05:30
hipError_t hipMemPtrGetInfo(void* ptr, size_t* size) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipMemPtrGetInfo, ptr, size);
2017-03-07 13:46:29 -06:00
return ihipLogStatus(ihipMemPtrGetInfo(ptr, size));
2017-03-07 13:46:29 -06:00
}
2017-03-10 15:04:46 -06:00
2018-03-12 11:29:03 +05:30
hipError_t hipFree(void* ptr) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipFree, (TRACE_MEM), ptr);
2016-03-24 09:28:46 -05:00
hipError_t hipStatus = hipErrorInvalidDevicePointer;
if (ptr) {
hc::accelerator acc;
2017-08-15 15:51:38 +05:30
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
2017-08-15 15:51:38 +05:30
#else
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
#endif
2016-03-24 09:28:46 -05:00
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
2018-03-12 11:29:03 +05:30
if (status == AM_SUCCESS) {
2019-09-13 03:09:01 +05:30
/*if (amPointerInfo._hostPointer == NULL) */ //TODO: Fix it when there is proper managed memory support
{
if (HIP_SYNC_FREE) {
// Synchronize all devices, all streams
// to ensure all work has finished on all devices.
// This is disabled by default.
for (unsigned i = 0; i < g_deviceCnt; i++) {
ihipGetPrimaryCtx(i)->locked_waitAllStreams();
}
}
else {
ihipCtx_t* ctx;
if (amPointerInfo._appId != -1) {
#if USE_APP_PTR_FOR_CTX
ctx = static_cast<ihipCtx_t*>(amPointerInfo._appPtr);
#else
ctx = ihipGetPrimaryCtx(amPointerInfo._appId);
#endif
} else {
ctx = ihipGetTlsDefaultCtx();
}
// Synchronize to ensure all work has finished on device owning the memory.
ctx->locked_waitAllStreams(); // ignores non-blocking streams, this waits
// for all activity to finish.
}
2016-03-24 09:28:46 -05:00
hc::am_free(ptr);
hipStatus = hipSuccess;
}
}
2016-06-10 20:12:46 -05:00
} else {
// free NULL pointer succeeds and is common technique to initialize runtime
hipStatus = hipSuccess;
2016-03-24 09:28:46 -05:00
}
return ihipLogStatus(hipStatus);
}
2017-03-10 15:04:46 -06:00
2018-03-12 11:29:03 +05:30
hipError_t hipHostFree(void* ptr) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipHostFree, (TRACE_MEM), ptr);
2016-03-24 09:28:46 -05:00
hipError_t hipStatus = hipSuccess;
hipStatus = hip_internal::ihipHostFree(tls, ptr);
2016-03-24 09:28:46 -05:00
return ihipLogStatus(hipStatus);
};
2017-01-19 23:22:06 -06:00
// Deprecated:
2018-03-12 11:29:03 +05:30
hipError_t hipFreeHost(void* ptr) { return hipHostFree(ptr); }
2018-03-12 11:29:03 +05:30
hipError_t hipFreeArray(hipArray* array) {
2018-11-08 08:36:50 -06:00
HIP_INIT_SPECIAL_API(hipFreeArray, (TRACE_MEM), array);
2016-07-21 12:29:56 +05:30
hipError_t hipStatus = hipErrorInvalidDevicePointer;
2016-07-21 12:29:56 +05:30
// Synchronize to ensure all work has finished.
2018-03-12 11:29:03 +05:30
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits
// for all activity to finish.
2016-07-21 12:29:56 +05:30
2018-03-12 11:29:03 +05:30
if (array->data) {
hc::accelerator acc;
2017-08-15 15:51:38 +05:30
#if (__hcc_workweek__ >= 17332)
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
2017-08-15 15:51:38 +05:30
#else
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
#endif
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, array->data);
2018-03-12 11:29:03 +05:30
if (status == AM_SUCCESS) {
if (amPointerInfo._hostPointer == NULL) {
hc::am_free(array->data);
hipStatus = hipSuccess;
}
}
2016-07-21 12:29:56 +05:30
}
return ihipLogStatus(hipStatus);
2016-07-21 12:29:56 +05:30
}
2016-03-24 09:28:46 -05:00
2018-03-12 11:29:03 +05:30
hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipMemGetAddressRange, pbase, psize, dptr);
2016-11-29 22:04:09 +05:30
hipError_t hipStatus = hipSuccess;
hc::accelerator acc;
2017-08-15 15:51:38 +05:30
#if (__hcc_workweek__ >= 17332)
2018-03-12 11:29:03 +05:30
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
2017-08-15 15:51:38 +05:30
#else
2018-03-12 11:29:03 +05:30
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
2017-08-15 15:51:38 +05:30
#endif
2018-03-12 11:29:03 +05:30
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, dptr);
2016-11-29 22:04:09 +05:30
if (status == AM_SUCCESS) {
*pbase = amPointerInfo._devicePointer;
*psize = amPointerInfo._sizeBytes;
2018-03-12 11:29:03 +05:30
} else
2016-12-07 15:31:23 -06:00
hipStatus = hipErrorInvalidDevicePointer;
2017-03-10 23:45:28 +05:30
return ihipLogStatus(hipStatus);
2016-11-29 22:04:09 +05:30
}
2016-12-01 13:51:58 -06:00
2018-03-12 11:29:03 +05:30
// TODO: IPC implementaiton:
2016-12-07 15:31:23 -06:00
2018-03-12 11:29:03 +05:30
hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipIpcGetMemHandle, handle, devPtr);
hipError_t hipStatus = hipSuccess;
2016-12-07 15:31:23 -06:00
// Get the size of allocated pointer
size_t psize = 0u;
2016-12-07 15:31:23 -06:00
hc::accelerator acc;
2018-03-12 11:29:03 +05:30
if ((handle == NULL) || (devPtr == NULL)) {
2016-12-07 15:31:23 -06:00
hipStatus = hipErrorInvalidResourceHandle;
} else {
2017-08-15 15:51:38 +05:30
#if (__hcc_workweek__ >= 17332)
2018-03-12 11:29:03 +05:30
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
2017-08-15 15:51:38 +05:30
#else
2018-03-12 11:29:03 +05:30
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
2017-08-15 15:51:38 +05:30
#endif
2018-03-12 11:29:03 +05:30
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, devPtr);
if (status == AM_SUCCESS) {
psize = (size_t)amPointerInfo._sizeBytes;
2017-11-21 15:41:15 -06:00
} else {
hipStatus = hipErrorInvalidResourceHandle;
2017-11-21 15:41:15 -06:00
}
2018-03-12 11:29:03 +05:30
ihipIpcMemHandle_t* iHandle = (ihipIpcMemHandle_t*)handle;
// Save the size of the pointer to hipIpcMemHandle
iHandle->psize = psize;
2016-12-07 15:31:23 -06:00
#if USE_IPC
// Create HSA ipc memory
hsa_status_t hsa_status =
2018-03-12 11:29:03 +05:30
hsa_amd_ipc_memory_create(devPtr, psize, (hsa_amd_ipc_memory_t*)&(iHandle->ipc_handle));
if (hsa_status != HSA_STATUS_SUCCESS) hipStatus = hipErrorMemoryAllocation;
#else
hipStatus = hipErrorRuntimeOther;
#endif
}
2017-03-10 23:45:28 +05:30
return ihipLogStatus(hipStatus);
}
2018-03-12 11:29:03 +05:30
hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipIpcOpenMemHandle, devPtr, &handle, flags);
hipError_t hipStatus = hipSuccess;
2019-08-23 02:19:18 -07:00
if (devPtr == NULL)
return ihipLogStatus(hipErrorInvalidValue);
#if USE_IPC
2019-08-23 02:19:18 -07:00
// Get the current device agent.
hc::accelerator acc;
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
if (!agent)
return ihipLogStatus(hipErrorInvalidResourceHandle);
2016-12-07 15:31:23 -06:00
2019-08-23 02:19:18 -07:00
ihipIpcMemHandle_t* iHandle = (ihipIpcMemHandle_t*)&handle;
// Attach ipc memory
auto ctx = ihipGetTlsDefaultCtx();
{
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
auto device = ctx->getWriteableDevice();
// the peerCnt always stores self so make sure the trace actually
if(hsa_amd_ipc_memory_attach(
(hsa_amd_ipc_memory_t*)&(iHandle->ipc_handle), iHandle->psize, crit->peerCnt(),
crit->peerAgents(), devPtr) != HSA_STATUS_SUCCESS)
return ihipLogStatus(hipErrorRuntimeOther);
hc::AmPointerInfo ampi(NULL, *devPtr, *devPtr, sizeof(*devPtr), acc, true, true);
am_status_t am_status = hc::am_memtracker_add(*devPtr,ampi);
if (am_status != AM_SUCCESS)
return ihipLogStatus(hipErrorMapBufferObjectFailed);
#if USE_APP_PTR_FOR_CTX
am_status = hc::am_memtracker_update(*devPtr, device->_deviceId, 0, ctx);
#else
2019-08-23 02:19:18 -07:00
am_status = hc::am_memtracker_update(*devPtr, device->_deviceId, 0);
#endif
2019-08-23 02:19:18 -07:00
if(am_status != AM_SUCCESS)
return ihipLogStatus(hipErrorMapBufferObjectFailed);
}
2019-08-23 02:19:18 -07:00
#else
hipStatus = hipErrorRuntimeOther;
#endif
2017-03-10 23:45:28 +05:30
return ihipLogStatus(hipStatus);
2016-12-01 13:51:58 -06:00
}
2018-03-12 11:29:03 +05:30
hipError_t hipIpcCloseMemHandle(void* devPtr) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipIpcCloseMemHandle, devPtr);
hipError_t hipStatus = hipSuccess;
2019-08-23 02:19:18 -07:00
if (devPtr == NULL)
return ihipLogStatus(hipErrorInvalidValue);
#if USE_IPC
2019-08-23 02:19:18 -07:00
if(hc::am_memtracker_remove(devPtr) != AM_SUCCESS)
return ihipLogStatus(hipErrorInvalidValue);
if (hsa_amd_ipc_memory_detach(devPtr) != HSA_STATUS_SUCCESS)
return ihipLogStatus(hipErrorInvalidResourceHandle);
#else
2019-08-23 02:19:18 -07:00
hipStatus = hipErrorRuntimeOther;
#endif
2019-08-23 02:19:18 -07:00
2017-03-10 23:45:28 +05:30
return ihipLogStatus(hipStatus);
2016-12-01 13:51:58 -06:00
}
// hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle){
// return hipSuccess;
// }