2
0

Refactor registered memory calls.

[ROCm/clr commit: b7acb85fa8]
Este cometimento está contido em:
Ben Sander
2017-03-10 15:04:46 -06:00
ascendente e1c95b083d
cometimento 49d7ea94f5
3 ficheiros modificados com 176 adições e 120 eliminações
@@ -858,6 +858,8 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr)
* @param[out] ptr Pointer to the allocated memory
* @param[in] size Requested memory size
*
* If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
*
* @return #hipSuccess
*
* @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, hipHostFree, hipHostMalloc
@@ -870,6 +872,8 @@ hipError_t hipMalloc(void** ptr, size_t size) ;
* @param[out] ptr Pointer to the allocated host pinned memory
* @param[in] size Requested memory size
*
* If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
*
* @return #hipSuccess, #hipErrorMemoryAllocation
*
* @deprecated use hipHostMalloc() instead
@@ -883,6 +887,8 @@ hipError_t hipMallocHost(void** ptr, size_t size) __attribute__((deprecated("use
* @param[in] size Requested memory size
* @param[in] flags Type of host memory allocation
*
* If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
*
* @return #hipSuccess, #hipErrorMemoryAllocation
*
* @see hipSetDeviceFlags, hipHostFree
@@ -896,6 +902,8 @@ hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) ;
* @param[in] size Requested memory size
* @param[in] flags Type of host memory allocation
*
* If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
*
* @return #hipSuccess, #hipErrorMemoryAllocation
*
* @deprecated use hipHostMalloc() instead
@@ -980,6 +988,9 @@ hipError_t hipHostUnregister(void* hostPtr) ;
* @param[out] pitch Pitch for allocation (in bytes)
* @param[in] width Requested pitched allocation width (in bytes)
* @param[in] height Requested pitched allocation height
*
* If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned.
*
* @return Error code
*
* @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, hipMalloc3DArray, hipHostMalloc
@@ -24,7 +24,7 @@ bool p_h2d = true;
bool p_d2h = true;
bool p_bidir = true;
#define NO_CHECK
//#define NO_CHECK
#define CHECK_HIP_ERROR() \
@@ -151,6 +151,10 @@ void RunBenchmark_H2D(ResultDatabase &resultDB)
hipHostRegister(hostMem, numMaxFloats * sizeof(float), 0);
CHECK_HIP_ERROR();
}
else
{
assert(0);
}
for (int i = 0; i < numMaxFloats; i++)
{
@@ -323,6 +327,22 @@ void RunBenchmark_D2H(ResultDatabase &resultDB)
hostMem1 = new float[numMaxFloats];
hostMem2 = new float[numMaxFloats];
}
else if (p_malloc_mode == MallocRegistered)
{
if (p_numa_ctl == -1) {
hostMem1 = (float*)malloc(numMaxFloats*sizeof(float));
hostMem2 = (float*)malloc(numMaxFloats*sizeof(float));
}
hipHostRegister(hostMem1, numMaxFloats * sizeof(float), 0);
CHECK_HIP_ERROR();
hipHostRegister(hostMem2, numMaxFloats * sizeof(float), 0);
CHECK_HIP_ERROR();
}
else
{
assert(0);
}
for (int i=0; i<numMaxFloats; i++)
@@ -429,6 +449,13 @@ void RunBenchmark_D2H(ResultDatabase &resultDB)
delete[] hostMem1;
delete[] hostMem2;
break;
case MallocRegistered:
hipHostUnregister(hostMem1);
CHECK_HIP_ERROR();
free(hostMem1);
hipHostUnregister(hostMem2);
free(hostMem2);
break;
default:
assert(0);
}
@@ -476,6 +503,22 @@ void RunBenchmark_Bidir(ResultDatabase &resultDB)
hostMem[0] = new float[numMaxFloats];
hostMem[1] = new float[numMaxFloats];
}
else if (p_malloc_mode == MallocRegistered)
{
if (p_numa_ctl == -1) {
hostMem[0] = (float*)malloc(numMaxFloats*sizeof(float));
hostMem[1] = (float*)malloc(numMaxFloats*sizeof(float));
}
hipHostRegister(hostMem[0], numMaxFloats * sizeof(float), 0);
CHECK_HIP_ERROR();
hipHostRegister(hostMem[1], numMaxFloats * sizeof(float), 0);
CHECK_HIP_ERROR();
}
else
{
assert(0);
}
for (int i = 0; i < numMaxFloats; i++)
{
@@ -571,6 +614,13 @@ void RunBenchmark_Bidir(ResultDatabase &resultDB)
delete[] hostMem[0];
delete[] hostMem[1];
break;
case MallocRegistered:
for (int i=0; i<2; i++) {
hipHostUnregister(hostMem[i]);
CHECK_HIP_ERROR();
free(hostMem[i]);
}
break;
default:
assert(0);
};
+114 -119
Ver ficheiro
@@ -30,6 +30,96 @@ THE SOFTWARE.
#include "hip/hcc_detail/hip_texture.h"
#include <hc_am.hpp>
// Internal HIP APIS:
namespace hip_internal {
hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
{
hipError_t e = hipSuccess;
stream = ihipSyncAndResolveStream(stream);
if ((dst == NULL) || (src == NULL)) {
e= hipErrorInvalidValue;
} else if (stream) {
try {
stream->locked_copyAsync(dst, src, sizeBytes, kind);
}
catch (ihipException ex) {
e = ex._code;
}
} else {
e = hipErrorInvalidValue;
}
return e;
}
// return 0 on success or -1 on error:
int sharePtr(void *ptr, ihipCtx_t *ctx, unsigned hipFlags)
{
int ret = 0;
auto device = ctx->getWriteableDevice();
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags);
int peerCnt=0;
{
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
// the peerCnt always stores self so make sure the trace actually
peerCnt = crit->peerCnt();
tprintf(DB_MEM, " allow access to %d other peer(s)\n", peerCnt-1);
if (peerCnt > 1) {
//printf ("peer self access\n");
// TODOD - remove me:
for (auto iter = crit->_peers.begin(); iter!=crit->_peers.end(); iter++) {
tprintf (DB_MEM, " allow access to peer: %s%s\n", (*iter)->toString().c_str(), (iter == crit->_peers.begin()) ? " (self)":"");
};
hsa_status_t s = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, ptr);
if (s != HSA_STATUS_SUCCESS) {
ret = -1;
}
}
}
return ret;
}
// Allocate a new pointer with am_alloc and share with all valid peers.
// Returns null-ptr if a memory error occurs (either allocation or sharing)
void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, unsigned amFlags, unsigned hipFlags)
{
void *ptr = nullptr;
auto device = ctx->getWriteableDevice();
ptr = hc::am_alloc(sizeBytes, device->_acc, amFlags);
tprintf(DB_MEM, " alloc %s ptr:%p size:%zu on dev:%d\n",
msg, ptr, sizeBytes, device->_deviceId);
if (ptr != nullptr) {
int r = sharePtr(ptr, ctx, hipFlags);
if (r != 0) {
ptr = nullptr;
}
}
return ptr;
}
} // end namespace hip_internal
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
// Memory
@@ -128,37 +218,8 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
if (ctx) {
auto device = ctx->getWriteableDevice();
const unsigned am_flags = 0;
*ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags);
*ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, 0/*amFlags*/, 0/*hipFlags*/);
if (sizeBytes && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
} else {
hc::am_memtracker_update(*ptr, device->_deviceId, 0);
int peerCnt=0;
{
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
// the peerCnt always stores self so make sure the trace actually
peerCnt = crit->peerCnt();
tprintf(DB_MEM, " allocated device_mem ptr:%p size:%zu on dev:%d and allow access to %d other peer(s)\n",
*ptr, sizeBytes, device->_deviceId, peerCnt-1);
if (peerCnt > 1) {
//printf ("peer self access\n");
// TODOD - remove me:
for (auto iter = crit->_peers.begin(); iter!=crit->_peers.end(); iter++) {
tprintf (DB_MEM, " allow access to peer: %s%s\n", (*iter)->toString().c_str(), (iter == crit->_peers.begin()) ? " (self)":"");
};
hsa_status_t e = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
if (e != HSA_STATUS_SUCCESS) {
hip_status = hipErrorMemoryAllocation;
}
}
}
}
} else {
hip_status = hipErrorMemoryAllocation;
}
@@ -198,39 +259,16 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
}
else {
auto device = ctx->getWriteableDevice();
if(HIP_COHERENT_HOST_ALLOC){
// Force to allocate finedgrained system memory
*ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
if(sizeBytes < 1 && (*ptr == NULL)){
hip_status = hipErrorMemoryAllocation;
} else {
// TODO - should OR in flags here?
hc::am_memtracker_update(*ptr, device->_deviceId, amHostCoherent);
}
tprintf(DB_MEM, " %s: finegrained system memory ptr=%p\n", __func__, *ptr);
}
else{
// TODO - am_alloc requires writeable __acc, perhaps could be refactored?
// TODO - hipHostMallocMapped is be ignored on ROCM - all memory is mapped to host address space as WC.
*ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
if (*ptr == NULL) {
hip_status = hipErrorMemoryAllocation;
} else {
hc::am_memtracker_update(*ptr, device->_deviceId, flags);
// TODO-hipHostMallocPortable should map the host memory into all contexts, regardless of peer status.
int peerCnt=0;
{
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
peerCnt = crit->peerCnt();
if (peerCnt > 1) {
hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
}
}
tprintf(DB_MEM, "allocated pinned_host ptr:%p size:%zu on dev:%d and allow access to %d other peer(s)\n", *ptr, sizeBytes, device->_deviceId, peerCnt-1);
}
}
unsigned amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostPinned;
*ptr = hip_internal::allocAndSharePtr(HIP_COHERENT_HOST_ALLOC ? "finegrained_host":"pinned_host",
sizeBytes, ctx, amFlags, flags);
if(sizeBytes && (*ptr == NULL)){
hip_status = hipErrorMemoryAllocation;
}
}
}
if (HIP_SYNC_HOST_ALLOC) {
hipDeviceSynchronize();
}
@@ -272,22 +310,11 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
auto device = ctx->getWriteableDevice();
const unsigned am_flags = 0;
*ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags);
*ptr = hip_internal::allocAndSharePtr("device_pitch", sizeBytes, ctx, am_flags, 0);
if (sizeBytes && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
} else {
hc::am_memtracker_update(*ptr, device->_deviceId, 0);
{
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
if (hsa_status != HSA_STATUS_SUCCESS) {
hip_status = hipErrorMemoryAllocation;
}
}
}
}
}
} else {
hip_status = hipErrorMemoryAllocation;
}
@@ -321,41 +348,31 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
void ** ptr = &array[0]->data;
if (ctx) {
auto device = ctx->getWriteableDevice();
const unsigned am_flags = 0;
const size_t size = width*height;
size_t allocSize = 0;
switch(desc->f) {
case hipChannelFormatKindSigned:
*ptr = hc::am_alloc(size*sizeof(int), device->_acc, am_flags);
allocSize = size * sizeof(int);
break;
case hipChannelFormatKindUnsigned:
*ptr = hc::am_alloc(size*sizeof(unsigned int), device->_acc, am_flags);
allocSize = size * sizeof(unsigned int);
break;
case hipChannelFormatKindFloat:
*ptr = hc::am_alloc(size*sizeof(float), device->_acc, am_flags);
allocSize = size * sizeof(float);
break;
case hipChannelFormatKindNone:
*ptr = hc::am_alloc(size*sizeof(size_t), device->_acc, am_flags);
allocSize = size * sizeof(size_t);
break;
default:
hip_status = hipErrorUnknown;
break;
}
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, am_flags, 0);
if (size && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
} else {
hc::am_memtracker_update(*ptr, device->_deviceId, 0);
{
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
if (hsa_status != HSA_STATUS_SUCCESS) {
hip_status = hipErrorMemoryAllocation;
}
}
}
}
}
} else {
hip_status = hipErrorMemoryAllocation;
@@ -409,12 +426,13 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
hip_status = hipErrorHostMemoryAlreadyRegistered;
} else {
auto ctx = ihipGetTlsDefaultCtx();
if(hostPtr == NULL){
if (hostPtr == NULL) {
return ihipLogStatus(hipErrorInvalidValue);
}
//TODO-test : multi-gpu access to registered host memory.
if (ctx) {
auto device = ctx->getWriteableDevice();
if(flags == hipHostRegisterDefault || flags == hipHostRegisterPortable || flags == hipHostRegisterMapped){
auto device = ctx->getWriteableDevice();
std::vector<hc::accelerator>vecAcc;
for(int i=0;i<g_deviceCnt;i++){
vecAcc.push_back(ihipGetDevice(i)->_acc);
@@ -711,32 +729,6 @@ hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes)
// Internal copy sync:
namespace hip_internal {
hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
{
hipError_t e = hipSuccess;
stream = ihipSyncAndResolveStream(stream);
if ((dst == NULL) || (src == NULL)) {
e= hipErrorInvalidValue;
} else if (stream) {
try {
stream->locked_copyAsync(dst, src, sizeBytes, kind);
}
catch (ihipException ex) {
e = ex._code;
}
} else {
e = hipErrorInvalidValue;
}
return e;
}
} // end namespace hip_internal
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
@@ -1012,6 +1004,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes )
return ihipLogStatus(e);
}
hipError_t hipMemGetInfo (size_t *free, size_t *total)
{
HIP_INIT_API(free, total);
@@ -1067,6 +1060,7 @@ hipError_t hipMemPtrGetInfo(void *ptr, size_t *size)
return ihipLogStatus(e);
}
hipError_t hipFree(void* ptr)
{
HIP_INIT_API(ptr);
@@ -1094,6 +1088,7 @@ hipError_t hipFree(void* ptr)
return ihipLogStatus(hipStatus);
}
hipError_t hipHostFree(void* ptr)
{
HIP_INIT_API(ptr);