[hip] add support for implicit kernel argument for multi-grid sync (#1456)
* [hip] add support for implicit kernel argument for multi-grid sync * modified code for calculating the prev_sum * change the impCoopArg type to size_t * add memory clean up * launch init_gws and main kernels into two separate loops
Šī revīzija ir iekļauta:
revīziju iesūtīja
Maneesh Gupta
vecāks
fe5f7d4245
revīzija
359dc79101
+101
-93
@@ -141,6 +141,103 @@ void* allocAndSharePtr(const char* msg, size_t sizeBytes, ihipCtx_t* ctx, bool s
|
||||
return ptr;
|
||||
}
|
||||
|
||||
hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
if (HIP_SYNC_HOST_ALLOC) {
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if ((ctx == nullptr) || (ptr == nullptr)) {
|
||||
hip_status = hipErrorInvalidValue;
|
||||
}
|
||||
else if (sizeBytes == 0) {
|
||||
hip_status = hipSuccess;
|
||||
// TODO - should size of 0 return err or be siliently ignored?
|
||||
} else {
|
||||
unsigned trueFlags = flags;
|
||||
if (flags == hipHostMallocDefault) {
|
||||
// HCC/ROCM provide a modern system with unified memory and should set both of these
|
||||
// flags by default:
|
||||
trueFlags = hipHostMallocMapped | hipHostMallocPortable;
|
||||
}
|
||||
|
||||
|
||||
const unsigned supportedFlags = hipHostMallocPortable | hipHostMallocMapped |
|
||||
hipHostMallocWriteCombined | hipHostMallocCoherent |
|
||||
hipHostMallocNonCoherent;
|
||||
|
||||
|
||||
const unsigned coherencyFlags = hipHostMallocCoherent | hipHostMallocNonCoherent;
|
||||
|
||||
if ((flags & ~supportedFlags) || ((flags & coherencyFlags) == coherencyFlags)) {
|
||||
*ptr = nullptr;
|
||||
// can't specify unsupported flags, can't specify both Coherent + NonCoherent
|
||||
hip_status = hipErrorInvalidValue;
|
||||
} else {
|
||||
auto device = ctx->getWriteableDevice();
|
||||
#if (__hcc_workweek__ >= 19115)
|
||||
//Avoid mapping host pinned memory to all devices by HCC
|
||||
unsigned amFlags = amHostUnmapped;
|
||||
#else
|
||||
unsigned amFlags = 0;
|
||||
#endif
|
||||
if (flags & hipHostMallocCoherent) {
|
||||
amFlags |= amHostCoherent;
|
||||
} else if (flags & hipHostMallocNonCoherent) {
|
||||
amFlags |= amHostNonCoherent;
|
||||
} else {
|
||||
// depends on env variables:
|
||||
amFlags |= HIP_HOST_COHERENT ? amHostCoherent : amHostNonCoherent;
|
||||
}
|
||||
|
||||
|
||||
*ptr = hip_internal::allocAndSharePtr(
|
||||
(amFlags & amHostCoherent) ? "finegrained_host" : "pinned_host", sizeBytes, ctx,
|
||||
true /*shareWithAll*/, amFlags, flags, 0);
|
||||
|
||||
if (sizeBytes && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (HIP_SYNC_HOST_ALLOC) {
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
return hip_status;
|
||||
}
|
||||
|
||||
hipError_t ihipHostFree(TlsData *tls, void* ptr) {
|
||||
|
||||
// Synchronize to ensure all work has finished.
|
||||
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits
|
||||
// for all activity to finish.
|
||||
|
||||
hipError_t hipStatus = hipErrorInvalidValue;
|
||||
if (ptr) {
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
|
||||
if (status == AM_SUCCESS) {
|
||||
if (amPointerInfo._hostPointer == ptr) {
|
||||
hc::am_free(ptr);
|
||||
hipStatus = hipSuccess;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// free NULL pointer succeeds and is common technique to initialize runtime
|
||||
hipStatus = hipSuccess;
|
||||
}
|
||||
|
||||
return hipStatus;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace hip_internal
|
||||
|
||||
@@ -301,79 +398,12 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
if (HIP_SYNC_HOST_ALLOC) {
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if ((ctx == nullptr) || (ptr == nullptr)) {
|
||||
hip_status = hipErrorInvalidValue;
|
||||
}
|
||||
else if (sizeBytes == 0) {
|
||||
hip_status = hipSuccess;
|
||||
// TODO - should size of 0 return err or be siliently ignored?
|
||||
} else {
|
||||
unsigned trueFlags = flags;
|
||||
if (flags == hipHostMallocDefault) {
|
||||
// HCC/ROCM provide a modern system with unified memory and should set both of these
|
||||
// flags by default:
|
||||
trueFlags = hipHostMallocMapped | hipHostMallocPortable;
|
||||
}
|
||||
|
||||
|
||||
const unsigned supportedFlags = hipHostMallocPortable | hipHostMallocMapped |
|
||||
hipHostMallocWriteCombined | hipHostMallocCoherent |
|
||||
hipHostMallocNonCoherent;
|
||||
|
||||
|
||||
const unsigned coherencyFlags = hipHostMallocCoherent | hipHostMallocNonCoherent;
|
||||
|
||||
if ((flags & ~supportedFlags) || ((flags & coherencyFlags) == coherencyFlags)) {
|
||||
*ptr = nullptr;
|
||||
// can't specify unsupported flags, can't specify both Coherent + NonCoherent
|
||||
hip_status = hipErrorInvalidValue;
|
||||
} else {
|
||||
auto device = ctx->getWriteableDevice();
|
||||
#if (__hcc_workweek__ >= 19115)
|
||||
//Avoid mapping host pinned memory to all devices by HCC
|
||||
unsigned amFlags = amHostUnmapped;
|
||||
#else
|
||||
unsigned amFlags = 0;
|
||||
#endif
|
||||
if (flags & hipHostMallocCoherent) {
|
||||
amFlags |= amHostCoherent;
|
||||
} else if (flags & hipHostMallocNonCoherent) {
|
||||
amFlags |= amHostNonCoherent;
|
||||
} else {
|
||||
// depends on env variables:
|
||||
amFlags |= HIP_HOST_COHERENT ? amHostCoherent : amHostNonCoherent;
|
||||
}
|
||||
|
||||
|
||||
*ptr = hip_internal::allocAndSharePtr(
|
||||
(amFlags & amHostCoherent) ? "finegrained_host" : "pinned_host", sizeBytes, ctx,
|
||||
true /*shareWithAll*/, amFlags, flags, 0);
|
||||
|
||||
if (sizeBytes && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (HIP_SYNC_HOST_ALLOC) {
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
return hip_status;
|
||||
}
|
||||
|
||||
hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) {
|
||||
HIP_INIT_SPECIAL_API(hipHostMalloc, (TRACE_MEM), ptr, sizeBytes, flags);
|
||||
HIP_SET_DEVICE();
|
||||
hipError_t hip_status = hipSuccess;
|
||||
hip_status = ihipHostMalloc(tls, ptr, sizeBytes, flags);
|
||||
hip_status = hip_internal::ihipHostMalloc(tls, ptr, sizeBytes, flags);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
@@ -384,7 +414,7 @@ hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) {
|
||||
if(flags != hipMemAttachGlobal)
|
||||
hip_status = hipErrorInvalidValue;
|
||||
else
|
||||
hip_status = ihipHostMalloc(tls, devPtr, size, hipHostMallocDefault);
|
||||
hip_status = hip_internal::ihipHostMalloc(tls, devPtr, size, hipHostMallocDefault);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
@@ -2146,30 +2176,8 @@ hipError_t hipFree(void* ptr) {
|
||||
hipError_t hipHostFree(void* ptr) {
|
||||
HIP_INIT_SPECIAL_API(hipHostFree, (TRACE_MEM), ptr);
|
||||
|
||||
// Synchronize to ensure all work has finished.
|
||||
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits
|
||||
// for all activity to finish.
|
||||
|
||||
|
||||
hipError_t hipStatus = hipErrorInvalidValue;
|
||||
if (ptr) {
|
||||
hc::accelerator acc;
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr);
|
||||
if (status == AM_SUCCESS) {
|
||||
if (amPointerInfo._hostPointer == ptr) {
|
||||
hc::am_free(ptr);
|
||||
hipStatus = hipSuccess;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// free NULL pointer succeeds and is common technique to initialize runtime
|
||||
hipStatus = hipSuccess;
|
||||
}
|
||||
hipError_t hipStatus = hipSuccess;
|
||||
hipStatus = hip_internal::ihipHostFree(tls, ptr);
|
||||
|
||||
return ihipLogStatus(hipStatus);
|
||||
};
|
||||
|
||||
Atsaukties uz šo jaunā problēmā
Block a user