From 2c84211b58a5f94cd7ae1499d27a45cf28f2a4e2 Mon Sep 17 00:00:00 2001 From: Ioannis Assiouras Date: Thu, 1 Aug 2024 00:54:31 +0100 Subject: [PATCH] SWDEV-470372 - Added hipExtHostAlloc API This change adds a new HIP API `hipExtHostAlloc` which preserves the functionality of `hipHostMalloc`. Change-Id: I13504c6fc13465ddd7aed329795bb4f2fef1baff --- CHANGELOG.md | 1 + .../include/hip/amd_detail/hip_api_trace.hpp | 6 +++- hipamd/include/hip/amd_detail/hip_prof_str.h | 29 ++++++++++++++++++- hipamd/src/amdhip.def | 1 + hipamd/src/hip_api_trace.cpp | 7 +++-- hipamd/src/hip_hcc.map.in | 7 +++++ hipamd/src/hip_memory.cpp | 28 +++++++++++++----- hipamd/src/hip_table_interface.cpp | 3 ++ rocclr/utils/flags.hpp | 2 +- 9 files changed, 71 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94a8dea658..b0f0cf9fef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs - `hipDrvGraphAddMemFreeNode` creates a memory free node and adds it to a graph. - `hipDrvGraphExecMemcpyNodeSetParams` sets the parameters for a memcpy node in the given graphExec. - `hipDrvGraphExecMemsetNodeSetParams` sets the parameters for a memset node in the given graphExec. + - `hipExtHostAlloc` preserves the functionality of `hipHostMalloc`. ### Optimizations diff --git a/hipamd/include/hip/amd_detail/hip_api_trace.hpp b/hipamd/include/hip/amd_detail/hip_api_trace.hpp index 62443460c7..50ac4a234d 100644 --- a/hipamd/include/hip/amd_detail/hip_api_trace.hpp +++ b/hipamd/include/hip/amd_detail/hip_api_trace.hpp @@ -61,7 +61,7 @@ // - Reset any of the *_STEP_VERSION defines to zero if the corresponding *_MAJOR_VERSION increases #define HIP_API_TABLE_STEP_VERSION 0 #define HIP_COMPILER_API_TABLE_STEP_VERSION 0 -#define HIP_RUNTIME_API_TABLE_STEP_VERSION 4 +#define HIP_RUNTIME_API_TABLE_STEP_VERSION 5 // HIP API interface typedef hipError_t (*t___hipPopCallConfiguration)(dim3* gridDim, dim3* blockDim, size_t* sharedMem, @@ -998,6 +998,9 @@ typedef hipError_t (*t_hipDrvGraphMemcpyNodeGetParams)(hipGraphNode_t hNode, typedef hipError_t (*t_hipDrvGraphMemcpyNodeSetParams)(hipGraphNode_t hNode, const HIP_MEMCPY3D* nodeParams); +typedef hipError_t (*t_hipExtHostAlloc)(void **ptr, size_t size, + unsigned int flags); + // HIP Compiler dispatch table struct HipCompilerDispatchTable { size_t size; @@ -1480,4 +1483,5 @@ struct HipDispatchTable { t_hipExternalMemoryGetMappedMipmappedArray hipExternalMemoryGetMappedMipmappedArray_fn; t_hipDrvGraphMemcpyNodeGetParams hipDrvGraphMemcpyNodeGetParams_fn; t_hipDrvGraphMemcpyNodeSetParams hipDrvGraphMemcpyNodeSetParams_fn; + t_hipExtHostAlloc hipExtHostAlloc_fn; }; diff --git a/hipamd/include/hip/amd_detail/hip_prof_str.h b/hipamd/include/hip/amd_detail/hip_prof_str.h index 3cf8291215..e7c67e5c54 100644 --- a/hipamd/include/hip/amd_detail/hip_prof_str.h +++ b/hipamd/include/hip/amd_detail/hip_prof_str.h @@ -424,7 +424,8 @@ enum hip_api_id_t { HIP_API_ID_hipMemcpyDtoA = 404, HIP_API_ID_hipMemcpyHtoAAsync = 405, HIP_API_ID_hipSetValidDevices = 406, - HIP_API_ID_LAST = 406, + HIP_API_ID_hipExtHostAlloc = 407, + HIP_API_ID_LAST = 407, HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice), HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties), @@ -671,6 +672,7 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipHostGetDevicePointer: return "hipHostGetDevicePointer"; case HIP_API_ID_hipHostGetFlags: return "hipHostGetFlags"; case HIP_API_ID_hipHostMalloc: return "hipHostMalloc"; + case HIP_API_ID_hipExtHostAlloc: return "hipExtHostAlloc"; case HIP_API_ID_hipHostRegister: return "hipHostRegister"; case HIP_API_ID_hipHostUnregister: return "hipHostUnregister"; case HIP_API_ID_hipImportExternalMemory: return "hipImportExternalMemory"; @@ -1073,6 +1075,7 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipHostGetDevicePointer", name) == 0) return HIP_API_ID_hipHostGetDevicePointer; if (strcmp("hipHostGetFlags", name) == 0) return HIP_API_ID_hipHostGetFlags; if (strcmp("hipHostMalloc", name) == 0) return HIP_API_ID_hipHostMalloc; + if (strcmp("hipExtHostAlloc", name) == 0) return HIP_API_ID_hipExtHostAlloc; if (strcmp("hipHostRegister", name) == 0) return HIP_API_ID_hipHostRegister; if (strcmp("hipHostUnregister", name) == 0) return HIP_API_ID_hipHostUnregister; if (strcmp("hipImportExternalMemory", name) == 0) return HIP_API_ID_hipImportExternalMemory; @@ -2462,6 +2465,12 @@ typedef struct hip_api_data_s { size_t size; unsigned int flags; } hipHostMalloc; + struct { + void** ptr; + void* ptr__val; + size_t size; + unsigned int flags; + } hipExtHostAlloc; struct { void* hostPtr; size_t sizeBytes; @@ -4809,6 +4818,12 @@ typedef struct hip_api_data_s { cb_data.args.hipHostMalloc.size = (size_t)sizeBytes; \ cb_data.args.hipHostMalloc.flags = (unsigned int)flags; \ }; +// hipExtHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] +#define INIT_hipExtHostAlloc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtHostAlloc.ptr = (void**)ptr; \ + cb_data.args.hipExtHostAlloc.size = (size_t)sizeBytes; \ + cb_data.args.hipExtHostAlloc.flags = (unsigned int)flags; \ +}; // hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')] #define INIT_hipHostRegister_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipHostRegister.hostPtr = (void*)hostPtr; \ @@ -6897,6 +6912,10 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { case HIP_API_ID_hipHostMalloc: if (data->args.hipHostMalloc.ptr) data->args.hipHostMalloc.ptr__val = *(data->args.hipHostMalloc.ptr); break; +// hipExtHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] + case HIP_API_ID_hipExtHostAlloc: + if (data->args.hipExtHostAlloc.ptr) data->args.hipExtHostAlloc.ptr__val = *(data->args.hipExtHostAlloc.ptr); + break; // hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')] case HIP_API_ID_hipHostRegister: break; @@ -9242,6 +9261,14 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostMalloc.flags); oss << ")"; break; + case HIP_API_ID_hipExtHostAlloc: + oss << "hipExtHostAlloc("; + if (data->args.hipExtHostAlloc.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtHostAlloc.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtHostAlloc.size); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtHostAlloc.flags); + oss << ")"; + break; case HIP_API_ID_hipHostRegister: oss << "hipHostRegister("; oss << "hostPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostRegister.hostPtr); diff --git a/hipamd/src/amdhip.def b/hipamd/src/amdhip.def index 3b4836484f..a58fa31547 100644 --- a/hipamd/src/amdhip.def +++ b/hipamd/src/amdhip.def @@ -478,3 +478,4 @@ hipGraphNodeSetParams hipGraphExecNodeSetParams hipDrvGraphMemcpyNodeSetParams hipDrvGraphMemcpyNodeGetParams +hipExtHostAlloc diff --git a/hipamd/src/hip_api_trace.cpp b/hipamd/src/hip_api_trace.cpp index 6bd03cf8ac..800fd804d2 100644 --- a/hipamd/src/hip_api_trace.cpp +++ b/hipamd/src/hip_api_trace.cpp @@ -347,6 +347,7 @@ hipError_t hipHostFree(void* ptr); hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags); hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr); hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags); +hipError_t hipExtHostAlloc(void** ptr, size_t size, unsigned int flags); hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags); hipError_t hipHostUnregister(void* hostPtr); hipError_t hipImportExternalMemory(hipExternalMemory_t* extMem_out, @@ -1026,6 +1027,7 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) { ptrDispatchTable->hipHostGetDevicePointer_fn = hip::hipHostGetDevicePointer; ptrDispatchTable->hipHostGetFlags_fn = hip::hipHostGetFlags; ptrDispatchTable->hipHostMalloc_fn = hip::hipHostMalloc; + ptrDispatchTable->hipExtHostAlloc_fn = hip::hipExtHostAlloc; ptrDispatchTable->hipHostRegister_fn = hip::hipHostRegister; ptrDispatchTable->hipHostUnregister_fn = hip::hipHostUnregister; ptrDispatchTable->hipImportExternalMemory_fn = hip::hipImportExternalMemory; @@ -1870,6 +1872,7 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipGraphExecNodeSetParams_fn, 457); HIP_ENFORCE_ABI(HipDispatchTable, hipExternalMemoryGetMappedMipmappedArray_fn, 458) HIP_ENFORCE_ABI(HipDispatchTable, hipDrvGraphMemcpyNodeGetParams_fn, 459) HIP_ENFORCE_ABI(HipDispatchTable, hipDrvGraphMemcpyNodeSetParams_fn, 460) +HIP_ENFORCE_ABI(HipDispatchTable, hipExtHostAlloc_fn, 461) // if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below // will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.: @@ -1877,9 +1880,9 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipDrvGraphMemcpyNodeSetParams_fn, 460) // HIP_ENFORCE_ABI(, , 8) // // HIP_ENFORCE_ABI_VERSIONING(
, 9) <- 8 + 1 = 9 -HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 461) +HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 462) -static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 4, +static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 5, "If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function " "pointers and then update this check so it is true"); #endif diff --git a/hipamd/src/hip_hcc.map.in b/hipamd/src/hip_hcc.map.in index 31ab71a7d7..a44d445bb9 100644 --- a/hipamd/src/hip_hcc.map.in +++ b/hipamd/src/hip_hcc.map.in @@ -578,3 +578,10 @@ global: local: *; } hip_6.1; + +hip_6.3 { +global: + hipExtHostAlloc; +local: + *; +} hip_6.2; diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 78ea87259f..7b02d61ac5 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -352,7 +352,7 @@ hipError_t ihipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) } *ptr = nullptr; - const unsigned int coherentFlags = hipHostMallocCoherent | hipHostMallocNonCoherent; + const unsigned int coherentFlags = hipExtHostAllocCoherent | hipExtHostAllocNonCoherent; // can't have both Coherent and NonCoherent flags set at the same time if ((flags & coherentFlags) == coherentFlags) { @@ -365,16 +365,16 @@ hipError_t ihipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) unsigned int ihipFlags = CL_MEM_SVM_FINE_GRAIN_BUFFER; if (flags == 0 || - flags & (hipHostMallocCoherent | hipHostMallocMapped | hipHostMallocNumaUser) || - (!(flags & hipHostMallocNonCoherent) && HIP_HOST_COHERENT)) { + flags & (hipExtHostAllocCoherent | hipHostAllocMapped | hipExtHostAllocNumaUser) || + (!(flags & hipExtHostAllocNonCoherent) && HIP_HOST_COHERENT)) { ihipFlags |= CL_MEM_SVM_ATOMICS; } - if (flags & hipHostMallocNumaUser) { + if (flags & hipExtHostAllocNumaUser) { ihipFlags |= CL_MEM_FOLLOW_USER_NUMA_POLICY; } - if (flags & hipHostMallocNonCoherent) { + if (flags & hipExtHostAllocNonCoherent) { ihipFlags &= ~CL_MEM_SVM_ATOMICS; } @@ -679,6 +679,16 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_RETURN_DURATION(status, *ptr); } +hipError_t hipExtHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipExtHostAlloc, ptr, sizeBytes, flags); + CHECK_STREAM_CAPTURE_SUPPORTED(); + if (ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + hipError_t status = ihipHostMalloc(ptr, sizeBytes, flags); + HIP_RETURN_DURATION(status, *ptr); +} + hipError_t hipFree(void* ptr) { HIP_INIT_API(hipFree, ptr); CHECK_STREAM_CAPTURE_SUPPORTED(); @@ -1222,7 +1232,8 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { HIP_RETURN(hipErrorInvalidValue); } - // To match with Nvidia behaviour validate that hostPtr passed was allocated using hipHostMalloc(), and not hipMalloc() + // To match with Nvidia behaviour validate that hostPtr passed + // was allocated using hipHostAlloc(), and not hipMalloc() if (!(svmMem->getMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1314,11 +1325,12 @@ hipError_t hipHostUnregister(void* hostPtr) { hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hipHostAlloc, ptr, sizeBytes, flags); CHECK_STREAM_CAPTURE_SUPPORTED(); + if (ptr == nullptr) { HIP_RETURN(hipErrorInvalidValue); } - if (flags > (hipHostMallocPortable | hipHostMallocMapped | - hipHostMallocWriteCombined)) { + if (flags > (hipHostAllocPortable | hipHostAllocMapped | + hipHostAllocWriteCombined)) { HIP_RETURN(hipErrorInvalidValue); } diff --git a/hipamd/src/hip_table_interface.cpp b/hipamd/src/hip_table_interface.cpp index 2553038271..12f6ae14a7 100644 --- a/hipamd/src/hip_table_interface.cpp +++ b/hipamd/src/hip_table_interface.cpp @@ -1805,3 +1805,6 @@ hipError_t hipDrvGraphMemcpyNodeGetParams(hipGraphNode_t hNode, HIP_MEMCPY3D* no hipError_t hipDrvGraphMemcpyNodeSetParams(hipGraphNode_t hNode, const HIP_MEMCPY3D* nodeParams) { return hip::GetHipDispatchTable()->hipDrvGraphMemcpyNodeSetParams_fn(hNode, nodeParams); } +hipError_t hipExtHostAlloc(void** ptr, size_t size, unsigned int flags) { + return hip::GetHipDispatchTable()->hipExtHostAlloc_fn(ptr, size, flags); +} diff --git a/rocclr/utils/flags.hpp b/rocclr/utils/flags.hpp index fa7c8ee2cb..5fb7430044 100644 --- a/rocclr/utils/flags.hpp +++ b/rocclr/utils/flags.hpp @@ -172,7 +172,7 @@ release(uint, HIP_LAUNCH_BLOCKING, 0, \ release(bool, PAL_ALWAYS_RESIDENT, false, \ "Force memory resources to become resident at allocation time") \ release(uint, HIP_HOST_COHERENT, 0, \ - "Coherent memory in hipHostMalloc, 0x1 = memory is coherent with host"\ + "Coherent memory in hipExtHostAlloc, 0x1 = memory is coherent with host"\ "0x0 = memory is not coherent between host and GPU") \ release(uint, AMD_OPT_FLUSH, 1, \ "Kernel flush option , 0x0 = Use system-scope fence operations." \