SWDEV-470372 - Added hipExtHostAlloc API
This change adds a new HIP API `hipExtHostAlloc` which preserves the functionality of `hipHostMalloc`. Change-Id: I13504c6fc13465ddd7aed329795bb4f2fef1baff
This commit is contained in:
@@ -19,6 +19,7 @@ Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs
|
||||
- `hipDrvGraphAddMemFreeNode` creates a memory free node and adds it to a graph.
|
||||
- `hipDrvGraphExecMemcpyNodeSetParams` sets the parameters for a memcpy node in the given graphExec.
|
||||
- `hipDrvGraphExecMemsetNodeSetParams` sets the parameters for a memset node in the given graphExec.
|
||||
- `hipExtHostAlloc` preserves the functionality of `hipHostMalloc`.
|
||||
|
||||
### Optimizations
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@
|
||||
// - Reset any of the *_STEP_VERSION defines to zero if the corresponding *_MAJOR_VERSION increases
|
||||
#define HIP_API_TABLE_STEP_VERSION 0
|
||||
#define HIP_COMPILER_API_TABLE_STEP_VERSION 0
|
||||
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 4
|
||||
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 5
|
||||
|
||||
// HIP API interface
|
||||
typedef hipError_t (*t___hipPopCallConfiguration)(dim3* gridDim, dim3* blockDim, size_t* sharedMem,
|
||||
@@ -998,6 +998,9 @@ typedef hipError_t (*t_hipDrvGraphMemcpyNodeGetParams)(hipGraphNode_t hNode,
|
||||
typedef hipError_t (*t_hipDrvGraphMemcpyNodeSetParams)(hipGraphNode_t hNode,
|
||||
const HIP_MEMCPY3D* nodeParams);
|
||||
|
||||
typedef hipError_t (*t_hipExtHostAlloc)(void **ptr, size_t size,
|
||||
unsigned int flags);
|
||||
|
||||
// HIP Compiler dispatch table
|
||||
struct HipCompilerDispatchTable {
|
||||
size_t size;
|
||||
@@ -1480,4 +1483,5 @@ struct HipDispatchTable {
|
||||
t_hipExternalMemoryGetMappedMipmappedArray hipExternalMemoryGetMappedMipmappedArray_fn;
|
||||
t_hipDrvGraphMemcpyNodeGetParams hipDrvGraphMemcpyNodeGetParams_fn;
|
||||
t_hipDrvGraphMemcpyNodeSetParams hipDrvGraphMemcpyNodeSetParams_fn;
|
||||
t_hipExtHostAlloc hipExtHostAlloc_fn;
|
||||
};
|
||||
|
||||
@@ -424,7 +424,8 @@ enum hip_api_id_t {
|
||||
HIP_API_ID_hipMemcpyDtoA = 404,
|
||||
HIP_API_ID_hipMemcpyHtoAAsync = 405,
|
||||
HIP_API_ID_hipSetValidDevices = 406,
|
||||
HIP_API_ID_LAST = 406,
|
||||
HIP_API_ID_hipExtHostAlloc = 407,
|
||||
HIP_API_ID_LAST = 407,
|
||||
|
||||
HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
|
||||
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
|
||||
@@ -671,6 +672,7 @@ static inline const char* hip_api_name(const uint32_t id) {
|
||||
case HIP_API_ID_hipHostGetDevicePointer: return "hipHostGetDevicePointer";
|
||||
case HIP_API_ID_hipHostGetFlags: return "hipHostGetFlags";
|
||||
case HIP_API_ID_hipHostMalloc: return "hipHostMalloc";
|
||||
case HIP_API_ID_hipExtHostAlloc: return "hipExtHostAlloc";
|
||||
case HIP_API_ID_hipHostRegister: return "hipHostRegister";
|
||||
case HIP_API_ID_hipHostUnregister: return "hipHostUnregister";
|
||||
case HIP_API_ID_hipImportExternalMemory: return "hipImportExternalMemory";
|
||||
@@ -1073,6 +1075,7 @@ static inline uint32_t hipApiIdByName(const char* name) {
|
||||
if (strcmp("hipHostGetDevicePointer", name) == 0) return HIP_API_ID_hipHostGetDevicePointer;
|
||||
if (strcmp("hipHostGetFlags", name) == 0) return HIP_API_ID_hipHostGetFlags;
|
||||
if (strcmp("hipHostMalloc", name) == 0) return HIP_API_ID_hipHostMalloc;
|
||||
if (strcmp("hipExtHostAlloc", name) == 0) return HIP_API_ID_hipExtHostAlloc;
|
||||
if (strcmp("hipHostRegister", name) == 0) return HIP_API_ID_hipHostRegister;
|
||||
if (strcmp("hipHostUnregister", name) == 0) return HIP_API_ID_hipHostUnregister;
|
||||
if (strcmp("hipImportExternalMemory", name) == 0) return HIP_API_ID_hipImportExternalMemory;
|
||||
@@ -2462,6 +2465,12 @@ typedef struct hip_api_data_s {
|
||||
size_t size;
|
||||
unsigned int flags;
|
||||
} hipHostMalloc;
|
||||
struct {
|
||||
void** ptr;
|
||||
void* ptr__val;
|
||||
size_t size;
|
||||
unsigned int flags;
|
||||
} hipExtHostAlloc;
|
||||
struct {
|
||||
void* hostPtr;
|
||||
size_t sizeBytes;
|
||||
@@ -4809,6 +4818,12 @@ typedef struct hip_api_data_s {
|
||||
cb_data.args.hipHostMalloc.size = (size_t)sizeBytes; \
|
||||
cb_data.args.hipHostMalloc.flags = (unsigned int)flags; \
|
||||
};
|
||||
// hipExtHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
|
||||
#define INIT_hipExtHostAlloc_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipExtHostAlloc.ptr = (void**)ptr; \
|
||||
cb_data.args.hipExtHostAlloc.size = (size_t)sizeBytes; \
|
||||
cb_data.args.hipExtHostAlloc.flags = (unsigned int)flags; \
|
||||
};
|
||||
// hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')]
|
||||
#define INIT_hipHostRegister_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipHostRegister.hostPtr = (void*)hostPtr; \
|
||||
@@ -6897,6 +6912,10 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
|
||||
case HIP_API_ID_hipHostMalloc:
|
||||
if (data->args.hipHostMalloc.ptr) data->args.hipHostMalloc.ptr__val = *(data->args.hipHostMalloc.ptr);
|
||||
break;
|
||||
// hipExtHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')]
|
||||
case HIP_API_ID_hipExtHostAlloc:
|
||||
if (data->args.hipExtHostAlloc.ptr) data->args.hipExtHostAlloc.ptr__val = *(data->args.hipExtHostAlloc.ptr);
|
||||
break;
|
||||
// hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')]
|
||||
case HIP_API_ID_hipHostRegister:
|
||||
break;
|
||||
@@ -9242,6 +9261,14 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
|
||||
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostMalloc.flags);
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipExtHostAlloc:
|
||||
oss << "hipExtHostAlloc(";
|
||||
if (data->args.hipExtHostAlloc.ptr == NULL) oss << "ptr=NULL";
|
||||
else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtHostAlloc.ptr__val); }
|
||||
oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtHostAlloc.size);
|
||||
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtHostAlloc.flags);
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipHostRegister:
|
||||
oss << "hipHostRegister(";
|
||||
oss << "hostPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostRegister.hostPtr);
|
||||
|
||||
@@ -478,3 +478,4 @@ hipGraphNodeSetParams
|
||||
hipGraphExecNodeSetParams
|
||||
hipDrvGraphMemcpyNodeSetParams
|
||||
hipDrvGraphMemcpyNodeGetParams
|
||||
hipExtHostAlloc
|
||||
|
||||
@@ -347,6 +347,7 @@ hipError_t hipHostFree(void* ptr);
|
||||
hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags);
|
||||
hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr);
|
||||
hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags);
|
||||
hipError_t hipExtHostAlloc(void** ptr, size_t size, unsigned int flags);
|
||||
hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags);
|
||||
hipError_t hipHostUnregister(void* hostPtr);
|
||||
hipError_t hipImportExternalMemory(hipExternalMemory_t* extMem_out,
|
||||
@@ -1026,6 +1027,7 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
|
||||
ptrDispatchTable->hipHostGetDevicePointer_fn = hip::hipHostGetDevicePointer;
|
||||
ptrDispatchTable->hipHostGetFlags_fn = hip::hipHostGetFlags;
|
||||
ptrDispatchTable->hipHostMalloc_fn = hip::hipHostMalloc;
|
||||
ptrDispatchTable->hipExtHostAlloc_fn = hip::hipExtHostAlloc;
|
||||
ptrDispatchTable->hipHostRegister_fn = hip::hipHostRegister;
|
||||
ptrDispatchTable->hipHostUnregister_fn = hip::hipHostUnregister;
|
||||
ptrDispatchTable->hipImportExternalMemory_fn = hip::hipImportExternalMemory;
|
||||
@@ -1870,6 +1872,7 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipGraphExecNodeSetParams_fn, 457);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipExternalMemoryGetMappedMipmappedArray_fn, 458)
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipDrvGraphMemcpyNodeGetParams_fn, 459)
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipDrvGraphMemcpyNodeSetParams_fn, 460)
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipExtHostAlloc_fn, 461)
|
||||
|
||||
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
|
||||
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
|
||||
@@ -1877,9 +1880,9 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipDrvGraphMemcpyNodeSetParams_fn, 460)
|
||||
// HIP_ENFORCE_ABI(<table>, <functor>, 8)
|
||||
//
|
||||
// HIP_ENFORCE_ABI_VERSIONING(<table>, 9) <- 8 + 1 = 9
|
||||
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 461)
|
||||
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 462)
|
||||
|
||||
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 4,
|
||||
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 5,
|
||||
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
|
||||
"pointers and then update this check so it is true");
|
||||
#endif
|
||||
|
||||
@@ -578,3 +578,10 @@ global:
|
||||
local:
|
||||
*;
|
||||
} hip_6.1;
|
||||
|
||||
hip_6.3 {
|
||||
global:
|
||||
hipExtHostAlloc;
|
||||
local:
|
||||
*;
|
||||
} hip_6.2;
|
||||
|
||||
@@ -352,7 +352,7 @@ hipError_t ihipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
}
|
||||
|
||||
*ptr = nullptr;
|
||||
const unsigned int coherentFlags = hipHostMallocCoherent | hipHostMallocNonCoherent;
|
||||
const unsigned int coherentFlags = hipExtHostAllocCoherent | hipExtHostAllocNonCoherent;
|
||||
|
||||
// can't have both Coherent and NonCoherent flags set at the same time
|
||||
if ((flags & coherentFlags) == coherentFlags) {
|
||||
@@ -365,16 +365,16 @@ hipError_t ihipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
|
||||
unsigned int ihipFlags = CL_MEM_SVM_FINE_GRAIN_BUFFER;
|
||||
if (flags == 0 ||
|
||||
flags & (hipHostMallocCoherent | hipHostMallocMapped | hipHostMallocNumaUser) ||
|
||||
(!(flags & hipHostMallocNonCoherent) && HIP_HOST_COHERENT)) {
|
||||
flags & (hipExtHostAllocCoherent | hipHostAllocMapped | hipExtHostAllocNumaUser) ||
|
||||
(!(flags & hipExtHostAllocNonCoherent) && HIP_HOST_COHERENT)) {
|
||||
ihipFlags |= CL_MEM_SVM_ATOMICS;
|
||||
}
|
||||
|
||||
if (flags & hipHostMallocNumaUser) {
|
||||
if (flags & hipExtHostAllocNumaUser) {
|
||||
ihipFlags |= CL_MEM_FOLLOW_USER_NUMA_POLICY;
|
||||
}
|
||||
|
||||
if (flags & hipHostMallocNonCoherent) {
|
||||
if (flags & hipExtHostAllocNonCoherent) {
|
||||
ihipFlags &= ~CL_MEM_SVM_ATOMICS;
|
||||
}
|
||||
|
||||
@@ -679,6 +679,16 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) {
|
||||
HIP_RETURN_DURATION(status, *ptr);
|
||||
}
|
||||
|
||||
hipError_t hipExtHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) {
|
||||
HIP_INIT_API(hipExtHostAlloc, ptr, sizeBytes, flags);
|
||||
CHECK_STREAM_CAPTURE_SUPPORTED();
|
||||
if (ptr == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
hipError_t status = ihipHostMalloc(ptr, sizeBytes, flags);
|
||||
HIP_RETURN_DURATION(status, *ptr);
|
||||
}
|
||||
|
||||
hipError_t hipFree(void* ptr) {
|
||||
HIP_INIT_API(hipFree, ptr);
|
||||
CHECK_STREAM_CAPTURE_SUPPORTED();
|
||||
@@ -1222,7 +1232,8 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
// To match with Nvidia behaviour validate that hostPtr passed was allocated using hipHostMalloc(), and not hipMalloc()
|
||||
// To match with Nvidia behaviour validate that hostPtr passed
|
||||
// was allocated using hipHostAlloc(), and not hipMalloc()
|
||||
if (!(svmMem->getMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -1314,11 +1325,12 @@ hipError_t hipHostUnregister(void* hostPtr) {
|
||||
hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) {
|
||||
HIP_INIT_API(hipHostAlloc, ptr, sizeBytes, flags);
|
||||
CHECK_STREAM_CAPTURE_SUPPORTED();
|
||||
|
||||
if (ptr == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
if (flags > (hipHostMallocPortable | hipHostMallocMapped |
|
||||
hipHostMallocWriteCombined)) {
|
||||
if (flags > (hipHostAllocPortable | hipHostAllocMapped |
|
||||
hipHostAllocWriteCombined)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
|
||||
@@ -1805,3 +1805,6 @@ hipError_t hipDrvGraphMemcpyNodeGetParams(hipGraphNode_t hNode, HIP_MEMCPY3D* no
|
||||
hipError_t hipDrvGraphMemcpyNodeSetParams(hipGraphNode_t hNode, const HIP_MEMCPY3D* nodeParams) {
|
||||
return hip::GetHipDispatchTable()->hipDrvGraphMemcpyNodeSetParams_fn(hNode, nodeParams);
|
||||
}
|
||||
hipError_t hipExtHostAlloc(void** ptr, size_t size, unsigned int flags) {
|
||||
return hip::GetHipDispatchTable()->hipExtHostAlloc_fn(ptr, size, flags);
|
||||
}
|
||||
|
||||
@@ -172,7 +172,7 @@ release(uint, HIP_LAUNCH_BLOCKING, 0, \
|
||||
release(bool, PAL_ALWAYS_RESIDENT, false, \
|
||||
"Force memory resources to become resident at allocation time") \
|
||||
release(uint, HIP_HOST_COHERENT, 0, \
|
||||
"Coherent memory in hipHostMalloc, 0x1 = memory is coherent with host"\
|
||||
"Coherent memory in hipExtHostAlloc, 0x1 = memory is coherent with host"\
|
||||
"0x0 = memory is not coherent between host and GPU") \
|
||||
release(uint, AMD_OPT_FLUSH, 1, \
|
||||
"Kernel flush option , 0x0 = Use system-scope fence operations." \
|
||||
|
||||
Reference in New Issue
Block a user