diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp b/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp index 797677115b..850252b400 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp @@ -63,7 +63,7 @@ #define HIP_API_TABLE_STEP_VERSION 0 #define HIP_COMPILER_API_TABLE_STEP_VERSION 0 #define HIP_TOOLS_API_TABLE_STEP_VERSION 0 -#define HIP_RUNTIME_API_TABLE_STEP_VERSION 11 +#define HIP_RUNTIME_API_TABLE_STEP_VERSION 12 // HIP API interface // HIP compiler dispatch functions @@ -1041,6 +1041,11 @@ typedef hipError_t (*t_hipLaunchKernelExC)(const hipLaunchConfig_t* config, cons typedef hipError_t (*t_hipDrvLaunchKernelEx)(const HIP_LAUNCH_CONFIG* config, hipFunction_t f, void** params, void** extra); +typedef hipError_t (*t_hipMemGetHandleForAddressRange)(void* handle, hipDeviceptr_t dptr, + size_t size, + hipMemRangeHandleType handleType, + unsigned long long flags); + // HIP Compiler dispatch table struct HipCompilerDispatchTable { // HIP_COMPILER_API_TABLE_STEP_VERSION == 0 @@ -1579,8 +1584,10 @@ struct HipDispatchTable { t_hipLaunchKernelExC hipLaunchKernelExC_fn; t_hipDrvLaunchKernelEx hipDrvLaunchKernelEx_fn; + // HIP_RUNTIME_API_TABLE_STEP_VERSION = 12 + t_hipMemGetHandleForAddressRange hipMemGetHandleForAddressRange_fn; // DO NOT EDIT ABOVE! - // HIP_RUNTIME_API_TABLE_STEP_VERSION == 11 + // HIP_RUNTIME_API_TABLE_STEP_VERSION == 12 // ******************************************************************************************* // // diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h b/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h index 8357ef3c48..3994975ba3 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h @@ -456,6 +456,7 @@ enum hip_api_id_t { HIP_API_ID_hipGetTextureObjectResourceViewDesc = HIP_API_ID_NONE, HIP_API_ID_hipGetTextureObjectTextureDesc = HIP_API_ID_NONE, HIP_API_ID_hipGetTextureReference = HIP_API_ID_NONE, + HIP_API_ID_hipMemGetHandleForAddressRange = HIP_API_ID_NONE, HIP_API_ID_hipTexObjectCreate = HIP_API_ID_NONE, HIP_API_ID_hipTexObjectDestroy = HIP_API_ID_NONE, HIP_API_ID_hipTexObjectGetResourceDesc = HIP_API_ID_NONE, @@ -6264,6 +6265,8 @@ typedef struct hip_api_data_s { #define INIT_hipGetTextureObjectTextureDesc_CB_ARGS_DATA(cb_data) {}; // hipGetTextureReference() #define INIT_hipGetTextureReference_CB_ARGS_DATA(cb_data) {}; +// hipMemGetHandleForAddressRange() +#define INIT_hipMemGetHandleForAddressRange_CB_ARGS_DATA(cb_data) {}; // hipTexObjectCreate() #define INIT_hipTexObjectCreate_CB_ARGS_DATA(cb_data) {}; // hipTexObjectDestroy() diff --git a/projects/clr/hipamd/src/hip_api_trace.cpp b/projects/clr/hipamd/src/hip_api_trace.cpp index 6f225ed897..59992f5241 100644 --- a/projects/clr/hipamd/src/hip_api_trace.cpp +++ b/projects/clr/hipamd/src/hip_api_trace.cpp @@ -826,6 +826,9 @@ hipError_t hipEventRecordWithFlags(hipEvent_t event, hipStream_t stream, unsigne hipError_t hipLaunchKernelExC(const hipLaunchConfig_t* config, const void* fPtr, void** args); hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f, void** params, void** extra); +hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size, + hipMemRangeHandleType handleType, + unsigned long long flags); } // namespace hip @@ -1341,6 +1344,7 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) { ptrDispatchTable->hipEventRecordWithFlags_fn = hip::hipEventRecordWithFlags; ptrDispatchTable->hipLaunchKernelExC_fn = hip::hipLaunchKernelExC; ptrDispatchTable->hipDrvLaunchKernelEx_fn = hip::hipDrvLaunchKernelEx; + ptrDispatchTable->hipMemGetHandleForAddressRange_fn = hip::hipMemGetHandleForAddressRange; } #if HIP_ROCPROFILER_REGISTER > 0 @@ -1981,15 +1985,17 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipLinkDestroy_fn , 473) // HIP_RUNTIME_API_TABLE_STEP_VERSION == 11 HIP_ENFORCE_ABI(HipDispatchTable, hipLaunchKernelExC_fn, 474); HIP_ENFORCE_ABI(HipDispatchTable, hipDrvLaunchKernelEx_fn, 475); +// HIP_RUNTIME_API_TABLE_STEP_VERSION == 12 +HIP_ENFORCE_ABI(HipDispatchTable, hipMemGetHandleForAddressRange_fn, 476); // if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below // will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.: // // HIP_ENFORCE_ABI(, , 8) // // HIP_ENFORCE_ABI_VERSIONING(
, 9) <- 8 + 1 = 9 -HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 476) +HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 477) -static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 11, +static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 12, "If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function " "pointers and then update this check so it is true"); #endif diff --git a/projects/clr/hipamd/src/hip_hcc.map.in b/projects/clr/hipamd/src/hip_hcc.map.in index d79cd16a5f..d8a3077fff 100644 --- a/projects/clr/hipamd/src/hip_hcc.map.in +++ b/projects/clr/hipamd/src/hip_hcc.map.in @@ -601,6 +601,7 @@ hip_6.5 { global: hipLaunchKernelExC; hipDrvLaunchKernelEx; + hipMemGetHandleForAddressRange; local: *; } hip_6.4; diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index a5144c3432..b2a59273fe 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -19,6 +19,7 @@ THE SOFTWARE. */ #include +#include "device.hpp" #include "hip/driver_types.h" #include "hip_internal.hpp" #include "hip_platform.hpp" @@ -4279,4 +4280,23 @@ hipError_t hipExternalMemoryGetMappedMipmappedArray( HIP_RETURN(ihipMipmapArrayCreate(mipmap, &allocateArray, mipmapDesc->numLevels, (size_t)mipmapDesc->offset, buf)); } -} // namespace hip + +hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size, + hipMemRangeHandleType handleType, + unsigned long long flags) { + HIP_INIT_API(hipMemGetHandleForAddressRange, handle, dptr, size, handleType, flags); + + // We do not support any flags at this time. + if (dptr == nullptr || size == 0 || handleType != hipMemRangeHandleTypeDmaBufFd || flags != 0) { + HIP_RETURN(hipErrorInvalidValue;) + } + + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + if (!device->GetHandleForAddressRange(dptr, size, handle)) { + HIP_RETURN(hipErrorInvalidValue;) + } + + HIP_RETURN(hipSuccess); +} + +} // namespace hip \ No newline at end of file diff --git a/projects/clr/hipamd/src/hip_table_interface.cpp b/projects/clr/hipamd/src/hip_table_interface.cpp index bfbde80d5a..7749c1f233 100644 --- a/projects/clr/hipamd/src/hip_table_interface.cpp +++ b/projects/clr/hipamd/src/hip_table_interface.cpp @@ -1877,4 +1877,11 @@ hipError_t hipLaunchKernelExC(const hipLaunchConfig_t* config, const void* fPtr, hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f, void** kernel, void** extra) { return hip::GetHipDispatchTable()->hipDrvLaunchKernelEx_fn(config, f, kernel, extra); +} + +hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size, + hipMemRangeHandleType handleType, + unsigned long long flags) { + return hip::GetHipDispatchTable()->hipMemGetHandleForAddressRange_fn(handle, dptr, size, + handleType, flags); } \ No newline at end of file diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index eca7b55168..c6290b4417 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -1126,6 +1126,25 @@ std::vector Device::getActiveQueues() { } return std::vector(activeQueues.begin(), activeQueues.end()); } + +// ================================================================================================= +bool Device::GetHandleForAddressRange(void* dev_ptr, size_t size, void* handle) { + // Check if the ptr is created through VMM APIs, if true we use different ROCr APIs. + amd::Memory* amd_base_obj = amd::MemObjMap::FindVirtualMemObj(dev_ptr); + bool VmmPtr = (amd_base_obj != nullptr) ? true : false; + + // Even if it is VMM ptr, check to make sure the memory is mapped. On hipMalloc'ed ptrs, + // make sure the memory is allocated. + amd::Memory* amd_mem_obj = amd::MemObjMap::FindMemObj(dev_ptr); + if (amd_mem_obj == nullptr) { + DevLogPrintfError("Cannot retrieve amd_mem_obj for dev_ptr: 0x%x", dev_ptr); + return false; + } + + device::Memory* dev_mem = amd_mem_obj->getDeviceMemory(*this); + return dev_mem->GetFDHandleForMem(dev_ptr, size, VmmPtr, handle); +} + } // namespace amd namespace amd::device { diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 6372d571ad..10fd8e45f0 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -952,6 +952,9 @@ class Memory : public amd::HeapObject { //! Get current access of the memory in device. MemAccess GetAccess() const { return memAccess_; } + //! Retrieves shareable handle for hipMalloc'ed address range. + virtual bool GetFDHandleForMem(void* dev_ptr, size_t size, bool vmm, void* handle) { return false; } + protected: enum Flags { HostMemoryDirectAccess = 0x00000001, //!< GPU has direct access to the host memory @@ -2183,6 +2186,8 @@ class Device : public RuntimeObject { static bool IsGPUInError() { return (gpu_error_ != CL_SUCCESS); } static cl_int GetGPUError() { return gpu_error_; } + bool GetHandleForAddressRange(void* dev_ptr, size_t size, void* handle); + protected: //! Enable the specified extension char* getExtensionString(); diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 6a4a2566f6..e617d6df87 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -3661,4 +3661,4 @@ device::UriLocator* Device::createUriLocator() const { #endif #endif } // namespace amd::roc -#endif // WITHOUT_HSA_BACKEND +#endif // WITHOUT_HSA_BACKEND \ No newline at end of file diff --git a/projects/clr/rocclr/device/rocm/rocmemory.cpp b/projects/clr/rocclr/device/rocm/rocmemory.cpp index bee2110c43..7a39d2a55f 100644 --- a/projects/clr/rocclr/device/rocm/rocmemory.cpp +++ b/projects/clr/rocclr/device/rocm/rocmemory.cpp @@ -1049,6 +1049,50 @@ bool Buffer::ExportHandle(void* handle) const { return true; } +// ================================================================================================ +bool Buffer::GetFDHandleForMem(void* dev_ptr, size_t size, bool vmm, void* handle) { + int dmabuffd = -1; + size_t offset = 0; + + // In case of vmm, we use a different set of APIs for retrieving the dmabuffd. + if (vmm) { + hsa_amd_vmem_alloc_handle_t mem_handle; + + // Retrieve the corresponding phys_mem handle for the mapped dev_ptr. + hsa_status_t hsa_status = hsa_amd_vmem_retain_alloc_handle(&mem_handle, dev_ptr); + if (hsa_status != HSA_STATUS_SUCCESS) { + LogPrintfError("Cannot retain alloc handle for dev_ptr: 0x%x hsa returned status: %d", + dev_ptr, hsa_status); + return false; + } + + // Now, retrieve the shareable handle (fd in linux) for the phys_mem handle. + hsa_status = hsa_amd_vmem_export_shareable_handle(&dmabuffd, mem_handle, 0); + if (hsa_status != HSA_STATUS_SUCCESS) { + LogPrintfError("Cannot get shareable handle for mem_handle: %lu, hsa returned status: %d", + mem_handle, hsa_status); + return false; + } + } else { + // Retrieve a shareable handle for the device ptr. + hsa_status_t hsa_status = hsa_amd_portable_export_dmabuf(dev_ptr, size, &dmabuffd, &offset); + if (hsa_status != HSA_STATUS_SUCCESS) { + LogPrintfError("Cannot export a portable fd for dev_ptr: 0x%x with size: %lu," + "hsa returned status: %d", dev_ptr, size, hsa_status); + return false; + } + } + + if (dmabuffd <= 0) { + LogPrintfError("Invalid file descriptor handle: %d returned", dmabuffd); + return false; + } + + // As per spec, handle passed through HIP API is ptr to int. + *(reinterpret_cast(handle)) = dmabuffd; + return true; +} + // ======================================= roc::Image ============================================= typedef struct ChannelOrderMap { uint32_t cl_channel_order; diff --git a/projects/clr/rocclr/device/rocm/rocmemory.hpp b/projects/clr/rocclr/device/rocm/rocmemory.hpp index 03d434d8f9..262be44457 100644 --- a/projects/clr/rocclr/device/rocm/rocmemory.hpp +++ b/projects/clr/rocclr/device/rocm/rocmemory.hpp @@ -173,6 +173,8 @@ class Buffer : public roc::Memory { virtual bool ExportHandle(void* handle) const final; + virtual bool GetFDHandleForMem(void* dev_ptr, size_t size, bool vmm, void* handle) final; + // Recreate the device memory using new size and alignment. bool recreate(size_t newSize, size_t newAlignment, bool forceSystem);