diff --git a/libdrm.cpp b/libdrm.cpp index 77f455b570..b16907ba40 100644 --- a/libdrm.cpp +++ b/libdrm.cpp @@ -67,3 +67,53 @@ HSAKMTAPI int hsaKmtamdgpu_query_gpu_info(void *dev, info->gpu_counter_freq = pDevice->GPUCounterFrequency() / 1000ull; return 0; } + +HSAKMTAPI int hsaKmtamdgpu_bo_import(amdgpu_device_handle dev, + enum amdgpu_bo_handle_type type, + uint32_t shared_handle, + struct amdgpu_bo_import_result *output) { + void *MemoryAddress = nullptr; + HSAKMT_STATUS ret = hsaKmtImportDMABufHandle(shared_handle, &MemoryAddress); + if (ret == HSAKMT_STATUS_SUCCESS) { + //use GpuMemory object's address as drm buf handle + output->buf_handle = reinterpret_cast(MemoryAddress); + return 0; + } else { + return -1; + } +} + + +HSAKMTAPI int hsaKmtamdgpu_bo_va_op(amdgpu_bo_handle bo, + uint64_t offset, + uint64_t size, + uint64_t addr, + uint64_t flags, + uint32_t ops) { + switch(ops) { + case AMDGPU_VA_OP_MAP: + { + wsl::thunk::GpuMemory *gpu_mem = reinterpret_cast(bo); + assert(gpu_mem != nullptr); + auto code = gpu_mem->MapGpuVirtualAddress(reinterpret_cast(addr), size, offset); + if (code != ErrorCode::Success) + return -1; + + code = gpu_mem->MakeResident(); + if (code != ErrorCode::Success) + return -1; + } + break; + case AMDGPU_VA_OP_UNMAP: + { + wsl::thunk::GpuMemory *gpu_mem = reinterpret_cast(bo); + assert(gpu_mem != nullptr); + auto code = gpu_mem->UnmapGpuVirtualAddress(reinterpret_cast(addr), size, offset); + if (code != ErrorCode::Success) + return -1; + gpu_mem->Evict(); + } + break; + } + return 0; +} diff --git a/libhsakmt.ver b/libhsakmt.ver index a203bbc90a..dbc8abf336 100644 --- a/libhsakmt.ver +++ b/libhsakmt.ver @@ -78,6 +78,7 @@ hsaKmtSetXNACKMode; hsaKmtGetXNACKMode; hsaKmtOpenSMI; hsaKmtExportDMABufHandle; +hsaKmtImportDMABufHandle; hsaKmtWaitOnEvent_Ext; hsaKmtWaitOnMultipleEvents_Ext; hsaKmtReplaceAsanHeaderPage; @@ -92,6 +93,9 @@ hsaKmtPcSamplingSupport; hsaKmtGetVersionCapInfo; hsaKmtQueueRingDoorbell; hsaKmtamdgpu_query_gpu_info; +hsaKmtamdgpu_bo_import; +hsaKmtamdgpu_bo_va_op; + local: *; }; diff --git a/memory.cpp b/memory.cpp index e5f3c7a1c8..acfc3b6d2f 100644 --- a/memory.cpp +++ b/memory.cpp @@ -145,7 +145,10 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode, if (MemFlags.ui32.Scratch && MemFlags.ui32.HostAccess && SizeInBytes > 0x80000000) MemFlags.ui32.OnlyAddress = 1; - if (!MemFlags.ui32.NonPaged || zfb_support || MemFlags.ui32.GTTAccess) { + create_info.alignment = Alignment; + create_info.va_hint = reinterpret_cast(*MemoryAddress); + if ((PreferredNode == 0 && !MemFlags.ui32.NonPaged) + || zfb_support || MemFlags.ui32.GTTAccess) { /* If allocate VRAM under ZFB mode */ if (zfb_support && MemFlags.ui32.NonPaged == 1) MemFlags.ui32.CoarseGrain = 1; @@ -165,12 +168,22 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode, // create_info.mem_flags |= rocr_proxy::kKernarg; create_info.flags.physical_only = MemFlags.ui32.NoAddress; create_info.flags.interprocess = MemFlags.ui32.NoAddress; - create_info.flags.locked = 0; //!!(alloc_flags & AllocatePinned); + create_info.flags.locked = MemFlags.ui32.NoSubstitute;//AllocatePinned + create_info.flags.virtual_alloc = MemFlags.ui32.OnlyAddress; + /*when only alloc virtual or only physical, it's vmm allocation, force to local*/ + if (create_info.flags.virtual_alloc || create_info.flags.physical_only) + create_info.domain = rocr_proxy::AllocDomain::kLocal; auto code = dev->CreateGpuMemory(create_info, &gpu_mem); if (code == ErrorCode::Success) { - *MemoryAddress = reinterpret_cast(gpu_mem->GpuAddress()); std::lock_guard gard(*allocation_map_lock_); + + /* For these physical allcations, use GpuMemory object's address as thunk handle*/ + if (create_info.flags.physical_only || create_info.dmabuf_fd > 0) + *MemoryAddress = reinterpret_cast(gpu_mem); + else + *MemoryAddress = reinterpret_cast(gpu_mem->GpuAddress()); + allocation_map_[*MemoryAddress] = Allocation( gpu_mem->GetGpuMemoryHandle(), *MemoryAddress, (uint64_t)*MemoryAddress, create_info.size, false, nullptr, SizeInBytes, @@ -261,26 +274,72 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags( return HSAKMT_STATUS_SUCCESS; } -HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes( - HSAuint64 GraphicsResourceHandle, - HsaGraphicsResourceInfo *GraphicsResourceInfo, HSAuint64 NumberOfNodes, - HSAuint32 *NodeArray) { +HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsResourceHandle, + HsaGraphicsResourceInfo *GraphicsResourceInfo, + HSAuint64 NumberOfNodes, + HSAuint32 *NodeArray) { CHECK_DXG_OPEN(); + uint32_t *gpu_id_array = NULL; + HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; - assert(false); - return HSAKMT_STATUS_SUCCESS; + pr_debug("[%s] number of nodes %lu\n", __func__, NumberOfNodes); + + GraphicsResourceInfo->NodeId = 1; + return hsaKmtImportDMABufHandle(GraphicsResourceHandle, &GraphicsResourceInfo->MemoryAddress); } + HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress, HSAuint64 MemorySizeInBytes, int *DMABufFd, HSAuint64 *Offset) { CHECK_DXG_OPEN(); - assert(false); + + std::lock_guard gard(*allocation_map_lock_); + auto it = allocation_map_.find(MemoryAddress); + if (it == allocation_map_.end()) + return HSAKMT_STATUS_ERROR; + + auto gpu_mem = wsl::thunk::GpuMemory::Convert(it->second.handle); + auto code = gpu_mem->ExportPhysicalHandle(DMABufFd); + if (code != ErrorCode::Success) + return HSAKMT_STATUS_ERROR; return HSAKMT_STATUS_SUCCESS; } + +HSAKMT_STATUS HSAKMTAPI hsaKmtImportDMABufHandle(int DMABufFd, + void **MemoryAddress) { + + CHECK_DXG_OPEN(); + + wsl::thunk::WDDMDevice* dev = get_wddmdev(1); + wsl::thunk::GpuMemory *gpu_mem = nullptr; + wsl::thunk::GpuMemoryCreateInfo create_info{}; + create_info.dmabuf_fd = DMABufFd; + + auto code = dev->CreateGpuMemory(create_info, &gpu_mem); + if (code == ErrorCode::Success) { + *MemoryAddress = reinterpret_cast(gpu_mem->GpuAddress()); + std::lock_guard gard(*allocation_map_lock_); + /* + * the gpu_mem->Flags() need convert back from GpuMemoryCreateFlags to + * HsaMemFlags, reference hsaKmtAllocMemoryAlign + * */ + allocation_map_[*MemoryAddress] = Allocation( + gpu_mem->GetGpuMemoryHandle(), *MemoryAddress, (uint64_t)*MemoryAddress, + gpu_mem->Size(), false, nullptr, gpu_mem->ClientSize(), + 1, gpu_mem->Flags()); + + return HSAKMT_STATUS_SUCCESS; + } + + return HSAKMT_STATUS_ERROR; + +} + + HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemory(void *MemoryAddress, HSAuint64 SizeInBytes, HsaSharedMemoryHandle *SharedMemoryHandle) { diff --git a/topology.cpp b/topology.cpp index d2c350066f..0ac6d48c32 100644 --- a/topology.cpp +++ b/topology.cpp @@ -1695,4 +1695,4 @@ wsl::thunk::WDDMDevice *get_wddmdev(uint32_t node_id) { return nullptr; return wdevices_[node_id - 1]; -} \ No newline at end of file +} diff --git a/version.cpp b/version.cpp index b71d036fc4..ba87575a2d 100644 --- a/version.cpp +++ b/version.cpp @@ -38,15 +38,15 @@ hsaKmtGetVersionCapInfo(HsaVersionCapability *VersionCapInfo) { CHECK_DXG_OPEN(); VersionCapInfo->Value = 0; + VersionCapInfo->ui64.Vmem = 1; + VersionCapInfo->ui64.dmabuf = 1; /*VersionCapInfo->ui64.InterruptSignal = 0; VersionCapInfo->ui64.Sdma = 0; VersionCapInfo->ui64.SdmaXgmi = 0; VersionCapInfo->ui64.Image = 0; VersionCapInfo->ui64.EventAge = 0; VersionCapInfo->ui64.Scratch = 0; - VersionCapInfo->ui64.Vmem = 0; - VersionCapInfo->ui64.dmabuf = 0; VersionCapInfo->ui64.XNack = 0;*/ return HSAKMT_STATUS_SUCCESS; -} \ No newline at end of file +}