diff --git a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt_virtio.h b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt_virtio.h index 7ce3bbeaa0..d2b1a3e1de 100644 --- a/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt_virtio.h +++ b/projects/rocr-runtime/libhsakmt/include/hsakmt/hsakmt_virtio.h @@ -44,6 +44,9 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtOpenKFD(void); HSAKMT_STATUS HSAKMTAPI vhsaKmtCloseKFD(void); HSAKMT_STATUS HSAKMTAPI vhsaKmtAllocMemory(HSAuint32 PreferredNode, HSAuint64 SizeInBytes, HsaMemFlags MemFlags, void** MemoryAddress); +HSAKMT_STATUS HSAKMTAPI vhsaKmtAllocMemoryAlign(HSAuint32 PreferredNode, HSAuint64 SizeInBytes, + HSAuint64 Alignment, HsaMemFlags MemFlags, + void** MemoryAddress); HSAKMT_STATUS HSAKMTAPI vhsaKmtFreeMemory(void* MemoryAddress, HSAuint64 SizeInBytes); HSAKMT_STATUS HSAKMTAPI vhsaKmtMapMemoryToGPUNodes(void* MemoryAddress, HSAuint64 MemorySizeInBytes, HSAuint64* AlternateVAGPU, @@ -56,6 +59,10 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtMapMemoryToGPU(void* MemoryAddress, HSAuint64 Mem HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterMemoryWithFlags(void* MemoryAddress, HSAuint64 MemorySizeInBytes, HsaMemFlags MemFlags); +HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterMemory(void* MemoryAddress, HSAuint64 MemorySizeInBytes); +HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterMemoryToNodes(void* MemoryAddress, + HSAuint64 MemorySizeInBytes, + HSAuint32 NumberOfNodes, HSAuint32* NodeArray); HSAKMT_STATUS HSAKMTAPI vhsaKmtDeregisterMemory(void* MemoryAddress); HSAKMT_STATUS HSAKMTAPI vhsaKmtGetVersion(HsaVersionInfo* v); HSAKMT_STATUS HSAKMTAPI vhsaKmtAcquireSystemProperties(HsaSystemProperties* SystemProperties); @@ -105,12 +112,87 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtCreateQueue(HSAuint32 NodeId, HSA_QUEUE_TYPE Type void* QueueAddress, HSAuint64 QueueSizeInBytes, HsaEvent* Event, HsaQueueResource* QueueResource); HSAKMT_STATUS HSAKMTAPI vhsaKmtDestroyQueue(HSA_QUEUEID QueueId); +HSAKMT_STATUS HSAKMTAPI vhsaKmtUpdateQueue(HSA_QUEUEID QueueId, HSAuint32 QueuePercentage, + HSA_QUEUE_PRIORITY Priority, void* QueueAddress, + HSAuint64 QueueSize, HsaEvent* Event); +HSAKMT_STATUS HSAKMTAPI vhsaKmtGetQueueInfo(HSA_QUEUEID QueueId, HsaQueueInfo* QueueInfo); +HSAKMT_STATUS HSAKMTAPI vhsaKmtSetQueueCUMask(HSA_QUEUEID QueueId, HSAuint32 CUMaskCount, + HSAuint32* QueueCUMask); +HSAKMT_STATUS HSAKMTAPI vhsaKmtAllocQueueGWS(HSA_QUEUEID QueueId, HSAuint32 nGWS, + HSAuint32* firstGWS); +HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterGraphicsHandleToNodesExt( + HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo* GraphicsResourceInfo, + HSAuint64 NumberOfNodes, HSAuint32* NodeArray, HSA_REGISTER_MEM_FLAGS RegisterFlags); HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterGraphicsHandleToNodes( HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo* GraphicsResourceInfo, HSAuint64 NumberOfNodes, HSAuint32* NodeArray); +HSAKMT_STATUS HSAKMTAPI vhsaKmtMapGraphicHandle(HSAuint32 NodeId, HSAuint64 GraphicDeviceHandle, + HSAuint64 GraphicResourceHandle, + HSAuint64 GraphicResourceOffset, + HSAuint64 GraphicResourceSize, + HSAuint64* FlatMemoryAddress); +HSAKMT_STATUS HSAKMTAPI vhsaKmtUnmapGraphicHandle(HSAuint32 NodeId, HSAuint64 FlatMemoryAddress, + HSAuint64 SizeInBytes); +HSAKMT_STATUS HSAKMTAPI vhsaKmtExportDMABufHandle(void* MemoryAddress, HSAuint64 MemorySizeInBytes, + int* DMABufFd, HSAuint64* Offset); HSAKMT_STATUS HSAKMTAPI vhsaKmtGetRuntimeCapabilities(HSAuint32* caps_mask); +HSAKMT_STATUS HSAKMTAPI vhsaKmtModelEnabled(bool* enable); +HSAKMT_STATUS HSAKMTAPI vhsaKmtOpenSMI(HSAuint32 NodeId, int* fd); +HSAKMT_STATUS HSAKMTAPI vhsaKmtSetXNACKMode(HSAint32 enable); +HSAKMT_STATUS HSAKMTAPI vhsaKmtShareMemory(void* MemoryAddress, HSAuint64 SizeInBytes, + HsaSharedMemoryHandle* SharedMemoryHandle); +HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterSharedHandleToNodes( + const HsaSharedMemoryHandle* SharedMemoryHandle, void** MemoryAddress, HSAuint64* SizeInBytes, + HSAuint64 NumberOfNodes, HSAuint32* NodeArray); +HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterSharedHandle(const HsaSharedMemoryHandle* SharedMemoryHandle, + void** MemoryAddress, HSAuint64* SizeInBytes); +HSAKMT_STATUS HSAKMTAPI vhsaKmtSetMemoryUserData(const void* Pointer, void* UserData); +HSAKMT_STATUS HSAKMTAPI vhsaKmtSetMemoryPolicy(HSAuint32 Node, HSAuint32 DefaultPolicy, + HSAuint32 AlternatePolicy, + void* MemoryAddressAlternate, + HSAuint64 MemorySizeInBytes); +HSAKMT_STATUS HSAKMTAPI vhsaKmtSVMGetAttr(void* start_addr, HSAuint64 size, unsigned int nattr, + HSA_SVM_ATTRIBUTE* attrs); +HSAKMT_STATUS HSAKMTAPI vhsaKmtSVMSetAttr(void* start_addr, HSAuint64 size, unsigned int nattr, + HSA_SVM_ATTRIBUTE* attrs); +HSAKMT_STATUS HSAKMTAPI vhsaKmtReplaceAsanHeaderPage(void* addr); +HSAKMT_STATUS HSAKMTAPI vhsaKmtReturnAsanHeaderPage(void* addr); +HSAKMT_STATUS HSAKMTAPI vhsaKmtSPMAcquire(HSAuint32 PreferredNode); +HSAKMT_STATUS HSAKMTAPI vhsaKmtSPMRelease(HSAuint32 PreferredNode); +HSAKMT_STATUS HSAKMTAPI vhsaKmtSPMSetDestBuffer(HSAuint32 PreferredNode, HSAuint32 SizeInBytes, + HSAuint32* timeout, HSAuint32* SizeCopied, + void* DestMemoryAddress, bool* isSPMDataLoss); +HSAKMT_STATUS HSAKMTAPI vhsaKmtAisReadWriteFile(void* MemoryAddress, HSAuint64 MemorySizeInBytes, + HSAint32 fd, HSAint64 file_offset, + HsaAisFlags AisFlags, HSAuint64* SizeCopiedInBytes, + HSAint32* status); +HSAKMT_STATUS HSAKMTAPI vhsaKmtProcessVMRead(HSAuint32 Pid, HsaMemoryRange* LocalMemoryArray, + HSAuint64 LocalMemoryArrayCount, + HsaMemoryRange* RemoteMemoryArray, + HSAuint64 RemoteMemoryArrayCount, + HSAuint64* SizeCopied); +HSAKMT_STATUS HSAKMTAPI vhsaKmtProcessVMWrite(HSAuint32 Pid, HsaMemoryRange* LocalMemoryArray, + HSAuint64 LocalMemoryArrayCount, + HsaMemoryRange* RemoteMemoryArray, + HSAuint64 RemoteMemoryArrayCount, + HSAuint64* SizeCopied); int vamdgpu_query_gpu_info(amdgpu_device_handle dev, void* out); +int vamdgpu_device_initialize(int fd, uint32_t* major_version, uint32_t* minor_version, + amdgpu_device_handle* device_handle); +int vamdgpu_device_deinitialize(amdgpu_device_handle device_handle); +int vamdgpu_device_get_fd(amdgpu_device_handle device_handle); +int vdrmCommandWriteRead(int fd, unsigned long drmCommandIndex, void* data, unsigned long size); +int vamdgpu_bo_cpu_map(amdgpu_bo_handle buf_handle, void** cpu); +int vamdgpu_bo_free(amdgpu_bo_handle buf_handle); +int vamdgpu_bo_export(amdgpu_bo_handle buf_handle, enum amdgpu_bo_handle_type type, + uint32_t* shared_handle); +int vamdgpu_bo_import(amdgpu_device_handle dev, enum amdgpu_bo_handle_type type, + uint32_t shared_handle, struct amdgpu_bo_import_result* output); +int vamdgpu_bo_va_op(amdgpu_bo_handle bo, uint64_t offset, uint64_t size, uint64_t addr, + uint64_t flags, uint32_t ops); +int vamdgpu_bo_query_info(amdgpu_bo_handle bo, struct amdgpu_bo_info* info); +int vamdgpu_bo_set_metadata(amdgpu_bo_handle bo, struct amdgpu_bo_metadata* info); #ifdef __cplusplus } diff --git a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_amdgpu.c b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_amdgpu.c index 4aca737221..06e7d92237 100644 --- a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_amdgpu.c +++ b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_amdgpu.c @@ -23,6 +23,17 @@ #include "hsakmt/hsakmt_virtio.h" #include "hsakmt_virtio_device.h" +/* amdgpu device initialize/deinitialize will be called in vhsakmtopen + * so just return ENOSYS here to avoid duplicate implementation + */ +int vamdgpu_device_initialize(int fd, uint32_t* major_version, uint32_t* minor_version, + amdgpu_device_handle* device_handle) { + return -ENOSYS; +} +int vamdgpu_device_deinitialize(amdgpu_device_handle device_handle) { + return -ENOSYS; +} + int vamdgpu_query_gpu_info(amdgpu_device_handle handle, void* out) { CHECK_VIRTIO_KFD_OPEN(); @@ -43,8 +54,330 @@ int vamdgpu_query_gpu_info(amdgpu_device_handle handle, void* out) { return ret; } +int vamdgpu_device_get_fd(amdgpu_device_handle device_handle) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle dev = vhsakmt_dev(); + struct vhsakmt_node* node = NULL; + int fd = -1; + + pthread_mutex_lock(&dev->vhsakmt_mutex); + for (uint32_t i = 0; i < dev->sys_props->NumNodes; i++) { + if (dev->vhsakmt_nodes[i].amdgpu_device_handle == device_handle) { + node = &dev->vhsakmt_nodes[i]; + fd = node->amdgpu_fd; + break; + } + } + pthread_mutex_unlock(&dev->vhsakmt_mutex); + + return fd; +} + +int vdrmCommandWriteRead(int fd, unsigned long drmCommandIndex, void* data, unsigned long size) { + CHECK_VIRTIO_KFD_OPEN(); + + if (size > VHSAKMT_CCMD_QUERY_DRM_CMD_WRITE_READ_MAX_SIZE) + return -EINVAL; + + vhsakmt_device_handle dev = vhsakmt_dev(); + struct vhsakmt_ccmd_query_info_rsp* rsp; + struct vhsakmt_ccmd_query_info_req req = { + .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req) + size), + .type = VHSAKMT_CCMD_QUERY_DRM_CMD_WRITE_READ, + .drm_cmd_write_read_args = + { + .fd = fd, + .drmCommandIndex = drmCommandIndex, + .size = size, + }, + }; + + memcpy(req.payload, data, size); + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, + sizeof(struct vhsakmt_ccmd_query_info_rsp) + size); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + memcpy(data, rsp->payload, size); + + return rsp->ret; +} + HSAKMT_STATUS vhsaKmtGetAMDGPUDeviceHandle(HSAuint32 NodeId, HsaAMDGPUDeviceHandle* DeviceHandle) { CHECK_VIRTIO_KFD_OPEN(); - return HSAKMT_STATUS_SUCCESS; + vhsakmt_device_handle dev = vhsakmt_dev(); + struct vhsakmt_node* node = vhsakmt_get_node_by_id(dev, NodeId); + if (!node) return HSAKMT_STATUS_INVALID_HANDLE; + + if (node->amdgpu_device_handle) { + *DeviceHandle = node->amdgpu_device_handle; + return HSAKMT_STATUS_SUCCESS; + } + + pthread_mutex_lock(&dev->vhsakmt_mutex); + if (node->amdgpu_device_handle) { + *DeviceHandle = node->amdgpu_device_handle; + pthread_mutex_unlock(&dev->vhsakmt_mutex); + return HSAKMT_STATUS_SUCCESS; + } + + struct vhsakmt_ccmd_query_info_rsp* rsp; + struct vhsakmt_ccmd_query_info_req req = { + .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)), + .type = VHSAKMT_CCMD_QUERY_AMDGPU_DEVICE_HANDLE, + .NodeID = NodeId, + }; + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp)); + if (!rsp) { + pthread_mutex_unlock(&dev->vhsakmt_mutex); + return -ENOMEM; + } + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + + node->amdgpu_device_handle = (void*)rsp->device_handle_rsp.amdgpu_device_handle; + node->amdgpu_fd = (int)rsp->device_handle_rsp.fd; + pthread_mutex_unlock(&dev->vhsakmt_mutex); + + *DeviceHandle = node->amdgpu_device_handle; + return rsp->ret; +} + +int vamdgpu_bo_cpu_map(amdgpu_bo_handle buf_handle, void** cpu) { + return 0; +} + +int vamdgpu_bo_free(amdgpu_bo_handle buf_handle) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle vdev = vhsakmt_dev(); + vhsakmt_bo_handle vbo = (vhsakmt_bo_handle)buf_handle; + + struct vhsakmt_ccmd_memory_rsp* rsp; + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)), + .type = VHSAKMT_CCMD_MEMORY_AMDGPU_BO_FREE, + .buf_handle = (uint64_t)buf_handle, + .res_id = vbo->real.res_id, + }; + + rsp = vhsakmt_alloc_rsp(vdev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(vdev, &req.hdr, __FUNCTION__); + + pthread_mutex_lock(&vbo->amdgpu_bo.lock); + if (vbo->amdgpu_bo.imported) { + if (vhsakmt_atomic_dec_return(&vbo->amdgpu_bo.refcount) > 0) { + pthread_mutex_unlock(&vbo->amdgpu_bo.lock); + return HSAKMT_STATUS_SUCCESS; + } + vbo->amdgpu_bo.import_size = 0; + vbo->amdgpu_bo.imported = false; + vbo->bo_type &= (uint32_t)~VHSA_BO_AMDGPU; + } + pthread_mutex_unlock(&vbo->amdgpu_bo.lock); + + return rsp->ret; +} + +int vamdgpu_bo_export(amdgpu_bo_handle buf_handle, enum amdgpu_bo_handle_type type, + uint32_t* shared_handle) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle vdev = vhsakmt_dev(); + vhsakmt_bo_handle bo = (vhsakmt_bo_handle)buf_handle; + + if (type != amdgpu_bo_handle_type_kms) { + vhsa_err("%s: unsupported export type: %u\n", __FUNCTION__, type); + return -EINVAL; + } + + struct vhsakmt_ccmd_memory_rsp* rsp; + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)), + .type = VHSAKMT_CCMD_MEMORY_AMDGPU_EXPORT, + .res_id = bo->real.res_id, + .amdgpu_export_args = + { + .buf_handle = (uint64_t)buf_handle, + .type = (uint32_t)type, + }, + }; + + rsp = vhsakmt_alloc_rsp(vdev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(vdev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + *shared_handle = rsp->shared_handle; + + return rsp->ret; +} + +static vhsakmt_bo_handle vhsakmt_bo_from_resid(vhsakmt_device_handle dev, uint32_t res_id) { + vhsakmt_bo_handle bo; + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)), + .type = VHSAKMT_CCMD_MEMORY_MAP_USERPTR, + .res_id = res_id, + }; + struct vhsakmt_ccmd_memory_rsp* rsp = + vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return NULL; + + rsp->map_userptr_rsp.userptr_handle = 0; + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + + bo = vhsakmt_find_bo_by_addr(dev, (void*)rsp->map_userptr_rsp.userptr_handle); + + return bo; +} + +int vamdgpu_bo_import(amdgpu_device_handle dev, enum amdgpu_bo_handle_type type, + uint32_t shared_handle, struct amdgpu_bo_import_result* output) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle vdev = vhsakmt_dev(); + vhsakmt_bo_handle obj; + uint32_t bo_handle, res_id; + int r; + + if (type != amdgpu_bo_handle_type_dma_buf_fd) { + vhsa_err("%s: unsupported import type: %u\n", __FUNCTION__, type); + return -EINVAL; + } + + r = vhsakmt_handle_to_resid(vdev, shared_handle, &res_id, &bo_handle); + if (r) return r; + + obj = vhsakmt_bo_from_resid(vdev, res_id); + if (!obj) return HSAKMT_STATUS_INVALID_HANDLE; + + struct vhsakmt_ccmd_memory_rsp* rsp; + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)), + .type = VHSAKMT_CCMD_MEMORY_AMDGPU_IMPORT, + .res_id = res_id, + .amdgpu_import_args = + { + .dev = (int64_t)dev, + .type = (uint32_t)type, + .shared_handle = shared_handle, + }, + }; + + rsp = vhsakmt_alloc_rsp(vdev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(vdev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + pthread_mutex_lock(&obj->amdgpu_bo.lock); + if (obj->amdgpu_bo.imported) { + vhsa_debug("%s: bo already imported for shared_handle: %u\n", __FUNCTION__, shared_handle); + vhsakmt_atomic_inc(&obj->amdgpu_bo.refcount); + output->alloc_size = obj->amdgpu_bo.import_size; + output->buf_handle = (amdgpu_bo_handle)obj; + pthread_mutex_unlock(&obj->amdgpu_bo.lock); + return HSAKMT_STATUS_SUCCESS; + } + + memcpy(output, &rsp->amdgpu_import_rsp.output, sizeof(struct amdgpu_bo_import_result)); + + obj->bo_type |= VHSA_BO_AMDGPU; + obj->amdgpu_bo.imported = true; + obj->amdgpu_bo.import_size = output->alloc_size; + atomic_store(&obj->amdgpu_bo.refcount, 1); + pthread_mutex_unlock(&obj->amdgpu_bo.lock); + + output->buf_handle = (amdgpu_bo_handle)obj; + + return rsp->ret; +} + +int vamdgpu_bo_va_op(amdgpu_bo_handle bo, uint64_t offset, uint64_t size, uint64_t addr, + uint64_t flags, uint32_t ops) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle vdev = vhsakmt_dev(); + vhsakmt_bo_handle vbo = (vhsakmt_bo_handle)bo; + + struct vhsakmt_ccmd_memory_rsp* rsp; + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)), + .type = VHSAKMT_CCMD_MEMORY_AMDGPU_VA_OP, + .res_id = vbo->real.res_id, + .amdgpu_va_op_args = + { + .bo = (uint64_t)bo, + .offset = offset, + .size = size, + .addr = addr, + .flags = flags, + .ops = ops, + }, + }; + + rsp = vhsakmt_alloc_rsp(vdev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(vdev, &req.hdr, __FUNCTION__); + return rsp->ret; +} + +int vamdgpu_bo_query_info(amdgpu_bo_handle bo, struct amdgpu_bo_info* info) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle vdev = vhsakmt_dev(); + vhsakmt_bo_handle vbo = (vhsakmt_bo_handle)bo; + + if (!(vbo->bo_type & VHSA_BO_AMDGPU)) return -EINVAL; + + struct vhsakmt_ccmd_memory_rsp* rsp; + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)), + .type = VHSAKMT_CCMD_MEMORY_AMDGPU_BO_QUERY_INFO, + .res_id = vbo->real.res_id, + }; + + rsp = vhsakmt_alloc_rsp(vdev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(vdev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + memcpy(info, &rsp->query_bo_info, sizeof(struct amdgpu_bo_info)); + + return rsp->ret; +} + +int vamdgpu_bo_set_metadata(amdgpu_bo_handle bo, struct amdgpu_bo_metadata* info) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle vdev = vhsakmt_dev(); + vhsakmt_bo_handle vbo = (vhsakmt_bo_handle)bo; + + if (!(vbo->bo_type & VHSA_BO_AMDGPU)) return -EINVAL; + + struct vhsakmt_ccmd_memory_rsp* rsp; + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD( + MEMORY, sizeof(struct vhsakmt_ccmd_memory_req) + sizeof(struct amdgpu_bo_metadata)), + .type = VHSAKMT_CCMD_MEMORY_AMDGPU_BO_SET_METADATA, + .res_id = vbo->real.res_id, + .amdgpu_bo_metadata = *info, + }; + rsp = vhsakmt_alloc_rsp(vdev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(vdev, &req.hdr, __FUNCTION__); + return rsp->ret; } diff --git a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_device.h b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_device.h index 101f4b0d23..d9f962ba0f 100644 --- a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_device.h +++ b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_device.h @@ -37,6 +37,8 @@ extern "C" { #define vhsakmt_atomic_inc_return(ptr) (atomic_fetch_add((ptr), 1) + 1) #define vhsakmt_atomic_dec_return(ptr) (atomic_fetch_sub((ptr), 1) - 1) +#define vhsakmt_atomic_inc(ptr) ((void)atomic_fetch_add((ptr), 1)) +#define vhsakmt_atomic_dec(ptr) ((void)atomic_fetch_sub((ptr), 1)) #define VHSA_VPTR_TO_UINT64(vptr) ((uint64_t)(unsigned long)(vptr)) #define VHSA_UINT64_TO_VPTR(v) ((void*)(unsigned long)(v)) @@ -73,7 +75,7 @@ extern vhsakmt_device_handle dev_list; #define VHSA_BO_QUEUE_RW_PTR 1 << 4 /* queue read write ptr, from host map to guest*/ /* allocated from KFD, but used for AQL queue read write ptr */ #define VHSA_BO_QUEUE_AQL_RW_PTR 1 << 5 -#define VHSA_BO_CLGL 1 << 6 /* CLGL memory, imported from mesa GL */ +#define VHSA_BO_AMDGPU 1 << 6 /* amdgpu bo */ /* allocated from KFD, but is scratch memory, do not need map and unmap in ioctrl */ #define VHSA_BO_SCRATCH 1 << 7 #define VHSA_BO_QUEUE 1 << 8 @@ -92,6 +94,8 @@ struct vhsakmt_node { void* doorbell_base; uint64_t scratch_start; uint64_t scratch_size; + HsaAMDGPUDeviceHandle amdgpu_device_handle; + int amdgpu_fd; }; struct vhsakmt_device { @@ -143,7 +147,16 @@ struct vhsakmt_bo { vHsaEvent* event; uint64_t queue_id; vhsakmt_bo_handle rw_bo; - void* gl_meta_data; + struct + { + void* gl_meta_data; + uint64_t import_size; + bool imported : 1; + int refcount; + pthread_mutex_t lock; /* protects imported, import_size and refcount operations */ + } amdgpu_bo; + + void* user_data; }; /*hsakmt_virtio_memory.c*/ @@ -179,10 +192,12 @@ int vhsakmt_set_node_doorbell(vhsakmt_device_handle dev, uint32_t node, void* do void* vhsakmt_node_doorbell(vhsakmt_device_handle dev, uint32_t node); bool vhsakmt_is_scratch_mem(vhsakmt_device_handle dev, void* addr); bool vhsakmt_is_userptr(vhsakmt_device_handle dev, void* addr); +struct vhsakmt_node* vhsakmt_get_node_by_id(vhsakmt_device_handle dev, uint32_t node_id); /*hsakmt_virtio_device.c*/ int vhsakmt_execbuf_cpu(vhsakmt_device_handle dev, struct vhsakmt_ccmd_req* req, const char* from); void* vhsakmt_alloc_rsp(vhsakmt_device_handle dev, struct vhsakmt_ccmd_req* req, uint32_t sz); +int vhsakmt_handle_to_resid(vhsakmt_device_handle dev, uint32_t handle, uint32_t* res_id, uint32_t* bo_handle); /*hsakmt_virtio_event.c*/ void* vhsakmt_event_host_handle(HsaEvent* h); diff --git a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_memory.c b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_memory.c index 19b8483d1e..83d7cf40a9 100644 --- a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_memory.c +++ b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_memory.c @@ -129,8 +129,8 @@ static vhsakmt_bo_handle vhsakmt_find_userptr(vhsakmt_device_handle dev, unsigne } static void vhsakmt_destroy_userptr(vhsakmt_device_handle dev, vhsakmt_bo_handle bo) { - hsakmt_interval_tree_remove(&dev->userptr_tree, &bo->itn); pthread_mutex_destroy(&bo->map_mutex); + pthread_mutex_destroy(&bo->amdgpu_bo.lock); struct drm_gem_close drm_req = { .handle = bo->real.handle, @@ -224,6 +224,7 @@ int vhsakmt_init_host_blob(vhsakmt_device_handle dev, size_t size, uint32_t blob bo->bo_type = bo_type; bo->host_addr = va_handle; pthread_mutex_init(&bo->map_mutex, NULL); + pthread_mutex_init(&bo->amdgpu_bo.lock, NULL); atomic_store(&bo->real.map_count, 0); atomic_store(&bo->refcount, 1); bo->real.handle = args.bo_handle; @@ -260,6 +261,7 @@ static int vhsakmt_init_userptr_blob(vhsakmt_device_handle dev, void* addr, size userptr->bo_type = VHSA_BO_USERPTR; userptr->cpu_addr = addr; pthread_mutex_init(&userptr->map_mutex, NULL); + pthread_mutex_init(&userptr->amdgpu_bo.lock, NULL); atomic_store(&userptr->real.map_count, 0); atomic_store(&userptr->refcount, 1); userptr->real.handle = args.bo_handle; @@ -295,8 +297,9 @@ int vhsakmt_create_mappable_blob_bo(vhsakmt_device_handle dev, size_t size, uint return r; } -HSAKMT_STATUS HSAKMTAPI vhsaKmtAllocMemory(HSAuint32 PreferredNode, HSAuint64 SizeInBytes, - HsaMemFlags MemFlags, void** MemoryAddress) { +HSAKMT_STATUS HSAKMTAPI vhsaKmtAllocMemoryAlign(HSAuint32 PreferredNode, HSAuint64 SizeInBytes, + HSAuint64 Alignment, HsaMemFlags MemFlags, + void** MemoryAddress) { vhsakmt_device_handle dev = vhsakmt_dev(); struct vhsakmt_ccmd_memory_rsp* rsp; vhsakmt_bo_handle bo; @@ -310,6 +313,7 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtAllocMemory(HSAuint32 PreferredNode, HSAuint64 Si .PreferredNode = PreferredNode, .SizeInBytes = SizeInBytes, .MemFlags = MemFlags, + .Alignment = Alignment, }, }; @@ -351,6 +355,11 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtAllocMemory(HSAuint32 PreferredNode, HSAuint64 Si return rsp->ret; } +HSAKMT_STATUS HSAKMTAPI vhsaKmtAllocMemory(HSAuint32 PreferredNode, HSAuint64 SizeInBytes, + HsaMemFlags MemFlags, void** MemoryAddress) { + return vhsaKmtAllocMemoryAlign(PreferredNode, SizeInBytes, 0, MemFlags, MemoryAddress); +} + int vhsakmt_bo_free(vhsakmt_device_handle dev, vhsakmt_bo_handle bo) { bo_entry entry; int r; @@ -372,9 +381,10 @@ int vhsakmt_bo_free(vhsakmt_device_handle dev, vhsakmt_bo_handle bo) { if (bo->event) free(bo->event); - if (bo->gl_meta_data) free(bo->gl_meta_data); + if (bo->amdgpu_bo.gl_meta_data) free(bo->amdgpu_bo.gl_meta_data); pthread_mutex_destroy(&bo->map_mutex); + pthread_mutex_destroy(&bo->amdgpu_bo.lock); r = vhsakmt_destroy_handle(dev, bo); @@ -412,6 +422,7 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtMapMemoryToGPUNodes(void* MemoryAddress, HSAuint6 struct vhsakmt_ccmd_memory_req* req; struct vhsakmt_ccmd_memory_rsp* rsp; vhsakmt_bo_handle bo; + uint64_t addr_offset = 0; req = (void*)calloc(1, req_len); if (!req) return -ENOMEM; @@ -428,9 +439,13 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtMapMemoryToGPUNodes(void* MemoryAddress, HSAuint6 } else if (!dev->use_svm) { bo = vhsakmt_find_userptr(dev, (uint64_t)MemoryAddress, (uint64_t)MemoryAddress + MemorySizeInBytes - 1UL); - if (bo) - req->map_to_GPU_nodes_args.MemoryAddress = - (uint64_t)bo->host_addr + ((char*)MemoryAddress - (char*)bo->cpu_addr); + if (bo) { + req->res_id = bo->real.res_id; + req->map_to_GPU_nodes_args.MemoryAddress = (uint64_t)bo->host_addr; + req->map_to_GPU_nodes_args.MemorySizeInBytes = (uint64_t)bo->size; + addr_offset = (uint64_t)MemoryAddress - (uint64_t)bo->cpu_addr; + } + } if (!bo) { @@ -447,6 +462,13 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtMapMemoryToGPUNodes(void* MemoryAddress, HSAuint6 } vhsakmt_execbuf_cpu(dev, &req->hdr, __FUNCTION__); + if (rsp->ret) { + free(req); + return rsp->ret; + } + + if (!dev->use_svm) + rsp->alternate_vagpu += addr_offset; *AlternateVAGPU = rsp->alternate_vagpu; @@ -722,6 +744,26 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterMemoryWithFlags(void* MemoryAddress, return rsp->ret; } +HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterMemory(void* MemoryAddress, HSAuint64 MemorySizeInBytes) { + CHECK_VIRTIO_KFD_OPEN(); + + HsaMemFlags flags = {0}; + flags.ui32.CoarseGrain = 1; + flags.ui32.ExtendedCoherent = 0; + + return vhsaKmtRegisterMemoryWithFlags(MemoryAddress, MemorySizeInBytes, flags); +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterMemoryToNodes(void* MemoryAddress, + HSAuint64 MemorySizeInBytes, + HSAuint32 NumberOfNodes, + HSAuint32* NodeArray) { + CHECK_VIRTIO_KFD_OPEN(); + + // Not used in ROCR so no implementation is performed here. + return HSAKMT_STATUS_NOT_IMPLEMENTED; +} + static int vhsakmt_remove_clgl_bo(vhsakmt_device_handle dev, vhsakmt_bo_handle bo) { struct vhsakmt_ccmd_memory_rsp* rsp; struct vhsakmt_ccmd_memory_req req = { @@ -747,6 +789,9 @@ static int vhsakmt_deregister_userptr_non_svm(vhsakmt_device_handle dev, void* M size_t page_size = getpagesize(); unsigned long aligned_addr = ((uint64_t)MemoryAddress / page_size) * page_size; interval_tree_node_t* n; + vhsakmt_bo_handle* bos_to_free = NULL; + int free_count = 0; + int free_capacity = 0; pthread_mutex_lock(&dev->bo_handles_mutex); @@ -766,7 +811,7 @@ static int vhsakmt_deregister_userptr_non_svm(vhsakmt_device_handle dev, void* M n = hsakmt_interval_tree_iter_next(&dev->userptr_tree, n, aligned_addr, aligned_addr); } - /* Second pass: Free all userptrs if all refcounts are <= 0 */ + /* Second pass: Collect BOs to free and remove from tree */ if (can_free_all) { n = hsakmt_interval_tree_iter_first(&dev->userptr_tree, aligned_addr, aligned_addr); while (n) { @@ -778,7 +823,21 @@ static int vhsakmt_deregister_userptr_non_svm(vhsakmt_device_handle dev, void* M vhsa_debug("%s: destroying userptr: %p, size: %x, res_id: %d\n", __FUNCTION__, bo->cpu_addr, bo->size, bo->real.res_id); - vhsakmt_destroy_userptr(dev, bo); + hsakmt_interval_tree_remove(&dev->userptr_tree, &bo->itn); + + if (free_count >= free_capacity) { + int new_capacity = free_capacity == 0 ? 32 : free_capacity * 2; + vhsakmt_bo_handle* new_array = realloc(bos_to_free, new_capacity * sizeof(vhsakmt_bo_handle)); + if (!new_array) { + vhsa_err("%s: failed to allocate memory for BO array, freeing %d BOs\n", __FUNCTION__, free_count); + pthread_mutex_unlock(&dev->bo_handles_mutex); + goto cleanup; + } + bos_to_free = new_array; + free_capacity = new_capacity; + } + + bos_to_free[free_count++] = bo; } n = next; @@ -786,6 +845,17 @@ static int vhsakmt_deregister_userptr_non_svm(vhsakmt_device_handle dev, void* M } pthread_mutex_unlock(&dev->bo_handles_mutex); + +cleanup: + for (int i = 0; i < free_count; i++) { + vhsakmt_bo_handle bo = bos_to_free[i]; + vhsakmt_destroy_userptr(dev, bo); + } + + if (bos_to_free) { + free(bos_to_free); + } + return 0; } @@ -795,7 +865,7 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtDeregisterMemory(void* MemoryAddress) { vhsakmt_device_handle dev = vhsakmt_dev(); vhsakmt_bo_handle bo = vhsakmt_find_bo_by_addr(dev, MemoryAddress); - if (bo && (bo->bo_type & VHSA_BO_CLGL)) return vhsakmt_remove_clgl_bo(dev, bo); + if (bo && (bo->bo_type & VHSA_BO_AMDGPU)) return vhsakmt_remove_clgl_bo(dev, bo); if (!dev->use_svm) { return vhsakmt_deregister_userptr_non_svm(dev, MemoryAddress); @@ -877,13 +947,14 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig* return rsp->ret; } -static int vhsakmt_create_clgl_bo(vhsakmt_device_handle dev, void* addr, size_t size, - uint32_t res_id, uint32_t bo_handle, void* meta_data) { +static int vhsakmt_create_amdgpu_bo(vhsakmt_device_handle dev, void* addr, size_t size, + uint32_t res_id, uint32_t bo_handle, void* meta_data) { vhsakmt_bo_handle out = calloc(1, sizeof(struct vhsakmt_bo)); if (!out) return -ENOMEM; out->dev = dev; out->size = size; + pthread_mutex_init(&out->amdgpu_bo.lock, NULL); atomic_store(&out->real.map_count, 0); atomic_store(&out->refcount, 1); @@ -895,8 +966,8 @@ static int vhsakmt_create_clgl_bo(vhsakmt_device_handle dev, void* addr, size_t /* GL bo handle from GL context*/ out->real.handle = bo_handle; - out->bo_type |= VHSA_BO_CLGL; - if (meta_data) out->gl_meta_data = meta_data; + out->bo_type |= VHSA_BO_AMDGPU; + if (meta_data) out->amdgpu_bo.gl_meta_data = meta_data; out->host_addr = addr; @@ -905,25 +976,25 @@ static int vhsakmt_create_clgl_bo(vhsakmt_device_handle dev, void* addr, size_t return 0; } -static int vhsakmt_gfxhandle_to_resid(vhsakmt_device_handle dev, uint32_t gfx_handle, - uint32_t* res_id, uint32_t* bo_handle) { - int r = drmPrimeFDToHandle(dev->vgdev->fd, gfx_handle, bo_handle); +int vhsakmt_handle_to_resid(vhsakmt_device_handle dev, uint32_t handle, + uint32_t* res_id, uint32_t* bo_handle) { + int r = drmPrimeFDToHandle(dev->vgdev->fd, handle, bo_handle); if (r) { - vhsa_err("%s: drmPrimeFDToHandle failed for handle: %u\n", __FUNCTION__, gfx_handle); + vhsa_err("%s: drmPrimeFDToHandle failed for handle: %u\n", __FUNCTION__, handle); return r; } virtio_gpu_res_id(dev->vgdev, *bo_handle, res_id); - vhsa_debug("%s: register praphics handle: handle: %d, bo_handle: %d, res_id: %d\n", __FUNCTION__, - gfx_handle, *bo_handle, *res_id); + vhsa_debug("%s: drm handle: %d, bo_handle: %d, res_id: %d\n", __FUNCTION__, + handle, *bo_handle, *res_id); return 0; } -HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterGraphicsHandleToNodes( +HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterGraphicsHandleToNodesExt( HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo* GraphicsResourceInfo, - HSAuint64 NumberOfNodes, HSAuint32* NodeArray) { + HSAuint64 NumberOfNodes, HSAuint32* NodeArray, HSA_REGISTER_MEM_FLAGS RegisterFlags) { CHECK_VIRTIO_KFD_OPEN(); vhsakmt_device_handle dev = vhsakmt_dev(); @@ -945,13 +1016,15 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterGraphicsHandleToNodes( #ifdef CLGL_EXPORT_RESID req->reg_ghd_to_nodes.GraphicsResourceHandle = GraphicsResourceHandle; #else - r = vhsakmt_gfxhandle_to_resid(dev, GraphicsResourceHandle, &res_id, &bo_handle); + r = vhsakmt_handle_to_resid(dev, GraphicsResourceHandle, &res_id, &bo_handle); if (r) return r; req->reg_ghd_to_nodes.GraphicsResourceHandle = bo_handle; req->reg_ghd_to_nodes.res_handle = res_id; #endif + req->reg_ghd_to_nodes.flag = RegisterFlags.Value; + memcpy(req->payload, NodeArray, NumberOfNodes * sizeof(NodeArray)); rsp = @@ -981,9 +1054,9 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterGraphicsHandleToNodes( GraphicsResourceHandle, GraphicsResourceInfo->MemoryAddress, GraphicsResourceInfo->SizeInBytes); - r = vhsakmt_create_clgl_bo(dev, GraphicsResourceInfo->MemoryAddress, - GraphicsResourceInfo->SizeInBytes, res_id, bo_handle, - VHSA_UINT64_TO_VPTR(GraphicsResourceInfo->Metadata)); + r = vhsakmt_create_amdgpu_bo(dev, GraphicsResourceInfo->MemoryAddress, + GraphicsResourceInfo->SizeInBytes, res_id, bo_handle, + VHSA_UINT64_TO_VPTR(GraphicsResourceInfo->Metadata)); if (r) goto free_out; r = rsp->ret; @@ -994,3 +1067,315 @@ free_out: free(req); return r; } + +HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterGraphicsHandleToNodes( + HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo* GraphicsResourceInfo, + HSAuint64 NumberOfNodes, HSAuint32* NodeArray) { + CHECK_VIRTIO_KFD_OPEN(); + + HSA_REGISTER_MEM_FLAGS flags = {0}; + + return vhsaKmtRegisterGraphicsHandleToNodesExt(GraphicsResourceHandle, GraphicsResourceInfo, + NumberOfNodes, NodeArray, flags); +} + +static int vhsakmt_export_dmabuf(vhsakmt_device_handle dev, uint32_t bo_handle, int* dmabuf_fd) { + return drmPrimeHandleToFD(dev->vgdev->fd, bo_handle, DRM_CLOEXEC | DRM_RDWR, dmabuf_fd); +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtMapGraphicHandle(HSAuint32 NodeId, HSAuint64 GraphicDeviceHandle, + HSAuint64 GraphicResourceHandle, + HSAuint64 GraphicResourceOffset, + HSAuint64 GraphicResourceSize, + HSAuint64* FlatMemoryAddress) { + CHECK_VIRTIO_KFD_OPEN(); + // Not implemented in baremetal so keep the stub here. + return HSAKMT_STATUS_NOT_IMPLEMENTED; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtUnmapGraphicHandle(HSAuint32 NodeId, HSAuint64 FlatMemoryAddress, + HSAuint64 SizeInBytes) { + return vhsaKmtUnmapMemoryToGPU(VHSA_UINT64_TO_VPTR(FlatMemoryAddress)); +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtExportDMABufHandle(void* MemoryAddress, HSAuint64 MemorySizeInBytes, + int* DMABufFd, HSAuint64* Offset) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle dev = vhsakmt_dev(); + vhsakmt_bo_handle bo = vhsakmt_find_bo_by_addr(dev, MemoryAddress); + if (!bo) return HSAKMT_STATUS_INVALID_HANDLE; + int r; + + r = vhsakmt_export_dmabuf(dev, bo->real.handle, DMABufFd); + if (r) { + vhsa_err("%s: export dmabuf failed for handle: %d\n", + __FUNCTION__, bo->real.handle); + return -HSAKMT_STATUS_ERROR; + } + + struct vhsakmt_ccmd_memory_rsp* rsp; + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)), + .type = VHSAKMT_CCMD_MEMORY_EXPORT_DMABUF, + .export_dmabuf_args = + { + .MemoryAddress = (uint64_t)MemoryAddress, + .MemorySizeInBytes = MemorySizeInBytes, + }, + .res_id = bo->real.res_id, + }; + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + *Offset = rsp->export_dmabuf_rsp.offset; + + vhsa_debug("%s: gva: %p, size: %lx, dmabuf_fd: %d, offset: %lx, resid: %x \n", __FUNCTION__, + MemoryAddress, MemorySizeInBytes, *DMABufFd, *Offset, bo->real.res_id); + + return r; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtShareMemory(void* MemoryAddress, HSAuint64 SizeInBytes, + HsaSharedMemoryHandle* SharedMemoryHandle) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle dev = vhsakmt_dev(); + vhsakmt_bo_handle bo; + struct vhsakmt_ccmd_memory_rsp* rsp; + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)), + .type = VHSAKMT_CCMD_MEMORY_SHARE_MEMORY, + .share_memory_args = + { + .MemoryAddress = (uint64_t)MemoryAddress, + .MemorySizeInBytes = SizeInBytes, + }, + }; + + bo = vhsakmt_find_bo_by_addr(dev, MemoryAddress); + if (!bo) return HSAKMT_STATUS_INVALID_PARAMETER; + + req.res_id = bo->real.res_id; + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + memcpy(SharedMemoryHandle, &rsp->share_memory_rsp.SharedMemoryHandle, + sizeof(HsaSharedMemoryHandle)); + + return rsp->ret; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterSharedHandleToNodes( + const HsaSharedMemoryHandle* SharedMemoryHandle, void** MemoryAddress, HSAuint64* SizeInBytes, + HSAuint64 NumberOfNodes, HSAuint32* NodeArray) { + CHECK_VIRTIO_KFD_OPEN(); + if (NumberOfNodes > VHSAKMT_MEMORY_MAX_NODES) return -EINVAL; + + vhsakmt_device_handle dev = vhsakmt_dev(); + vhsakmt_bo_handle bo; + struct vhsakmt_ccmd_memory_rsp* rsp; + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD( + MEMORY, sizeof(struct vhsakmt_ccmd_memory_req) + NumberOfNodes * sizeof(NodeArray)), + .type = VHSAKMT_CCMD_MEMORY_REGISTER_SHARED_HANDLE, + .blob_id = vhsakmt_atomic_inc_return(&dev->next_blob_id), + .register_shared_handle_args = { + .NumberOfNodes = NumberOfNodes, + }}; + int r; + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return -ENOMEM; + + memcpy(req.payload, NodeArray, NumberOfNodes * sizeof(NodeArray)); + memcpy(&req.register_shared_handle_args.SharedMemoryHandle, SharedMemoryHandle, + sizeof(HsaSharedMemoryHandle)); + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + if (!rsp->register_shared_handle_rsp.memory_handle || !rsp->register_shared_handle_rsp.size) + return -ENOMEM; + + // treat as VHSA_BO_KFD_MEM for shared handle memory + r = vhsakmt_init_host_blob(dev, rsp->register_shared_handle_rsp.size, VIRTGPU_BLOB_MEM_HOST3D, + VIRTGPU_BLOB_FLAG_USE_MAPPABLE, req.blob_id, VHSA_BO_KFD_MEM, + (void*)rsp->register_shared_handle_rsp.memory_handle, &bo); + if (r) return r; + + r = vhsakmt_bo_cpu_map(bo, &bo->cpu_addr, bo->host_addr); + if (r) { + free(bo); + return -ENOMEM; + } + + *MemoryAddress = bo->cpu_addr; + *SizeInBytes = rsp->register_shared_handle_rsp.size; + + return rsp->ret; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtRegisterSharedHandle(const HsaSharedMemoryHandle* SharedMemoryHandle, + void** MemoryAddress, HSAuint64* SizeInBytes) { + return vhsaKmtRegisterSharedHandleToNodes(SharedMemoryHandle, MemoryAddress, SizeInBytes, 0, + NULL); +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtSetMemoryPolicy(HSAuint32 Node, HSAuint32 DefaultPolicy, + HSAuint32 AlternatePolicy, + void* MemoryAddressAlternate, + HSAuint64 MemorySizeInBytes) { + CHECK_VIRTIO_KFD_OPEN(); + vhsakmt_device_handle dev = vhsakmt_dev(); + struct vhsakmt_ccmd_memory_rsp* rsp; + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD(MEMORY, sizeof(struct vhsakmt_ccmd_memory_req)), + .type = VHSAKMT_CCMD_MEMORY_SET_MEM_POLICY, + .set_mem_policy_args = { + .Node = Node, + .DefaultPolicy = DefaultPolicy, + .AlternatePolicy = AlternatePolicy, + .MemoryAddressAlternate = (uint64_t)MemoryAddressAlternate, + .MemorySizeInBytes = MemorySizeInBytes, + }}; + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + + return rsp->ret; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtSetMemoryUserData(const void* Pointer, void* UserData) { + CHECK_VIRTIO_KFD_OPEN(); + vhsakmt_device_handle dev = vhsakmt_dev(); + vhsakmt_bo_handle bo = vhsakmt_find_bo_by_addr(dev, VHSA_UINT64_TO_VPTR(Pointer)); + if (!bo) return HSAKMT_STATUS_INVALID_HANDLE; + + pthread_mutex_lock(&dev->bo_handles_mutex); + bo->user_data = UserData; + pthread_mutex_unlock(&dev->bo_handles_mutex); + + return HSAKMT_STATUS_SUCCESS; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtSVMGetAttr(void* start_addr, HSAuint64 size, unsigned int nattr, + HSA_SVM_ATTRIBUTE* attrs) { + CHECK_VIRTIO_KFD_OPEN(); + if (nattr > VHSAKMT_MEMORY_MAX_NATTR) return -EINVAL; + + vhsakmt_device_handle dev = vhsakmt_dev(); + struct vhsakmt_ccmd_memory_rsp* rsp; + vhsakmt_bo_handle bo; + size_t req_len = sizeof(struct vhsakmt_ccmd_memory_req) + nattr * sizeof(HSA_SVM_ATTRIBUTE); + size_t rsp_len = sizeof(struct vhsakmt_ccmd_memory_rsp) + nattr * sizeof(HSA_SVM_ATTRIBUTE); + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD(MEMORY, req_len), + .type = VHSAKMT_CCMD_MEMORY_SVM_GET_ATTR, + .svm_attr_args = + { + .start_addr = (uint64_t)start_addr, + .size = size, + .nattr = nattr, + }, + }; + + bo = vhsakmt_find_bo_by_addr(dev, start_addr); + if (!bo) return HSAKMT_STATUS_INVALID_HANDLE; + req.res_id = bo->real.res_id; + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, rsp_len); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + memcpy(req.payload, attrs, nattr * sizeof(HSA_SVM_ATTRIBUTE)); + + return rsp->ret; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtSVMSetAttr(void* start_addr, HSAuint64 size, unsigned int nattr, + HSA_SVM_ATTRIBUTE* attrs) { + CHECK_VIRTIO_KFD_OPEN(); + if (nattr > VHSAKMT_MEMORY_MAX_NATTR) return -EINVAL; + vhsakmt_device_handle dev = vhsakmt_dev(); + vhsakmt_bo_handle bo; + struct vhsakmt_ccmd_memory_rsp* rsp; + size_t req_len = sizeof(struct vhsakmt_ccmd_memory_req) + nattr * sizeof(HSA_SVM_ATTRIBUTE); + struct vhsakmt_ccmd_memory_req req = { + .hdr = VHSAKMT_CCMD(MEMORY, req_len), + .type = VHSAKMT_CCMD_MEMORY_SVM_SET_ATTR, + .svm_attr_args = + { + .start_addr = (uint64_t)start_addr, + .size = size, + .nattr = nattr, + }, + }; + + bo = vhsakmt_find_bo_by_addr(dev, start_addr); + if (!bo) return HSAKMT_STATUS_INVALID_HANDLE; + req.res_id = bo->real.res_id; + + memcpy(req.payload, attrs, nattr * sizeof(HSA_SVM_ATTRIBUTE)); + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_memory_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + return rsp->ret; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtReplaceAsanHeaderPage(void* addr) { + CHECK_VIRTIO_KFD_OPEN(); + // Not implemented so keep the stub here. + return HSAKMT_STATUS_NOT_IMPLEMENTED; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtReturnAsanHeaderPage(void* addr) { + CHECK_VIRTIO_KFD_OPEN(); + // Not implemented so keep the stub here. + return HSAKMT_STATUS_NOT_IMPLEMENTED; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtAisReadWriteFile(void* MemoryAddress, HSAuint64 MemorySizeInBytes, + HSAint32 fd, HSAint64 file_offset, + HsaAisFlags AisFlags, HSAuint64* SizeCopiedInBytes, + HSAint32* status) { + CHECK_VIRTIO_KFD_OPEN(); + // Not implemented so keep the stub here. + return HSAKMT_STATUS_NOT_IMPLEMENTED; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtProcessVMRead(HSAuint32 Pid, HsaMemoryRange* LocalMemoryArray, + HSAuint64 LocalMemoryArrayCount, + HsaMemoryRange* RemoteMemoryArray, + HSAuint64 RemoteMemoryArrayCount, + HSAuint64* SizeCopied) { + CHECK_VIRTIO_KFD_OPEN(); + // Not implemented in baremetal so keep the stub here. + return HSAKMT_STATUS_NOT_IMPLEMENTED; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtProcessVMWrite(HSAuint32 Pid, HsaMemoryRange* LocalMemoryArray, + HSAuint64 LocalMemoryArrayCount, + HsaMemoryRange* RemoteMemoryArray, + HSAuint64 RemoteMemoryArrayCount, + HSAuint64* SizeCopied) { + CHECK_VIRTIO_KFD_OPEN(); + // Not implemented in baremetal so keep the stub here. + return HSAKMT_STATUS_NOT_IMPLEMENTED; +} diff --git a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_openclose.c b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_openclose.c index 0e68a54052..2f8810d320 100644 --- a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_openclose.c +++ b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_openclose.c @@ -128,12 +128,17 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtOpenKFD(void) { static void vhsakmt_device_destroy(struct vhsakmt_device* dev) { pthread_mutex_destroy(&dev->bo_handles_mutex); + pthread_mutex_destroy(&dev->vhsakmt_mutex); vhsakmt_dereserve_va(dev->vm_start, dev->vm_size); if (dev->sys_props) free(dev->sys_props); if (dev->vhsakmt_nodes) free(dev->vhsakmt_nodes); virtio_gpu_close(dev->vgdev); + if (dev == dev_list) + dev_list = NULL; + + free(dev); } HSAKMT_STATUS HSAKMTAPI vhsaKmtCloseKFD(void) { diff --git a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_proto.h b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_proto.h index ef3581c32b..ee23ea7be5 100644 --- a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_proto.h +++ b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_proto.h @@ -109,6 +109,12 @@ enum vhsakmt_ccmd_query_type { VHSAKMT_CCMD_QUERY_TILE_CONFIG, VHSAKMT_CCMD_QUERY_NANO_TIME, VHSAKMT_CCMD_QUERY_GET_RUNTIME_CAPS, + VHSAKMT_CCMD_QUERY_AMDGPU_DEVICE_HANDLE, + VHSAKMT_CCMD_QUERY_DRM_CMD_WRITE_READ, + VHSAKMT_CCMD_QUERY_SET_XNACK_MODE, + VHSAKMT_CCMD_QUERY_SPM_ACQUIRE, + VHSAKMT_CCMD_QUERY_SPM_RELEASE, + VHSAKMT_CCMD_QUERY_SPM_SET_DST_BUFFER, }; #define QUERY_PTR_INFO_MAX_MAPPED_NODES 3 @@ -153,6 +159,15 @@ typedef struct _query_open_kfd_args { } query_open_kfd_args; VHSAKMT_STATIC_ASSERT_SIZE(_query_open_kfd_args) +typedef struct _query_spm_set_dst_buffer_args { + uint32_t PreferredNode; + uint32_t SizeInBytes; + uint32_t timeout; + uint32_t res_id; + uint64_t DestMemoryAddress; +} query_spm_set_dst_buffer_args; +VHSAKMT_STATIC_ASSERT_SIZE(_query_spm_set_dst_buffer_args) + typedef struct _query_open_kfd_rsp { uint64_t vm_start; uint64_t vm_size; @@ -164,6 +179,27 @@ typedef struct _query_nano_time_rsp { } query_nano_time_rsp; VHSAKMT_STATIC_ASSERT_SIZE(_query_nano_time_rsp) +typedef struct _query_drm_cmd_write_read_args { + uint64_t fd; + uint64_t drmCommandIndex; + uint64_t size; +} query_drm_cmd_write_read_args; +VHSAKMT_STATIC_ASSERT_SIZE(_query_drm_cmd_write_read_args) + +typedef struct _query_device_handle_rsp { + uint64_t amdgpu_device_handle; + uint64_t fd; +} query_device_handle_rsp; +VHSAKMT_STATIC_ASSERT_SIZE(_query_device_handle_rsp) + +typedef struct _query_spm_set_dst_buffer_rsp { + uint32_t SizeCopied; + uint32_t timeout; + uint8_t IsTileDataLoss; + uint8_t pad[7]; +} query_spm_set_dst_buffer_rsp; +VHSAKMT_STATIC_ASSERT_SIZE(_query_spm_set_dst_buffer_rsp) + struct vhsakmt_ccmd_query_info_req { struct vhsakmt_ccmd_req hdr; struct drm_amdgpu_info info; @@ -178,6 +214,9 @@ struct vhsakmt_ccmd_query_info_req { query_req_node_io_link_args node_io_link_args; query_tile_config tile_config_args; query_open_kfd_args open_kfd_args; + query_drm_cmd_write_read_args drm_cmd_write_read_args; + query_spm_set_dst_buffer_args spm_set_dst_buffer_args; + HSAint32 xnack_mode; }; uint8_t payload[]; @@ -186,8 +225,9 @@ VHSAKMT_DEFINE_CAST(vhsakmt_ccmd_req, vhsakmt_ccmd_query_info_req) VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_query_info_req) #define VHSAKMT_CCMD_QUERY_MAX_TILE_CONFIG 128 #define VHSAKMT_CCMD_QUERY_MAX_GET_NOD_MEM_PROP 128 -#define VHSAKMT_CCMD_QUERY_MAX_GET_NOD_CACHE_PROP 128 -#define VHSAKMT_CCMD_QUERY_MAX_GET_NOD_IO_LINK_PROP 128 +#define VHSAKMT_CCMD_QUERY_MAX_GET_NOD_CACHE_PROP 512 +#define VHSAKMT_CCMD_QUERY_MAX_GET_NOD_IO_LINK_PROP 512 +#define VHSAKMT_CCMD_QUERY_DRM_CMD_WRITE_READ_MAX_SIZE 128 struct vhsakmt_ccmd_query_info_rsp { struct vhsakmt_ccmd_rsp hdr; @@ -203,6 +243,8 @@ struct vhsakmt_ccmd_query_info_rsp { HsaNodeProperties node_props; int32_t xnack_mode; HsaClockCounters clock_counters; + query_device_handle_rsp device_handle_rsp; + query_spm_set_dst_buffer_rsp spm_set_dst_buffer_rsp; uint32_t caps; uint64_t pad[9]; }; @@ -319,13 +361,29 @@ enum vhsakmt_ccmd_memory_type { VHSAKMT_CCMD_MEMORY_REG_MEM_WITH_FLAG, VHSAKMT_CCMD_MEMORY_DEREG_MEM, VHSAKMT_CCMD_MEMORY_MAP_USERPTR, + VHSAKMT_CCMD_MEMORY_EXPORT_DMABUF, + VHSAKMT_CCMD_MEMORY_AMDGPU_IMPORT, + VHSAKMT_CCMD_MEMORY_AMDGPU_EXPORT, + VHSAKMT_CCMD_MEMORY_AMDGPU_VA_OP, + VHSAKMT_CCMD_MEMORY_AMDGPU_BO_FREE, + VHSAKMT_CCMD_MEMORY_SHARE_MEMORY, + VHSAKMT_CCMD_MEMORY_REGISTER_SHARED_HANDLE, + VHSAKMT_CCMD_MEMORY_SET_MEM_POLICY, + VHSAKMT_CCMD_MEMORY_SVM_GET_ATTR, + VHSAKMT_CCMD_MEMORY_SVM_SET_ATTR, + VHSAKMT_CCMD_MEMORY_AMDGPU_BO_QUERY_INFO, + VHSAKMT_CCMD_MEMORY_AMDGPU_BO_SET_METADATA, }; +#define VHSAKMT_MEMORY_MAX_NODES 32 +#define VHSAKMT_MEMORY_MAX_NATTR 32 + typedef struct _memory_req_alloc_args { uint32_t PreferredNode; HsaMemFlags MemFlags; uint64_t SizeInBytes; uint64_t MemoryAddress; + uint64_t Alignment; } memory_req_alloc_args; VHSAKMT_STATIC_ASSERT_SIZE(_memory_req_alloc_args) @@ -362,16 +420,87 @@ typedef struct _memory_reg_mem_with_flag { } memory_reg_mem_with_flag; VHSAKMT_STATIC_ASSERT_SIZE(_memory_reg_mem_with_flag) +typedef struct _memory_amdgpu_import_args { + int64_t dev; + uint32_t type; + uint32_t shared_handle; +} memory_amdgpu_import_args; +VHSAKMT_STATIC_ASSERT_SIZE(_memory_amdgpu_import_args) + +typedef struct _memory_amdgpu_export_args { + uint64_t buf_handle; + uint32_t type; + uint32_t pad; +} memory_amdgpu_export_args; +VHSAKMT_STATIC_ASSERT_SIZE(_memory_amdgpu_export_args) + +typedef struct _memory_amdgpu_va_op_args { + uint64_t bo; + uint64_t offset; + uint64_t size; + uint64_t addr; + uint64_t flags; + uint32_t ops; + uint32_t pad; +} memory_amdgpu_va_op_args; +VHSAKMT_STATIC_ASSERT_SIZE(_memory_amdgpu_va_op_args) + +typedef struct _memory_export_dmabuf_args { + uint64_t MemoryAddress; + uint64_t MemorySizeInBytes; +} memory_export_dmabuf_args; +VHSAKMT_STATIC_ASSERT_SIZE(_memory_export_dmabuf_args) + +typedef struct _memory_share_memory_args { + uint64_t MemoryAddress; + uint64_t MemorySizeInBytes; +} memory_share_memory_args; +VHSAKMT_STATIC_ASSERT_SIZE(_memory_share_memory_args) + +typedef struct _memory_register_shared_handle_args { + HsaSharedMemoryHandle SharedMemoryHandle; + uint64_t NumberOfNodes; +} memory_register_shared_handle_args; +VHSAKMT_STATIC_ASSERT_SIZE(_memory_register_shared_handle_args) + +typedef struct _memory_set_mem_policy_args { + uint32_t Node; + uint32_t DefaultPolicy; + uint32_t AlternatePolicy; + uint32_t pad; + uint64_t MemoryAddressAlternate; + uint64_t MemorySizeInBytes; +} memory_set_mem_policy_args; +VHSAKMT_STATIC_ASSERT_SIZE(_memory_set_mem_policy_args) + +typedef struct _memory_svm_attr_args { + uint64_t start_addr; + uint64_t size; + uint32_t nattr; + uint32_t pad; +} memory_svm_attr_args; +VHSAKMT_STATIC_ASSERT_SIZE(_memory_svm_attr_args) + struct vhsakmt_ccmd_memory_req { struct vhsakmt_ccmd_req hdr; union { uint64_t MemoryAddress; + uint64_t buf_handle; uint32_t Node; memory_req_alloc_args alloc_args; memory_req_map_to_GPU_nodes_args map_to_GPU_nodes_args; memory_req_free_args free_args; memory_map_mem_to_gpu_args map_to_GPU_args; memory_reg_mem_with_flag reg_mem_with_flag; + memory_export_dmabuf_args export_dmabuf_args; + memory_amdgpu_import_args amdgpu_import_args; + memory_amdgpu_export_args amdgpu_export_args; + memory_amdgpu_va_op_args amdgpu_va_op_args; + memory_share_memory_args share_memory_args; + memory_register_shared_handle_args register_shared_handle_args; + memory_set_mem_policy_args set_mem_policy_args; + memory_svm_attr_args svm_attr_args; + struct amdgpu_bo_metadata amdgpu_bo_metadata; }; uint64_t blob_id; uint32_t type; @@ -388,14 +517,43 @@ typedef struct _vhsakmt_ccmd_memory_map_userptr_rsp { } vhsakmt_ccmd_memory_map_userptr_rsp; VHSAKMT_STATIC_ASSERT_SIZE(_vhsakmt_ccmd_memory_map_userptr_rsp) +typedef struct _vhsakmt_ccmd_memory_export_dmabuf_rsp { + int64_t dmabuf_fd; + uint64_t offset; +} vhsakmt_ccmd_memory_export_dmabuf_rsp; +VHSAKMT_STATIC_ASSERT_SIZE(_vhsakmt_ccmd_memory_export_dmabuf_rsp) + +typedef struct _vhsakmt_ccmd_memory_amdgpu_import_rsp +{ + struct amdgpu_bo_import_result output; +}vhsakmt_ccmd_memory_amdgpu_import_rsp; +VHSAKMT_STATIC_ASSERT_SIZE(_vhsakmt_ccmd_memory_amdgpu_import_rsp) + +typedef struct _vhsakmt_ccmd_memory_share_memory_rsp { + HsaSharedMemoryHandle SharedMemoryHandle; +} vhsakmt_ccmd_memory_share_memory_rsp; +VHSAKMT_STATIC_ASSERT_SIZE(_vhsakmt_ccmd_memory_share_memory_rsp) + +typedef struct _vhsakmt_ccmd_memory_register_shared_handle_rsp { + uint64_t memory_handle; + uint64_t size; +} vhsakmt_ccmd_memory_register_shared_handle_rsp; +VHSAKMT_STATIC_ASSERT_SIZE(_vhsakmt_ccmd_memory_register_shared_handle_rsp) + struct vhsakmt_ccmd_memory_rsp { struct vhsakmt_ccmd_rsp hdr; int32_t ret; union { vhsakmt_ccmd_memory_map_userptr_rsp map_userptr_rsp; + vhsakmt_ccmd_memory_export_dmabuf_rsp export_dmabuf_rsp; uint64_t memory_handle; uint64_t alternate_vagpu; uint64_t available_bytes; + vhsakmt_ccmd_memory_amdgpu_import_rsp amdgpu_import_rsp; + uint32_t shared_handle; + vhsakmt_ccmd_memory_share_memory_rsp share_memory_rsp; + vhsakmt_ccmd_memory_register_shared_handle_rsp register_shared_handle_rsp; + struct amdgpu_bo_info query_bo_info; }; uint8_t payload[]; }; @@ -407,8 +565,15 @@ VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_memory_rsp) enum vhsakmt_ccmd_queue_type { VHSAKMT_CCMD_QUEUE_CREATE, VHSAKMT_CCMD_QUEUE_DESTROY, + VHSAKMT_CCMD_QUEUE_UPDATE, + VHSAKMT_CCMD_QUEUE_GET_INFO, + VHSAKMT_CCMD_QUEUE_SET_CU_MASK, + VHSAKMT_CCMD_QUEUE_ALLOC_GWS, }; +#define VHSAKMT_CCMD_QUEUE_MAX_CU_MASK_SIZE 128 +#define VHSAKMT_CCMD_QUEUE_MAX_GWS_SIZE 128 + typedef struct _vHsaQueueResource { HsaQueueResource r; uint64_t host_doorbell; @@ -437,11 +602,24 @@ typedef struct _queue_req_create { } queue_req_create; VHSAKMT_STATIC_ASSERT_SIZE(_queue_req_create) +typedef struct _queue_req_update { + HSA_QUEUEID QueueId; + uint32_t QueuePercentage; + uint32_t pad; + HSA_QUEUE_PRIORITY Priority; + uint64_t QueueAddress; + uint64_t QueueSizeInBytes; +} queue_req_update; +VHSAKMT_STATIC_ASSERT_SIZE(_queue_req_update) + struct vhsakmt_ccmd_queue_req { struct vhsakmt_ccmd_req hdr; union { HSA_QUEUEID QueueId; queue_req_create create_queue_args; + queue_req_update update_queue_args; + uint32_t CUMaskCount; + uint32_t nGWS; }; uint64_t blob_id; /* For queue create, queue resource */ uint64_t rw_ptr_blob_id; /* For queue create, r/w ptr memory mapping */ @@ -459,6 +637,8 @@ struct vhsakmt_ccmd_queue_rsp { struct vhsakmt_ccmd_rsp hdr; int32_t ret; vHsaQueueResource vqueue_res; + uint32_t pad; + HsaQueueInfo queue_info; uint8_t payload[]; }; VHSAKMT_STATIC_ASSERT_SIZE(vhsakmt_ccmd_queue_rsp) @@ -474,7 +654,7 @@ typedef struct _gl_inter_req_reg_ghd_to_nodes { uint64_t GraphicsResourceHandle; uint64_t NumberOfNodes; // NodeArray in payload uint32_t res_handle; - uint32_t pad; + uint32_t flag; } gl_inter_req_reg_ghd_to_nodes; VHSAKMT_STATIC_ASSERT_SIZE(_gl_inter_req_reg_ghd_to_nodes) diff --git a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_queues.c b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_queues.c index 5ab12698a8..78ef57bc6b 100644 --- a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_queues.c +++ b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_queues.c @@ -251,3 +251,115 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtDestroyQueue(HSA_QUEUEID QueueId) { return r; } + +HSAKMT_STATUS HSAKMTAPI vhsaKmtUpdateQueue(HSA_QUEUEID QueueId, HSAuint32 QueuePercentage, + HSA_QUEUE_PRIORITY Priority, void* QueueAddress, + HSAuint64 QueueSize, HsaEvent* Event) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle dev = vhsakmt_dev(); + vhsakmt_bo_handle bo = (vhsakmt_bo_handle)QueueId; + vhsakmt_bo_handle queue_mem_bo; + struct vhsakmt_ccmd_queue_rsp* rsp; + struct vhsakmt_ccmd_queue_req req = { + .hdr = VHSAKMT_CCMD(QUEUE, sizeof(struct vhsakmt_ccmd_queue_req)), + .type = VHSAKMT_CCMD_QUEUE_UPDATE, + .res_id = bo->real.res_id, + .update_queue_args = + { + .QueueId = QueueId, + .QueuePercentage = QueuePercentage, + .Priority = Priority, + .QueueAddress = (uint64_t)QueueAddress, + .QueueSizeInBytes = QueueSize, + }, + }; + + queue_mem_bo = vhsakmt_find_bo_by_addr(dev, (void*)QueueAddress); + if (!queue_mem_bo) return -EINVAL; + + req.queue_mem_res_id = queue_mem_bo->real.res_id; + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_queue_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + return rsp->ret; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtGetQueueInfo(HSA_QUEUEID QueueId, HsaQueueInfo* QueueInfo) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle dev = vhsakmt_dev(); + vhsakmt_bo_handle bo = (vhsakmt_bo_handle)QueueId; + struct vhsakmt_ccmd_queue_rsp* rsp; + struct vhsakmt_ccmd_queue_req req = { + .hdr = VHSAKMT_CCMD(QUEUE, sizeof(struct vhsakmt_ccmd_queue_req)), + .type = VHSAKMT_CCMD_QUEUE_GET_INFO, + .res_id = bo->real.res_id, + }; + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_queue_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + *QueueInfo = rsp->queue_info; + + return HSAKMT_STATUS_SUCCESS; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtSetQueueCUMask(HSA_QUEUEID QueueId, HSAuint32 CUMaskCount, + HSAuint32* QueueCUMask) { + CHECK_VIRTIO_KFD_OPEN(); + if (CUMaskCount > VHSAKMT_CCMD_QUEUE_MAX_CU_MASK_SIZE) return -EINVAL; + + vhsakmt_device_handle dev = vhsakmt_dev(); + vhsakmt_bo_handle bo = (vhsakmt_bo_handle)QueueId; + struct vhsakmt_ccmd_queue_rsp* rsp; + struct vhsakmt_ccmd_queue_req req = { + .hdr = VHSAKMT_CCMD(QUEUE, + sizeof(struct vhsakmt_ccmd_queue_req) + CUMaskCount * sizeof(HSAuint32)), + .type = VHSAKMT_CCMD_QUEUE_SET_CU_MASK, + .res_id = bo->real.res_id, + .CUMaskCount = CUMaskCount, + }; + + memcpy(req.payload, QueueCUMask, CUMaskCount * sizeof(HSAuint32)); + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_queue_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + memcpy(QueueCUMask, rsp->payload, CUMaskCount * sizeof(HSAuint32)); + + return rsp->ret; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtAllocQueueGWS(HSA_QUEUEID QueueId, HSAuint32 nGWS, + HSAuint32* firstGWS) { + CHECK_VIRTIO_KFD_OPEN(); + if (nGWS > VHSAKMT_CCMD_QUEUE_MAX_GWS_SIZE) return -EINVAL; + + vhsakmt_device_handle dev = vhsakmt_dev(); + vhsakmt_bo_handle bo = (vhsakmt_bo_handle)QueueId; + struct vhsakmt_ccmd_queue_rsp* rsp; + struct vhsakmt_ccmd_queue_req req = { + .hdr = VHSAKMT_CCMD(QUEUE, sizeof(struct vhsakmt_ccmd_queue_req) + nGWS * sizeof(HSAuint32)), + .type = VHSAKMT_CCMD_QUEUE_ALLOC_GWS, + .res_id = bo->real.res_id, + .nGWS = nGWS, + }; + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_queue_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + memcpy(firstGWS, rsp->payload, nGWS * sizeof(HSAuint32)); + + return rsp->ret; +} diff --git a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_topology.c b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_topology.c index 8c06e26459..0b8869c40a 100644 --- a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_topology.c +++ b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_topology.c @@ -340,3 +340,115 @@ HSAKMT_STATUS HSAKMTAPI vhsaKmtGetRuntimeCapabilities(HSAuint32* caps_mask) { return rsp->ret; } + +HSAKMT_STATUS HSAKMTAPI vhsaKmtModelEnabled(bool* enable) { + CHECK_VIRTIO_KFD_OPEN(); + + // pre-silicon models are not supported in virtio mode + *enable = false; + + return HSAKMT_STATUS_SUCCESS; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtOpenSMI(HSAuint32 NodeId, int* fd) { + CHECK_VIRTIO_KFD_OPEN(); + + // not supported yet in virtio mode + return HSAKMT_STATUS_NOT_SUPPORTED; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtSetXNACKMode(HSAint32 enable) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle dev = vhsakmt_dev(); + struct vhsakmt_ccmd_query_info_rsp* rsp; + struct vhsakmt_ccmd_query_info_req req = { + .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)), + .xnack_mode = enable, + .type = VHSAKMT_CCMD_QUERY_SET_XNACK_MODE, + }; + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + + return rsp->ret; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtSPMAcquire(HSAuint32 PreferredNode) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle dev = vhsakmt_dev(); + struct vhsakmt_ccmd_query_info_rsp* rsp; + struct vhsakmt_ccmd_query_info_req req = { + .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)), + .NodeID = PreferredNode, + .type = VHSAKMT_CCMD_QUERY_SPM_ACQUIRE, + }; + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + + return rsp->ret; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtSPMRelease(HSAuint32 PreferredNode) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle dev = vhsakmt_dev(); + struct vhsakmt_ccmd_query_info_rsp* rsp; + struct vhsakmt_ccmd_query_info_req req = { + .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)), + .NodeID = PreferredNode, + .type = VHSAKMT_CCMD_QUERY_SPM_RELEASE, + }; + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp)); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + + return rsp->ret; +} + +HSAKMT_STATUS HSAKMTAPI vhsaKmtSPMSetDestBuffer(HSAuint32 PreferredNode, HSAuint32 SizeInBytes, + HSAuint32* timeout, HSAuint32* SizeCopied, + void* DestMemoryAddress, bool* isSPMDataLoss) { + CHECK_VIRTIO_KFD_OPEN(); + + vhsakmt_device_handle dev = vhsakmt_dev(); + vhsakmt_bo_handle bo; + bool use_userptr = false; + struct vhsakmt_ccmd_query_info_rsp* rsp; + struct vhsakmt_ccmd_query_info_req req = { + .hdr = VHSAKMT_CCMD(QUERY_INFO, sizeof(struct vhsakmt_ccmd_query_info_req)), + .type = VHSAKMT_CCMD_QUERY_SPM_SET_DST_BUFFER, + .spm_set_dst_buffer_args = { + .PreferredNode = PreferredNode, + .SizeInBytes = SizeInBytes, + .timeout = *timeout, + .DestMemoryAddress = (uint64_t)DestMemoryAddress, + }}; + + bo = vhsakmt_find_bo_by_addr(dev, DestMemoryAddress); + if (!bo) { + use_userptr = true; + if (SizeInBytes > (dev->shmem_bo->size >> 2)) return HSAKMT_STATUS_INVALID_PARAMETER; + } else + req.spm_set_dst_buffer_args.res_id = bo->real.res_id; + + rsp = vhsakmt_alloc_rsp(dev, &req.hdr, sizeof(struct vhsakmt_ccmd_query_info_rsp) + SizeInBytes); + if (!rsp) return -ENOMEM; + + vhsakmt_execbuf_cpu(dev, &req.hdr, __FUNCTION__); + if (rsp->ret) return rsp->ret; + + if (use_userptr) memcpy(DestMemoryAddress, rsp->payload, SizeInBytes); + + *SizeCopied = rsp->spm_set_dst_buffer_rsp.SizeCopied; + *timeout = rsp->spm_set_dst_buffer_rsp.timeout; + *isSPMDataLoss = rsp->spm_set_dst_buffer_rsp.IsTileDataLoss; + + return rsp->ret; +} diff --git a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_vm.c b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_vm.c index 151b374180..86aa0a1a66 100644 --- a/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_vm.c +++ b/projects/rocr-runtime/libhsakmt/src/virtio/hsakmt_virtio_vm.c @@ -111,3 +111,10 @@ void* vhsakmt_node_doorbell(vhsakmt_device_handle dev, uint32_t node) { return dev->vhsakmt_nodes[node].doorbell_base; } + +struct vhsakmt_node* vhsakmt_get_node_by_id(vhsakmt_device_handle dev, uint32_t node_id) { + if (!dev->vhsakmt_nodes || !dev->sys_props) return NULL; + if (node_id >= dev->sys_props->NumNodes) return NULL; + + return &dev->vhsakmt_nodes[node_id]; +} diff --git a/projects/rocr-runtime/libhsakmt/src/virtio/libhsakmt_virtio.ver b/projects/rocr-runtime/libhsakmt/src/virtio/libhsakmt_virtio.ver index 1d3c323e17..1f58b87aa1 100644 --- a/projects/rocr-runtime/libhsakmt/src/virtio/libhsakmt_virtio.ver +++ b/projects/rocr-runtime/libhsakmt/src/virtio/libhsakmt_virtio.ver @@ -3,12 +3,15 @@ global: vhsaKmtOpenKFD; vhsaKmtCloseKFD; vhsaKmtAllocMemory; +vhsaKmtAllocMemoryAlign; vhsaKmtFreeMemory; vhsaKmtMapMemoryToGPUNodes; vhsaKmtUnmapMemoryToGPU; vhsaKmtAvailableMemory; vhsaKmtMapMemoryToGPU; vhsaKmtRegisterMemoryWithFlags; +vhsaKmtRegisterMemory; +vhsaKmtRegisterMemoryToNodes; vhsaKmtDeregisterMemory; vhsaKmtGetVersion; vhsaKmtAcquireSystemProperties; @@ -37,9 +40,46 @@ vhsaKmtSetTrapHandler; vhsaKmtCreateQueueExt; vhsaKmtCreateQueue; vhsaKmtDestroyQueue; +vhsaKmtUpdateQueue; +vhsaKmtGetQueueInfo; +vhsaKmtSetQueueCUMask; +vhsaKmtAllocQueueGWS; +vhsaKmtRegisterGraphicsHandleToNodesExt; vhsaKmtRegisterGraphicsHandleToNodes; +vhsaKmtMapGraphicHandle; +vhsaKmtUnmapGraphicHandle; +vhsaKmtExportDMABufHandle; vhsaKmtGetRuntimeCapabilities; +vhsaKmtModelEnabled; +vhsaKmtOpenSMI; +vhsaKmtSetXNACKMode; +vhsaKmtShareMemory; +vhsaKmtRegisterSharedHandleToNodes; +vhsaKmtRegisterSharedHandle; +vhsaKmtSetMemoryUserData; +vhsaKmtSetMemoryPolicy; +vhsaKmtSVMGetAttr; +vhsaKmtSVMSetAttr; +vhsaKmtReplaceAsanHeaderPage; +vhsaKmtReturnAsanHeaderPage; +vhsaKmtSPMAcquire; +vhsaKmtSPMRelease; +vhsaKmtSPMSetDestBuffer; +vhsaKmtAisReadWriteFile; +vhsaKmtProcessVMRead; +vhsaKmtProcessVMWrite; vamdgpu_query_gpu_info; +vamdgpu_device_initialize; +vamdgpu_device_deinitialize; +vamdgpu_device_get_fd; +vdrmCommandWriteRead; +vamdgpu_bo_cpu_map; +vamdgpu_bo_free; +vamdgpu_bo_export; +vamdgpu_bo_import; +vamdgpu_bo_va_op; +vamdgpu_bo_query_info; +vamdgpu_bo_set_metadata; local: *; }; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp index a6a832b3c6..efd32709ca 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/virtio/amd_kfd_virtio_driver.cpp @@ -45,6 +45,7 @@ #include #include +#include #include "core/inc/amd_gpu_agent.h" #include "core/inc/amd_memory_region.h" @@ -55,6 +56,20 @@ extern r_debug _amdgpu_r_debug; namespace rocr { namespace AMD { +__forceinline uint64_t drm_perm(hsa_access_permission_t perm) { + switch (perm) { + case HSA_ACCESS_PERMISSION_RO: + return AMDGPU_VM_PAGE_READABLE; + case HSA_ACCESS_PERMISSION_WO: + return AMDGPU_VM_PAGE_WRITEABLE; + case HSA_ACCESS_PERMISSION_RW: + return AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; + case HSA_ACCESS_PERMISSION_NONE: + default: + return 0; + } +} + KfdVirtioDriver::KfdVirtioDriver(std::string devnode_name) : core::Driver(core::DriverType::KFD_VIRTIO, std::move(devnode_name)) {} @@ -448,26 +463,73 @@ hsa_status_t KfdVirtioDriver::AllocQueueGWS(HSA_QUEUEID queue_id, uint32_t num_G } hsa_status_t KfdVirtioDriver::ExportDMABuf(void* mem, size_t size, int* dmabuf_fd, size_t* offset) { - return HSA_STATUS_ERROR; + int dmabuf_fd_res = -1; + size_t offset_res = 0; + HSAKMT_STATUS status = + vhsaKmtExportDMABufHandle(mem, size, &dmabuf_fd_res, &offset_res); + if (status != HSAKMT_STATUS_SUCCESS) { + if (status == HSAKMT_STATUS_INVALID_PARAMETER) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + + *dmabuf_fd = dmabuf_fd_res; + *offset = offset_res; + + return HSA_STATUS_SUCCESS; } hsa_status_t KfdVirtioDriver::ImportDMABuf(int dmabuf_fd, core::Agent& agent, core::ShareableHandle& handle) { - return HSA_STATUS_ERROR; + auto &gpu_agent = static_cast(agent); + amdgpu_bo_import_result res; + auto ret = vamdgpu_bo_import( + gpu_agent.libDrmDev(), amdgpu_bo_handle_type_dma_buf_fd, dmabuf_fd, &res); + if (ret) + return HSA_STATUS_ERROR; + + handle.handle = reinterpret_cast(res.buf_handle); + return HSA_STATUS_SUCCESS; } hsa_status_t KfdVirtioDriver::Map(core::ShareableHandle handle, void* mem, size_t offset, size_t size, hsa_access_permission_t perms) { - return HSA_STATUS_ERROR; + const auto ldrm_bo = reinterpret_cast(handle.handle); + if (!ldrm_bo) + return HSA_STATUS_ERROR; + + if (vamdgpu_bo_va_op(ldrm_bo, offset, size, reinterpret_cast(mem), + drm_perm(perms), AMDGPU_VA_OP_MAP) != 0) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; } hsa_status_t KfdVirtioDriver::Unmap(core::ShareableHandle handle, void* mem, size_t offset, size_t size) { - return HSA_STATUS_ERROR; + const auto ldrm_bo = reinterpret_cast(handle.handle); + if (!ldrm_bo) + return HSA_STATUS_ERROR; + + if (vamdgpu_bo_va_op(ldrm_bo, offset, size, reinterpret_cast(mem), 0, + AMDGPU_VA_OP_UNMAP) != 0) + return HSA_STATUS_ERROR; + + return HSA_STATUS_SUCCESS; } hsa_status_t KfdVirtioDriver::ReleaseShareableHandle(core::ShareableHandle& handle) { - return HSA_STATUS_ERROR; + const auto ldrm_bo = reinterpret_cast(handle.handle); + if (!ldrm_bo) + return HSA_STATUS_ERROR; + + const auto ret = vamdgpu_bo_free(ldrm_bo); + if (ret) + return HSA_STATUS_ERROR; + + handle = {}; + return HSA_STATUS_SUCCESS; } hsa_status_t KfdVirtioDriver::GetTileConfig(uint32_t node_id, HsaGpuTileConfig* config) const {