diff --git a/projects/rocr-runtime/src/memory.c b/projects/rocr-runtime/src/memory.c index acc45dc89c..37cb88f1ef 100644 --- a/projects/rocr-runtime/src/memory.c +++ b/projects/rocr-runtime/src/memory.c @@ -28,6 +28,7 @@ #include #include #include +#include "fmm.h" HSAKMT_STATUS HSAKMTAPI @@ -39,9 +40,49 @@ hsaKmtSetMemoryPolicy( HSAuint64 MemorySizeInBytes ) { + HSAKMT_STATUS result; + uint32_t gpu_id; + CHECK_KFD_OPEN(); - return HSAKMT_STATUS_NOT_SUPPORTED; + result = validate_nodeid(Node, &gpu_id); + if (result != HSAKMT_STATUS_SUCCESS) + return result; + + // We accept any legal policy and alternate address location. You get CC everywhere anyway. + if ((DefaultPolicy != HSA_CACHING_CACHED && DefaultPolicy != HSA_CACHING_NONCACHED) + || (AlternatePolicy != HSA_CACHING_CACHED && AlternatePolicy != HSA_CACHING_NONCACHED)) + { + return HSAKMT_STATUS_INVALID_PARAMETER; + } + + CHECK_PAGE_MULTIPLE(MemoryAddressAlternate); + CHECK_PAGE_MULTIPLE(MemorySizeInBytes); + + struct kfd_ioctl_set_memory_policy_args args; + memset(&args, 0, sizeof(args)); + + args.gpu_id = gpu_id; + args.default_policy = (DefaultPolicy == HSA_CACHING_CACHED) ? KFD_IOC_CACHE_POLICY_COHERENT : KFD_IOC_CACHE_POLICY_NONCOHERENT; + args.alternate_policy = (AlternatePolicy == HSA_CACHING_CACHED) ? KFD_IOC_CACHE_POLICY_COHERENT : KFD_IOC_CACHE_POLICY_NONCOHERENT; + args.alternate_aperture_base = (uintptr_t)MemoryAddressAlternate; + args.alternate_aperture_size = MemorySizeInBytes; + + int err = kfd_ioctl(KFD_IOC_SET_MEMORY_POLICY, &args); + + return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS; +} + +static HSAuint32 PageSizeFromFlags(unsigned int pageSizeFlags) +{ + switch (pageSizeFlags) + { + case HSA_PAGE_SIZE_4KB: return 4*1024; + case HSA_PAGE_SIZE_64KB: return 64*1024; + case HSA_PAGE_SIZE_2MB: return 2*1024*1024; + case HSA_PAGE_SIZE_1GB: return 1024*1024*1024; + default: assert(false); return 4*1024; + } } HSAKMT_STATUS @@ -54,8 +95,29 @@ hsaKmtAllocMemory( ) { CHECK_KFD_OPEN(); + HSAKMT_STATUS result; + uint32_t gpu_id; + + result = validate_nodeid(PreferredNode, &gpu_id); + if (result != HSAKMT_STATUS_SUCCESS) + return result; + + // The required size should be page aligned (GDS?) + HSAuint64 page_size = PageSizeFromFlags(MemFlags.ui32.PageSize); + if ((SizeInBytes & (page_size-1)) && !MemFlags.ui32.GDSMemory){ + return HSAKMT_STATUS_INVALID_PARAMETER; + } + + if (MemFlags.ui32.HostAccess && !MemFlags.ui32.NonPaged){ + int err = posix_memalign(MemoryAddress, page_size, SizeInBytes); + if (err == 0) + return HSAKMT_STATUS_SUCCESS; + else + return HSAKMT_STATUS_NO_MEMORY; + } + else + return HSAKMT_STATUS_INVALID_PARAMETER; - return HSAKMT_STATUS_NOT_SUPPORTED; } HSAKMT_STATUS @@ -65,9 +127,17 @@ hsaKmtFreeMemory( HSAuint64 SizeInBytes //IN ) { + HSAKMT_STATUS hsa_status = HSAKMT_STATUS_SUCCESS; CHECK_KFD_OPEN(); - return HSAKMT_STATUS_NOT_SUPPORTED; + if (fmm_is_inside_some_aperture(MemoryAddress)){ + if (fmm_release( MemoryAddress, SizeInBytes)) + hsa_status = HSAKMT_STATUS_INVALID_PARAMETER; + } + else + free(MemoryAddress); + + return hsa_status; } HSAKMT_STATUS @@ -79,7 +149,7 @@ hsaKmtRegisterMemory( { CHECK_KFD_OPEN(); - return HSAKMT_STATUS_NOT_SUPPORTED; + return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS @@ -90,7 +160,7 @@ hsaKmtDeregisterMemory( { CHECK_KFD_OPEN(); - return HSAKMT_STATUS_NOT_SUPPORTED; + return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS @@ -103,7 +173,13 @@ hsaKmtMapMemoryToGPU( { CHECK_KFD_OPEN(); - return HSAKMT_STATUS_NOT_SUPPORTED; + // We don't support GPUVM in the stub, there should never be a request for a GPUVA. + if (AlternateVAGPU) + { + *AlternateVAGPU = 0; + } + + return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS @@ -114,5 +190,5 @@ hsaKmtUnmapMemoryToGPU( { CHECK_KFD_OPEN(); - return HSAKMT_STATUS_NOT_SUPPORTED; + return HSAKMT_STATUS_SUCCESS; }