diff --git a/rocclr/platform/context.cpp b/rocclr/platform/context.cpp index ef6c276431..df536108f0 100644 --- a/rocclr/platform/context.cpp +++ b/rocclr/platform/context.cpp @@ -297,17 +297,31 @@ void Context::hostFree(void* ptr) const { AlignedMemory::deallocate(ptr); } -void* Context::svmAlloc(size_t size, size_t alignment, cl_svm_mem_flags flags) { +void* Context::svmAlloc(size_t size, size_t alignment, cl_svm_mem_flags flags, + const amd::Device* curDev) { unsigned int numSVMDev = svmAllocDevice_.size(); if (numSVMDev < 1) { - return NULL; + return nullptr; } - void* svmPtrAlloced = NULL; - void* tempPtr = NULL; + void* svmPtrAlloced = nullptr; amd::ScopedLock lock(&ctxLock_); + + if (curDev != nullptr) { + if (!(flags & CL_MEM_SVM_ATOMICS) || + (curDev->info().svmCapabilities_ & CL_DEVICE_SVM_ATOMICS)) { + svmPtrAlloced = curDev->svmAlloc(*this, size, alignment, flags, svmPtrAlloced); + if (svmPtrAlloced == nullptr) { + return nullptr; + } + } + } + for (const auto& dev : svmAllocDevice_) { + if (dev == curDev) { + continue; + } // check if the device support svm platform atomics, // skipped allocation for platform atomics if not supported by this device if ((flags & CL_MEM_SVM_ATOMICS) && @@ -315,8 +329,8 @@ void* Context::svmAlloc(size_t size, size_t alignment, cl_svm_mem_flags flags) { continue; } svmPtrAlloced = dev->svmAlloc(*this, size, alignment, flags, svmPtrAlloced); - if (svmPtrAlloced == NULL) { - return NULL; + if (svmPtrAlloced == nullptr) { + return nullptr; } } return svmPtrAlloced; diff --git a/rocclr/platform/context.hpp b/rocclr/platform/context.hpp index 99b8cc5fe6..4ce9f07652 100644 --- a/rocclr/platform/context.hpp +++ b/rocclr/platform/context.hpp @@ -150,8 +150,10 @@ class Context : public RuntimeObject { * @param size Allocation size, in bytes * @param alignment Desired alignment, in bytes * @param flags The flags to create a svm space + * @param curDev The current device */ - void* svmAlloc(size_t size, size_t alignment, cl_svm_mem_flags flags = CL_MEM_READ_WRITE); + void* svmAlloc(size_t size, size_t alignment, cl_svm_mem_flags flags = CL_MEM_READ_WRITE, + const amd::Device* curDev = nullptr); /** * Release SVM buffer diff --git a/rocclr/platform/memory.cpp b/rocclr/platform/memory.cpp index 27b0362367..8e179dcea9 100644 --- a/rocclr/platform/memory.cpp +++ b/rocclr/platform/memory.cpp @@ -1454,12 +1454,13 @@ bool SvmBuffer::Contains(uintptr_t ptr) { } // The allocation flags are ignored for now. -void* SvmBuffer::malloc(Context& context, cl_svm_mem_flags flags, size_t size, size_t alignment) { +void* SvmBuffer::malloc(Context& context, cl_svm_mem_flags flags, size_t size, size_t alignment, + const amd::Device* curDev) { bool atomics = (flags & CL_MEM_SVM_ATOMICS) != 0; - void* ret = context.svmAlloc(size, alignment, flags); - if (ret == NULL) { + void* ret = context.svmAlloc(size, alignment, flags, curDev); + if (ret == nullptr) { LogError("Unable to allocate aligned memory"); - return NULL; + return nullptr; } uintptr_t ret_u = reinterpret_cast(ret); Add(ret_u, ret_u + size); diff --git a/rocclr/platform/memory.hpp b/rocclr/platform/memory.hpp index 0775639823..8a2761fafa 100644 --- a/rocclr/platform/memory.hpp +++ b/rocclr/platform/memory.hpp @@ -594,7 +594,8 @@ class Image : public Memory { class SvmBuffer : AllStatic { public: //! Allocate a shared buffer that is accessible by all devices in the context - static void* malloc(Context& context, cl_svm_mem_flags flags, size_t size, size_t alignment); + static void* malloc(Context& context, cl_svm_mem_flags flags, size_t size, size_t alignment, + const amd::Device* curDev = nullptr); //! Release shared buffer static void free(const Context& context, void* ptr);