P4 to Git Change 1527848 by gandryey@gera-w8 on 2018/03/15 17:11:43
SWDEV-79445 - OCL generic changes and code clean-up - Add suballocations support for local(invisible) memory. It should significantly improve memory footprint and TLB usage with 2MB pages - Implementation uses BuddyAllocator provided in PAL - The chunk allocation size is 64MB, min allocation 4KB and max 4MB. GPU_MAX_SUBALLOC_SIZE controls the max size in KB Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#33 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#76 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#56 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#51 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#45 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#77 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#285 edit
Tento commit je obsažen v:
@@ -8,6 +8,7 @@
|
||||
#include "palGpuMemory.h"
|
||||
#include "palImage.h"
|
||||
#include "palFormatInfo.h"
|
||||
#include "util/palSysMemory.h"
|
||||
|
||||
//
|
||||
/// Memory Object Type
|
||||
|
||||
@@ -54,6 +54,10 @@ void PalDeviceUnload() { pal::Device::tearDown(); }
|
||||
|
||||
namespace pal {
|
||||
|
||||
Util::GenericAllocator NullDevice::allocator_;
|
||||
char* Device::platformObj_;
|
||||
Pal::IPlatform* Device::platform_;
|
||||
|
||||
NullDevice::Compiler* NullDevice::compiler_;
|
||||
AppProfile Device::appProfile_;
|
||||
|
||||
@@ -183,6 +187,7 @@ bool NullDevice::init() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
|
||||
uint xNACKSupported) {
|
||||
online_ = false;
|
||||
@@ -736,7 +741,7 @@ bool Device::create(Pal::IDevice* device) {
|
||||
if (!amd::Device::create()) {
|
||||
return false;
|
||||
}
|
||||
resourceList_ = new std::list<GpuMemoryReference*>();
|
||||
resourceList_ = new std::list<Resource*>();
|
||||
if (nullptr == resourceList_) {
|
||||
return false;
|
||||
}
|
||||
@@ -865,7 +870,7 @@ bool Device::create(Pal::IDevice* device) {
|
||||
size_t resourceCacheSize = settings().resourceCacheSize_;
|
||||
// Create resource cache.
|
||||
// \note Cache must be created before any resource creation to avoid nullptr check
|
||||
resourceCache_ = new ResourceCache(resourceCacheSize);
|
||||
resourceCache_ = new ResourceCache(this, resourceCacheSize);
|
||||
if (nullptr == resourceCache_) {
|
||||
return false;
|
||||
}
|
||||
@@ -925,8 +930,6 @@ bool Device::create(Pal::IDevice* device) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static Pal::IPlatform* platform;
|
||||
|
||||
bool Device::initializeHeapResources() {
|
||||
amd::ScopedLock k(lockForInitHeap_);
|
||||
if (!heapInitComplete_) {
|
||||
@@ -998,7 +1001,7 @@ bool Device::initializeHeapResources() {
|
||||
xferQueue_->enableSyncedBlit();
|
||||
|
||||
// Create RGP capture manager
|
||||
rgpCaptureMgr_ = RgpCaptureMgr::Create(platform, *this);
|
||||
rgpCaptureMgr_ = RgpCaptureMgr::Create(platform_, *this);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -1096,8 +1099,6 @@ static int reportHook(int reportType, char* message, int* returnValue) {
|
||||
}
|
||||
#endif // _WIN32 & DEBUG
|
||||
|
||||
static char* platformObj;
|
||||
|
||||
bool Device::init() {
|
||||
uint32_t numDevices = 0;
|
||||
bool useDeviceList = false;
|
||||
@@ -1123,7 +1124,7 @@ bool Device::init() {
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
size_t size = Pal::GetPlatformSize();
|
||||
platformObj = new char[size];
|
||||
platformObj_ = new char[size];
|
||||
Pal::PlatformCreateInfo info = {};
|
||||
info.flags.disableGpuTimeout = true;
|
||||
#if !defined(PAL_BUILD_DTIF)
|
||||
@@ -1138,14 +1139,14 @@ bool Device::init() {
|
||||
info.maxSvmSize = static_cast<Pal::gpusize>(OCL_SET_SVM_SIZE * Mi);
|
||||
|
||||
// PAL init
|
||||
if (Pal::Result::Success != Pal::CreatePlatform(info, platformObj, &platform)) {
|
||||
if (Pal::Result::Success != Pal::CreatePlatform(info, platformObj_, &platform_)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the total number of active devices
|
||||
// Count up all the devices in the system.
|
||||
Pal::IDevice* deviceList[Pal::MaxDevices] = {};
|
||||
platform->EnumerateDevices(&numDevices, &deviceList[0]);
|
||||
platform_->EnumerateDevices(&numDevices, &deviceList[0]);
|
||||
|
||||
uint ordinal = 0;
|
||||
const char* selectDeviceByName = nullptr;
|
||||
@@ -1175,8 +1176,8 @@ bool Device::init() {
|
||||
}
|
||||
|
||||
void Device::tearDown() {
|
||||
platform->Destroy();
|
||||
delete platformObj;
|
||||
platform_->Destroy();
|
||||
delete platformObj_;
|
||||
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (compiler_ != nullptr) {
|
||||
|
||||
@@ -120,7 +120,12 @@ class NullDevice : public amd::Device {
|
||||
amd::CacheCompilation* cacheCompilation() const { return cacheCompilation_.get(); }
|
||||
#endif
|
||||
|
||||
void* Alloc(const Util::AllocInfo& allocInfo) { return allocator_.Alloc(allocInfo); }
|
||||
void Free(const Util::FreeInfo& freeInfo) { allocator_.Free(freeInfo); }
|
||||
|
||||
protected:
|
||||
static Util::GenericAllocator allocator_; //!< Generic memory allocator in PAL
|
||||
|
||||
Pal::AsicRevision asicRevision_; //!< ASIC revision
|
||||
Pal::GfxIpLevel ipLevel_; //!< Device IP level
|
||||
const AMDDeviceInfo* hwInfo_; //!< Device HW info structure
|
||||
@@ -464,6 +469,9 @@ class Device : public NullDevice {
|
||||
//! Returns PAL device properties
|
||||
const Pal::DeviceProperties& properties() const { return properties_; }
|
||||
|
||||
//! Returns PAL platform interface
|
||||
Pal::IPlatform* iPlat() const { return platform_; }
|
||||
|
||||
//! Returns PAL device interface
|
||||
Pal::IDevice* iDev() const { return device_; }
|
||||
|
||||
@@ -496,19 +504,19 @@ class Device : public NullDevice {
|
||||
bool resGLFree(void* GLplatformContext, void* mbResHandle, uint type) const;
|
||||
|
||||
//! Adds a resource to the global list
|
||||
void addResource(GpuMemoryReference* mem) const {
|
||||
void addResource(Resource* res) const {
|
||||
amd::ScopedLock lock(lockResources());
|
||||
auto findIt = std::find(resourceList_->begin(), resourceList_->end(), mem);
|
||||
mem->events_.resize(numOfVgpus());
|
||||
auto findIt = std::find(resourceList_->begin(), resourceList_->end(), res);
|
||||
res->resizeGpuEvents(numOfVgpus() - 1);
|
||||
if (resourceList_->end() == findIt) {
|
||||
resourceList_->push_back(mem);
|
||||
resourceList_->push_back(res);
|
||||
}
|
||||
}
|
||||
|
||||
//! Removes a resource from the global list
|
||||
void removeResource(GpuMemoryReference* mem) const {
|
||||
void removeResource(Resource* res) const {
|
||||
amd::ScopedLock lock(lockResources());
|
||||
resourceList_->remove(mem);
|
||||
resourceList_->remove(res);
|
||||
}
|
||||
|
||||
//! Resizes global resource list to accumulate a new queue
|
||||
@@ -566,6 +574,9 @@ class Device : public NullDevice {
|
||||
bool glAssociate(void* GLplatformContext, void* GLdeviceContext) const;
|
||||
bool glDissociate(void* GLplatformContext, void* GLdeviceContext) const;
|
||||
|
||||
static char* platformObj_; //!< Memory allocated for PAL platform object
|
||||
static Pal::IPlatform* platform_; //!< Pointer to the PAL platform object
|
||||
|
||||
amd::Context* context_; //!< A dummy context for internal allocations
|
||||
amd::Monitor* lockAsyncOps_; //!< Lock to serialise all async ops on this device
|
||||
amd::Monitor*
|
||||
@@ -592,7 +603,7 @@ class Device : public NullDevice {
|
||||
Pal::IDevice* device_; //!< PAL device object
|
||||
std::atomic<Pal::gpusize> freeMem[Pal::GpuHeap::GpuHeapCount]; //!< Free memory counter
|
||||
amd::Monitor* lockResourceOps_; //!< Lock to serialise resource access
|
||||
std::list<GpuMemoryReference*>* resourceList_; //!< Active resource list
|
||||
std::list<Resource*>* resourceList_; //!< Active resource list
|
||||
RgpCaptureMgr* rgpCaptureMgr_; //!< RGP capture manager
|
||||
};
|
||||
|
||||
|
||||
@@ -89,14 +89,14 @@ void Segment::copy(size_t offset, const void* src, size_t size) {
|
||||
amd::ScopedLock k(gpuAccess_->dev().xferMgr().lockXfer());
|
||||
VirtualGPU& gpu = *gpuAccess_->dev().xferQueue();
|
||||
Memory& xferBuf = gpuAccess_->dev().xferWrite().acquire();
|
||||
size_t tmpSize = std::min(static_cast<size_t>(xferBuf.vmSize()), size);
|
||||
size_t tmpSize = std::min(static_cast<size_t>(xferBuf.size()), size);
|
||||
size_t srcOffs = 0;
|
||||
while (size != 0) {
|
||||
xferBuf.hostWrite(&gpu, reinterpret_cast<const_address>(src) + srcOffs, 0, tmpSize);
|
||||
xferBuf.partialMemCopyTo(gpu, 0, (offset + srcOffs), tmpSize, *gpuAccess_, false, true);
|
||||
size -= tmpSize;
|
||||
srcOffs += tmpSize;
|
||||
tmpSize = std::min(static_cast<size_t>(xferBuf.vmSize()), size);
|
||||
tmpSize = std::min(static_cast<size_t>(xferBuf.size()), size);
|
||||
}
|
||||
gpu.waitAllEngines();
|
||||
}
|
||||
|
||||
Rozdílový obsah nebyl zobrazen, protože je příliš veliký
Načíst rozdílové porovnání
@@ -6,6 +6,7 @@
|
||||
#include "platform/command.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include "device/pal/paldefs.hpp"
|
||||
#include "util/palBuddyAllocatorImpl.h"
|
||||
|
||||
//! \namespace pal PAL Resource Implementation
|
||||
namespace pal {
|
||||
@@ -16,7 +17,6 @@ class VirtualGPU;
|
||||
/*! \addtogroup PAL PAL Resource Implementation
|
||||
* @{
|
||||
*/
|
||||
|
||||
class GpuMemoryReference : public amd::ReferenceCountedObject {
|
||||
public:
|
||||
static GpuMemoryReference* Create(const Device& dev, const Pal::GpuMemoryCreateInfo& createInfo);
|
||||
@@ -36,12 +36,6 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
|
||||
//! Default constructor
|
||||
GpuMemoryReference(const Device& dev);
|
||||
|
||||
//! Resizes the events array to account the new queue
|
||||
void resizeGpuEvents(uint index) { events_.resize(index + 1); }
|
||||
|
||||
//! Erase an entry in the array for provided queue index
|
||||
void eraseGpuEvents(uint index) { events_.erase(events_.begin() + index); }
|
||||
|
||||
//! Get PAL memory object
|
||||
Pal::IGpuMemory* iMem() const { return gpuMem_; }
|
||||
|
||||
@@ -50,7 +44,6 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
|
||||
const Device& device_; //!< GPU device
|
||||
//! @note: This field is necessary for the thread safe release only
|
||||
VirtualGPU* gpu_; //!< Resource will be used only on this queue
|
||||
std::vector<GpuEvent> events_; //!< GPU events associated with the resource
|
||||
|
||||
protected:
|
||||
//! Default destructor
|
||||
@@ -64,6 +57,8 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
|
||||
GpuMemoryReference& operator=(const GpuMemoryReference&);
|
||||
};
|
||||
|
||||
static constexpr Pal::gpusize MaxGpuAlignment = 4 * Ki;
|
||||
|
||||
//! GPU resource
|
||||
class Resource : public amd::HeapObject {
|
||||
public:
|
||||
@@ -178,7 +173,7 @@ class Resource : public amd::HeapObject {
|
||||
uint imageArray_ : 1; //!< PAL resource is an array of images
|
||||
uint buffer_ : 1; //!< PAL resource is a buffer
|
||||
uint tiled_ : 1; //!< PAL resource is tiled
|
||||
uint SVMRes_ : 1; //!< SVM flag to the cal resource
|
||||
uint SVMRes_ : 1; //!< SVM flag to the pal resource
|
||||
uint scratch_ : 1; //!< Scratch buffer
|
||||
uint isAllocExecute_ : 1; //!< SVM resource allocation attribute for shader\cmdbuf
|
||||
uint isDoppTexture_ : 1; //!< PAL resource is for a DOPP desktop texture
|
||||
@@ -205,9 +200,9 @@ class Resource : public amd::HeapObject {
|
||||
//! Destructor of the resource
|
||||
virtual ~Resource();
|
||||
|
||||
/*! \brief Creates a CAL object, associated with the resource
|
||||
/*! \brief Creates a PAL object, associated with the resource
|
||||
*
|
||||
* \return True if we succesfully created a CAL resource
|
||||
* \return True if we succesfully created a PAL resource
|
||||
*/
|
||||
virtual bool create(MemoryType memType, //!< memory type
|
||||
CreateParams* params = 0 //!< special parameters for resource allocation
|
||||
@@ -263,7 +258,7 @@ class Resource : public amd::HeapObject {
|
||||
uint64_t vmAddress() const { return iMem()->Desc().gpuVirtAddr + offset_; }
|
||||
|
||||
//! Returns global memory offset
|
||||
uint64_t vmSize() const { return iMem()->Desc().size - offset_; }
|
||||
uint64_t vmSize() const { return desc_.width_ * elementSize(); }
|
||||
|
||||
//! Returns global memory offset
|
||||
bool mipMapped() const { return (desc().mipLevels_ > 1) ? true : false; }
|
||||
@@ -290,7 +285,7 @@ class Resource : public amd::HeapObject {
|
||||
|
||||
//! Marks the resource as busy
|
||||
void setBusy(VirtualGPU& gpu, //!< Virtual GPU device object
|
||||
GpuEvent calEvent //!< CAL event
|
||||
GpuEvent calEvent //!< PAL event
|
||||
) const;
|
||||
|
||||
//! Wait for the resource
|
||||
@@ -326,7 +321,7 @@ class Resource : public amd::HeapObject {
|
||||
//! Get the mapped address of this resource
|
||||
address data() const { return reinterpret_cast<address>(address_); }
|
||||
|
||||
//! Frees all allocated CAL memories and resources,
|
||||
//! Frees all allocated PAL memories and resources,
|
||||
//! associated with this objects. And also destroys all rename structures
|
||||
//! Note: doesn't destroy the object itself
|
||||
void free();
|
||||
@@ -360,7 +355,42 @@ class Resource : public amd::HeapObject {
|
||||
//! Returns GPU event associated with this resource and specified queue
|
||||
GpuEvent* getGpuEvent(const VirtualGPU& gpu) const;
|
||||
|
||||
//! Resizes the events array to account the new queue
|
||||
void resizeGpuEvents(uint index) { events_.resize(index + 1); }
|
||||
|
||||
//! Erase an entry in the array for provided queue index
|
||||
void eraseGpuEvents(uint index) { events_.erase(events_.begin() + index); }
|
||||
|
||||
protected:
|
||||
/*! \brief Creates a PAL iamge object, associated with the resource
|
||||
*
|
||||
* \return True if we succesfully created a PAL resource
|
||||
*/
|
||||
bool CreateImage(CreateParams* params //!< special parameters for resource allocation
|
||||
);
|
||||
|
||||
/*! \brief Creates a PAL interop object, associated with the resource
|
||||
*
|
||||
* \return True if we succesfully created a PAL interop resource
|
||||
*/
|
||||
bool CreateInterop(CreateParams* params //!< special parameters for resource allocation
|
||||
);
|
||||
|
||||
/*! \brief Creates a PAL pinned object, associated with the resource
|
||||
*
|
||||
* \return True if we succesfully created a PAL pinned resource
|
||||
*/
|
||||
bool CreatePinned(CreateParams* params //!< special parameters for resource allocation
|
||||
);
|
||||
|
||||
/*! \brief Creates a PAL SVM object, associated with the resource
|
||||
*
|
||||
* \return True if we succesfully created a PAL SVM resource
|
||||
*/
|
||||
bool CreateSvm(CreateParams* params, //!< special parameters for resource allocation
|
||||
Pal::gpusize svmPtr
|
||||
);
|
||||
|
||||
uint elementSize_; //!< Size of a single element in bytes
|
||||
|
||||
private:
|
||||
@@ -424,6 +454,7 @@ class Resource : public amd::HeapObject {
|
||||
uint32_t curRename_; //!< Current active rename in the list
|
||||
RenameList renames_; //!< Rename resource list
|
||||
GpuMemoryReference* memRef_; //!< PAL resource reference
|
||||
Pal::gpusize subOffset_; //!< GPU memory offset in the oririnal resource
|
||||
const Resource* viewOwner_; //!< GPU resource, which owns this view
|
||||
void* glInteropMbRes_; //!< Mb Res handle
|
||||
uint32_t glType_; //!< GL interop type
|
||||
@@ -438,26 +469,50 @@ class Resource : public amd::HeapObject {
|
||||
|
||||
uint32_t* hwState_; //!< HW state for image object
|
||||
uint64_t hwSrd_; //!< GPU pointer to HW SRD
|
||||
|
||||
//! Note: Access to the events are thread safe.
|
||||
mutable std::vector<GpuEvent> events_; //!< GPU events associated with the resource
|
||||
};
|
||||
|
||||
typedef Util::BuddyAllocator<Device> MemBuddyAllocator;
|
||||
|
||||
class MemorySubAllocator : public amd::HeapObject {
|
||||
public:
|
||||
MemorySubAllocator(Device* device) : device_(device) {}
|
||||
|
||||
~MemorySubAllocator();
|
||||
|
||||
GpuMemoryReference* Allocate(Pal::gpusize size,
|
||||
Pal::gpusize alignment, Pal::gpusize* offset);
|
||||
bool Free(GpuMemoryReference* ref, Pal::gpusize offset);
|
||||
|
||||
private:
|
||||
Device* device_;
|
||||
std::map<GpuMemoryReference*, MemBuddyAllocator*> mem_heap_;
|
||||
};
|
||||
|
||||
class ResourceCache : public amd::HeapObject {
|
||||
public:
|
||||
//! Default constructor
|
||||
ResourceCache(size_t cacheSizeLimit)
|
||||
: lockCacheOps_("PAL resource cache", true), cacheSize_(0), cacheSizeLimit_(cacheSizeLimit) {}
|
||||
ResourceCache(Device* device, size_t cacheSizeLimit)
|
||||
: lockCacheOps_("PAL resource cache", true)
|
||||
, cacheSize_(0)
|
||||
, cacheSizeLimit_(cacheSizeLimit)
|
||||
, memSubAllocLocal_(device) {}
|
||||
|
||||
//! Default destructor
|
||||
~ResourceCache();
|
||||
|
||||
//! Adds a CAL resource to the cache
|
||||
bool addGpuMemory(Resource::Descriptor* desc, //!< Resource descriptor - cache key
|
||||
GpuMemoryReference* ref //!< Resource reference
|
||||
//! Adds a PAL resource to the cache
|
||||
bool addGpuMemory(Resource::Descriptor* desc, //!< Resource descriptor - cache key
|
||||
GpuMemoryReference* ref, //!< Resource reference
|
||||
Pal::gpusize offset //!< Original resource offset
|
||||
);
|
||||
|
||||
//! Finds a CAL resource from the cache
|
||||
//! Finds a PAL resource from the cache
|
||||
GpuMemoryReference* findGpuMemory(
|
||||
Resource::Descriptor* desc, //!< Resource descriptor - cache key
|
||||
Pal::gpusize size, Pal::gpusize alignment);
|
||||
Pal::gpusize size, Pal::gpusize alignment, Pal::gpusize* offset);
|
||||
|
||||
//! Destroys cache
|
||||
bool free(size_t minCacheEntries = 0);
|
||||
@@ -477,8 +532,10 @@ class ResourceCache : public amd::HeapObject {
|
||||
size_t cacheSize_; //!< Current cache size in bytes
|
||||
const size_t cacheSizeLimit_; //!< Cache size limit in bytes
|
||||
|
||||
//! CAL resource cache
|
||||
//! PAL resource cache
|
||||
std::list<std::pair<Resource::Descriptor*, GpuMemoryReference*> > resCache_;
|
||||
|
||||
MemorySubAllocator memSubAllocLocal_; //!< Allocator for suballocations in Local
|
||||
};
|
||||
|
||||
/*@}*/} // namespace pal
|
||||
|
||||
@@ -138,6 +138,12 @@ Settings::Settings() {
|
||||
rgpSqttDispCount_ = PAL_RGP_DISP_COUNT;
|
||||
rgpSqttWaitIdle_ = true;
|
||||
rgpSqttForceDisable_ = false;
|
||||
|
||||
// Sub allocation parameters
|
||||
subAllocationMinSize_ = 4 * Ki;
|
||||
subAllocationChunkSize_ = 64 * Mi;
|
||||
subAllocationMaxSize_ =
|
||||
std::min(static_cast<uint64_t>(GPU_MAX_SUBALLOC_SIZE) * Ki, subAllocationChunkSize_);
|
||||
}
|
||||
|
||||
bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
|
||||
@@ -98,6 +98,10 @@ class Settings : public device::Settings {
|
||||
uint64_t maxAllocSize_; //!< Maximum single allocation size
|
||||
uint rgpSqttDispCount_; //!< The number of dispatches captured in SQTT
|
||||
|
||||
uint64_t subAllocationMinSize_; //!< Minimum size allowed for suballocations
|
||||
uint64_t subAllocationMaxSize_; //!< Maximum size allowed with suballocations
|
||||
uint64_t subAllocationChunkSize_; //!< Chunk size for suballocaitons
|
||||
|
||||
amd::LibrarySelector libSelector_; //!< Select linking libraries for compiler
|
||||
|
||||
//! Default constructor
|
||||
|
||||
@@ -409,7 +409,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
|
||||
}
|
||||
|
||||
uint64_t curStart = memory->vmAddress();
|
||||
uint64_t curEnd = curStart + memory->vmSize();
|
||||
uint64_t curEnd = curStart + memory->size();
|
||||
|
||||
// Loop through all memory objects in the queue and find dependency
|
||||
// @note don't include objects from the current kernel
|
||||
@@ -1974,6 +1974,7 @@ void VirtualGPU::PostDeviceEnqueue(
|
||||
uint64_t vmParentWrap,
|
||||
GpuEvent* gpuEvent)
|
||||
{
|
||||
uint32_t id = gpuEvent->id;
|
||||
amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(dev());
|
||||
|
||||
// Make sure exculsive access to the device queue
|
||||
@@ -2055,6 +2056,9 @@ void VirtualGPU::PostDeviceEnqueue(
|
||||
iCmd()->CmdVirtualQueueHandshake(vmParentWrap + offsetof(AmdAqlWrap, state), AQL_WRAP_DONE,
|
||||
vmParentWrap + offsetof(AmdAqlWrap, child_counter),
|
||||
signalAddr, dev().settings().useDeviceQueue_);
|
||||
if (id != gpuEvent->id) {
|
||||
LogError("Something is wrong. ID mismatch!\n");
|
||||
}
|
||||
eventEnd(MainEngine, *gpuEvent);
|
||||
}
|
||||
|
||||
@@ -2203,6 +2207,9 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
if (profiling() || state_.profileEnabled_) {
|
||||
addBarrier();
|
||||
}
|
||||
if (id != gpuEvent.id) {
|
||||
LogError("Something is wrong. ID mismatch!\n");
|
||||
}
|
||||
eventEnd(MainEngine, gpuEvent);
|
||||
|
||||
// Execute scheduler for device enqueue
|
||||
@@ -2210,9 +2217,6 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
PostDeviceEnqueue(kernel, hsaKernel, gpuDefQueue, vmDefQueue, vmParentWrap, &gpuEvent);
|
||||
}
|
||||
|
||||
if (id != gpuEvent.id) {
|
||||
LogError("Something is wrong. ID mismatch!\n");
|
||||
}
|
||||
// Update the global GPU event
|
||||
setGpuEvent(gpuEvent, needFlush);
|
||||
|
||||
@@ -2266,7 +2270,7 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) {
|
||||
}
|
||||
}
|
||||
|
||||
void VirtualGPU::releaseMemory(GpuMemoryReference* mem, GpuEvent* event) {
|
||||
void VirtualGPU::releaseMemory(GpuMemoryReference* mem) {
|
||||
queues_[MainEngine]->removeCmdMemRef(mem);
|
||||
queues_[SdmaEngine]->removeCmdMemRef(mem);
|
||||
}
|
||||
|
||||
@@ -314,7 +314,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
virtual void submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd);
|
||||
virtual void submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd);
|
||||
|
||||
void releaseMemory(GpuMemoryReference* mem, GpuEvent* event);
|
||||
void releaseMemory(GpuMemoryReference* mem);
|
||||
|
||||
void flush(amd::Command* list = nullptr, bool wait = false);
|
||||
bool terminate() { return true; }
|
||||
|
||||
@@ -86,6 +86,8 @@ release(size_t, GPU_PINNED_MIN_XFER_SIZE, 512, \
|
||||
"The minimal buffer size for pinned read/write transfers in KBytes") \
|
||||
release(size_t, GPU_RESOURCE_CACHE_SIZE, 64, \
|
||||
"The resource cache size in MB") \
|
||||
release(size_t, GPU_MAX_SUBALLOC_SIZE, 4096, \
|
||||
"The maximum size accepted for suballocaitons in KB") \
|
||||
release(uint, GPU_ASYNC_MEM_COPY, 0, \
|
||||
"Enables async memory transfers with DRM engine") \
|
||||
release(bool, GPU_FORCE_64BIT_PTR, 0, \
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele