From 65de22f0ed74b6b507eb96f908505ad84ef3b58f Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 13 Oct 2017 14:10:40 -0400 Subject: [PATCH] P4 to Git Change 1469850 by gandryey@gera-w8 on 2017/10/13 13:56:50 SWDEV-79445 - OCL generic changes and code clean-up - Remove obsolete/unused code Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#62 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#34 edit [ROCm/clr commit: 59ed7d24454a746175f8e59e0f45fc7aad150b71] --- .../runtime/device/pal/paldebugmanager.cpp | 2 +- .../rocclr/runtime/device/pal/palvirtual.cpp | 106 +++--------------- .../rocclr/runtime/device/pal/palvirtual.hpp | 37 +----- 3 files changed, 22 insertions(+), 123 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp b/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp index 7ed056645f..124de40991 100644 --- a/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp +++ b/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp @@ -143,7 +143,7 @@ void GpuDebugManager::unregisterDebugger() { void GpuDebugManager::flushCache(uint32_t mask) { HwDbgGpuCacheMask cacheMask(mask); - device()->xferQueue()->flushCuCaches(cacheMask); + //device()->xferQueue()->flushCuCaches(cacheMask); } diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp index 3eb5001971..bbf53a634a 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp @@ -707,13 +707,13 @@ VirtualGPU::VirtualGPU(Device& device) engineID_(MainEngine), gpuDevice_(static_cast(device)), execution_("Virtual GPU execution lock", true), - printfDbg_(nullptr), printfDbgHSA_(nullptr), tsCache_(nullptr), dmaFlushMgmt_(device), hwRing_(0), readjustTimeGPU_(0), - currTs_(nullptr), + lastTS_(nullptr), + profileTs_(nullptr), vqHeader_(nullptr), virtualQueue_(nullptr), schedParams_(nullptr), @@ -722,10 +722,6 @@ VirtualGPU::VirtualGPU(Device& device) maskGroups_(1), hsaQueueMem_(nullptr), cmdAllocator_(nullptr) { - memset(&cal_, 0, sizeof(CalVirtualDesc)); - for (uint i = 0; i < AllEngines; ++i) { - cal_.events_[i].invalidate(); - } // Note: Virtual GPU device creation must be a thread safe operation index_ = gpuDevice_.numOfVgpus_++; @@ -829,14 +825,6 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs, return false; } - // Create Printf class - printfDbg_ = new PrintfDbg(gpuDevice_); - if ((nullptr == printfDbg_) || !printfDbg_->create()) { - delete printfDbg_; - LogError("Could not allocate debug buffer for printf()!"); - return false; - } - // Create HSAILPrintf class printfDbgHSA_ = new PrintfDbgHSA(gpuDevice_); if (nullptr == printfDbgHSA_) { @@ -930,9 +918,6 @@ VirtualGPU::~VirtualGPU() { freeCbQueue_.pop(); } - // Destroy printf object - delete printfDbg_; - // Destroy printfHSA object delete printfDbgHSA_; @@ -1833,53 +1818,6 @@ void VirtualGPU::submitSvmFreeMemory(amd::SvmFreeMemoryCommand& vcmd) { profilingEnd(vcmd); } -void VirtualGPU::findIterations(const amd::NDRangeContainer& sizes, const amd::NDRange& local, - amd::NDRange& groups, amd::NDRange& remainder, size_t& extra) { - size_t dimensions = sizes.dimensions(); - - if (cal()->iterations_ > 1) { - size_t iterations = cal()->iterations_; - cal_.iterations_ = 1; - - // Find the total amount of all groups - groups = sizes.global() / local; - if (dev().settings().partialDispatch_) { - for (uint j = 0; j < dimensions; ++j) { - if ((sizes.global()[j] % local[j]) != 0) { - groups[j]++; - } - } - } - - // Calculate the real number of required iterations and - // the workgroup size of each iteration - for (int j = (dimensions - 1); j >= 0; --j) { - // Find possible size of each iteration - size_t tmp = (groups[j] / iterations); - // Make sure the group size is more than 1 - if (tmp > 0) { - remainder = groups; - remainder[j] = (groups[j] % tmp); - - extra = ((groups[j] / tmp) + - // Check for the remainder - ((remainder[j] != 0) ? 1 : 0)); - // Recalculate the number of iterations - cal_.iterations_ *= extra; - if (remainder[j] == 0) { - extra = 0; - } - groups[j] = tmp; - break; - } else { - iterations = ((iterations / groups[j]) + (((iterations % groups[j]) != 0) ? 1 : 0)); - cal_.iterations_ *= groups[j]; - groups[j] = 1; - } - } - } -} - void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) { // Make sure VirtualGPU has an exclusive access to the resources amd::ScopedLock lock(execution()); @@ -2651,7 +2589,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) { bool gpuCommand = false; for (uint i = 0; i < AllEngines; ++i) { - if (cal_.events_[i].isValid()) { + if (events_[i].isValid()) { gpuCommand = true; } } @@ -2668,10 +2606,10 @@ void VirtualGPU::flush(amd::Command* list, bool wait) { } if (nullptr == cb) { - cb = new CommandBatch(list, cal()->events_, cal()->lastTS_); + cb = new CommandBatch(list, events_, lastTS_); } else { freeCbQueue_.pop(); - cb->init(list, cal()->events_, cal()->lastTS_); + cb->init(list, events_, lastTS_); } } @@ -2684,12 +2622,12 @@ void VirtualGPU::flush(amd::Command* list, bool wait) { // if runtime didn't submit any commands //! @note: it's safe to invalidate events, since //! we already saved them with the batch creation step above - cal_.events_[i].invalidate(); + events_[i].invalidate(); } } // Mark last TS as nullptr, so runtime won't process empty batches with the old TS - cal_.lastTS_ = nullptr; + lastTS_ = nullptr; if (nullptr != cb) { cbQueue_.push(cb); } @@ -2721,7 +2659,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) { void VirtualGPU::enableSyncedBlit() const { return blitMgr_->enableSynchronization(); } void VirtualGPU::setGpuEvent(GpuEvent gpuEvent, bool flush) { - cal_.events_[engineID_] = gpuEvent; + events_[engineID_] = gpuEvent; // Flush current DMA buffer if requested if (flush) { @@ -2738,7 +2676,7 @@ void VirtualGPU::flushDMA(uint engineID) { //! but L1 still has to be invalidated. } - isDone(&cal_.events_[engineID]); + isDone(&events_[engineID]); } bool VirtualGPU::waitAllEngines(CommandBatch* cb) { @@ -2747,7 +2685,7 @@ bool VirtualGPU::waitAllEngines(CommandBatch* cb) { // If command batch is nullptr then wait for the current if (nullptr == cb) { - events = cal_.events_; + events = events_; } else { events = cb->events_; } @@ -2844,7 +2782,7 @@ void VirtualGPU::profilingBegin(amd::Command& command, bool drmProfiling) { } // Save the TimeStamp object in the current OCL event command.setData(ts); - currTs_ = ts; + profileTs_ = ts; state_.profileEnabled_ = true; } } @@ -2855,7 +2793,7 @@ void VirtualGPU::profilingEnd(amd::Command& command) { if (ts != nullptr) { // Check if the command actually did any GPU submission if (ts->isValid()) { - cal_.lastTS_ = ts; + lastTS_ = ts; } else { // Destroy the TimeStamp object tsCache_->freeTimeStamp(ts); @@ -2949,13 +2887,13 @@ void VirtualGPU::addDoppRef(const Memory* memory, bool lastDoppCmd, bool pfpaDop } void VirtualGPU::profileEvent(EngineType engine, bool type) const { - if (nullptr == currTs_) { + if (nullptr == profileTs_) { return; } if (type) { - currTs_->begin((engine == SdmaEngine) ? true : false); + profileTs_->begin((engine == SdmaEngine) ? true : false); } else { - currTs_->end((engine == SdmaEngine) ? true : false); + profileTs_->end((engine == SdmaEngine) ? true : false); } } @@ -3105,20 +3043,6 @@ void VirtualGPU::writeVQueueHeader(VirtualGPU& hostQ, uint64_t kernelTable) { virtualQueue_->writeRawData(hostQ, 0, sizeof(AmdVQueueHeader), vqHeader_, Wait); } -void VirtualGPU::flushCuCaches(HwDbgGpuCacheMask cache_mask) { - Unimplemented(); - /* - //! @todo: fix issue of no event available for the flush/invalidate cache command - InvalidateSqCaches(cache_mask.sqICache_, - cache_mask.sqKCache_, - cache_mask.tcL1_, - cache_mask.tcL2_); - */ - flushDMA(engineID_); - - return; -} - void VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel, hsa_kernel_dispatch_packet_t* aqlPkt, HwDbgKernelInfo& kernelInfo, amd::Event* enqueueEvent) { amd::HwDebugManager* dbgManager = dev().hwDebugMgr(); diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp index 6b0d2dcd54..2318dcd729 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp @@ -205,13 +205,6 @@ class VirtualGPU : public device::VirtualDevice { State() : value_(0) {} }; - //! CAL descriptor for the GPU virtual device - struct CalVirtualDesc : public amd::EmbeddedObject { - GpuEvent events_[AllEngines]; //!< Last known GPU events - uint iterations_; //!< Number of iterations for the execution - TimeStamp* lastTS_; //!< Last timestamp executed on Virtual GPU - }; - typedef std::vector constbufs_t; class MemoryDependency : public amd::EmbeddedObject { @@ -327,9 +320,6 @@ class VirtualGPU : public device::VirtualDevice { //! Returns GPU device object associated with this kernel const Device& dev() const { return gpuDevice_; } - //! Returns CAL descriptor of the virtual device - const CalVirtualDesc* cal() const { return &cal_; } - //! Set the last known GPU event void setGpuEvent(GpuEvent gpuEvent, //!< GPU event for tracking bool flush = false //!< TRUE if flush is required @@ -401,9 +391,6 @@ class VirtualGPU : public device::VirtualDevice { //! Returns the virtual gpu unique index uint index() const { return index_; } - //! Get the PrintfDbg object - PrintfDbg& printfDbg() const { return *printfDbg_; } - //! Get the PrintfDbgHSA object PrintfDbgHSA& printfDbgHSA() const { return *printfDbgHSA_; } @@ -425,9 +412,6 @@ class VirtualGPU : public device::VirtualDevice { //! Returns the HW ring used on this virtual device uint hwRing() const { return hwRing_; } - //! Returns current timestamp object for profiling - TimeStamp* currTs() const { return cal_.lastTS_; } - //! Returns virtual queue object for device enqueuing Memory* vQueue() const { return virtualQueue_; } @@ -439,10 +423,6 @@ class VirtualGPU : public device::VirtualDevice { ); EngineType engineID_; //!< Engine ID for this VirtualGPU - State state_; //!< virtual GPU current state - CalVirtualDesc cal_; //!< CAL virtual device descriptor - - void flushCuCaches(HwDbgGpuCacheMask cache_mask); //!< flush/invalidate SQ cache //! Returns PAL command buffer interface Pal::ICmdBuffer* iCmd() const { @@ -530,14 +510,6 @@ class VirtualGPU : public device::VirtualDevice { MemoryRange() : start_(0), end_(0) {} }; - //! Finds total amount of necessary iterations - inline void findIterations(const amd::NDRangeContainer& sizes, //!< Original workload sizes - const amd::NDRange& local, //!< Local workgroup size - amd::NDRange& groups, //!< Calculated workgroup sizes - amd::NDRange& remainder, //!< Calculated remainder sizes - size_t& extra //!< Amount of extra executions for remainder - ); - //! Allocates constant buffers bool allocConstantBuffers(); @@ -592,7 +564,6 @@ class VirtualGPU : public device::VirtualDevice { amd::Monitor execution_; //!< Lock to serialise access to all device objects uint index_; //!< The virtual device unique index - PrintfDbg* printfDbg_; //!< GPU printf implemenation PrintfDbgHSA* printfDbgHSA_; //!< HSAIL printf implemenation TimeStampCache* tsCache_; //!< TimeStamp cache @@ -609,8 +580,12 @@ class VirtualGPU : public device::VirtualDevice { uint hwRing_; //!< HW ring used on this virtual device - uint64_t readjustTimeGPU_; //!< Readjust time between GPU and CPU timestamps - TimeStamp* currTs_; //!< current timestamp for command + State state_; //!< virtual GPU current state + GpuEvent events_[AllEngines]; //!< Last known GPU events + + uint64_t readjustTimeGPU_; //!< Readjust time between GPU and CPU timestamps + TimeStamp* lastTS_; //!< Last timestamp executed on Virtual GPU + TimeStamp* profileTs_; //!< current profiling timestamp for command AmdVQueueHeader* vqHeader_; //!< Sysmem copy for virtual queue header Memory* virtualQueue_; //!< Virtual device queue