From 65de22f0ed74b6b507eb96f908505ad84ef3b58f Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 13 Oct 2017 14:10:40 -0400
Subject: [PATCH] P4 to Git Change 1469850 by gandryey@gera-w8 on 2017/10/13
13:56:50
SWDEV-79445 - OCL generic changes and code clean-up
- Remove obsolete/unused code
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#62 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#34 edit
[ROCm/clr commit: 59ed7d24454a746175f8e59e0f45fc7aad150b71]
---
.../runtime/device/pal/paldebugmanager.cpp | 2 +-
.../rocclr/runtime/device/pal/palvirtual.cpp | 106 +++---------------
.../rocclr/runtime/device/pal/palvirtual.hpp | 37 +-----
3 files changed, 22 insertions(+), 123 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp b/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp
index 7ed056645f..124de40991 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp
@@ -143,7 +143,7 @@ void GpuDebugManager::unregisterDebugger() {
void GpuDebugManager::flushCache(uint32_t mask) {
HwDbgGpuCacheMask cacheMask(mask);
- device()->xferQueue()->flushCuCaches(cacheMask);
+ //device()->xferQueue()->flushCuCaches(cacheMask);
}
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
index 3eb5001971..bbf53a634a 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
@@ -707,13 +707,13 @@ VirtualGPU::VirtualGPU(Device& device)
engineID_(MainEngine),
gpuDevice_(static_cast(device)),
execution_("Virtual GPU execution lock", true),
- printfDbg_(nullptr),
printfDbgHSA_(nullptr),
tsCache_(nullptr),
dmaFlushMgmt_(device),
hwRing_(0),
readjustTimeGPU_(0),
- currTs_(nullptr),
+ lastTS_(nullptr),
+ profileTs_(nullptr),
vqHeader_(nullptr),
virtualQueue_(nullptr),
schedParams_(nullptr),
@@ -722,10 +722,6 @@ VirtualGPU::VirtualGPU(Device& device)
maskGroups_(1),
hsaQueueMem_(nullptr),
cmdAllocator_(nullptr) {
- memset(&cal_, 0, sizeof(CalVirtualDesc));
- for (uint i = 0; i < AllEngines; ++i) {
- cal_.events_[i].invalidate();
- }
// Note: Virtual GPU device creation must be a thread safe operation
index_ = gpuDevice_.numOfVgpus_++;
@@ -829,14 +825,6 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
return false;
}
- // Create Printf class
- printfDbg_ = new PrintfDbg(gpuDevice_);
- if ((nullptr == printfDbg_) || !printfDbg_->create()) {
- delete printfDbg_;
- LogError("Could not allocate debug buffer for printf()!");
- return false;
- }
-
// Create HSAILPrintf class
printfDbgHSA_ = new PrintfDbgHSA(gpuDevice_);
if (nullptr == printfDbgHSA_) {
@@ -930,9 +918,6 @@ VirtualGPU::~VirtualGPU() {
freeCbQueue_.pop();
}
- // Destroy printf object
- delete printfDbg_;
-
// Destroy printfHSA object
delete printfDbgHSA_;
@@ -1833,53 +1818,6 @@ void VirtualGPU::submitSvmFreeMemory(amd::SvmFreeMemoryCommand& vcmd) {
profilingEnd(vcmd);
}
-void VirtualGPU::findIterations(const amd::NDRangeContainer& sizes, const amd::NDRange& local,
- amd::NDRange& groups, amd::NDRange& remainder, size_t& extra) {
- size_t dimensions = sizes.dimensions();
-
- if (cal()->iterations_ > 1) {
- size_t iterations = cal()->iterations_;
- cal_.iterations_ = 1;
-
- // Find the total amount of all groups
- groups = sizes.global() / local;
- if (dev().settings().partialDispatch_) {
- for (uint j = 0; j < dimensions; ++j) {
- if ((sizes.global()[j] % local[j]) != 0) {
- groups[j]++;
- }
- }
- }
-
- // Calculate the real number of required iterations and
- // the workgroup size of each iteration
- for (int j = (dimensions - 1); j >= 0; --j) {
- // Find possible size of each iteration
- size_t tmp = (groups[j] / iterations);
- // Make sure the group size is more than 1
- if (tmp > 0) {
- remainder = groups;
- remainder[j] = (groups[j] % tmp);
-
- extra = ((groups[j] / tmp) +
- // Check for the remainder
- ((remainder[j] != 0) ? 1 : 0));
- // Recalculate the number of iterations
- cal_.iterations_ *= extra;
- if (remainder[j] == 0) {
- extra = 0;
- }
- groups[j] = tmp;
- break;
- } else {
- iterations = ((iterations / groups[j]) + (((iterations % groups[j]) != 0) ? 1 : 0));
- cal_.iterations_ *= groups[j];
- groups[j] = 1;
- }
- }
- }
-}
-
void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) {
// Make sure VirtualGPU has an exclusive access to the resources
amd::ScopedLock lock(execution());
@@ -2651,7 +2589,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
bool gpuCommand = false;
for (uint i = 0; i < AllEngines; ++i) {
- if (cal_.events_[i].isValid()) {
+ if (events_[i].isValid()) {
gpuCommand = true;
}
}
@@ -2668,10 +2606,10 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
}
if (nullptr == cb) {
- cb = new CommandBatch(list, cal()->events_, cal()->lastTS_);
+ cb = new CommandBatch(list, events_, lastTS_);
} else {
freeCbQueue_.pop();
- cb->init(list, cal()->events_, cal()->lastTS_);
+ cb->init(list, events_, lastTS_);
}
}
@@ -2684,12 +2622,12 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
// if runtime didn't submit any commands
//! @note: it's safe to invalidate events, since
//! we already saved them with the batch creation step above
- cal_.events_[i].invalidate();
+ events_[i].invalidate();
}
}
// Mark last TS as nullptr, so runtime won't process empty batches with the old TS
- cal_.lastTS_ = nullptr;
+ lastTS_ = nullptr;
if (nullptr != cb) {
cbQueue_.push(cb);
}
@@ -2721,7 +2659,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
void VirtualGPU::enableSyncedBlit() const { return blitMgr_->enableSynchronization(); }
void VirtualGPU::setGpuEvent(GpuEvent gpuEvent, bool flush) {
- cal_.events_[engineID_] = gpuEvent;
+ events_[engineID_] = gpuEvent;
// Flush current DMA buffer if requested
if (flush) {
@@ -2738,7 +2676,7 @@ void VirtualGPU::flushDMA(uint engineID) {
//! but L1 still has to be invalidated.
}
- isDone(&cal_.events_[engineID]);
+ isDone(&events_[engineID]);
}
bool VirtualGPU::waitAllEngines(CommandBatch* cb) {
@@ -2747,7 +2685,7 @@ bool VirtualGPU::waitAllEngines(CommandBatch* cb) {
// If command batch is nullptr then wait for the current
if (nullptr == cb) {
- events = cal_.events_;
+ events = events_;
} else {
events = cb->events_;
}
@@ -2844,7 +2782,7 @@ void VirtualGPU::profilingBegin(amd::Command& command, bool drmProfiling) {
}
// Save the TimeStamp object in the current OCL event
command.setData(ts);
- currTs_ = ts;
+ profileTs_ = ts;
state_.profileEnabled_ = true;
}
}
@@ -2855,7 +2793,7 @@ void VirtualGPU::profilingEnd(amd::Command& command) {
if (ts != nullptr) {
// Check if the command actually did any GPU submission
if (ts->isValid()) {
- cal_.lastTS_ = ts;
+ lastTS_ = ts;
} else {
// Destroy the TimeStamp object
tsCache_->freeTimeStamp(ts);
@@ -2949,13 +2887,13 @@ void VirtualGPU::addDoppRef(const Memory* memory, bool lastDoppCmd, bool pfpaDop
}
void VirtualGPU::profileEvent(EngineType engine, bool type) const {
- if (nullptr == currTs_) {
+ if (nullptr == profileTs_) {
return;
}
if (type) {
- currTs_->begin((engine == SdmaEngine) ? true : false);
+ profileTs_->begin((engine == SdmaEngine) ? true : false);
} else {
- currTs_->end((engine == SdmaEngine) ? true : false);
+ profileTs_->end((engine == SdmaEngine) ? true : false);
}
}
@@ -3105,20 +3043,6 @@ void VirtualGPU::writeVQueueHeader(VirtualGPU& hostQ, uint64_t kernelTable) {
virtualQueue_->writeRawData(hostQ, 0, sizeof(AmdVQueueHeader), vqHeader_, Wait);
}
-void VirtualGPU::flushCuCaches(HwDbgGpuCacheMask cache_mask) {
- Unimplemented();
- /*
- //! @todo: fix issue of no event available for the flush/invalidate cache command
- InvalidateSqCaches(cache_mask.sqICache_,
- cache_mask.sqKCache_,
- cache_mask.tcL1_,
- cache_mask.tcL2_);
- */
- flushDMA(engineID_);
-
- return;
-}
-
void VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel, hsa_kernel_dispatch_packet_t* aqlPkt,
HwDbgKernelInfo& kernelInfo, amd::Event* enqueueEvent) {
amd::HwDebugManager* dbgManager = dev().hwDebugMgr();
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp
index 6b0d2dcd54..2318dcd729 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp
@@ -205,13 +205,6 @@ class VirtualGPU : public device::VirtualDevice {
State() : value_(0) {}
};
- //! CAL descriptor for the GPU virtual device
- struct CalVirtualDesc : public amd::EmbeddedObject {
- GpuEvent events_[AllEngines]; //!< Last known GPU events
- uint iterations_; //!< Number of iterations for the execution
- TimeStamp* lastTS_; //!< Last timestamp executed on Virtual GPU
- };
-
typedef std::vector constbufs_t;
class MemoryDependency : public amd::EmbeddedObject {
@@ -327,9 +320,6 @@ class VirtualGPU : public device::VirtualDevice {
//! Returns GPU device object associated with this kernel
const Device& dev() const { return gpuDevice_; }
- //! Returns CAL descriptor of the virtual device
- const CalVirtualDesc* cal() const { return &cal_; }
-
//! Set the last known GPU event
void setGpuEvent(GpuEvent gpuEvent, //!< GPU event for tracking
bool flush = false //!< TRUE if flush is required
@@ -401,9 +391,6 @@ class VirtualGPU : public device::VirtualDevice {
//! Returns the virtual gpu unique index
uint index() const { return index_; }
- //! Get the PrintfDbg object
- PrintfDbg& printfDbg() const { return *printfDbg_; }
-
//! Get the PrintfDbgHSA object
PrintfDbgHSA& printfDbgHSA() const { return *printfDbgHSA_; }
@@ -425,9 +412,6 @@ class VirtualGPU : public device::VirtualDevice {
//! Returns the HW ring used on this virtual device
uint hwRing() const { return hwRing_; }
- //! Returns current timestamp object for profiling
- TimeStamp* currTs() const { return cal_.lastTS_; }
-
//! Returns virtual queue object for device enqueuing
Memory* vQueue() const { return virtualQueue_; }
@@ -439,10 +423,6 @@ class VirtualGPU : public device::VirtualDevice {
);
EngineType engineID_; //!< Engine ID for this VirtualGPU
- State state_; //!< virtual GPU current state
- CalVirtualDesc cal_; //!< CAL virtual device descriptor
-
- void flushCuCaches(HwDbgGpuCacheMask cache_mask); //!< flush/invalidate SQ cache
//! Returns PAL command buffer interface
Pal::ICmdBuffer* iCmd() const {
@@ -530,14 +510,6 @@ class VirtualGPU : public device::VirtualDevice {
MemoryRange() : start_(0), end_(0) {}
};
- //! Finds total amount of necessary iterations
- inline void findIterations(const amd::NDRangeContainer& sizes, //!< Original workload sizes
- const amd::NDRange& local, //!< Local workgroup size
- amd::NDRange& groups, //!< Calculated workgroup sizes
- amd::NDRange& remainder, //!< Calculated remainder sizes
- size_t& extra //!< Amount of extra executions for remainder
- );
-
//! Allocates constant buffers
bool allocConstantBuffers();
@@ -592,7 +564,6 @@ class VirtualGPU : public device::VirtualDevice {
amd::Monitor execution_; //!< Lock to serialise access to all device objects
uint index_; //!< The virtual device unique index
- PrintfDbg* printfDbg_; //!< GPU printf implemenation
PrintfDbgHSA* printfDbgHSA_; //!< HSAIL printf implemenation
TimeStampCache* tsCache_; //!< TimeStamp cache
@@ -609,8 +580,12 @@ class VirtualGPU : public device::VirtualDevice {
uint hwRing_; //!< HW ring used on this virtual device
- uint64_t readjustTimeGPU_; //!< Readjust time between GPU and CPU timestamps
- TimeStamp* currTs_; //!< current timestamp for command
+ State state_; //!< virtual GPU current state
+ GpuEvent events_[AllEngines]; //!< Last known GPU events
+
+ uint64_t readjustTimeGPU_; //!< Readjust time between GPU and CPU timestamps
+ TimeStamp* lastTS_; //!< Last timestamp executed on Virtual GPU
+ TimeStamp* profileTs_; //!< current profiling timestamp for command
AmdVQueueHeader* vqHeader_; //!< Sysmem copy for virtual queue header
Memory* virtualQueue_; //!< Virtual device queue