P4 to Git Change 1469850 by gandryey@gera-w8 on 2017/10/13 13:56:50
SWDEV-79445 - OCL generic changes and code clean-up
- Remove obsolete/unused code
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#62 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#34 edit
[ROCm/clr commit: 59ed7d2445]
Этот коммит содержится в:
@@ -143,7 +143,7 @@ void GpuDebugManager::unregisterDebugger() {
|
||||
|
||||
void GpuDebugManager::flushCache(uint32_t mask) {
|
||||
HwDbgGpuCacheMask cacheMask(mask);
|
||||
device()->xferQueue()->flushCuCaches(cacheMask);
|
||||
//device()->xferQueue()->flushCuCaches(cacheMask);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -707,13 +707,13 @@ VirtualGPU::VirtualGPU(Device& device)
|
||||
engineID_(MainEngine),
|
||||
gpuDevice_(static_cast<Device&>(device)),
|
||||
execution_("Virtual GPU execution lock", true),
|
||||
printfDbg_(nullptr),
|
||||
printfDbgHSA_(nullptr),
|
||||
tsCache_(nullptr),
|
||||
dmaFlushMgmt_(device),
|
||||
hwRing_(0),
|
||||
readjustTimeGPU_(0),
|
||||
currTs_(nullptr),
|
||||
lastTS_(nullptr),
|
||||
profileTs_(nullptr),
|
||||
vqHeader_(nullptr),
|
||||
virtualQueue_(nullptr),
|
||||
schedParams_(nullptr),
|
||||
@@ -722,10 +722,6 @@ VirtualGPU::VirtualGPU(Device& device)
|
||||
maskGroups_(1),
|
||||
hsaQueueMem_(nullptr),
|
||||
cmdAllocator_(nullptr) {
|
||||
memset(&cal_, 0, sizeof(CalVirtualDesc));
|
||||
for (uint i = 0; i < AllEngines; ++i) {
|
||||
cal_.events_[i].invalidate();
|
||||
}
|
||||
|
||||
// Note: Virtual GPU device creation must be a thread safe operation
|
||||
index_ = gpuDevice_.numOfVgpus_++;
|
||||
@@ -829,14 +825,6 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create Printf class
|
||||
printfDbg_ = new PrintfDbg(gpuDevice_);
|
||||
if ((nullptr == printfDbg_) || !printfDbg_->create()) {
|
||||
delete printfDbg_;
|
||||
LogError("Could not allocate debug buffer for printf()!");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create HSAILPrintf class
|
||||
printfDbgHSA_ = new PrintfDbgHSA(gpuDevice_);
|
||||
if (nullptr == printfDbgHSA_) {
|
||||
@@ -930,9 +918,6 @@ VirtualGPU::~VirtualGPU() {
|
||||
freeCbQueue_.pop();
|
||||
}
|
||||
|
||||
// Destroy printf object
|
||||
delete printfDbg_;
|
||||
|
||||
// Destroy printfHSA object
|
||||
delete printfDbgHSA_;
|
||||
|
||||
@@ -1833,53 +1818,6 @@ void VirtualGPU::submitSvmFreeMemory(amd::SvmFreeMemoryCommand& vcmd) {
|
||||
profilingEnd(vcmd);
|
||||
}
|
||||
|
||||
void VirtualGPU::findIterations(const amd::NDRangeContainer& sizes, const amd::NDRange& local,
|
||||
amd::NDRange& groups, amd::NDRange& remainder, size_t& extra) {
|
||||
size_t dimensions = sizes.dimensions();
|
||||
|
||||
if (cal()->iterations_ > 1) {
|
||||
size_t iterations = cal()->iterations_;
|
||||
cal_.iterations_ = 1;
|
||||
|
||||
// Find the total amount of all groups
|
||||
groups = sizes.global() / local;
|
||||
if (dev().settings().partialDispatch_) {
|
||||
for (uint j = 0; j < dimensions; ++j) {
|
||||
if ((sizes.global()[j] % local[j]) != 0) {
|
||||
groups[j]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the real number of required iterations and
|
||||
// the workgroup size of each iteration
|
||||
for (int j = (dimensions - 1); j >= 0; --j) {
|
||||
// Find possible size of each iteration
|
||||
size_t tmp = (groups[j] / iterations);
|
||||
// Make sure the group size is more than 1
|
||||
if (tmp > 0) {
|
||||
remainder = groups;
|
||||
remainder[j] = (groups[j] % tmp);
|
||||
|
||||
extra = ((groups[j] / tmp) +
|
||||
// Check for the remainder
|
||||
((remainder[j] != 0) ? 1 : 0));
|
||||
// Recalculate the number of iterations
|
||||
cal_.iterations_ *= extra;
|
||||
if (remainder[j] == 0) {
|
||||
extra = 0;
|
||||
}
|
||||
groups[j] = tmp;
|
||||
break;
|
||||
} else {
|
||||
iterations = ((iterations / groups[j]) + (((iterations % groups[j]) != 0) ? 1 : 0));
|
||||
cal_.iterations_ *= groups[j];
|
||||
groups[j] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) {
|
||||
// Make sure VirtualGPU has an exclusive access to the resources
|
||||
amd::ScopedLock lock(execution());
|
||||
@@ -2651,7 +2589,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
|
||||
bool gpuCommand = false;
|
||||
|
||||
for (uint i = 0; i < AllEngines; ++i) {
|
||||
if (cal_.events_[i].isValid()) {
|
||||
if (events_[i].isValid()) {
|
||||
gpuCommand = true;
|
||||
}
|
||||
}
|
||||
@@ -2668,10 +2606,10 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
|
||||
}
|
||||
|
||||
if (nullptr == cb) {
|
||||
cb = new CommandBatch(list, cal()->events_, cal()->lastTS_);
|
||||
cb = new CommandBatch(list, events_, lastTS_);
|
||||
} else {
|
||||
freeCbQueue_.pop();
|
||||
cb->init(list, cal()->events_, cal()->lastTS_);
|
||||
cb->init(list, events_, lastTS_);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2684,12 +2622,12 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
|
||||
// if runtime didn't submit any commands
|
||||
//! @note: it's safe to invalidate events, since
|
||||
//! we already saved them with the batch creation step above
|
||||
cal_.events_[i].invalidate();
|
||||
events_[i].invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
// Mark last TS as nullptr, so runtime won't process empty batches with the old TS
|
||||
cal_.lastTS_ = nullptr;
|
||||
lastTS_ = nullptr;
|
||||
if (nullptr != cb) {
|
||||
cbQueue_.push(cb);
|
||||
}
|
||||
@@ -2721,7 +2659,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
|
||||
void VirtualGPU::enableSyncedBlit() const { return blitMgr_->enableSynchronization(); }
|
||||
|
||||
void VirtualGPU::setGpuEvent(GpuEvent gpuEvent, bool flush) {
|
||||
cal_.events_[engineID_] = gpuEvent;
|
||||
events_[engineID_] = gpuEvent;
|
||||
|
||||
// Flush current DMA buffer if requested
|
||||
if (flush) {
|
||||
@@ -2738,7 +2676,7 @@ void VirtualGPU::flushDMA(uint engineID) {
|
||||
//! but L1 still has to be invalidated.
|
||||
}
|
||||
|
||||
isDone(&cal_.events_[engineID]);
|
||||
isDone(&events_[engineID]);
|
||||
}
|
||||
|
||||
bool VirtualGPU::waitAllEngines(CommandBatch* cb) {
|
||||
@@ -2747,7 +2685,7 @@ bool VirtualGPU::waitAllEngines(CommandBatch* cb) {
|
||||
|
||||
// If command batch is nullptr then wait for the current
|
||||
if (nullptr == cb) {
|
||||
events = cal_.events_;
|
||||
events = events_;
|
||||
} else {
|
||||
events = cb->events_;
|
||||
}
|
||||
@@ -2844,7 +2782,7 @@ void VirtualGPU::profilingBegin(amd::Command& command, bool drmProfiling) {
|
||||
}
|
||||
// Save the TimeStamp object in the current OCL event
|
||||
command.setData(ts);
|
||||
currTs_ = ts;
|
||||
profileTs_ = ts;
|
||||
state_.profileEnabled_ = true;
|
||||
}
|
||||
}
|
||||
@@ -2855,7 +2793,7 @@ void VirtualGPU::profilingEnd(amd::Command& command) {
|
||||
if (ts != nullptr) {
|
||||
// Check if the command actually did any GPU submission
|
||||
if (ts->isValid()) {
|
||||
cal_.lastTS_ = ts;
|
||||
lastTS_ = ts;
|
||||
} else {
|
||||
// Destroy the TimeStamp object
|
||||
tsCache_->freeTimeStamp(ts);
|
||||
@@ -2949,13 +2887,13 @@ void VirtualGPU::addDoppRef(const Memory* memory, bool lastDoppCmd, bool pfpaDop
|
||||
}
|
||||
|
||||
void VirtualGPU::profileEvent(EngineType engine, bool type) const {
|
||||
if (nullptr == currTs_) {
|
||||
if (nullptr == profileTs_) {
|
||||
return;
|
||||
}
|
||||
if (type) {
|
||||
currTs_->begin((engine == SdmaEngine) ? true : false);
|
||||
profileTs_->begin((engine == SdmaEngine) ? true : false);
|
||||
} else {
|
||||
currTs_->end((engine == SdmaEngine) ? true : false);
|
||||
profileTs_->end((engine == SdmaEngine) ? true : false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3105,20 +3043,6 @@ void VirtualGPU::writeVQueueHeader(VirtualGPU& hostQ, uint64_t kernelTable) {
|
||||
virtualQueue_->writeRawData(hostQ, 0, sizeof(AmdVQueueHeader), vqHeader_, Wait);
|
||||
}
|
||||
|
||||
void VirtualGPU::flushCuCaches(HwDbgGpuCacheMask cache_mask) {
|
||||
Unimplemented();
|
||||
/*
|
||||
//! @todo: fix issue of no event available for the flush/invalidate cache command
|
||||
InvalidateSqCaches(cache_mask.sqICache_,
|
||||
cache_mask.sqKCache_,
|
||||
cache_mask.tcL1_,
|
||||
cache_mask.tcL2_);
|
||||
*/
|
||||
flushDMA(engineID_);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel, hsa_kernel_dispatch_packet_t* aqlPkt,
|
||||
HwDbgKernelInfo& kernelInfo, amd::Event* enqueueEvent) {
|
||||
amd::HwDebugManager* dbgManager = dev().hwDebugMgr();
|
||||
|
||||
@@ -205,13 +205,6 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
State() : value_(0) {}
|
||||
};
|
||||
|
||||
//! CAL descriptor for the GPU virtual device
|
||||
struct CalVirtualDesc : public amd::EmbeddedObject {
|
||||
GpuEvent events_[AllEngines]; //!< Last known GPU events
|
||||
uint iterations_; //!< Number of iterations for the execution
|
||||
TimeStamp* lastTS_; //!< Last timestamp executed on Virtual GPU
|
||||
};
|
||||
|
||||
typedef std::vector<ConstBuffer*> constbufs_t;
|
||||
|
||||
class MemoryDependency : public amd::EmbeddedObject {
|
||||
@@ -327,9 +320,6 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
//! Returns GPU device object associated with this kernel
|
||||
const Device& dev() const { return gpuDevice_; }
|
||||
|
||||
//! Returns CAL descriptor of the virtual device
|
||||
const CalVirtualDesc* cal() const { return &cal_; }
|
||||
|
||||
//! Set the last known GPU event
|
||||
void setGpuEvent(GpuEvent gpuEvent, //!< GPU event for tracking
|
||||
bool flush = false //!< TRUE if flush is required
|
||||
@@ -401,9 +391,6 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
//! Returns the virtual gpu unique index
|
||||
uint index() const { return index_; }
|
||||
|
||||
//! Get the PrintfDbg object
|
||||
PrintfDbg& printfDbg() const { return *printfDbg_; }
|
||||
|
||||
//! Get the PrintfDbgHSA object
|
||||
PrintfDbgHSA& printfDbgHSA() const { return *printfDbgHSA_; }
|
||||
|
||||
@@ -425,9 +412,6 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
//! Returns the HW ring used on this virtual device
|
||||
uint hwRing() const { return hwRing_; }
|
||||
|
||||
//! Returns current timestamp object for profiling
|
||||
TimeStamp* currTs() const { return cal_.lastTS_; }
|
||||
|
||||
//! Returns virtual queue object for device enqueuing
|
||||
Memory* vQueue() const { return virtualQueue_; }
|
||||
|
||||
@@ -439,10 +423,6 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
);
|
||||
|
||||
EngineType engineID_; //!< Engine ID for this VirtualGPU
|
||||
State state_; //!< virtual GPU current state
|
||||
CalVirtualDesc cal_; //!< CAL virtual device descriptor
|
||||
|
||||
void flushCuCaches(HwDbgGpuCacheMask cache_mask); //!< flush/invalidate SQ cache
|
||||
|
||||
//! Returns PAL command buffer interface
|
||||
Pal::ICmdBuffer* iCmd() const {
|
||||
@@ -530,14 +510,6 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
MemoryRange() : start_(0), end_(0) {}
|
||||
};
|
||||
|
||||
//! Finds total amount of necessary iterations
|
||||
inline void findIterations(const amd::NDRangeContainer& sizes, //!< Original workload sizes
|
||||
const amd::NDRange& local, //!< Local workgroup size
|
||||
amd::NDRange& groups, //!< Calculated workgroup sizes
|
||||
amd::NDRange& remainder, //!< Calculated remainder sizes
|
||||
size_t& extra //!< Amount of extra executions for remainder
|
||||
);
|
||||
|
||||
//! Allocates constant buffers
|
||||
bool allocConstantBuffers();
|
||||
|
||||
@@ -592,7 +564,6 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
amd::Monitor execution_; //!< Lock to serialise access to all device objects
|
||||
uint index_; //!< The virtual device unique index
|
||||
|
||||
PrintfDbg* printfDbg_; //!< GPU printf implemenation
|
||||
PrintfDbgHSA* printfDbgHSA_; //!< HSAIL printf implemenation
|
||||
|
||||
TimeStampCache* tsCache_; //!< TimeStamp cache
|
||||
@@ -609,8 +580,12 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
|
||||
uint hwRing_; //!< HW ring used on this virtual device
|
||||
|
||||
uint64_t readjustTimeGPU_; //!< Readjust time between GPU and CPU timestamps
|
||||
TimeStamp* currTs_; //!< current timestamp for command
|
||||
State state_; //!< virtual GPU current state
|
||||
GpuEvent events_[AllEngines]; //!< Last known GPU events
|
||||
|
||||
uint64_t readjustTimeGPU_; //!< Readjust time between GPU and CPU timestamps
|
||||
TimeStamp* lastTS_; //!< Last timestamp executed on Virtual GPU
|
||||
TimeStamp* profileTs_; //!< current profiling timestamp for command
|
||||
|
||||
AmdVQueueHeader* vqHeader_; //!< Sysmem copy for virtual queue header
|
||||
Memory* virtualQueue_; //!< Virtual device queue
|
||||
|
||||
Ссылка в новой задаче
Block a user