P4 to Git Change 1469850 by gandryey@gera-w8 on 2017/10/13 13:56:50

SWDEV-79445 - OCL generic changes and code clean-up
	- Remove obsolete/unused code

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#62 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#34 edit


[ROCm/clr commit: 59ed7d2445]
Этот коммит содержится в:
foreman
2017-10-13 14:10:40 -04:00
родитель a2715be56d
Коммит 65de22f0ed
3 изменённых файлов: 22 добавлений и 123 удалений
+1 -1
Просмотреть файл
@@ -143,7 +143,7 @@ void GpuDebugManager::unregisterDebugger() {
void GpuDebugManager::flushCache(uint32_t mask) {
HwDbgGpuCacheMask cacheMask(mask);
device()->xferQueue()->flushCuCaches(cacheMask);
//device()->xferQueue()->flushCuCaches(cacheMask);
}
+15 -91
Просмотреть файл
@@ -707,13 +707,13 @@ VirtualGPU::VirtualGPU(Device& device)
engineID_(MainEngine),
gpuDevice_(static_cast<Device&>(device)),
execution_("Virtual GPU execution lock", true),
printfDbg_(nullptr),
printfDbgHSA_(nullptr),
tsCache_(nullptr),
dmaFlushMgmt_(device),
hwRing_(0),
readjustTimeGPU_(0),
currTs_(nullptr),
lastTS_(nullptr),
profileTs_(nullptr),
vqHeader_(nullptr),
virtualQueue_(nullptr),
schedParams_(nullptr),
@@ -722,10 +722,6 @@ VirtualGPU::VirtualGPU(Device& device)
maskGroups_(1),
hsaQueueMem_(nullptr),
cmdAllocator_(nullptr) {
memset(&cal_, 0, sizeof(CalVirtualDesc));
for (uint i = 0; i < AllEngines; ++i) {
cal_.events_[i].invalidate();
}
// Note: Virtual GPU device creation must be a thread safe operation
index_ = gpuDevice_.numOfVgpus_++;
@@ -829,14 +825,6 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
return false;
}
// Create Printf class
printfDbg_ = new PrintfDbg(gpuDevice_);
if ((nullptr == printfDbg_) || !printfDbg_->create()) {
delete printfDbg_;
LogError("Could not allocate debug buffer for printf()!");
return false;
}
// Create HSAILPrintf class
printfDbgHSA_ = new PrintfDbgHSA(gpuDevice_);
if (nullptr == printfDbgHSA_) {
@@ -930,9 +918,6 @@ VirtualGPU::~VirtualGPU() {
freeCbQueue_.pop();
}
// Destroy printf object
delete printfDbg_;
// Destroy printfHSA object
delete printfDbgHSA_;
@@ -1833,53 +1818,6 @@ void VirtualGPU::submitSvmFreeMemory(amd::SvmFreeMemoryCommand& vcmd) {
profilingEnd(vcmd);
}
void VirtualGPU::findIterations(const amd::NDRangeContainer& sizes, const amd::NDRange& local,
amd::NDRange& groups, amd::NDRange& remainder, size_t& extra) {
size_t dimensions = sizes.dimensions();
if (cal()->iterations_ > 1) {
size_t iterations = cal()->iterations_;
cal_.iterations_ = 1;
// Find the total amount of all groups
groups = sizes.global() / local;
if (dev().settings().partialDispatch_) {
for (uint j = 0; j < dimensions; ++j) {
if ((sizes.global()[j] % local[j]) != 0) {
groups[j]++;
}
}
}
// Calculate the real number of required iterations and
// the workgroup size of each iteration
for (int j = (dimensions - 1); j >= 0; --j) {
// Find possible size of each iteration
size_t tmp = (groups[j] / iterations);
// Make sure the group size is more than 1
if (tmp > 0) {
remainder = groups;
remainder[j] = (groups[j] % tmp);
extra = ((groups[j] / tmp) +
// Check for the remainder
((remainder[j] != 0) ? 1 : 0));
// Recalculate the number of iterations
cal_.iterations_ *= extra;
if (remainder[j] == 0) {
extra = 0;
}
groups[j] = tmp;
break;
} else {
iterations = ((iterations / groups[j]) + (((iterations % groups[j]) != 0) ? 1 : 0));
cal_.iterations_ *= groups[j];
groups[j] = 1;
}
}
}
}
void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) {
// Make sure VirtualGPU has an exclusive access to the resources
amd::ScopedLock lock(execution());
@@ -2651,7 +2589,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
bool gpuCommand = false;
for (uint i = 0; i < AllEngines; ++i) {
if (cal_.events_[i].isValid()) {
if (events_[i].isValid()) {
gpuCommand = true;
}
}
@@ -2668,10 +2606,10 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
}
if (nullptr == cb) {
cb = new CommandBatch(list, cal()->events_, cal()->lastTS_);
cb = new CommandBatch(list, events_, lastTS_);
} else {
freeCbQueue_.pop();
cb->init(list, cal()->events_, cal()->lastTS_);
cb->init(list, events_, lastTS_);
}
}
@@ -2684,12 +2622,12 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
// if runtime didn't submit any commands
//! @note: it's safe to invalidate events, since
//! we already saved them with the batch creation step above
cal_.events_[i].invalidate();
events_[i].invalidate();
}
}
// Mark last TS as nullptr, so runtime won't process empty batches with the old TS
cal_.lastTS_ = nullptr;
lastTS_ = nullptr;
if (nullptr != cb) {
cbQueue_.push(cb);
}
@@ -2721,7 +2659,7 @@ void VirtualGPU::flush(amd::Command* list, bool wait) {
void VirtualGPU::enableSyncedBlit() const { return blitMgr_->enableSynchronization(); }
void VirtualGPU::setGpuEvent(GpuEvent gpuEvent, bool flush) {
cal_.events_[engineID_] = gpuEvent;
events_[engineID_] = gpuEvent;
// Flush current DMA buffer if requested
if (flush) {
@@ -2738,7 +2676,7 @@ void VirtualGPU::flushDMA(uint engineID) {
//! but L1 still has to be invalidated.
}
isDone(&cal_.events_[engineID]);
isDone(&events_[engineID]);
}
bool VirtualGPU::waitAllEngines(CommandBatch* cb) {
@@ -2747,7 +2685,7 @@ bool VirtualGPU::waitAllEngines(CommandBatch* cb) {
// If command batch is nullptr then wait for the current
if (nullptr == cb) {
events = cal_.events_;
events = events_;
} else {
events = cb->events_;
}
@@ -2844,7 +2782,7 @@ void VirtualGPU::profilingBegin(amd::Command& command, bool drmProfiling) {
}
// Save the TimeStamp object in the current OCL event
command.setData(ts);
currTs_ = ts;
profileTs_ = ts;
state_.profileEnabled_ = true;
}
}
@@ -2855,7 +2793,7 @@ void VirtualGPU::profilingEnd(amd::Command& command) {
if (ts != nullptr) {
// Check if the command actually did any GPU submission
if (ts->isValid()) {
cal_.lastTS_ = ts;
lastTS_ = ts;
} else {
// Destroy the TimeStamp object
tsCache_->freeTimeStamp(ts);
@@ -2949,13 +2887,13 @@ void VirtualGPU::addDoppRef(const Memory* memory, bool lastDoppCmd, bool pfpaDop
}
void VirtualGPU::profileEvent(EngineType engine, bool type) const {
if (nullptr == currTs_) {
if (nullptr == profileTs_) {
return;
}
if (type) {
currTs_->begin((engine == SdmaEngine) ? true : false);
profileTs_->begin((engine == SdmaEngine) ? true : false);
} else {
currTs_->end((engine == SdmaEngine) ? true : false);
profileTs_->end((engine == SdmaEngine) ? true : false);
}
}
@@ -3105,20 +3043,6 @@ void VirtualGPU::writeVQueueHeader(VirtualGPU& hostQ, uint64_t kernelTable) {
virtualQueue_->writeRawData(hostQ, 0, sizeof(AmdVQueueHeader), vqHeader_, Wait);
}
void VirtualGPU::flushCuCaches(HwDbgGpuCacheMask cache_mask) {
Unimplemented();
/*
//! @todo: fix issue of no event available for the flush/invalidate cache command
InvalidateSqCaches(cache_mask.sqICache_,
cache_mask.sqKCache_,
cache_mask.tcL1_,
cache_mask.tcL2_);
*/
flushDMA(engineID_);
return;
}
void VirtualGPU::buildKernelInfo(const HSAILKernel& hsaKernel, hsa_kernel_dispatch_packet_t* aqlPkt,
HwDbgKernelInfo& kernelInfo, amd::Event* enqueueEvent) {
amd::HwDebugManager* dbgManager = dev().hwDebugMgr();
+6 -31
Просмотреть файл
@@ -205,13 +205,6 @@ class VirtualGPU : public device::VirtualDevice {
State() : value_(0) {}
};
//! CAL descriptor for the GPU virtual device
struct CalVirtualDesc : public amd::EmbeddedObject {
GpuEvent events_[AllEngines]; //!< Last known GPU events
uint iterations_; //!< Number of iterations for the execution
TimeStamp* lastTS_; //!< Last timestamp executed on Virtual GPU
};
typedef std::vector<ConstBuffer*> constbufs_t;
class MemoryDependency : public amd::EmbeddedObject {
@@ -327,9 +320,6 @@ class VirtualGPU : public device::VirtualDevice {
//! Returns GPU device object associated with this kernel
const Device& dev() const { return gpuDevice_; }
//! Returns CAL descriptor of the virtual device
const CalVirtualDesc* cal() const { return &cal_; }
//! Set the last known GPU event
void setGpuEvent(GpuEvent gpuEvent, //!< GPU event for tracking
bool flush = false //!< TRUE if flush is required
@@ -401,9 +391,6 @@ class VirtualGPU : public device::VirtualDevice {
//! Returns the virtual gpu unique index
uint index() const { return index_; }
//! Get the PrintfDbg object
PrintfDbg& printfDbg() const { return *printfDbg_; }
//! Get the PrintfDbgHSA object
PrintfDbgHSA& printfDbgHSA() const { return *printfDbgHSA_; }
@@ -425,9 +412,6 @@ class VirtualGPU : public device::VirtualDevice {
//! Returns the HW ring used on this virtual device
uint hwRing() const { return hwRing_; }
//! Returns current timestamp object for profiling
TimeStamp* currTs() const { return cal_.lastTS_; }
//! Returns virtual queue object for device enqueuing
Memory* vQueue() const { return virtualQueue_; }
@@ -439,10 +423,6 @@ class VirtualGPU : public device::VirtualDevice {
);
EngineType engineID_; //!< Engine ID for this VirtualGPU
State state_; //!< virtual GPU current state
CalVirtualDesc cal_; //!< CAL virtual device descriptor
void flushCuCaches(HwDbgGpuCacheMask cache_mask); //!< flush/invalidate SQ cache
//! Returns PAL command buffer interface
Pal::ICmdBuffer* iCmd() const {
@@ -530,14 +510,6 @@ class VirtualGPU : public device::VirtualDevice {
MemoryRange() : start_(0), end_(0) {}
};
//! Finds total amount of necessary iterations
inline void findIterations(const amd::NDRangeContainer& sizes, //!< Original workload sizes
const amd::NDRange& local, //!< Local workgroup size
amd::NDRange& groups, //!< Calculated workgroup sizes
amd::NDRange& remainder, //!< Calculated remainder sizes
size_t& extra //!< Amount of extra executions for remainder
);
//! Allocates constant buffers
bool allocConstantBuffers();
@@ -592,7 +564,6 @@ class VirtualGPU : public device::VirtualDevice {
amd::Monitor execution_; //!< Lock to serialise access to all device objects
uint index_; //!< The virtual device unique index
PrintfDbg* printfDbg_; //!< GPU printf implemenation
PrintfDbgHSA* printfDbgHSA_; //!< HSAIL printf implemenation
TimeStampCache* tsCache_; //!< TimeStamp cache
@@ -609,8 +580,12 @@ class VirtualGPU : public device::VirtualDevice {
uint hwRing_; //!< HW ring used on this virtual device
uint64_t readjustTimeGPU_; //!< Readjust time between GPU and CPU timestamps
TimeStamp* currTs_; //!< current timestamp for command
State state_; //!< virtual GPU current state
GpuEvent events_[AllEngines]; //!< Last known GPU events
uint64_t readjustTimeGPU_; //!< Readjust time between GPU and CPU timestamps
TimeStamp* lastTS_; //!< Last timestamp executed on Virtual GPU
TimeStamp* profileTs_; //!< current profiling timestamp for command
AmdVQueueHeader* vqHeader_; //!< Sysmem copy for virtual queue header
Memory* virtualQueue_; //!< Virtual device queue