P4 to Git Change 1311278 by jatang@jatang-opencl-hsa-stg2 on 2016/09/06 14:13:56
SWDEV-101315 - Fix PerfCounter not working under CodeXL. 1. Need to map ORCA PerfCounter block to PAL PerfCounter block/instance. 2. CodeXL could try to create PerfCouters that don't exist in HW, so need to handle that and return 0 as result. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#21 edit
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
|
||||
#include "device/pal/palcounters.hpp"
|
||||
#include "device/pal/palvirtual.hpp"
|
||||
#include <array>
|
||||
|
||||
namespace pal {
|
||||
|
||||
@@ -60,8 +61,13 @@ PalCounterReference::~PalCounterReference()
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t PalCounterReference::result(uint index)
|
||||
uint64_t PalCounterReference::result(int index)
|
||||
{
|
||||
if (index < 0) {
|
||||
// These are counters that have no corresponding PalSample created
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (layout_ != nullptr) {
|
||||
assert(index <= layout_->sampleCount && "index not in range");
|
||||
const Pal::GlobalSampleLayout& sample = layout_->samples[index];
|
||||
@@ -118,6 +124,7 @@ bool PalCounterReference::finalize()
|
||||
Pal::GlobalCounterLayout layout = {};
|
||||
iPerf()->GetGlobalCounterLayout(&layout);
|
||||
|
||||
assert(layout.sampleCount == numExpCounters_);
|
||||
size_t size = sizeof(Pal::GlobalCounterLayout) + (sizeof(Pal::GlobalSampleLayout) * (layout.sampleCount - 1));
|
||||
layout_ = reinterpret_cast<Pal::GlobalCounterLayout*>(new char[size]);
|
||||
if (layout_ != nullptr) {
|
||||
@@ -131,6 +138,223 @@ bool PalCounterReference::finalize()
|
||||
}
|
||||
}
|
||||
|
||||
// Converting from ORCA cmndefs.h to PAL palPerfExperiment.h
|
||||
static const
|
||||
std::array<std::pair<int, int>, 83> ciBlockIdOrcaToPal =
|
||||
{{
|
||||
{0x0F, 0}, // CB0
|
||||
{0x0F, 1}, // CB1
|
||||
{0x0F, 2}, // CB2
|
||||
{0x0F, 3}, // CB3
|
||||
{0x01, 0}, // CPF
|
||||
{0x0E, 0}, // DB0
|
||||
{0x0E, 1}, // DB1
|
||||
{0x0E, 2}, // DB2
|
||||
{0x0E, 3}, // DB3
|
||||
{0x12, 0}, // GRBM
|
||||
{0x13, 0}, // GRBMSE
|
||||
{0x04, 0}, // PA_SU
|
||||
{0x04, 0}, // PA_SC
|
||||
{0x06, 0}, // SPI
|
||||
{0x07, 0}, // SQ
|
||||
{0x07, 0}, // SQ_ES
|
||||
{0x07, 0}, // SQ_GS
|
||||
{0x07, 0}, // SQ_VS
|
||||
{0x07, 0}, // SQ_PS
|
||||
{0x07, 0}, // SQ_LS
|
||||
{0x07, 0}, // SQ_HS
|
||||
{0x07, 0}, // SQ_CS
|
||||
{0x08, 0}, // SX
|
||||
{0x09, 0}, // TA0
|
||||
{0x09, 1}, // TA1
|
||||
{0x09, 2}, // TA2
|
||||
{0x09, 3}, // TA3
|
||||
{0x09, 4}, // TA4
|
||||
{0x09, 5}, // TA5
|
||||
{0x09, 6}, // TA6
|
||||
{0x09, 7}, // TA7
|
||||
{0x09, 8}, // TA8
|
||||
{0x09, 9}, // TA9
|
||||
{0x09, 0x0a}, // TA10
|
||||
{0x0D, 0}, // TCA0
|
||||
{0x0D, 1}, // TCA1
|
||||
{0x0C, 0}, // TCC0
|
||||
{0x0C, 1}, // TCC1
|
||||
{0x0C, 2}, // TCC2
|
||||
{0x0C, 3}, // TCC3
|
||||
{0x0C, 4}, // TCC4
|
||||
{0x0C, 5}, // TCC5
|
||||
{0x0C, 6}, // TCC6
|
||||
{0x0C, 7}, // TCC7
|
||||
{0x0C, 8}, // TCC8
|
||||
{0x0C, 9}, // TCC9
|
||||
{0x0C, 0x0a}, // TCC10
|
||||
{0x0C, 0x0b}, // TCC11
|
||||
{0x0C, 0x0c}, // TCC12
|
||||
{0x0C, 0x0d}, // TCC13
|
||||
{0x0C, 0x0e}, // TCC14
|
||||
{0x0C, 0x0f}, // TCC15
|
||||
{0x0A, 0}, // TD0
|
||||
{0x0A, 1}, // TD1
|
||||
{0x0A, 2}, // TD2
|
||||
{0x0A, 3}, // TD3
|
||||
{0x0A, 4}, // TD4
|
||||
{0x0A, 5}, // TD5
|
||||
{0x0A, 6}, // TD6
|
||||
{0x0A, 7}, // TD7
|
||||
{0x0A, 8}, // TD8
|
||||
{0x0A, 9}, // TD9
|
||||
{0x0A, 0x0a}, // TD10
|
||||
{0x0B, 0}, // TCP0
|
||||
{0x0B, 1}, // TCP1
|
||||
{0x0B, 2}, // TCP2
|
||||
{0x0B, 3}, // TCP3
|
||||
{0x0B, 4}, // TCP4
|
||||
{0x0B, 5}, // TCP5
|
||||
{0x0B, 6}, // TCP6
|
||||
{0x0B, 7}, // TCP7
|
||||
{0x0B, 8}, // TCP8
|
||||
{0x0B, 9}, // TCP9
|
||||
{0x0B, 0x0a}, // TCP10
|
||||
{0x10, 0}, // GDS
|
||||
{0x03, 0}, // VGT
|
||||
{0x02, 0}, // IA
|
||||
{0x16, 0}, // MC
|
||||
{0x11, 0}, // SRBM
|
||||
{0x1a, 0}, // TCS
|
||||
{0x19, 0}, // WD
|
||||
{0x17, 0}, // CPG
|
||||
{0x18, 0}, // CPC
|
||||
}};
|
||||
|
||||
static const
|
||||
std::array<std::pair<int, int>, 98> viBlockIdOrcaToPal =
|
||||
{{
|
||||
{0x0F, 0}, // CB0
|
||||
{0x0F, 1}, // CB1
|
||||
{0x0F, 2}, // CB2
|
||||
{0x0F, 3}, // CB3
|
||||
{0x01, 0}, // CPF
|
||||
{0x0E, 0}, // DB0
|
||||
{0x0E, 1}, // DB1
|
||||
{0x0E, 2}, // DB2
|
||||
{0x0E, 3}, // DB3
|
||||
{0x12, 0}, // GRBM
|
||||
{0x13, 0}, // GRBMSE
|
||||
{0x04, 0}, // PA_SU
|
||||
{0x04, 0}, // PA_SC
|
||||
{0x06, 0}, // SPI
|
||||
{0x07, 0}, // SQ
|
||||
{0x07, 0}, // SQ_ES
|
||||
{0x07, 0}, // SQ_GS
|
||||
{0x07, 0}, // SQ_VS
|
||||
{0x07, 0}, // SQ_PS
|
||||
{0x07, 0}, // SQ_LS
|
||||
{0x07, 0}, // SQ_HS
|
||||
{0x07, 0}, // SQ_CS
|
||||
{0x08, 0}, // SX
|
||||
{0x09, 0}, // TA0
|
||||
{0x09, 1}, // TA1
|
||||
{0x09, 2}, // TA2
|
||||
{0x09, 3}, // TA3
|
||||
{0x09, 4}, // TA4
|
||||
{0x09, 5}, // TA5
|
||||
{0x09, 6}, // TA6
|
||||
{0x09, 7}, // TA7
|
||||
{0x09, 8}, // TA8
|
||||
{0x09, 9}, // TA9
|
||||
{0x09, 0x0a}, // TA10
|
||||
{0x09, 0x0b}, // TA11
|
||||
{0x09, 0x0c}, // TA12
|
||||
{0x09, 0x0d}, // TA13
|
||||
{0x09, 0x0e}, // TA14
|
||||
{0x09, 0x0f}, // TA15
|
||||
{0x0D, 0}, // TCA0
|
||||
{0x0D, 1}, // TCA1
|
||||
{0x0C, 0}, // TCC0
|
||||
{0x0C, 1}, // TCC1
|
||||
{0x0C, 2}, // TCC2
|
||||
{0x0C, 3}, // TCC3
|
||||
{0x0C, 4}, // TCC4
|
||||
{0x0C, 5}, // TCC5
|
||||
{0x0C, 6}, // TCC6
|
||||
{0x0C, 7}, // TCC7
|
||||
{0x0C, 8}, // TCC8
|
||||
{0x0C, 9}, // TCC9
|
||||
{0x0C, 0x0a}, // TCC10
|
||||
{0x0C, 0x0b}, // TCC11
|
||||
{0x0C, 0x0c}, // TCC12
|
||||
{0x0C, 0x0d}, // TCC13
|
||||
{0x0C, 0x0e}, // TCC14
|
||||
{0x0C, 0x0f}, // TCC15
|
||||
{0x0A, 0}, // TD0
|
||||
{0x0A, 1}, // TD1
|
||||
{0x0A, 2}, // TD2
|
||||
{0x0A, 3}, // TD3
|
||||
{0x0A, 4}, // TD4
|
||||
{0x0A, 5}, // TD5
|
||||
{0x0A, 6}, // TD6
|
||||
{0x0A, 7}, // TD7
|
||||
{0x0A, 8}, // TD8
|
||||
{0x0A, 9}, // TD9
|
||||
{0x0A, 0x0a}, // TD10
|
||||
{0x0A, 0x0b}, // TD11
|
||||
{0x0A, 0x0c}, // TD12
|
||||
{0x0A, 0x0d}, // TD13
|
||||
{0x0A, 0x0e}, // TD14
|
||||
{0x0A, 0x0f}, // TD15
|
||||
{0x0B, 0}, // TCP0
|
||||
{0x0B, 1}, // TCP1
|
||||
{0x0B, 2}, // TCP2
|
||||
{0x0B, 3}, // TCP3
|
||||
{0x0B, 4}, // TCP4
|
||||
{0x0B, 5}, // TCP5
|
||||
{0x0B, 6}, // TCP6
|
||||
{0x0B, 7}, // TCP7
|
||||
{0x0B, 8}, // TCP8
|
||||
{0x0B, 9}, // TCP9
|
||||
{0x0B, 0x0a}, // TCP10
|
||||
{0x0B, 0x0b}, // TCP11
|
||||
{0x0B, 0x0c}, // TCP12
|
||||
{0x0B, 0x0d}, // TCP13
|
||||
{0x0B, 0x0e}, // TCP14
|
||||
{0x0B, 0x0f}, // TCP15
|
||||
{0x10, 0}, // GDS
|
||||
{0x03, 0}, // VGT
|
||||
{0x02, 0}, // IA
|
||||
{0x16, 0}, // MC
|
||||
{0x11, 0}, // SRBM
|
||||
{0x19, 0}, // WD
|
||||
{0x17, 0}, // CPG
|
||||
{0x18, 0}, // CPC
|
||||
}};
|
||||
|
||||
void PerfCounter::convertInfo()
|
||||
{
|
||||
switch (dev().ipLevel()) {
|
||||
case Pal::GfxIpLevel::GfxIp7:
|
||||
if (info_.blockIndex_ < ciBlockIdOrcaToPal.size()) {
|
||||
auto p = ciBlockIdOrcaToPal[info_.blockIndex_];
|
||||
info_.blockIndex_ = std::get<0>(p);
|
||||
info_.counterIndex_ = std::get<1>(p);
|
||||
}
|
||||
break;
|
||||
case Pal::GfxIpLevel::GfxIp8:
|
||||
if (info_.blockIndex_ < viBlockIdOrcaToPal.size()) {
|
||||
auto p = viBlockIdOrcaToPal[info_.blockIndex_];
|
||||
info_.blockIndex_ = std::get<0>(p);
|
||||
info_.counterIndex_ = std::get<1>(p);
|
||||
}
|
||||
break;
|
||||
case Pal::GfxIpLevel::GfxIp9:
|
||||
Unimplemented();
|
||||
break;
|
||||
default:
|
||||
Unimplemented();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
PerfCounter::~PerfCounter()
|
||||
{
|
||||
if (palRef_ == nullptr) {
|
||||
@@ -144,7 +368,7 @@ PerfCounter::~PerfCounter()
|
||||
bool
|
||||
PerfCounter::create()
|
||||
{
|
||||
index_ = palRef_->retain() - 2;
|
||||
palRef_->retain();
|
||||
|
||||
// Initialize the counter
|
||||
Pal::PerfCounterInfo counterInfo = {};
|
||||
@@ -153,11 +377,15 @@ PerfCounter::create()
|
||||
counterInfo.instance = info_.counterIndex_;
|
||||
counterInfo.eventId = info_.eventIndex_;
|
||||
Pal::Result result = iPerf()->AddCounter(counterInfo);
|
||||
if (result != Pal::Result::Success) {
|
||||
if (result == Pal::Result::Success) {
|
||||
index_ = palRef_->getPalCounterIndex();
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// Get here when there's no HW PerfCounter matching the counterInfo
|
||||
index_ = -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
|
||||
@@ -25,7 +25,8 @@ public:
|
||||
, gpu_(gpu)
|
||||
, memory_(nullptr)
|
||||
, cpuAddr_(nullptr)
|
||||
, layout_(nullptr) {}
|
||||
, layout_(nullptr)
|
||||
, numExpCounters_(0) {}
|
||||
|
||||
//! Get PAL counter
|
||||
Pal::IPerfExperiment* iPerf() const { return perfExp_; }
|
||||
@@ -37,7 +38,10 @@ public:
|
||||
bool finalize();
|
||||
|
||||
//! Returns the PAL counter results
|
||||
uint64_t result(uint index);
|
||||
uint64_t result(int index);
|
||||
|
||||
//! Get the latest Experiment Counter index
|
||||
uint getPalCounterIndex() { return numExpCounters_++; };
|
||||
|
||||
protected:
|
||||
//! Default destructor
|
||||
@@ -50,11 +54,12 @@ private:
|
||||
//! Disable operator=
|
||||
PalCounterReference& operator=(const PalCounterReference&);
|
||||
|
||||
VirtualGPU& gpu_; //!< The virtual GPU device object
|
||||
Pal::IPerfExperiment* perfExp_; //!< PAL performance experiment object
|
||||
Pal::GlobalCounterLayout* layout_; //!< Layout of the result
|
||||
Memory* memory_;
|
||||
void* cpuAddr_; //!< CPU address of memory_
|
||||
VirtualGPU& gpu_; //!< The virtual GPU device object
|
||||
Pal::IPerfExperiment* perfExp_; //!< PAL performance experiment object
|
||||
Pal::GlobalCounterLayout* layout_; //!< Layout of the result
|
||||
Memory* memory_; //!< Memory used by PAL performance experiment
|
||||
void* cpuAddr_; //!< CPU address of memory_
|
||||
uint numExpCounters_; //!< Number of Experiment Counter created
|
||||
};
|
||||
|
||||
//! Performance counter implementation on GPU
|
||||
@@ -83,6 +88,7 @@ public:
|
||||
info_.blockIndex_ = blockIndex;
|
||||
info_.counterIndex_ = counterIndex;
|
||||
info_.eventIndex_ = eventIndex;
|
||||
convertInfo();
|
||||
}
|
||||
|
||||
//! Destructor for the GPU PerfCounter object
|
||||
@@ -102,7 +108,7 @@ public:
|
||||
//! Returns the virtual GPU device
|
||||
const VirtualGPU& gpu() const { return palRef_->gpu(); }
|
||||
|
||||
//! Returns the CAL performance counter descriptor
|
||||
//! Returns the PAL performance counter descriptor
|
||||
const Info* info() const { return &info_; }
|
||||
|
||||
//! Returns the Info structure for performance counter
|
||||
@@ -115,10 +121,13 @@ private:
|
||||
//! Disable default operator=
|
||||
PerfCounter& operator=(const PerfCounter&);
|
||||
|
||||
//! Convert info from ORCA to PAL
|
||||
void convertInfo();
|
||||
|
||||
const Device& gpuDevice_; //!< The backend device
|
||||
PalCounterReference* palRef_; //!< Reference counter
|
||||
Info info_; //!< The info structure for perfcounter
|
||||
uint index_; //!< Counter index in the CAL container
|
||||
int index_; //!< Counter index in the PAL container
|
||||
};
|
||||
|
||||
} // namespace pal
|
||||
|
||||
@@ -2344,19 +2344,16 @@ VirtualGPU::submitPerfCounter(amd::PerfCounterCommand& vcmd)
|
||||
return;
|
||||
}
|
||||
else if (gpuCounter->create()) {
|
||||
amdCounter->setDeviceCounter(gpuCounter);
|
||||
newExperiment = true;
|
||||
}
|
||||
else {
|
||||
LogPrintfError("We failed to allocate a perfcounter in CAL.\
|
||||
LogPrintfError("We failed to allocate a perfcounter in PAL.\
|
||||
Block: %d, counter: #d, event: %d",
|
||||
gpuCounter->info()->blockIndex_,
|
||||
gpuCounter->info()->counterIndex_,
|
||||
gpuCounter->info()->eventIndex_);
|
||||
delete gpuCounter;
|
||||
vcmd.setStatus(CL_INVALID_OPERATION);
|
||||
return;
|
||||
}
|
||||
amdCounter->setDeviceCounter(gpuCounter);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user