SWDEV-364604 - Add ROCclr support for hipEventDisableSystemFence

Change-Id: I6127b432a8759359359a1890fda85bc401be6a56
Этот коммит содержится в:
Saleel Kudchadker
2023-01-20 15:34:24 -08:00
коммит произвёл Rahul Garg
родитель 4830dd168c
Коммит 3e603d986a
6 изменённых файлов: 31 добавлений и 20 удалений
+3
Просмотреть файл
@@ -1260,6 +1260,9 @@ class VirtualDevice : public amd::HeapObject {
//! Returns fence state of the VirtualGPU
virtual bool isFenceDirty() const = 0;
//! Resets fence state of the VirtualGPU
virtual void resetFenceDirty() = 0;
private:
//! Disable default copy constructor
VirtualDevice& operator=(const VirtualDevice&);
+2
Просмотреть файл
@@ -359,6 +359,8 @@ class VirtualGPU : public device::VirtualDevice {
bool isFenceDirty() const { return false; }
void resetFenceDirty() {}
//! Returns GPU device object associated with this kernel
const Device& dev() const { return gpuDevice_; }
+24 -18
Просмотреть файл
@@ -78,13 +78,8 @@ static constexpr uint16_t kBarrierPacketHeader =
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
static constexpr uint16_t kBarrierPacketAgentScopeHeader =
(HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) | (1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
static constexpr uint16_t kNopPacketHeader =
(HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) |
(HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE) | (1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
@@ -99,14 +94,16 @@ static constexpr uint16_t kBarrierPacketReleaseHeader =
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
static constexpr uint16_t kBarrierVendorPacketHeader =
(HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE) | (1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE) |
(1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
static constexpr uint16_t kBarrierVendorPacketAgentScopeHeader =
(HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE) | (1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
static constexpr uint16_t kBarrierVendorPacketNopScopeHeader =
(HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE) |
(1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_NONE << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
static constexpr hsa_barrier_and_packet_t kBarrierAcquirePacket = {
kBarrierPacketAcquireHeader, 0, 0, {{0}}, 0, {0}};
@@ -989,6 +986,7 @@ void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal,
uint64_t index = hsa_queue_add_write_index_screlease(gpu_queue_, 1);
uint64_t read = hsa_queue_load_read_index_relaxed(gpu_queue_);
fence_dirty_ = true;
auto cache_state = extractAqlBits(packetHeader, HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE,
HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE);
if (!skipSignal) {
@@ -1001,7 +999,9 @@ void VirtualGPU::dispatchBarrierPacket(uint16_t packetHeader, bool skipSignal,
}
// Reset fence_dirty_ flag if we submit a barrier
fence_dirty_ = false;
if (cache_state == amd::Device::kCacheStateSystem) {
fence_dirty_ = false;
}
while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= queueMask);
hsa_barrier_and_packet_t* aql_loc =
@@ -1063,6 +1063,10 @@ void VirtualGPU::dispatchBarrierValuePacket(uint16_t packetHeader, bool resolveD
}
}
fence_dirty_ = true;
auto cache_state = extractAqlBits(packetHeader, HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE,
HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE);
if (completionSignal.handle == 0) {
// Get active signal for current dispatch if profiling is necessary
barrier_value_packet_.completion_signal =
@@ -1072,6 +1076,11 @@ void VirtualGPU::dispatchBarrierValuePacket(uint16_t packetHeader, bool resolveD
barrier_value_packet_.completion_signal = completionSignal;
}
// Reset fence_dirty_ flag if we submit a barrier
if (cache_state == amd::Device::kCacheStateSystem) {
fence_dirty_ = false;
}
uint64_t index = hsa_queue_add_write_index_screlease(gpu_queue_, 1);
while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= queueMask);
hsa_amd_barrier_value_packet_t* aql_loc = &(reinterpret_cast<hsa_amd_barrier_value_packet_t*>(
@@ -1079,9 +1088,6 @@ void VirtualGPU::dispatchBarrierValuePacket(uint16_t packetHeader, bool resolveD
*aql_loc = barrier_value_packet_;
packet_store_release(reinterpret_cast<uint32_t*>(aql_loc), packetHeader, rest);
auto cache_state = extractAqlBits(packetHeader, HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE,
HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE);
hsa_signal_store_screlease(gpu_queue_->doorbell_signal, index);
ClPrint(amd::LOG_DEBUG, amd::LOG_AQL,
@@ -3252,11 +3258,11 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) {
if (timestamp_ != nullptr) {
const Settings& settings = dev().settings();
int32_t releaseFlags = vcmd.getEventScope();
if (releaseFlags == Device::CacheState::kCacheStateAgent) {
if (releaseFlags == Device::CacheState::kCacheStateIgnore) {
if (settings.barrier_value_packet_ && vcmd.profilingInfo().marker_ts_) {
dispatchBarrierValuePacket(kBarrierVendorPacketAgentScopeHeader, true);
dispatchBarrierValuePacket(kBarrierVendorPacketNopScopeHeader, true);
} else {
dispatchBarrierPacket(kBarrierPacketAgentScopeHeader, false);
dispatchBarrierPacket(kNopPacketHeader, false);
}
} else {
// Submit a barrier with a cache flushes.
+1
Просмотреть файл
@@ -407,6 +407,7 @@ class VirtualGPU : public device::VirtualDevice {
void* allocKernArg(size_t size, size_t alignment);
bool isFenceDirty() const { return fence_dirty_; }
void resetFenceDirty() { fence_dirty_ = false; }
// } roc OpenCL integration
private:
//! Dispatches a barrier with blocking HSA signals
-1
Просмотреть файл
@@ -436,7 +436,6 @@ NDRangeKernelCommand::NDRangeKernelCommand(HostQueue& queue, const EventWaitList
profilingInfo_.clear();
profilingInfo_.callback_ = nullptr;
profilingInfo_.marker_ts_ = true;
setEventScope(amd::Device::kCacheStateSystem);
}
kernel_.retain();
}
+1 -1
Просмотреть файл
@@ -120,7 +120,7 @@ void HostQueue::finish() {
return;
}
}
if (nullptr == command || vdev()->isHandlerPending()) {
if (nullptr == command || vdev()->isHandlerPending() || vdev()->isFenceDirty()) {
if (nullptr != command) {
command->release();
}