From 96a59a73b8f6259f2e3df0b78483bf1ed582066c Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 28 Aug 2014 18:45:10 -0400
Subject: [PATCH] P4 to Git Change 1071369 by gandryey@gera-dev-w7 on
2014/08/28 18:09:49
ECR #304775 - Add a pointer to the profiling_capture_event_info values.
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuschedcl.cpp#26 edit
---
rocclr/runtime/device/gpu/gpusched.hpp | 1 +
rocclr/runtime/device/gpu/gpuschedcl.cpp | 26 ++++++++++++++++--------
2 files changed, 19 insertions(+), 8 deletions(-)
diff --git a/rocclr/runtime/device/gpu/gpusched.hpp b/rocclr/runtime/device/gpu/gpusched.hpp
index 5ace3cb0c7..1fa72efdc7 100644
--- a/rocclr/runtime/device/gpu/gpusched.hpp
+++ b/rocclr/runtime/device/gpu/gpusched.hpp
@@ -54,6 +54,7 @@ struct AmdEvent {
uint32_t state; //!< [LRO/SRW] Event state: START, END, COMPLETE
uint32_t counter; //!< [LRW] Event retain/release counter. 0 means the event is free
uint64_t timer[3]; //!< [LRO/SWO] Timer values for profiling for each state
+ uint64_t captureInfo; //!< [LRW/SRO] Profiling capture info for CLK_PROFILING_COMMAND_EXEC_TIME
};
struct SchedulerParam {
diff --git a/rocclr/runtime/device/gpu/gpuschedcl.cpp b/rocclr/runtime/device/gpu/gpuschedcl.cpp
index 85ba4fb9a7..3871935c1d 100644
--- a/rocclr/runtime/device/gpu/gpuschedcl.cpp
+++ b/rocclr/runtime/device/gpu/gpuschedcl.cpp
@@ -78,6 +78,7 @@ typedef struct _AmdEvent {
uint state; //!< [LRO/SRW] Event state: START, END, COMPLETE
uint counter; //!< [LRW] Event retain/release counter. 0 means the event is free
ulong timer[3]; //!< [LRO/SWO] Timer values for profiling for each state
+ ulong capture_info; //!< [LRW/SRO] Profiling capture info for CLK_PROFILING_COMMAND_EXEC_TIME
} AmdEvent;
typedef struct _SchedulerParam {
@@ -165,6 +166,7 @@ const uint ResumeExecution = 0x80000000; // 0x81000000
const uint StallExecution = 0x00000000; // 0x01000000
const uint WavefrontSize = 64;
const uint MaxWaveSize = 0x400;
+const uint CL_DONE = 0xffff;
static inline void
dispatch(
@@ -466,19 +468,27 @@ scheduler(
else if (slotState == AQL_WRAP_DONE) {
// Was CL_EVENT requested?
if (event != 0) {
+ // If state isn't DONE yet
+ if (event->state != CL_DONE) {
+ event->timer[PROFILING_COMMAND_END] =
+ (__hsail_get_clock() * (ulong)param->eng_clk) >> 10;
+ event->state = CL_DONE;
+ }
// The current dispatch doesn't have any outstanding children
if (disp->child_counter == 0) {
- event->state = CL_COMPLETE;
- event->timer[PROFILING_COMMAND_END] =
event->timer[PROFILING_COMMAND_COMPLETE] =
(__hsail_get_clock() * (ulong)param->eng_clk) >> 10;
+ event->state = CL_COMPLETE;
+ if (event->capture_info != 0) {
+ __global ulong* values = (__global ulong*)event->capture_info;
+ values[0] = event->timer[PROFILING_COMMAND_END] -
+ event->timer[PROFILING_COMMAND_START];
+ values[1] = event->timer[PROFILING_COMMAND_COMPLETE] -
+ event->timer[PROFILING_COMMAND_START];
+ }
+ releaseEvent(event, (__global uint *)queue->event_slot_mask,
+ (__global AmdEvent *)queue->event_slots);
}
- else {
- event->timer[PROFILING_COMMAND_END] =
- (__hsail_get_clock() * (ulong)param->eng_clk) >> 10;
- }
- releaseEvent(event, (__global uint *)queue->event_slot_mask,
- (__global AmdEvent *)queue->event_slots);
}
// The current dispatch doesn't have any outstanding children
if (disp->child_counter == 0) {