Refactor events and add initial event option for hipHccModuleLaunchKernel
- Change hipEvent_t to a class.
- Move event logic inside the class.
- Add _type to support Independent, StartCommand, StopCommand events.
StartCommand returns start timestamp from events.
Change-Id: I4ddd694f2645a3ff7170c9111dc1d3e39931ca21
[ROCm/hip commit: cfa3155082]
Этот коммит содержится в:
коммит произвёл
Ben Sander
родитель
7502166e5a
Коммит
eaeecbd461
@@ -30,6 +30,54 @@ THE SOFTWARE.
|
||||
//---
|
||||
|
||||
|
||||
ihipEvent_t::ihipEvent_t(unsigned flags)
|
||||
{
|
||||
_state = hipEventStatusCreated;
|
||||
_stream = NULL;
|
||||
_flags = flags;
|
||||
_timestamp = 0;
|
||||
_type = hipEventTypeIndependent;
|
||||
};
|
||||
|
||||
|
||||
|
||||
// Attach to an existing completion future:
|
||||
void ihipEvent_t::attachToCompletionFuture(const hc::completion_future *cf, ihipEventType_t eventType)
|
||||
{
|
||||
_state = hipEventStatusRecording;
|
||||
_marker = *cf;
|
||||
_type = eventType;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void ihipEvent_t::setTimestamp()
|
||||
{
|
||||
if (_state == hipEventStatusRecorded) {
|
||||
// already recorded, done:
|
||||
return;
|
||||
} else {
|
||||
// TODO - use completion-future functions to obtain ticks and timestamps:
|
||||
hsa_signal_t *sig = static_cast<hsa_signal_t*> (_marker.get_native_handle());
|
||||
if (sig) {
|
||||
if (hsa_signal_load_acquire(*sig) == 0) {
|
||||
|
||||
if ((_type == hipEventTypeIndependent) || (_type == hipEventTypeStopCommand)) {
|
||||
_timestamp = _marker.get_end_tick();
|
||||
} else if (_type == hipEventTypeStartCommand) {
|
||||
_timestamp = _marker.get_begin_tick();
|
||||
} else {
|
||||
assert(0); // TODO - move to debug assert
|
||||
_timestamp = 0;
|
||||
}
|
||||
|
||||
_state = hipEventStatusRecorded;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags)
|
||||
{
|
||||
hipError_t e = hipSuccess;
|
||||
@@ -37,12 +85,8 @@ hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags)
|
||||
// TODO-IPC - support hipEventInterprocess.
|
||||
unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming;
|
||||
if ((flags & ~supportedFlags) == 0) {
|
||||
ihipEvent_t *eh = new ihipEvent_t();
|
||||
ihipEvent_t *eh = new ihipEvent_t(flags);
|
||||
|
||||
eh->_state = hipEventStatusCreated;
|
||||
eh->_stream = NULL;
|
||||
eh->_flags = flags;
|
||||
eh->_timestamp = 0;
|
||||
*event = eh;
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
@@ -141,8 +185,8 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
|
||||
ihipEvent_t *start_eh = start;
|
||||
ihipEvent_t *stop_eh = stop;
|
||||
|
||||
ihipSetTs(start);
|
||||
ihipSetTs(stop);
|
||||
start->setTimestamp();
|
||||
stop->setTimestamp();
|
||||
|
||||
hipError_t status = hipSuccess;
|
||||
*ms = 0.0f;
|
||||
@@ -151,7 +195,7 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
|
||||
if ((start_eh->_state == hipEventStatusRecorded) && (stop_eh->_state == hipEventStatusRecorded)) {
|
||||
// Common case, we have good information for both events.
|
||||
|
||||
int64_t tickDiff = (stop_eh->_timestamp - start_eh->_timestamp);
|
||||
int64_t tickDiff = (stop_eh->timestamp() - start_eh->timestamp());
|
||||
|
||||
uint64_t freqHz;
|
||||
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz);
|
||||
|
||||
@@ -1641,23 +1641,6 @@ const char *ihipErrorString(hipError_t hip_error)
|
||||
};
|
||||
|
||||
|
||||
void ihipSetTs(hipEvent_t e)
|
||||
{
|
||||
ihipEvent_t *eh = e;
|
||||
if (eh->_state == hipEventStatusRecorded) {
|
||||
// already recorded, done:
|
||||
return;
|
||||
} else {
|
||||
// TODO - use completion-future functions to obtain ticks and timestamps:
|
||||
hsa_signal_t *sig = static_cast<hsa_signal_t*> (eh->_marker.get_native_handle());
|
||||
if (sig) {
|
||||
if (hsa_signal_load_acquire(*sig) == 0) {
|
||||
eh->_timestamp = eh->_marker.get_end_tick();
|
||||
eh->_state = hipEventStatusRecorded;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Returns true if copyEngineCtx can see the memory allocated on dstCtx and srcCtx.
|
||||
|
||||
@@ -584,22 +584,39 @@ private: // Data
|
||||
//----
|
||||
// Internal event structure:
|
||||
enum hipEventStatus_t {
|
||||
hipEventStatusUnitialized = 0, // event is unutilized, must be "Created" before use.
|
||||
hipEventStatusCreated = 1,
|
||||
hipEventStatusRecording = 2, // event has been enqueued to record something.
|
||||
hipEventStatusRecorded = 3, // event has been recorded - timestamps are valid.
|
||||
hipEventStatusUnitialized = 0, // event is unutilized, must be "Created" before use.
|
||||
hipEventStatusCreated = 1,
|
||||
hipEventStatusRecording = 2, // event has been enqueued to record something.
|
||||
hipEventStatusRecorded = 3, // event has been recorded - timestamps are valid.
|
||||
} ;
|
||||
|
||||
// TODO - rename to ihip type of some kind
|
||||
enum ihipEventType_t {
|
||||
hipEventTypeIndependent,
|
||||
hipEventTypeStartCommand,
|
||||
hipEventTypeStopCommand,
|
||||
};
|
||||
|
||||
// internal hip event structure.
|
||||
struct ihipEvent_t {
|
||||
hipEventStatus_t _state;
|
||||
class ihipEvent_t {
|
||||
public:
|
||||
ihipEvent_t(unsigned flags);
|
||||
void attachToCompletionFuture(const hc::completion_future *cf, ihipEventType_t eventType);
|
||||
void setTimestamp();
|
||||
uint64_t timestamp() const { return _timestamp; } ;
|
||||
|
||||
public:
|
||||
hipEventStatus_t _state;
|
||||
|
||||
hipStream_t _stream; // Stream where the event is recorded, or NULL if all streams.
|
||||
unsigned _flags;
|
||||
|
||||
hc::completion_future _marker;
|
||||
|
||||
private:
|
||||
ihipEventType_t _type;
|
||||
uint64_t _timestamp; // store timestamp, may be set on host or by marker.
|
||||
friend hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream);
|
||||
} ;
|
||||
|
||||
|
||||
@@ -822,8 +839,6 @@ extern hipError_t ihipDeviceSetState();
|
||||
extern ihipDevice_t *ihipGetDevice(int);
|
||||
ihipCtx_t * ihipGetPrimaryCtx(unsigned deviceIndex);
|
||||
|
||||
extern void ihipSetTs(hipEvent_t e);
|
||||
|
||||
|
||||
hipStream_t ihipSyncAndResolveStream(hipStream_t);
|
||||
|
||||
|
||||
@@ -364,10 +364,11 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
|
||||
|
||||
|
||||
hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra)
|
||||
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra,
|
||||
hipEvent_t *startEvent, hipEvent_t *stopEvent)
|
||||
{
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
@@ -446,7 +447,20 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
||||
};
|
||||
|
||||
lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize, nullptr/*completion_future*/);
|
||||
|
||||
hc::completion_future cf;
|
||||
|
||||
lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize,
|
||||
(startEvent || stopEvent) ? &cf : nullptr);
|
||||
|
||||
|
||||
if (startEvent) {
|
||||
(*startEvent)->attachToCompletionFuture(&cf, hipEventTypeStartCommand);
|
||||
}
|
||||
if (stopEvent) {
|
||||
(*stopEvent)->attachToCompletionFuture (&cf, hipEventTypeStopCommand);
|
||||
}
|
||||
|
||||
|
||||
if(kernelParams != NULL){
|
||||
free(config[1]);
|
||||
@@ -470,7 +484,8 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(f,
|
||||
blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra));
|
||||
sharedMemBytes, hStream, kernelParams, extra,
|
||||
nullptr, nullptr));
|
||||
}
|
||||
|
||||
|
||||
@@ -478,7 +493,8 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra)
|
||||
void **kernelParams, void **extra,
|
||||
hipEvent_t *startEvent, hipEvent_t *stopEvent)
|
||||
{
|
||||
HIP_INIT_API(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
|
||||
@@ -486,7 +502,7 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
kernelParams, extra);
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra));
|
||||
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
|
||||
Ссылка в новой задаче
Block a user