diff --git a/include/hip/hcc_detail/hip_hcc.h b/include/hip/hcc_detail/hip_hcc.h index 889e04eb9f..fc04917931 100644 --- a/include/hip/hcc_detail/hip_hcc.h +++ b/include/hip/hcc_detail/hip_hcc.h @@ -70,15 +70,14 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th default stream is used with associated synchronization rules. * @param [in] kernelParams * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and must be in the memory layout and alignment expected by the kernel. - * @param [in] startEvent If non-null, specified event will be updated to track the start time of the kernel launch. The event must be created before calling this API. + * @param [in] startEvent If non-null, specified event will be updated to track the start time of the kernel launch. The event must be created before calling this API. * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of the kernel launch. The event must be created before calling this API. * * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue - - * If startNanos or stopNanos is specified, this API will record and return the start and stop timestamps for the command. The timestamps are collected on the GPU device - * and converted into ns resolution. Typically programs will specify both pointers. Collecting performance timestamps may have a small overhead (approx 1us). * * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please refer to hip_porting_driver_api.md for sample usage. + + * HIP/ROCm actually updates the start event when the associated kernel completes. */ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, @@ -91,8 +90,8 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, hipStream_t hStream, void **kernelParams, void **extra, - uint64_t *startNanos=nullptr, - uint64_t *stopNanos=nullptr + hipEvent_t startEvent=nullptr, + hipEvent_t stopEvent=nullptr ); // doxygen end HCC-specific features diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index 459ea3ba2c..9c17c6e98c 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -604,6 +604,7 @@ public: void attachToCompletionFuture(const hc::completion_future *cf, ihipEventType_t eventType); void setTimestamp(); uint64_t timestamp() const { return _timestamp; } ; + ihipEventType_t type() const { return _type; }; public: hipEventStatus_t _state; diff --git a/src/hip_module.cpp b/src/hip_module.cpp index c8555672c3..b359e7a63c 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -368,7 +368,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ, size_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra, - hipEvent_t *startEvent, hipEvent_t *stopEvent) + hipEvent_t startEvent, hipEvent_t stopEvent) { auto ctx = ihipGetTlsDefaultCtx(); @@ -455,10 +455,10 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, if (startEvent) { - (*startEvent)->attachToCompletionFuture(&cf, hipEventTypeStartCommand); + startEvent->attachToCompletionFuture(&cf, hipEventTypeStartCommand); } if (stopEvent) { - (*stopEvent)->attachToCompletionFuture (&cf, hipEventTypeStopCommand); + stopEvent->attachToCompletionFuture (&cf, hipEventTypeStopCommand); } @@ -494,7 +494,7 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ, size_t sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra, - hipEvent_t *startEvent, hipEvent_t *stopEvent) + hipEvent_t startEvent, hipEvent_t stopEvent) { HIP_INIT_API(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ,