Finish adding start/stop event support to hipHccModuleLaunchKernel.
Change interface to use hipEvent_t rather than hipEvent_t*
Change-Id: I259062dc087a13d51dc27f84e1e8861f332a104d
[ROCm/hip commit: 6656d33f75]
Этот коммит содержится в:
@@ -70,15 +70,14 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a
|
||||
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th default stream is used with associated synchronization rules.
|
||||
* @param [in] kernelParams
|
||||
* @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and must be in the memory layout and alignment expected by the kernel.
|
||||
* @param [in] startEvent If non-null, specified event will be updated to track the start time of the kernel launch. The event must be created before calling this API.
|
||||
* @param [in] startEvent If non-null, specified event will be updated to track the start time of the kernel launch. The event must be created before calling this API.
|
||||
* @param [in] stopEvent If non-null, specified event will be updated to track the stop time of the kernel launch. The event must be created before calling this API.
|
||||
*
|
||||
* @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
|
||||
|
||||
* If startNanos or stopNanos is specified, this API will record and return the start and stop timestamps for the command. The timestamps are collected on the GPU device
|
||||
* and converted into ns resolution. Typically programs will specify both pointers. Collecting performance timestamps may have a small overhead (approx 1us).
|
||||
*
|
||||
* @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please refer to hip_porting_driver_api.md for sample usage.
|
||||
|
||||
* HIP/ROCm actually updates the start event when the associated kernel completes.
|
||||
*/
|
||||
hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX,
|
||||
@@ -91,8 +90,8 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
hipStream_t hStream,
|
||||
void **kernelParams,
|
||||
void **extra,
|
||||
uint64_t *startNanos=nullptr,
|
||||
uint64_t *stopNanos=nullptr
|
||||
hipEvent_t startEvent=nullptr,
|
||||
hipEvent_t stopEvent=nullptr
|
||||
);
|
||||
|
||||
// doxygen end HCC-specific features
|
||||
|
||||
@@ -604,6 +604,7 @@ public:
|
||||
void attachToCompletionFuture(const hc::completion_future *cf, ihipEventType_t eventType);
|
||||
void setTimestamp();
|
||||
uint64_t timestamp() const { return _timestamp; } ;
|
||||
ihipEventType_t type() const { return _type; };
|
||||
|
||||
public:
|
||||
hipEventStatus_t _state;
|
||||
|
||||
@@ -368,7 +368,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra,
|
||||
hipEvent_t *startEvent, hipEvent_t *stopEvent)
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent)
|
||||
{
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
@@ -455,10 +455,10 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
|
||||
|
||||
if (startEvent) {
|
||||
(*startEvent)->attachToCompletionFuture(&cf, hipEventTypeStartCommand);
|
||||
startEvent->attachToCompletionFuture(&cf, hipEventTypeStartCommand);
|
||||
}
|
||||
if (stopEvent) {
|
||||
(*stopEvent)->attachToCompletionFuture (&cf, hipEventTypeStopCommand);
|
||||
stopEvent->attachToCompletionFuture (&cf, hipEventTypeStopCommand);
|
||||
}
|
||||
|
||||
|
||||
@@ -494,7 +494,7 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra,
|
||||
hipEvent_t *startEvent, hipEvent_t *stopEvent)
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent)
|
||||
{
|
||||
HIP_INIT_API(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
|
||||
|
||||
Ссылка в новой задаче
Block a user