Files
rocm-systems/opencl/api/opencl/amdocl/cl_profile_amd.cpp
T
2014-07-04 16:17:05 -04:00

383 lines
12 KiB
C++

//
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
//
#include "cl_common.hpp"
#include "cl_profile_amd.h"
#include "platform/context.hpp"
#include "platform/command.hpp"
#include "platform/perfctr.hpp"
#include <cstring>
/*! \addtogroup API
* @{
*
* \addtogroup AMD_Extensions
* @{
*
*/
/*! \brief Creates a new HW performance counter
* for the specified OpenCL context.
*
* \param device must be a valid OpenCL device.
*
* \param block_index index of the HW block to configure.
*
* \param counter_index index of the hardware counter
* within the block to configure.
*
* \param event_index Event you wish to count with
* the counter specified by block_index + counter_index
*
* \param perf_counter the created perfcounter object
*
* \param errcode_ret A non zero value if OpenCL failed to create PerfCounter
* - CL_SUCCESS if the function is executed successfully.
* - CL_INVALID_DEVICE if the specified context is invalid.
* - CL_INVALID_OPERATION if we couldn't create the object
*
* \return Created perfcounter object
*/
RUNTIME_ENTRY_RET(cl_perfcounter_amd, clCreatePerfCounterAMD, (
cl_device_id device,
cl_perfcounter_property* properties,
cl_int* errcode_ret))
{
// Make sure we have a valid device object
if (!is_valid(device)) {
*not_null(errcode_ret) = CL_INVALID_DEVICE;
return NULL;
}
// Make sure we have a valid pointer to the performance counter properties
if (NULL == properties) {
return NULL;
}
amd::PerfCounter::Properties perfProperties;
size_t size = 0;
while (properties[size] != CL_PERFCOUNTER_NONE) {
if (properties[size] < CL_PERFCOUNTER_LAST) {
perfProperties[properties[size]] =
static_cast<ulong>(properties[size+1]);
size += 2;
}
else {
return NULL;
}
}
// Create the device perf counter
amd::PerfCounter* perfCounter =
new amd::PerfCounter(*as_amd(device), perfProperties);
if (perfCounter == NULL) {
*not_null(errcode_ret) = CL_INVALID_OPERATION;
return NULL;
}
*not_null(errcode_ret) = CL_SUCCESS;
return as_cl(perfCounter);
}
RUNTIME_EXIT
/*! \brief Destroy a performance counter object.
*
* \param perf_counter the perfcounter object for release
*
* \return A non zero value if OpenCL failed to release PerfCounter
* - CL_SUCCESS if the function is executed successfully.
* - CL_INVALID_OPERATION if we failed to release the object
*/
RUNTIME_ENTRY(cl_int, clReleasePerfCounterAMD, (
cl_perfcounter_amd perf_counter))
{
if (!is_valid(perf_counter)) {
return CL_INVALID_OPERATION;
}
as_amd(perf_counter)->release();
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Increments the perfcounter object reference count.
*
* \param perf_counter the perfcounter object for retain
*
* \return A non zero value if OpenCL failed to retain PerfCounter
* - CL_SUCCESS if the function is executed successfully.
* - CL_INVALID_OPERATION if we failed to release the object
*/
RUNTIME_ENTRY(cl_int, clRetainPerfCounterAMD, (
cl_perfcounter_amd perf_counter))
{
if (!is_valid(perf_counter)) {
return CL_INVALID_OPERATION;
}
as_amd(perf_counter)->retain();
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Enqueues the begin command for the specified counters.
*
* \param command_queue must be a valid OpenCL command queue.
*
* \param num_perf_counters the number of perfcounter objects in the array.
*
* \param perf_counters specifies an array of perfcounter objects.
*
* \param event_wait_list specify [is a pointer to] events that need to
* complete before this particular command can be executed.
* If \a event_wait_list is NULL, then this particular command does not wait
* on any event to complete. If \a event_wait_list is NULL,
* \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL,
* the list of events pointed to by \a event_wait_list must be valid and
* \a num_events_in_wait_list must be greater than 0. The events specified in
* \a event_wait_list act as synchronization points.
*
* \param num_events_in_wait_list specify the number of events in
* \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be
* greater than 0 if \a event_wait_list is not NULL.
*
* \param event returns an event object that identifies this particular
* command and can be used to query or queue a wait for this particular
* command to complete. \a event can be NULL in which case it will not be
* possible for the application to query the status of this command or queue a
* wait for this command to complete.
*
* \return A non zero value if OpenCL failed to release PerfCounter
* - CL_SUCCESS if the function is executed successfully.
* - CL_INVALID_OPERATION if we failed to enqueue the begin operation
* - CL_INVALID_COMMAND_QUEUE if the queue is
*/
RUNTIME_ENTRY(cl_int, clEnqueueBeginPerfCounterAMD, (
cl_command_queue command_queue,
cl_uint num_perf_counters,
cl_perfcounter_amd* perf_counters,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event))
{
if (!is_valid(command_queue)) {
return CL_INVALID_COMMAND_QUEUE;
}
if ((num_perf_counters == 0) || (perf_counters == NULL)) {
return CL_INVALID_OPERATION;
}
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
if (NULL == hostQueue) {
return CL_INVALID_COMMAND_QUEUE;
}
amd::PerfCounterCommand::PerfCounterList counters;
// Place all counters into the list
for (cl_uint i = 0; i < num_perf_counters; ++i) {
amd::PerfCounter* amdPerf = as_amd(perf_counters[i]);
if (&hostQueue->device() == &amdPerf->device()) {
counters.push_back(amdPerf);
}
else {
return CL_INVALID_DEVICE;
}
}
amd::Command::EventWaitList eventWaitList;
cl_int err = amd::clSetEventWaitList(eventWaitList,
hostQueue->context(), num_events_in_wait_list, event_wait_list);
if (err != CL_SUCCESS){
return err;
}
// Create a new command for the performance counters
amd::PerfCounterCommand* command =
new amd::PerfCounterCommand(*hostQueue, eventWaitList, counters,
amd::PerfCounterCommand::Begin);
if (command == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
// Submit the command to the device
command->enqueue();
*not_null(event) = as_cl(&command->event());
if (event == NULL) {
command->release();
}
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Enqueues the end command for the specified counters.
*
* \param command_queue must be a valid OpenCL command queue.
*
* \param num_perf_counters the number of perfcounter objects in the array.
*
* \param perf_counters specifies an array of perfcounter objects.
*
* \param event_wait_list specify [is a pointer to] events that need to
* complete before this particular command can be executed.
* If \a event_wait_list is NULL, then this particular command does not wait
* on any event to complete. If \a event_wait_list is NULL,
* \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL,
* the list of events pointed to by \a event_wait_list must be valid and
* \a num_events_in_wait_list must be greater than 0. The events specified in
* \a event_wait_list act as synchronization points.
*
* \param num_events_in_wait_list specify the number of events in
* \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be
* greater than 0 if \a event_wait_list is not NULL.
*
* \param event returns an event object that identifies this particular
* command and can be used to query or queue a wait for this particular
* command to complete. \a event can be NULL in which case it will not be
* possible for the application to query the status of this command or queue a
* wait for this command to complete.
*
* \return A non zero value if OpenCL failed to release PerfCounter
* - CL_SUCCESS if the function is executed successfully.
* - CL_INVALID_OPERATION if we failed to enqueue the end operation
*/
RUNTIME_ENTRY(cl_int, clEnqueueEndPerfCounterAMD, (
cl_command_queue command_queue,
cl_uint num_perf_counters,
cl_perfcounter_amd* perf_counters,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event))
{
if (!is_valid(command_queue)) {
return CL_INVALID_COMMAND_QUEUE;
}
if ((num_perf_counters == 0) || (perf_counters == NULL)) {
return CL_INVALID_OPERATION;
}
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
if (NULL == hostQueue) {
return CL_INVALID_COMMAND_QUEUE;
}
amd::PerfCounterCommand::PerfCounterList counters;
// Place all counters into the list
for (cl_uint i = 0; i < num_perf_counters; ++i) {
amd::PerfCounter* amdPerf = as_amd(perf_counters[i]);
if (&hostQueue->device() == &amdPerf->device()) {
counters.push_back(amdPerf);
}
else {
return CL_INVALID_DEVICE;
}
}
amd::Command::EventWaitList eventWaitList;
cl_int err = amd::clSetEventWaitList(eventWaitList,
hostQueue->context(), num_events_in_wait_list, event_wait_list);
if (err != CL_SUCCESS){
return err;
}
// Create a new command for the performance counters
amd::PerfCounterCommand* command =
new amd::PerfCounterCommand(*hostQueue, eventWaitList, counters,
amd::PerfCounterCommand::End);
if (command == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
// Submit the command to the device
command->enqueue();
*not_null(event) = as_cl(&command->event());
if (event == NULL) {
command->release();
}
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Retrieves the results from the counter objects.
*
* \param num_perf_counter the perfcounter object for the information query.
*
* \param perf_counters specifies an array of perfcounter objects.
*
* \param wait_event specifies the wait event, returned in
* the clEnqueueEndPerfCounterAMD.
*
* \param wait true if OpenCL should wait for the perfcounter data.
*
* \param values must be a valid pointer to an array of 64-bit values
* and the array size must be equal to num_perf_counters.
*
* \return
* - CL_SUCCESS if the function is executed successfully.
* - CL_PROFILING_INFO_NOT_AVAILABLE if event isn't finished.
* - CL_INVALID_OPERATION if we failed to get the data
*/
RUNTIME_ENTRY(cl_int, clGetPerfCounterInfoAMD, (
cl_perfcounter_amd perf_counter,
cl_perfcounter_info param_name,
size_t param_value_size,
void* param_value,
size_t* param_value_size_ret))
{
// Check if we have a valid performance counter
if (!is_valid(perf_counter)) {
return CL_INVALID_OPERATION;
}
// Find the kernel, associated with the specified device
const device::PerfCounter* devCounter =
as_amd(perf_counter)->getDeviceCounter();
// Make sure we found a valid performance counter
if (devCounter == NULL) {
return CL_INVALID_OPERATION;
}
// Get the corresponded parameters
switch (param_name) {
case CL_PERFCOUNTER_REFERENCE_COUNT: {
cl_uint count = as_amd(perf_counter)->referenceCount();
// Return the reference counter
return amd::clGetInfo(
count, param_value_size, param_value, param_value_size_ret);
}
case CL_PERFCOUNTER_GPU_BLOCK_INDEX:
case CL_PERFCOUNTER_GPU_COUNTER_INDEX:
case CL_PERFCOUNTER_GPU_EVENT_INDEX: {
cl_ulong data = devCounter->getInfo(param_name);
// Return the device performance counter information
return amd::clGetInfo(data,
param_value_size, param_value, param_value_size_ret);
}
case CL_PERFCOUNTER_DATA: {
cl_ulong data = devCounter->getInfo(param_name);
if (static_cast<cl_ulong>(0xffffffffffffffffULL) == data) {
return CL_PROFILING_INFO_NOT_AVAILABLE;
}
// Return the device performance counter result
return amd::clGetInfo(data,
param_value_size, param_value, param_value_size_ret);
}
default:
return CL_INVALID_VALUE;
}
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! @}
* @}
*/