387 satır
13 KiB
C++
387 satır
13 KiB
C++
/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE. */
|
|
|
|
#include "cl_common.hpp"
|
|
#include "cl_profile_amd.h"
|
|
#include "platform/context.hpp"
|
|
#include "platform/command.hpp"
|
|
#include "platform/perfctr.hpp"
|
|
#include "device/device.hpp"
|
|
#include <cstring>
|
|
|
|
/*! \addtogroup API
|
|
* @{
|
|
*
|
|
* \addtogroup AMD_Extensions
|
|
* @{
|
|
*
|
|
*/
|
|
|
|
/*! \brief Creates a new HW performance counter
|
|
* for the specified OpenCL context.
|
|
*
|
|
* \param device must be a valid OpenCL device.
|
|
*
|
|
* \param block_index index of the HW block to configure.
|
|
*
|
|
* \param counter_index index of the hardware counter
|
|
* within the block to configure.
|
|
*
|
|
* \param event_index Event you wish to count with
|
|
* the counter specified by block_index + counter_index
|
|
*
|
|
* \param perf_counter the created perfcounter object
|
|
*
|
|
* \param errcode_ret A non zero value if OpenCL failed to create PerfCounter
|
|
* - CL_SUCCESS if the function is executed successfully.
|
|
* - CL_INVALID_DEVICE if the specified context is invalid.
|
|
* - CL_INVALID_OPERATION if we couldn't create the object
|
|
*
|
|
* \return Created perfcounter object
|
|
*/
|
|
RUNTIME_ENTRY_RET(cl_perfcounter_amd, clCreatePerfCounterAMD,
|
|
(cl_device_id device, cl_perfcounter_property* properties, cl_int* errcode_ret)) {
|
|
// Make sure we have a valid device object
|
|
if (!is_valid(device)) {
|
|
*not_null(errcode_ret) = CL_INVALID_DEVICE;
|
|
return NULL;
|
|
}
|
|
|
|
// Make sure we have a valid pointer to the performance counter properties
|
|
if (NULL == properties) {
|
|
return NULL;
|
|
}
|
|
|
|
amd::PerfCounter::Properties perfProperties;
|
|
size_t size = 0;
|
|
while (properties[size] != CL_PERFCOUNTER_NONE) {
|
|
if (properties[size] < CL_PERFCOUNTER_LAST) {
|
|
perfProperties[properties[size]] = static_cast<ulong>(properties[size + 1]);
|
|
size += 2;
|
|
} else {
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
// Create the device perf counter
|
|
amd::PerfCounter* perfCounter = new amd::PerfCounter(*as_amd(device), perfProperties);
|
|
|
|
if (perfCounter == NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_OPERATION;
|
|
return NULL;
|
|
}
|
|
|
|
*not_null(errcode_ret) = CL_SUCCESS;
|
|
return as_cl(perfCounter);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Destroy a performance counter object.
|
|
*
|
|
* \param perf_counter the perfcounter object for release
|
|
*
|
|
* \return A non zero value if OpenCL failed to release PerfCounter
|
|
* - CL_SUCCESS if the function is executed successfully.
|
|
* - CL_INVALID_OPERATION if we failed to release the object
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clReleasePerfCounterAMD, (cl_perfcounter_amd perf_counter)) {
|
|
if (!is_valid(perf_counter)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
as_amd(perf_counter)->release();
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Increments the perfcounter object reference count.
|
|
*
|
|
* \param perf_counter the perfcounter object for retain
|
|
*
|
|
* \return A non zero value if OpenCL failed to retain PerfCounter
|
|
* - CL_SUCCESS if the function is executed successfully.
|
|
* - CL_INVALID_OPERATION if we failed to release the object
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clRetainPerfCounterAMD, (cl_perfcounter_amd perf_counter)) {
|
|
if (!is_valid(perf_counter)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
as_amd(perf_counter)->retain();
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Enqueues the begin command for the specified counters.
|
|
*
|
|
* \param command_queue must be a valid OpenCL command queue.
|
|
*
|
|
* \param num_perf_counters the number of perfcounter objects in the array.
|
|
*
|
|
* \param perf_counters specifies an array of perfcounter objects.
|
|
*
|
|
* \param event_wait_list specify [is a pointer to] events that need to
|
|
* complete before this particular command can be executed.
|
|
* If \a event_wait_list is NULL, then this particular command does not wait
|
|
* on any event to complete. If \a event_wait_list is NULL,
|
|
* \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL,
|
|
* the list of events pointed to by \a event_wait_list must be valid and
|
|
* \a num_events_in_wait_list must be greater than 0. The events specified in
|
|
* \a event_wait_list act as synchronization points.
|
|
*
|
|
* \param num_events_in_wait_list specify the number of events in
|
|
* \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be
|
|
* greater than 0 if \a event_wait_list is not NULL.
|
|
*
|
|
* \param event returns an event object that identifies this particular
|
|
* command and can be used to query or queue a wait for this particular
|
|
* command to complete. \a event can be NULL in which case it will not be
|
|
* possible for the application to query the status of this command or queue a
|
|
* wait for this command to complete.
|
|
*
|
|
* \return A non zero value if OpenCL failed to release PerfCounter
|
|
* - CL_SUCCESS if the function is executed successfully.
|
|
* - CL_INVALID_OPERATION if we failed to enqueue the begin operation
|
|
* - CL_INVALID_COMMAND_QUEUE if the queue is
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clEnqueueBeginPerfCounterAMD,
|
|
(cl_command_queue command_queue, cl_uint num_perf_counters,
|
|
cl_perfcounter_amd* perf_counters, cl_uint num_events_in_wait_list,
|
|
const cl_event* event_wait_list, cl_event* event)) {
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
if ((num_perf_counters == 0) || (perf_counters == NULL)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
|
|
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == hostQueue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::PerfCounterCommand::PerfCounterList counters;
|
|
|
|
// Place all counters into the list
|
|
for (cl_uint i = 0; i < num_perf_counters; ++i) {
|
|
amd::PerfCounter* amdPerf = as_amd(perf_counters[i]);
|
|
if (&hostQueue->device() == &amdPerf->device()) {
|
|
counters.push_back(amdPerf);
|
|
} else {
|
|
return CL_INVALID_DEVICE;
|
|
}
|
|
}
|
|
|
|
amd::Command::EventWaitList eventWaitList;
|
|
cl_int err =
|
|
amd::clSetEventWaitList(eventWaitList, *hostQueue, num_events_in_wait_list, event_wait_list);
|
|
if (err != CL_SUCCESS) {
|
|
return err;
|
|
}
|
|
|
|
// Create a new command for the performance counters
|
|
amd::PerfCounterCommand* command = new amd::PerfCounterCommand(
|
|
*hostQueue, eventWaitList, counters, amd::PerfCounterCommand::Begin);
|
|
if (command == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
// Submit the command to the device
|
|
command->enqueue();
|
|
|
|
*not_null(event) = as_cl(&command->event());
|
|
if (event == NULL) {
|
|
command->release();
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Enqueues the end command for the specified counters.
|
|
*
|
|
* \param command_queue must be a valid OpenCL command queue.
|
|
*
|
|
* \param num_perf_counters the number of perfcounter objects in the array.
|
|
*
|
|
* \param perf_counters specifies an array of perfcounter objects.
|
|
*
|
|
* \param event_wait_list specify [is a pointer to] events that need to
|
|
* complete before this particular command can be executed.
|
|
* If \a event_wait_list is NULL, then this particular command does not wait
|
|
* on any event to complete. If \a event_wait_list is NULL,
|
|
* \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL,
|
|
* the list of events pointed to by \a event_wait_list must be valid and
|
|
* \a num_events_in_wait_list must be greater than 0. The events specified in
|
|
* \a event_wait_list act as synchronization points.
|
|
*
|
|
* \param num_events_in_wait_list specify the number of events in
|
|
* \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be
|
|
* greater than 0 if \a event_wait_list is not NULL.
|
|
*
|
|
* \param event returns an event object that identifies this particular
|
|
* command and can be used to query or queue a wait for this particular
|
|
* command to complete. \a event can be NULL in which case it will not be
|
|
* possible for the application to query the status of this command or queue a
|
|
* wait for this command to complete.
|
|
*
|
|
* \return A non zero value if OpenCL failed to release PerfCounter
|
|
* - CL_SUCCESS if the function is executed successfully.
|
|
* - CL_INVALID_OPERATION if we failed to enqueue the end operation
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clEnqueueEndPerfCounterAMD,
|
|
(cl_command_queue command_queue, cl_uint num_perf_counters,
|
|
cl_perfcounter_amd* perf_counters, cl_uint num_events_in_wait_list,
|
|
const cl_event* event_wait_list, cl_event* event)) {
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
if ((num_perf_counters == 0) || (perf_counters == NULL)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
|
|
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == hostQueue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::PerfCounterCommand::PerfCounterList counters;
|
|
|
|
// Place all counters into the list
|
|
for (cl_uint i = 0; i < num_perf_counters; ++i) {
|
|
amd::PerfCounter* amdPerf = as_amd(perf_counters[i]);
|
|
if (&hostQueue->device() == &amdPerf->device()) {
|
|
counters.push_back(amdPerf);
|
|
} else {
|
|
return CL_INVALID_DEVICE;
|
|
}
|
|
}
|
|
|
|
amd::Command::EventWaitList eventWaitList;
|
|
cl_int err =
|
|
amd::clSetEventWaitList(eventWaitList, *hostQueue, num_events_in_wait_list, event_wait_list);
|
|
if (err != CL_SUCCESS) {
|
|
return err;
|
|
}
|
|
|
|
// Create a new command for the performance counters
|
|
amd::PerfCounterCommand* command = new amd::PerfCounterCommand(
|
|
*hostQueue, eventWaitList, counters, amd::PerfCounterCommand::End);
|
|
if (command == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
// Submit the command to the device
|
|
command->enqueue();
|
|
|
|
*not_null(event) = as_cl(&command->event());
|
|
if (event == NULL) {
|
|
command->release();
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Retrieves the results from the counter objects.
|
|
*
|
|
* \param num_perf_counter the perfcounter object for the information query.
|
|
*
|
|
* \param perf_counters specifies an array of perfcounter objects.
|
|
*
|
|
* \param wait_event specifies the wait event, returned in
|
|
* the clEnqueueEndPerfCounterAMD.
|
|
*
|
|
* \param wait true if OpenCL should wait for the perfcounter data.
|
|
*
|
|
* \param values must be a valid pointer to an array of 64-bit values
|
|
* and the array size must be equal to num_perf_counters.
|
|
*
|
|
* \return
|
|
* - CL_SUCCESS if the function is executed successfully.
|
|
* - CL_PROFILING_INFO_NOT_AVAILABLE if event isn't finished.
|
|
* - CL_INVALID_OPERATION if we failed to get the data
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clGetPerfCounterInfoAMD,
|
|
(cl_perfcounter_amd perf_counter, cl_perfcounter_info param_name,
|
|
size_t param_value_size, void* param_value, size_t* param_value_size_ret)) {
|
|
// Check if we have a valid performance counter
|
|
if (!is_valid(perf_counter)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
|
|
// Find the kernel, associated with the specified device
|
|
const device::PerfCounter* devCounter = as_amd(perf_counter)->getDeviceCounter();
|
|
|
|
// Make sure we found a valid performance counter
|
|
if (devCounter == NULL) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
|
|
// Get the corresponded parameters
|
|
switch (param_name) {
|
|
case CL_PERFCOUNTER_REFERENCE_COUNT: {
|
|
cl_uint count = as_amd(perf_counter)->referenceCount();
|
|
// Return the reference counter
|
|
return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_PERFCOUNTER_GPU_BLOCK_INDEX:
|
|
case CL_PERFCOUNTER_GPU_COUNTER_INDEX:
|
|
case CL_PERFCOUNTER_GPU_EVENT_INDEX: {
|
|
cl_ulong data = devCounter->getInfo(param_name);
|
|
// Return the device performance counter information
|
|
return amd::clGetInfo(data, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_PERFCOUNTER_DATA: {
|
|
cl_ulong data = devCounter->getInfo(param_name);
|
|
if (static_cast<cl_ulong>(0xffffffffffffffffULL) == data) {
|
|
return CL_PROFILING_INFO_NOT_AVAILABLE;
|
|
}
|
|
// Return the device performance counter result
|
|
return amd::clGetInfo(data, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
default:
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
RUNTIME_ENTRY(cl_int, clSetDeviceClockModeAMD,
|
|
(cl_device_id device, cl_set_device_clock_mode_input_amd set_clock_mode_input,
|
|
cl_set_device_clock_mode_output_amd* set_clock_mode_output)) {
|
|
// Make sure we have a valid device object
|
|
if (!is_valid(device)) {
|
|
return CL_INVALID_DEVICE;
|
|
}
|
|
if (set_clock_mode_input.clock_mode >= CL_DEVICE_CLOCK_MODE_COUNT_AMD) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
amd::Device* amdDevice = as_amd(device);
|
|
bool ret = amdDevice->SetClockMode(set_clock_mode_input, set_clock_mode_output);
|
|
return (ret == true) ? CL_SUCCESS : CL_INVALID_OPERATION;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
* @}
|
|
*/
|