5a0ecc8e48
Change-Id: I0ce8369a7ef569ac82c6e20266d0c98331a43e08
535 строки
21 KiB
C++
535 строки
21 KiB
C++
/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE. */
|
|
|
|
#include "cl_common.hpp"
|
|
#include "cl_thread_trace_amd.h"
|
|
#include "platform/context.hpp"
|
|
#include "platform/command.hpp"
|
|
#include "platform/threadtrace.hpp"
|
|
#include <cstring>
|
|
#include <vector>
|
|
#include <memory>
|
|
|
|
/*! \addtogroup API
|
|
* @{
|
|
*
|
|
* \addtogroup AMD_Extensions
|
|
* @{
|
|
*
|
|
*/
|
|
|
|
/*! \brief Creates a new HW threadTrace
|
|
*
|
|
* \param device must be a valid OpenCL device.
|
|
*
|
|
* \param threadTrace the created cl_threadtrace_amd object
|
|
*
|
|
* \param errcode_ret A non zero value if OpenCL failed to create cl_threadtrace_amd
|
|
* - CL_SUCCESS if the function is executed successfully.
|
|
* - CL_INVALID_DEVICE if the specified context is invalid.
|
|
* - CL_INVALID_OPERATION if we couldn't create the object
|
|
*
|
|
* \return Created cl_threadtrace_amd object
|
|
*/
|
|
RUNTIME_ENTRY_RET(cl_threadtrace_amd, clCreateThreadTraceAMD,
|
|
(cl_device_id device, cl_int* errcode_ret)) {
|
|
// Make sure we have a valid device object
|
|
if (!is_valid(device)) {
|
|
*not_null(errcode_ret) = CL_INVALID_DEVICE;
|
|
return NULL;
|
|
}
|
|
|
|
// Create the device thread trace object
|
|
amd::ThreadTrace* threadTrace = new amd::ThreadTrace(*as_amd(device));
|
|
|
|
if (threadTrace == NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_OPERATION;
|
|
return NULL;
|
|
}
|
|
|
|
*not_null(errcode_ret) = CL_SUCCESS;
|
|
return as_cl(threadTrace);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
///*! \brief Destroy a threadTrace object.
|
|
// *
|
|
// * \param threadTrace the cl_threadtrace_amd object for release
|
|
// *
|
|
// * \return A non zero value if OpenCL failed to release cl_threadtrace_amd
|
|
// * - CL_SUCCESS if the function is executed successfully.
|
|
// * - CL_INVALID_OPERATION if we failed to release the object
|
|
// */
|
|
RUNTIME_ENTRY(cl_int, clReleaseThreadTraceAMD, (cl_threadtrace_amd threadTrace)) {
|
|
if (!is_valid(threadTrace)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
as_amd(threadTrace)->release();
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
//
|
|
// *! \brief Increments the cl_threadtrace_amd object reference count.
|
|
// *
|
|
// * \param threadTrace the cl_threadtrace_amd object for retain
|
|
// *
|
|
// * \return A non zero value if OpenCL failed to retain cl_threadtrace_amd
|
|
// * - CL_SUCCESS if the function is executed successfully.
|
|
// * - CL_INVALID_OPERATION if we failed to release the object
|
|
// */
|
|
RUNTIME_ENTRY(cl_int, clRetainThreadTraceAMD, (cl_threadtrace_amd threadTrace)) {
|
|
if (!is_valid(threadTrace)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
as_amd(threadTrace)->retain();
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
//
|
|
// *! \brief Sets the cl_threadtrace_amd object configuration parameter.
|
|
// *
|
|
// * \param thread_trace the cl_threadtrace_amd object to set configuration parameter
|
|
// *
|
|
// * \param config_param the cl_thread_trace_param
|
|
// *
|
|
// * \param param_value corresponding to configParam
|
|
// *
|
|
// * \return A non zero value if OpenCL failed to set threadTrace buffer parameter
|
|
// * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object.
|
|
// * - CL_INVALID_VALUE if the invalid config_param or param_value enum values , are used.
|
|
// * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or
|
|
// event_wait_list is not NULL and num_events_in_wait_list is 0,
|
|
// * - or if event objects in event_wait_list are not valid events.
|
|
// * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL
|
|
// implementation on the device.
|
|
// * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the
|
|
// OpenCL implementation on the host.
|
|
// */
|
|
RUNTIME_ENTRY(cl_int, clSetThreadTraceParamAMD,
|
|
(cl_threadtrace_amd thread_trace, cl_thread_trace_param config_param,
|
|
cl_uint param_value)) {
|
|
if (!is_valid(thread_trace)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
switch (config_param) {
|
|
case CL_THREAD_TRACE_PARAM_TOKEN_MASK:
|
|
if (param_value > CL_THREAD_TRACE_TOKEN_MASK_ALL_SI) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
as_amd(thread_trace)->setTokenMask(param_value);
|
|
break;
|
|
case CL_THREAD_TRACE_PARAM_REG_MASK:
|
|
if (param_value > CL_THREAD_TRACE_REG_MASK_ALL_SI) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
as_amd(thread_trace)->setRegMask(param_value);
|
|
break;
|
|
case CL_THREAD_TRACE_PARAM_VM_ID_MASK:
|
|
if (param_value > CL_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
as_amd(thread_trace)->setVmIdMask(param_value);
|
|
break;
|
|
case CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK:
|
|
if (param_value > CL_THREAD_TRACE_INST_MASK_IMMEDIATE_CI) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
as_amd(thread_trace)->setInstMask(param_value);
|
|
break;
|
|
case CL_THREAD_TRACE_PARAM_COMPUTE_UNIT_TARGET:
|
|
as_amd(thread_trace)->setCU(param_value);
|
|
break;
|
|
case CL_THREAD_TRACE_PARAM_SHADER_ARRAY_TARGET:
|
|
as_amd(thread_trace)->setSH(param_value);
|
|
break;
|
|
case CL_THREAD_TRACE_PARAM_SIMD_MASK:
|
|
as_amd(thread_trace)->setSIMD(param_value);
|
|
break;
|
|
case CL_THREAD_TRACE_PARAM_USER_DATA:
|
|
as_amd(thread_trace)->setUserData(param_value);
|
|
break;
|
|
case CL_THREAD_TRACE_PARAM_CAPTURE_MODE:
|
|
if (param_value > CL_THREAD_TRACE_CAPTURE_SELECT_DETAIL) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
as_amd(thread_trace)->setCaptureMode(param_value);
|
|
break;
|
|
case CL_THREAD_TRACE_PARAM_IS_WRAPPED:
|
|
as_amd(thread_trace)->setIsWrapped(true);
|
|
break;
|
|
case CL_THREAD_TRACE_PARAM_RANDOM_SEED:
|
|
as_amd(thread_trace)->setRandomSeed(param_value);
|
|
break;
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Get specific information about the OpenCL Thread Trace.
|
|
*
|
|
* \param threadTrace_info_param is an enum that identifies the Thread Trace information being
|
|
* queried.
|
|
*
|
|
* \param param_value is a pointer to memory location where appropriate values
|
|
* for a given \a threadTrace_info_param will be returned. If \a param_value is NULL,
|
|
* it is ignored.
|
|
*
|
|
* \param param_value_size specifies the size in bytes of memory pointed to by
|
|
* \a param_value. This size in bytes must be >= size of return type.
|
|
*
|
|
* \param param_value_size_ret returns the actual size in bytes of data being
|
|
* queried by param_value. If \a param_value_size_ret is NULL, it is ignored.
|
|
*
|
|
* \return One of the following values:
|
|
* CL_INVALID_OPERATION if cl_threadtrace_amd object is not valid
|
|
* - CL_INVALID_VALUE if \a param_name is not one of the supported
|
|
* values or if size in bytes specified by \a param_value_size is < size of
|
|
* return type and \a param_value is not a NULL value.
|
|
* - CL_SUCCESS if the function is executed successfully.
|
|
*
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clGetThreadTraceInfoAMD,
|
|
(cl_threadtrace_amd thread_trace /* threadTrace */,
|
|
cl_threadtrace_info thread_trace_info_param, size_t param_value_size,
|
|
void* param_value, size_t* param_value_size_ret)) {
|
|
if (!is_valid(thread_trace)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
|
|
// Find the thread trace object, associated with the specified device
|
|
const device::ThreadTrace* devThreadTrace = as_amd(thread_trace)->getDeviceThreadTrace();
|
|
|
|
const size_t seNum = as_amd(thread_trace)->deviceSeNumThreadTrace();
|
|
switch (thread_trace_info_param) {
|
|
case CL_THREAD_TRACE_SE: {
|
|
return amd::clGetInfo(seNum, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_THREAD_TRACE_BUFFERS_SIZE: {
|
|
// Make sure we found a valid thread trace object
|
|
if (devThreadTrace == NULL) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
|
|
std::unique_ptr<uint> bufSize2Se(new uint[seNum]);
|
|
|
|
if (bufSize2Se.get() == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
if (!devThreadTrace->info(thread_trace_info_param, bufSize2Se.get(), seNum)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
const size_t valueSize = seNum * sizeof(unsigned int);
|
|
|
|
if (param_value != NULL && param_value_size < valueSize) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
*not_null(param_value_size_ret) = valueSize;
|
|
|
|
if (param_value != NULL) {
|
|
::memcpy(param_value, bufSize2Se.get(), valueSize);
|
|
if (param_value_size > valueSize) {
|
|
::memset(static_cast<address>(param_value) + valueSize, '\0',
|
|
param_value_size - valueSize);
|
|
}
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/* \brief Enqueues the command for the specified thread trace object.
|
|
*
|
|
* \param command_queue must be a valid OpenCL command queue.
|
|
*
|
|
* \param thread_trace specifies the cl_threadtrace_amd object.
|
|
*
|
|
* \param event_wait_list specify [is a pointer to] events that need to
|
|
* complete before this particular command can be executed.
|
|
* If \a event_wait_list is NULL, then this particular command does not wait
|
|
* on any event to complete. If \a event_wait_list is NULL,
|
|
* \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL,
|
|
* the list of events pointed to by \a event_wait_list must be valid and
|
|
* \a num_events_in_wait_list must be greater than 0. The events specified in
|
|
* \a event_wait_list act as synchronization points.
|
|
*
|
|
* \param num_events_in_wait_list specify the number of events in
|
|
* \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be
|
|
* greater than 0 if \a event_wait_list is not NULL.
|
|
*
|
|
* \param event returns an event object that identifies this particular
|
|
* command and can be used to query or queue a wait for this particular
|
|
* command to complete. \a event can be NULL in which case it will not be
|
|
* possible for the application to query the status of this command or queue a
|
|
* wait for this command to complete.
|
|
* \return A non zero value if OpenCL failed to release threadTrace
|
|
* - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue.
|
|
* - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list
|
|
* are not the same.
|
|
* - CL_INVALID_VALUE if the thread_trace is invalid thread trace object .
|
|
* - CL_INVALID_VALUE if the invalid command name enum value , not described in the
|
|
* cl_threadtrace_command_name_amd, is used.
|
|
* - CL_INVALID_OPERATION if the command enqueue failed. It can happen in the following cases:
|
|
* o BEGIN_COMMAND is queued for thread trace object for which memory object/s was/were not
|
|
* bound..
|
|
* o END_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not queued.
|
|
* o PAUSE_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not
|
|
* queued.
|
|
* o RESUME_COMMAND is queued for thread trace object, for which PAUSE_COMMAND was not
|
|
* queued.
|
|
* - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or
|
|
* event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in
|
|
* event_wait_list are not valid events.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL
|
|
* implementation on the device.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL
|
|
* implementation on the host.
|
|
*/
|
|
|
|
RUNTIME_ENTRY(cl_int, clEnqueueThreadTraceCommandAMD,
|
|
(cl_command_queue command_queue, cl_threadtrace_amd thread_trace,
|
|
cl_threadtrace_command_name_amd command_name, cl_uint num_events_in_wait_list,
|
|
const cl_event* event_wait_list, cl_event* event)) {
|
|
// Check if command queue is valid
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
// Check if thread trace is valid
|
|
if (!is_valid(thread_trace)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
|
|
amd::ThreadTrace* amdThreadTrace = as_amd(thread_trace);
|
|
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == hostQueue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
// Check that device associated with the command queue is the same as with thread trace
|
|
if (&hostQueue->device() != &amdThreadTrace->device()) {
|
|
return CL_INVALID_DEVICE;
|
|
}
|
|
|
|
amd::Command::EventWaitList eventWaitList;
|
|
cl_int err = amd::clSetEventWaitList(eventWaitList, *hostQueue, num_events_in_wait_list,
|
|
event_wait_list);
|
|
if (err != CL_SUCCESS) {
|
|
return err;
|
|
}
|
|
|
|
// Create a new command for the threadTraces
|
|
amd::ThreadTraceCommand* command = NULL;
|
|
switch (command_name) {
|
|
case CL_THREAD_TRACE_BEGIN_COMMAND:
|
|
if ((amdThreadTrace->getState() != amd::ThreadTrace::MemoryBound) &&
|
|
(amdThreadTrace->getState() != amd::ThreadTrace::End)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
amdThreadTrace->setState(amd::ThreadTrace::Begin);
|
|
command = new amd::ThreadTraceCommand(
|
|
*hostQueue, eventWaitList, static_cast<const void*>(&amdThreadTrace->threadTraceConfig()),
|
|
*amdThreadTrace, amd::ThreadTraceCommand::Begin, CL_COMMAND_THREAD_TRACE);
|
|
break;
|
|
case CL_THREAD_TRACE_END_COMMAND:
|
|
if ((amdThreadTrace->getState() != amd::ThreadTrace::Begin) &&
|
|
(amdThreadTrace->getState() != amd::ThreadTrace::Pause)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
amdThreadTrace->setState(amd::ThreadTrace::End);
|
|
command = new amd::ThreadTraceCommand(*hostQueue, eventWaitList,
|
|
&amdThreadTrace->threadTraceConfig(), *amdThreadTrace,
|
|
amd::ThreadTraceCommand::End, CL_COMMAND_THREAD_TRACE);
|
|
break;
|
|
case CL_THREAD_TRACE_PAUSE_COMMAND:
|
|
if (amdThreadTrace->getState() != amd::ThreadTrace::Begin) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
amdThreadTrace->setState(amd::ThreadTrace::Pause);
|
|
command = new amd::ThreadTraceCommand(
|
|
*hostQueue, eventWaitList, &amdThreadTrace->threadTraceConfig(), *amdThreadTrace,
|
|
amd::ThreadTraceCommand::Pause, CL_COMMAND_THREAD_TRACE);
|
|
break;
|
|
case CL_THREAD_TRACE_RESUME_COMMAND:
|
|
if (amdThreadTrace->getState() != amd::ThreadTrace::Pause) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
amdThreadTrace->setState(amd::ThreadTrace::Begin);
|
|
command = new amd::ThreadTraceCommand(
|
|
*hostQueue, eventWaitList, &amdThreadTrace->threadTraceConfig(), *amdThreadTrace,
|
|
amd::ThreadTraceCommand::Resume, CL_COMMAND_THREAD_TRACE);
|
|
break;
|
|
}
|
|
|
|
if (command == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
// Submit the command to the device
|
|
command->enqueue();
|
|
|
|
*not_null(event) = as_cl(&command->event());
|
|
if (event == NULL) {
|
|
command->release();
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
//
|
|
///*! \brief Enqueues the binding command to bind cl_threadtrace_amd to cl_mem object for trace
|
|
///recording..
|
|
// *
|
|
// * \param command_queue must be a valid OpenCL command queue.
|
|
// *
|
|
// * \param thread_trace specifies the cl_threadtrace_amd object.
|
|
// *
|
|
// * \param mem_objects the cl_mem objects for trace recording
|
|
// *
|
|
// * \param mem_objects_num the number of cl_mem objects in the mem_objects
|
|
// *
|
|
// * \param buffer_size the size of each cl_mem object from mem_objects
|
|
// *
|
|
// * \param event_wait_list specify [is a pointer to] events that need to
|
|
// * complete before this particular command can be executed.
|
|
// * If \a event_wait_list is NULL, then this particular command does not wait
|
|
// * on any event to complete. If \a event_wait_list is NULL,
|
|
// * \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL,
|
|
// * the list of events pointed to by \a event_wait_list must be valid and
|
|
// * \a num_events_in_wait_list must be greater than 0. The events specified in
|
|
// * \a event_wait_list act as synchronization points.
|
|
// *
|
|
// * \param num_events_in_wait_list specify the number of events in
|
|
// * \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be
|
|
// * greater than 0 if \a event_wait_list is not NULL.
|
|
// *
|
|
// * \param event returns an event object that identifies this particular
|
|
// * command and can be used to query or queue a wait for this particular
|
|
// * command to complete. \a event can be NULL in which case it will not be
|
|
// * possible for the application to query the status of this command or queue a
|
|
// * wait for this command to complete.
|
|
// * \return A non zero value if OpenCL failed to set threadTrace buffer parameter
|
|
// * - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue.
|
|
// * - CL_INVALID_CONTEXT if the context associated with command_queue and events in
|
|
// event_wait_list are not the same.
|
|
// * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object.
|
|
// * - CL_INVALID_VALUE if the buffer_size is negative or zero.
|
|
// * - CL_INVALID_VALUE if the sub_buffers_num I less than 1.
|
|
// * - CL_INVALID_OPERATION if the mem_objects_num is not equal to the number of Shader Engines of
|
|
// the [GPU] device.
|
|
// * - CL_INVALID_MEM_OBJECT if one on memory objects in the mem_objects array is not a valid
|
|
// memory object or memory_objects is NULL.
|
|
// * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for the data store
|
|
// associated from the memory objects of the mem_objects array.
|
|
// * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or
|
|
// event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in
|
|
// event_wait_list are not valid events.
|
|
// * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL
|
|
// implementation on the device.
|
|
// * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the
|
|
// * OpenCL implementation on the host.
|
|
// */
|
|
RUNTIME_ENTRY(cl_int, clEnqueueBindThreadTraceBufferAMD,
|
|
(cl_command_queue command_queue, cl_threadtrace_amd thread_trace, cl_mem* mem_objects,
|
|
cl_uint mem_objects_num, cl_uint buffer_size, cl_uint num_events_in_wait_list,
|
|
const cl_event* event_wait_list, cl_event* event)) {
|
|
// Check if command queue is valid
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
// Check if thread trace is valid
|
|
if (!is_valid(thread_trace)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
|
|
// Check if input values are valid
|
|
if ((mem_objects == NULL) || (buffer_size <= 0)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
amd::ThreadTrace* amdThreadTrace = as_amd(thread_trace);
|
|
|
|
// Check if the number of bound memory objects is the same as the number of SEs
|
|
if (amdThreadTrace->deviceSeNumThreadTrace() != mem_objects_num) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
// Check if memory objects ,bound the thread trace,are valid
|
|
for (size_t i = 0; i < mem_objects_num; ++i) {
|
|
cl_mem obj = mem_objects[i];
|
|
if (!is_valid(obj)) {
|
|
return CL_INVALID_MEM_OBJECT;
|
|
}
|
|
}
|
|
|
|
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == hostQueue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
// Check that device associated with the command queue is the same as with thread trace
|
|
if (&hostQueue->device() != &amdThreadTrace->device()) {
|
|
return CL_INVALID_DEVICE;
|
|
}
|
|
|
|
amd::Command::EventWaitList eventWaitList;
|
|
cl_int err = amd::clSetEventWaitList(eventWaitList, *hostQueue, num_events_in_wait_list,
|
|
event_wait_list);
|
|
if (err != CL_SUCCESS) {
|
|
return err;
|
|
}
|
|
|
|
amdThreadTrace->setState(amd::ThreadTrace::MemoryBound);
|
|
// Create a new ThreadTraceMemObjectsCommand command
|
|
amd::ThreadTraceMemObjectsCommand* command = new amd::ThreadTraceMemObjectsCommand(
|
|
*hostQueue, eventWaitList, mem_objects_num, mem_objects, buffer_size, *amdThreadTrace,
|
|
CL_COMMAND_THREAD_TRACE_MEM);
|
|
if (command == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
// Make sure we have memory for the command execution
|
|
if (!command->validateMemory()) {
|
|
delete command;
|
|
return CL_OUT_OF_RESOURCES;
|
|
}
|
|
// Submit the command to the device
|
|
command->enqueue();
|
|
|
|
*not_null(event) = as_cl(&command->event());
|
|
if (event == NULL) {
|
|
command->release();
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
* @}
|
|
*/
|