diff --git a/projects/clr/opencl/api/opencl/amdocl/cl_context.cpp b/projects/clr/opencl/api/opencl/amdocl/cl_context.cpp index 8472cac544..ffb4ac21ed 100644 --- a/projects/clr/opencl/api/opencl/amdocl/cl_context.cpp +++ b/projects/clr/opencl/api/opencl/amdocl/cl_context.cpp @@ -23,6 +23,7 @@ #include "cl_thread_trace_amd.h" #include "cl_debugger_amd.h" #include "cl_lqdflash_amd.h" +#include "cl_p2p_amd.h" #include #include @@ -503,6 +504,9 @@ CL_API_ENTRY void* CL_API_CALL clGetExtensionFunctionAddress(const char* func_na #if cl_amd_liquid_flash CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReadSsgFileAMD); CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueWriteSsgFileAMD); +#endif // cl_amd_liquid_flash +#if cl_amd_copy_buffer_p2p + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueCopyBufferP2PAMD); #endif // cl_amd_liquid_flash break; case 'G': diff --git a/projects/clr/opencl/api/opencl/amdocl/cl_device.cpp b/projects/clr/opencl/api/opencl/amdocl/cl_device.cpp index fe01c15661..e41211af94 100644 --- a/projects/clr/opencl/api/opencl/amdocl/cl_device.cpp +++ b/projects/clr/opencl/api/opencl/amdocl/cl_device.cpp @@ -557,6 +557,24 @@ RUNTIME_ENTRY(cl_int, clGetDeviceInfo, #define CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD 0x404E CASE(CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD, numRTQueues_); CASE(CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD, numRTCUs_); + case CL_DEVICE_NUM_P2P_DEVICES_AMD: { + cl_uint num_p2p_devices = as_amd(device)->p2pDevices_.size(); + return amd::clGetInfo(num_p2p_devices, param_value_size, param_value, param_value_size_ret); + } + case CL_DEVICE_P2P_DEVICES_AMD: { + uint valueSize = as_amd(device)->p2pDevices_.size() * sizeof(cl_device_id); + if (param_value != NULL) { + if (param_value_size < valueSize) { + return CL_INVALID_VALUE; + } + } + memcpy(param_value, as_amd(device)->p2pDevices_.data(), valueSize); + *not_null(param_value_size_ret) = valueSize; + if (param_value != NULL && param_value_size > valueSize) { + ::memset(static_cast(param_value) + valueSize, '\0', param_value_size - valueSize); + } + return CL_SUCCESS; + } default: break; } diff --git a/projects/clr/opencl/api/opencl/amdocl/cl_p2p_amd.cpp b/projects/clr/opencl/api/opencl/amdocl/cl_p2p_amd.cpp new file mode 100644 index 0000000000..3932b83c01 --- /dev/null +++ b/projects/clr/opencl/api/opencl/amdocl/cl_p2p_amd.cpp @@ -0,0 +1,88 @@ +// +// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved. +// +#include "cl_common.hpp" +#include + +#include "cl_p2p_amd.h" +#include "platform/object.hpp" + +RUNTIME_ENTRY(cl_int, clEnqueueCopyBufferP2PAMD, + (cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (!is_valid(src_buffer) || !is_valid(dst_buffer)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Buffer* srcBuffer = as_amd(src_buffer)->asBuffer(); + amd::Buffer* dstBuffer = as_amd(dst_buffer)->asBuffer(); + if (srcBuffer == NULL || dstBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (hostQueue.context() != srcBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } + + amd::Coord3D srcOffset(src_offset, 0, 0); + amd::Coord3D dstOffset(dst_offset, 0, 0); + amd::Coord3D size(cb, 1, 1); + + if (!srcBuffer->validateRegion(srcOffset, size) || !dstBuffer->validateRegion(dstOffset, size)) { + return CL_INVALID_VALUE; + } + + if (srcBuffer == dstBuffer && ((src_offset <= dst_offset && dst_offset < src_offset + cb) || + (dst_offset <= src_offset && src_offset < dst_offset + cb))) { + return CL_MEM_COPY_OVERLAP; + } + + amd::Command::EventWaitList eventWaitList; + if ((num_events_in_wait_list == 0 && event_wait_list != NULL) + || (num_events_in_wait_list != 0 && event_wait_list == NULL)) { + return CL_INVALID_EVENT_WAIT_LIST; + } + + while (num_events_in_wait_list-- > 0) { + cl_event event = *event_wait_list++; + amd::Event* amdEvent = as_amd(event); + if (!is_valid(event)) { + return CL_INVALID_EVENT_WAIT_LIST; + } + eventWaitList.push_back(amdEvent); + } + + amd::CopyMemoryP2PCommand* command = + new amd::CopyMemoryP2PCommand(hostQueue, CL_COMMAND_COPY_BUFFER, eventWaitList, *srcBuffer, + *dstBuffer, srcOffset, dstOffset, size); + + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + + command->enqueue(); + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; +} +RUNTIME_EXIT + diff --git a/projects/clr/opencl/api/opencl/amdocl/cl_p2p_amd.h b/projects/clr/opencl/api/opencl/amdocl/cl_p2p_amd.h new file mode 100644 index 0000000000..7f9fa0c58c --- /dev/null +++ b/projects/clr/opencl/api/opencl/amdocl/cl_p2p_amd.h @@ -0,0 +1,19 @@ +#ifndef __CL_P2P_AMD_H +#define __CL_P2P_AMD_H + +#include "CL/cl_ext.h" + +#ifdef __cplusplus +extern "C" { +#endif /*__cplusplus*/ + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferP2PAMD( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + +#ifdef __cplusplus +} /*extern "C"*/ +#endif /*__cplusplus*/ + +#endif diff --git a/projects/clr/opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_ext.h b/projects/clr/opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_ext.h index f7032f88a2..9e332016c1 100644 --- a/projects/clr/opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_ext.h +++ b/projects/clr/opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_ext.h @@ -513,6 +513,26 @@ typedef CL_API_ENTRY cl_int const cl_event * /*event_wait_list*/, cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2; +/************************* +* cl_amd_copy_buffer_p2p * +**************************/ +#define CL_DEVICE_NUM_P2P_DEVICES_AMD 0x4088 +#define CL_DEVICE_P2P_DEVICES_AMD 0x4089 + +#define cl_amd_copy_buffer_p2p 1 + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clEnqueueCopyBufferP2PAMD_fn)(cl_command_queue /*command_queue*/, + cl_mem /*src_buffer*/, + cl_mem /*dst_buffer*/, + size_t /*src_offset*/, + size_t /*dst_offset*/, + size_t /*cb*/, + cl_uint /*num_events_in_wait_list*/, + const cl_event* /*event_wait_list*/, + cl_event* /*event*/) CL_EXT_SUFFIX__VERSION_1_2; + + #endif /* CL_VERSION_1_2 */