P4 to Git Change 1421208 by gandryey@gera-w8 on 2017/06/12 13:15:22
SWDEV-124171 - adding support for p2p OCL in rocm stack
- Add cl_amd_copy_buffer_p2p extension for P2P transfers. The extension adds a new API entry - clEnqueueCopyBufferP2PAMD() which allows to transfer CL buffers between different CL contexts on different GPUs. If P2P isn't possible, then double copy performed
- Also the app can query the P2P support capabilities for the device. A list of P2P accessible devices can be returned for the current device
http://ocltc.amd.com/reviews/r/12913/
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#54 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#62 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_p2p_amd.cpp#1 add
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_p2p_amd.h#1 add
... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_ext.h#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.hpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#287 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#141 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#19 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#54 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#19 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#79 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#84 edit
[ROCm/clr commit: 628503e964]
This commit is contained in:
@@ -23,6 +23,7 @@
|
||||
#include "cl_thread_trace_amd.h"
|
||||
#include "cl_debugger_amd.h"
|
||||
#include "cl_lqdflash_amd.h"
|
||||
#include "cl_p2p_amd.h"
|
||||
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glext.h>
|
||||
@@ -503,6 +504,9 @@ CL_API_ENTRY void* CL_API_CALL clGetExtensionFunctionAddress(const char* func_na
|
||||
#if cl_amd_liquid_flash
|
||||
CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReadSsgFileAMD);
|
||||
CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueWriteSsgFileAMD);
|
||||
#endif // cl_amd_liquid_flash
|
||||
#if cl_amd_copy_buffer_p2p
|
||||
CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueCopyBufferP2PAMD);
|
||||
#endif // cl_amd_liquid_flash
|
||||
break;
|
||||
case 'G':
|
||||
|
||||
@@ -557,6 +557,24 @@ RUNTIME_ENTRY(cl_int, clGetDeviceInfo,
|
||||
#define CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD 0x404E
|
||||
CASE(CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD, numRTQueues_);
|
||||
CASE(CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD, numRTCUs_);
|
||||
case CL_DEVICE_NUM_P2P_DEVICES_AMD: {
|
||||
cl_uint num_p2p_devices = as_amd(device)->p2pDevices_.size();
|
||||
return amd::clGetInfo(num_p2p_devices, param_value_size, param_value, param_value_size_ret);
|
||||
}
|
||||
case CL_DEVICE_P2P_DEVICES_AMD: {
|
||||
uint valueSize = as_amd(device)->p2pDevices_.size() * sizeof(cl_device_id);
|
||||
if (param_value != NULL) {
|
||||
if (param_value_size < valueSize) {
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
}
|
||||
memcpy(param_value, as_amd(device)->p2pDevices_.data(), valueSize);
|
||||
*not_null(param_value_size_ret) = valueSize;
|
||||
if (param_value != NULL && param_value_size > valueSize) {
|
||||
::memset(static_cast<char*>(param_value) + valueSize, '\0', param_value_size - valueSize);
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
//
|
||||
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#include "cl_common.hpp"
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#include "cl_p2p_amd.h"
|
||||
#include "platform/object.hpp"
|
||||
|
||||
RUNTIME_ENTRY(cl_int, clEnqueueCopyBufferP2PAMD,
|
||||
(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer,
|
||||
size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list, cl_event* event)) {
|
||||
if (!is_valid(command_queue)) {
|
||||
return CL_INVALID_COMMAND_QUEUE;
|
||||
}
|
||||
|
||||
if (!is_valid(src_buffer) || !is_valid(dst_buffer)) {
|
||||
return CL_INVALID_MEM_OBJECT;
|
||||
}
|
||||
amd::Buffer* srcBuffer = as_amd(src_buffer)->asBuffer();
|
||||
amd::Buffer* dstBuffer = as_amd(dst_buffer)->asBuffer();
|
||||
if (srcBuffer == NULL || dstBuffer == NULL) {
|
||||
return CL_INVALID_MEM_OBJECT;
|
||||
}
|
||||
|
||||
amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
|
||||
if (NULL == queue) {
|
||||
return CL_INVALID_COMMAND_QUEUE;
|
||||
}
|
||||
amd::HostQueue& hostQueue = *queue;
|
||||
|
||||
if (hostQueue.context() != srcBuffer->getContext()) {
|
||||
return CL_INVALID_CONTEXT;
|
||||
}
|
||||
|
||||
amd::Coord3D srcOffset(src_offset, 0, 0);
|
||||
amd::Coord3D dstOffset(dst_offset, 0, 0);
|
||||
amd::Coord3D size(cb, 1, 1);
|
||||
|
||||
if (!srcBuffer->validateRegion(srcOffset, size) || !dstBuffer->validateRegion(dstOffset, size)) {
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
if (srcBuffer == dstBuffer && ((src_offset <= dst_offset && dst_offset < src_offset + cb) ||
|
||||
(dst_offset <= src_offset && src_offset < dst_offset + cb))) {
|
||||
return CL_MEM_COPY_OVERLAP;
|
||||
}
|
||||
|
||||
amd::Command::EventWaitList eventWaitList;
|
||||
if ((num_events_in_wait_list == 0 && event_wait_list != NULL)
|
||||
|| (num_events_in_wait_list != 0 && event_wait_list == NULL)) {
|
||||
return CL_INVALID_EVENT_WAIT_LIST;
|
||||
}
|
||||
|
||||
while (num_events_in_wait_list-- > 0) {
|
||||
cl_event event = *event_wait_list++;
|
||||
amd::Event* amdEvent = as_amd(event);
|
||||
if (!is_valid(event)) {
|
||||
return CL_INVALID_EVENT_WAIT_LIST;
|
||||
}
|
||||
eventWaitList.push_back(amdEvent);
|
||||
}
|
||||
|
||||
amd::CopyMemoryP2PCommand* command =
|
||||
new amd::CopyMemoryP2PCommand(hostQueue, CL_COMMAND_COPY_BUFFER, eventWaitList, *srcBuffer,
|
||||
*dstBuffer, srcOffset, dstOffset, size);
|
||||
|
||||
if (command == NULL) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
// Make sure we have memory for the command execution
|
||||
if (!command->validateMemory()) {
|
||||
delete command;
|
||||
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
|
||||
}
|
||||
|
||||
command->enqueue();
|
||||
|
||||
*not_null(event) = as_cl(&command->event());
|
||||
if (event == NULL) {
|
||||
command->release();
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
RUNTIME_EXIT
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
#ifndef __CL_P2P_AMD_H
|
||||
#define __CL_P2P_AMD_H
|
||||
|
||||
#include "CL/cl_ext.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBufferP2PAMD(
|
||||
cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer,
|
||||
size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /*extern "C"*/
|
||||
#endif /*__cplusplus*/
|
||||
|
||||
#endif
|
||||
@@ -513,6 +513,26 @@ typedef CL_API_ENTRY cl_int
|
||||
const cl_event * /*event_wait_list*/,
|
||||
cl_event * /*event*/) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
/*************************
|
||||
* cl_amd_copy_buffer_p2p *
|
||||
**************************/
|
||||
#define CL_DEVICE_NUM_P2P_DEVICES_AMD 0x4088
|
||||
#define CL_DEVICE_P2P_DEVICES_AMD 0x4089
|
||||
|
||||
#define cl_amd_copy_buffer_p2p 1
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clEnqueueCopyBufferP2PAMD_fn)(cl_command_queue /*command_queue*/,
|
||||
cl_mem /*src_buffer*/,
|
||||
cl_mem /*dst_buffer*/,
|
||||
size_t /*src_offset*/,
|
||||
size_t /*dst_offset*/,
|
||||
size_t /*cb*/,
|
||||
cl_uint /*num_events_in_wait_list*/,
|
||||
const cl_event* /*event_wait_list*/,
|
||||
cl_event* /*event*/) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
#endif /* CL_VERSION_1_2 */
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user