17943639e4
SWDEV-180872 - Runtime support changes for Cooperative Group Features - Initial implementation of the core functionality. Disabled by default. Use GPU_ENABLE_COOP_GROUPS=1 to enable the feature. - Runtime uses device queue for cooperative executions with a synchronization on the launched queue. - The current implementation is pure runtime change and it can work if only one app uses this feature. No ROCr/KFD support was added or tested - Only inline assembler was tested Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device.cpp#20 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_device_runtime.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#15 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#17 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#28 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#338 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#606 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#171 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#31 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#142 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#39 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palschedcl.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#135 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#61 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#127 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#37 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocschedcl.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#75 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#94 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#92 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#311 edit
217 строки
6.9 KiB
C++
217 строки
6.9 KiB
C++
/*
|
|
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
|
|
#include <hip/hip_runtime.h>
|
|
|
|
#include "hip_internal.hpp"
|
|
|
|
hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) {
|
|
HIP_INIT_API(device, deviceId);
|
|
|
|
if (device != nullptr) {
|
|
*device = deviceId;
|
|
} else {
|
|
HIP_RETURN(hipErrorInvalidValue);
|
|
}
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
};
|
|
|
|
hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) {
|
|
|
|
HIP_INIT_API(cacheConfig);
|
|
|
|
// No way to set cache config yet.
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
}
|
|
|
|
hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) {
|
|
|
|
HIP_INIT_API(bytes, device);
|
|
|
|
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
|
|
HIP_RETURN(hipErrorInvalidDevice);
|
|
}
|
|
|
|
if (bytes == nullptr) {
|
|
HIP_RETURN(hipErrorInvalidValue);
|
|
}
|
|
|
|
auto* deviceHandle = g_devices[device]->devices()[0];
|
|
const auto& info = deviceHandle->info();
|
|
|
|
*bytes = info.globalMemSize_;
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
}
|
|
|
|
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) {
|
|
|
|
HIP_INIT_API(major, minor, device);
|
|
|
|
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
|
|
HIP_RETURN(hipErrorInvalidDevice);
|
|
}
|
|
|
|
if (major == nullptr || minor == nullptr) {
|
|
HIP_RETURN(hipErrorInvalidValue);
|
|
}
|
|
|
|
auto* deviceHandle = g_devices[device]->devices()[0];
|
|
const auto& info = deviceHandle->info();
|
|
*major = info.gfxipVersion_ / 100;
|
|
*minor = info.gfxipVersion_ % 100;
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
}
|
|
|
|
hipError_t hipDeviceGetCount(int* count) {
|
|
HIP_INIT_API(count);
|
|
|
|
HIP_RETURN(ihipDeviceGetCount(count));
|
|
}
|
|
|
|
hipError_t ihipDeviceGetCount(int* count) {
|
|
if (count == nullptr) {
|
|
return hipErrorInvalidValue;
|
|
}
|
|
|
|
// Get all available devices
|
|
*count = g_devices.size();
|
|
|
|
return hipSuccess;
|
|
}
|
|
|
|
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) {
|
|
|
|
HIP_INIT_API((void*)name, len, device);
|
|
|
|
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
|
|
HIP_RETURN(hipErrorInvalidDevice);
|
|
}
|
|
|
|
if (name == nullptr || len <= 0) {
|
|
HIP_RETURN(hipErrorInvalidValue);
|
|
}
|
|
|
|
auto* deviceHandle = g_devices[device]->devices()[0];
|
|
const auto& info = deviceHandle->info();
|
|
const auto nameLen = ::strlen(info.boardName_);
|
|
|
|
// Make sure that the size of `dest` is big enough to hold `src` including
|
|
// trailing zero byte
|
|
if (nameLen > (cl_uint)(len - 1)) {
|
|
HIP_RETURN(hipErrorInvalidValue);
|
|
}
|
|
|
|
::strncpy(name, info.boardName_, (nameLen + 1));
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
}
|
|
|
|
hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) {
|
|
HIP_INIT_API(props, device);
|
|
|
|
if (props == nullptr) {
|
|
HIP_RETURN(hipErrorInvalidValue);
|
|
}
|
|
|
|
if (unsigned(device) >= g_devices.size()) {
|
|
HIP_RETURN(hipErrorInvalidDevice);
|
|
}
|
|
auto* deviceHandle = g_devices[device]->devices()[0];
|
|
|
|
hipDeviceProp_t deviceProps = {0};
|
|
|
|
const auto& info = deviceHandle->info();
|
|
::strncpy(deviceProps.name, info.boardName_, 128);
|
|
deviceProps.totalGlobalMem = info.globalMemSize_;
|
|
deviceProps.sharedMemPerBlock = info.localMemSizePerCU_;
|
|
deviceProps.regsPerBlock = info.availableSGPRs_;
|
|
deviceProps.warpSize = info.wavefrontWidth_;
|
|
deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_;
|
|
deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0];
|
|
deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1];
|
|
deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2];
|
|
deviceProps.maxGridSize[0] = INT32_MAX;
|
|
deviceProps.maxGridSize[1] = INT32_MAX;
|
|
deviceProps.maxGridSize[2] = INT32_MAX;
|
|
deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000;
|
|
deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000;
|
|
deviceProps.memoryBusWidth = info.globalMemChannels_ * 32;
|
|
deviceProps.totalConstMem = info.maxConstantBufferSize_;
|
|
deviceProps.major = info.gfxipVersion_ / 100;
|
|
deviceProps.minor = info.gfxipVersion_ % 100;
|
|
deviceProps.multiProcessorCount = info.maxComputeUnits_;
|
|
deviceProps.l2CacheSize = info.l2CacheSize_;
|
|
deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_;
|
|
deviceProps.computeMode = 0;
|
|
deviceProps.clockInstructionRate = info.timeStampFrequency_;
|
|
deviceProps.arch.hasGlobalInt32Atomics = 1;
|
|
deviceProps.arch.hasGlobalFloatAtomicExch = 1;
|
|
deviceProps.arch.hasSharedInt32Atomics = 1;
|
|
deviceProps.arch.hasSharedFloatAtomicExch = 1;
|
|
deviceProps.arch.hasFloatAtomicAdd = 0;
|
|
deviceProps.arch.hasGlobalInt64Atomics = 1;
|
|
deviceProps.arch.hasSharedInt64Atomics = 1;
|
|
deviceProps.arch.hasDoubles = 1;
|
|
deviceProps.arch.hasWarpVote = 0;
|
|
deviceProps.arch.hasWarpBallot = 0;
|
|
deviceProps.arch.hasWarpShuffle = 0;
|
|
deviceProps.arch.hasFunnelShift = 0;
|
|
deviceProps.arch.hasThreadFenceSystem = 1;
|
|
deviceProps.arch.hasSyncThreadsExt = 0;
|
|
deviceProps.arch.hasSurfaceFuncs = 0;
|
|
deviceProps.arch.has3dGrid = 1;
|
|
deviceProps.arch.hasDynamicParallelism = 0;
|
|
deviceProps.concurrentKernels = 1;
|
|
deviceProps.pciDomainID = info.deviceTopology_.pcie.function;
|
|
deviceProps.pciBusID = info.deviceTopology_.pcie.bus;
|
|
deviceProps.pciDeviceID = info.deviceTopology_.pcie.device;
|
|
deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_;
|
|
//deviceProps.isMultiGpuBoard = info.;
|
|
deviceProps.canMapHostMemory = 1;
|
|
deviceProps.gcnArch = info.gfxipVersion_;
|
|
deviceProps.cooperativeLaunch = info.cooperativeGroups_;
|
|
deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_;
|
|
|
|
*props = deviceProps;
|
|
HIP_RETURN(hipSuccess);
|
|
}
|
|
|
|
hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) {
|
|
HIP_INIT_API(deviceId, acc);
|
|
|
|
assert(0 && "Unimplemented");
|
|
|
|
HIP_RETURN(hipErrorUnknown);
|
|
}
|
|
|
|
hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) {
|
|
HIP_INIT_API(stream, av);
|
|
|
|
assert(0 && "Unimplemented");
|
|
|
|
HIP_RETURN(hipErrorUnknown);
|
|
}
|