Merge HIP/VDI branch 'amd-staging' into lmoriche/amd-master-next

Change-Id: Iabaab4e72815ba483a1330ec6a1130f2b86676f0
This commit is contained in:
Laurent Morichetti
2020-01-29 11:27:07 -08:00
25 changed files with 8036 additions and 0 deletions
+34
View File
@@ -0,0 +1,34 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "vdi_common.hpp"
#include <icd/loader/icd_dispatch.h>
KHRicdVendorDispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0};
amd::PlatformIDS amd::PlatformID::Platform = {amd::ICDDispatchedObject::icdVendorDispatch_};
RUNTIME_ENTRY(cl_int, clGetDeviceIDs,
(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries,
cl_device_id* devices, cl_uint* num_devices)) {
return CL_SUCCESS;
}
RUNTIME_EXIT
+34
View File
@@ -0,0 +1,34 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "platform/activity.hpp"
extern "C" void hipInitActivityCallback(void* id_callback, void* op_callback, void* arg) {
activity_prof::CallbacksTable::init(reinterpret_cast<activity_prof::id_callback_fun_t>(id_callback),
reinterpret_cast<activity_prof::callback_fun_t>(op_callback),
arg);
}
extern "C" bool hipEnableActivityCallback(unsigned op, bool enable) {
return activity_prof::CallbacksTable::SetEnabled(op, enable);
}
extern "C" const char* hipGetCmdName(unsigned op) {
return getOclCommandKindString(static_cast<uint32_t>(op));
}
+382
View File
@@ -0,0 +1,382 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include "hip_internal.hpp"
#include "platform/runtime.hpp"
#include "utils/flags.hpp"
#include "utils/versions.hpp"
std::vector<amd::Context*> g_devices;
namespace hip {
thread_local amd::Context* g_context = nullptr;
thread_local std::stack<amd::Context*> g_ctxtStack;
thread_local hipError_t g_lastError = hipSuccess;
std::once_flag g_ihipInitialized;
amd::Context* host_context = nullptr;
std::map<amd::Context*, amd::HostQueue*> g_nullStreams;
void init() {
if (!amd::Runtime::initialized()) {
amd::IS_HIP = true;
GPU_NUM_MEM_DEPENDENCY = 0;
amd::Runtime::init();
}
const std::vector<amd::Device*>& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false);
for (unsigned int i=0; i<devices.size(); i++) {
const std::vector<amd::Device*> device(1, devices[i]);
amd::Context* context = new amd::Context(device, amd::Context::Info());
if (!context) return;
if (context && CL_SUCCESS != context->create(nullptr)) {
context->release();
} else {
g_devices.push_back(context);
}
}
host_context = new amd::Context(devices, amd::Context::Info());
if (!host_context) return;
if (host_context && CL_SUCCESS != host_context->create(nullptr)) {
host_context->release();
}
PlatformState::instance().init();
}
amd::Context* getCurrentContext() {
return g_context;
}
void setCurrentContext(unsigned int index) {
assert(index<g_devices.size());
g_context = g_devices[index];
}
amd::HostQueue* getQueue(hipStream_t stream) {
if (stream == nullptr) {
syncStreams();
return getNullStream();
} else {
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
if ((s->flags & hipStreamNonBlocking) == 0) {
getNullStream()->finish();
}
return s->asHostQueue();
}
}
amd::HostQueue* getNullStream(amd::Context& context) {
auto stream = g_nullStreams.find(&context);
if (stream == g_nullStreams.end()) {
amd::Device* device = context.devices()[0];
cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE;
amd::HostQueue* queue = new amd::HostQueue(context, *device, properties,
amd::CommandQueue::RealTimeDisabled,
amd::CommandQueue::Priority::Normal);
g_nullStreams[&context] = queue;
return queue;
}
return stream->second;
}
amd::HostQueue* getNullStream() {
amd::Context* context = getCurrentContext();
return context ? getNullStream(*context) : nullptr;
}
};
using namespace hip;
hipError_t hipInit(unsigned int flags) {
HIP_INIT_API(hipInit, flags);
HIP_RETURN(hipSuccess);
}
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) {
HIP_INIT_API(hipCtxCreate, ctx, flags, device);
if (static_cast<size_t>(device) >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidValue);
}
*ctx = reinterpret_cast<hipCtx_t>(g_devices[device]);
// Increment ref count for device primary context
g_devices[device]->retain();
g_ctxtStack.push(g_devices[device]);
HIP_RETURN(hipSuccess);
}
hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
HIP_INIT_API(hipCtxSetCurrent, ctx);
if (ctx == nullptr) {
if(!g_ctxtStack.empty()) {
g_ctxtStack.pop();
}
} else {
hip::g_context = reinterpret_cast<amd::Context*>(as_amd(ctx));
if(!g_ctxtStack.empty()) {
g_ctxtStack.pop();
}
g_ctxtStack.push(hip::getCurrentContext());
}
HIP_RETURN(hipSuccess);
}
hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
HIP_INIT_API(hipCtxGetCurrent, ctx);
*ctx = reinterpret_cast<hipCtx_t>(hip::getCurrentContext());
HIP_RETURN(hipSuccess);
}
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
HIP_INIT_API(hipCtxGetSharedMemConfig, pConfig);
*pConfig = hipSharedMemBankSizeFourByte;
HIP_RETURN(hipSuccess);
}
hipError_t hipRuntimeGetVersion(int *runtimeVersion) {
HIP_INIT_API(hipRuntimeGetVersion, runtimeVersion);
if (!runtimeVersion) {
HIP_RETURN(hipErrorInvalidValue);
}
*runtimeVersion = AMD_PLATFORM_BUILD_NUMBER;
HIP_RETURN(hipSuccess);
}
hipError_t hipCtxDestroy(hipCtx_t ctx) {
HIP_INIT_API(hipCtxDestroy, ctx);
amd::Context* amdContext = reinterpret_cast<amd::Context*>(as_amd(ctx));
if (amdContext == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
// Release last tracked command
hip::getNullStream()->setLastQueuedCommand(nullptr);
// Need to remove the ctx of calling thread if its the top one
if (!g_ctxtStack.empty() && g_ctxtStack.top() == amdContext) {
g_ctxtStack.pop();
}
// Remove context from global context list
for (unsigned int i = 0; i < g_devices.size(); i++) {
if (g_devices[i] == amdContext) {
// Decrement ref count for device primary context
amdContext->release();
}
}
HIP_RETURN(hipSuccess);
}
hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
HIP_INIT_API(hipCtxPopCurrent, ctx);
amd::Context* amdContext = reinterpret_cast<amd::Context*>(as_amd(ctx));
if (amdContext == nullptr) {
HIP_RETURN(hipErrorInvalidContext);
}
if (!g_ctxtStack.empty()) {
amdContext = g_ctxtStack.top();
g_ctxtStack.pop();
} else {
HIP_RETURN(hipErrorInvalidContext);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
HIP_INIT_API(hipCtxPushCurrent, ctx);
amd::Context* amdContext = reinterpret_cast<amd::Context*>(as_amd(ctx));
if (amdContext == nullptr) {
HIP_RETURN(hipErrorInvalidContext);
}
hip::g_context = amdContext;
g_ctxtStack.push(hip::getCurrentContext());
HIP_RETURN(hipSuccess);
}
hipError_t hipDriverGetVersion(int* driverVersion) {
HIP_INIT_API(hipDriverGetVersion, driverVersion);
auto* deviceHandle = g_devices[0]->devices()[0];
const auto& info = deviceHandle->info();
if (driverVersion) {
*driverVersion = AMD_PLATFORM_BUILD_NUMBER * 100 +
AMD_PLATFORM_REVISION_NUMBER;
} else {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipCtxGetDevice(hipDevice_t* device) {
HIP_INIT_API(hipCtxGetDevice, device);
if (device != nullptr) {
for (unsigned int i = 0; i < g_devices.size(); i++) {
if (g_devices[i] == hip::getCurrentContext()) {
*device = static_cast<hipDevice_t>(i);
HIP_RETURN(hipSuccess);
}
}
} else {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(hipErrorInvalidContext);
}
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
HIP_INIT_API(hipCtxGetApiVersion, apiVersion);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) {
HIP_INIT_API(hipCtxGetCacheConfig, cacheConfig);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) {
HIP_INIT_API(hipCtxSetCacheConfig, cacheConfig);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
HIP_INIT_API(hipCtxSetSharedMemConfig, config);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipCtxSynchronize(void) {
HIP_INIT_API(hipCtxSynchronize, 1);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipCtxGetFlags(unsigned int* flags) {
HIP_INIT_API(hipCtxGetFlags, flags);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) {
HIP_INIT_API(hipDevicePrimaryCtxGetState, dev, flags, active);
if (static_cast<unsigned int>(dev) >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidDevice);
}
if (flags != nullptr) {
*flags = 0;
}
if (active != nullptr) {
*active = (g_devices[dev] == hip::getCurrentContext())? 1 : 0;
}
HIP_RETURN(hipSuccess);
}
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
HIP_INIT_API(hipDevicePrimaryCtxRelease, dev);
if (static_cast<unsigned int>(dev) >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidDevice);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
HIP_INIT_API(hipDevicePrimaryCtxRetain, pctx, dev);
if (static_cast<unsigned int>(dev) >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidDevice);
}
if (pctx == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
*pctx = reinterpret_cast<hipCtx_t>(g_devices[dev]);
HIP_RETURN(hipSuccess);
}
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
HIP_INIT_API(hipDevicePrimaryCtxReset, dev);
HIP_RETURN(hipSuccess);
}
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
HIP_INIT_API(hipDevicePrimaryCtxSetFlags, dev, flags);
if (static_cast<unsigned int>(dev) >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidDevice);
} else {
HIP_RETURN(hipErrorContextAlreadyInUse);
}
}
+234
View File
@@ -0,0 +1,234 @@
/*
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include "hip_internal.hpp"
hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) {
HIP_INIT_API(hipDeviceGet, device, deviceId);
if (device != nullptr) {
*device = deviceId;
} else {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(hipSuccess);
};
hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) {
HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig);
// No way to set cache config yet.
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) {
HIP_INIT_API(hipDeviceTotalMem, bytes, device);
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidDevice);
}
if (bytes == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
auto* deviceHandle = g_devices[device]->devices()[0];
const auto& info = deviceHandle->info();
*bytes = info.globalMemSize_;
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) {
HIP_INIT_API(hipDeviceComputeCapability, major, minor, device);
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidDevice);
}
if (major == nullptr || minor == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
auto* deviceHandle = g_devices[device]->devices()[0];
const auto& info = deviceHandle->info();
*major = info.gfxipVersion_ / 100;
*minor = info.gfxipVersion_ % 100;
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceGetCount(int* count) {
HIP_INIT_API(NONE, count);
HIP_RETURN(ihipDeviceGetCount(count));
}
hipError_t ihipDeviceGetCount(int* count) {
if (count == nullptr) {
return hipErrorInvalidValue;
}
// Get all available devices
*count = g_devices.size();
if (*count < 1) {
return hipErrorNoDevice;
}
return hipSuccess;
}
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) {
HIP_INIT_API(hipDeviceGetName, (void*)name, len, device);
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidDevice);
}
if (name == nullptr || len <= 0) {
HIP_RETURN(hipErrorInvalidValue);
}
auto* deviceHandle = g_devices[device]->devices()[0];
const auto& info = deviceHandle->info();
const auto nameLen = ::strlen(info.boardName_);
// Make sure that the size of `dest` is big enough to hold `src` including
// trailing zero byte
if (nameLen > (cl_uint)(len - 1)) {
HIP_RETURN(hipErrorInvalidValue);
}
::strncpy(name, info.boardName_, (nameLen + 1));
HIP_RETURN(hipSuccess);
}
hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) {
HIP_INIT_API(hipGetDeviceProperties, props, device);
if (props == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
if (unsigned(device) >= g_devices.size()) {
HIP_RETURN(hipErrorInvalidDevice);
}
auto* deviceHandle = g_devices[device]->devices()[0];
hipDeviceProp_t deviceProps = {0};
const auto& info = deviceHandle->info();
::strncpy(deviceProps.name, info.boardName_, 128);
deviceProps.totalGlobalMem = info.globalMemSize_;
deviceProps.sharedMemPerBlock = info.localMemSizePerCU_;
deviceProps.regsPerBlock = info.availableSGPRs_;
deviceProps.warpSize = info.wavefrontWidth_;
deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_;
deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0];
deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1];
deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2];
deviceProps.maxGridSize[0] = INT32_MAX;
deviceProps.maxGridSize[1] = INT32_MAX;
deviceProps.maxGridSize[2] = INT32_MAX;
deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000;
deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000;
deviceProps.memoryBusWidth = info.globalMemChannels_ * 32;
deviceProps.totalConstMem = info.maxConstantBufferSize_;
deviceProps.major = info.gfxipVersion_ / 100;
deviceProps.minor = info.gfxipVersion_ % 100;
deviceProps.multiProcessorCount = info.maxComputeUnits_;
deviceProps.l2CacheSize = info.l2CacheSize_;
deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_;
deviceProps.computeMode = 0;
deviceProps.clockInstructionRate = info.timeStampFrequency_;
deviceProps.arch.hasGlobalInt32Atomics = 1;
deviceProps.arch.hasGlobalFloatAtomicExch = 1;
deviceProps.arch.hasSharedInt32Atomics = 1;
deviceProps.arch.hasSharedFloatAtomicExch = 1;
deviceProps.arch.hasFloatAtomicAdd = 0;
deviceProps.arch.hasGlobalInt64Atomics = 1;
deviceProps.arch.hasSharedInt64Atomics = 1;
deviceProps.arch.hasDoubles = 1;
deviceProps.arch.hasWarpVote = 0;
deviceProps.arch.hasWarpBallot = 0;
deviceProps.arch.hasWarpShuffle = 0;
deviceProps.arch.hasFunnelShift = 0;
deviceProps.arch.hasThreadFenceSystem = 1;
deviceProps.arch.hasSyncThreadsExt = 0;
deviceProps.arch.hasSurfaceFuncs = 0;
deviceProps.arch.has3dGrid = 1;
deviceProps.arch.hasDynamicParallelism = 0;
deviceProps.concurrentKernels = 1;
deviceProps.pciDomainID = info.deviceTopology_.pcie.function;
deviceProps.pciBusID = info.deviceTopology_.pcie.bus;
deviceProps.pciDeviceID = info.deviceTopology_.pcie.device;
deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_;
//deviceProps.isMultiGpuBoard = info.;
deviceProps.canMapHostMemory = 1;
deviceProps.gcnArch = info.gfxipVersion_;
deviceProps.cooperativeLaunch = info.cooperativeGroups_;
deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_;
deviceProps.maxTexture1D = info.imageMaxBufferSize_;
deviceProps.maxTexture2D[0] = info.image2DMaxWidth_;
deviceProps.maxTexture2D[1] = info.image2DMaxHeight_;
deviceProps.maxTexture3D[0] = info.image3DMaxWidth_;
deviceProps.maxTexture3D[1] = info.image3DMaxHeight_;
deviceProps.maxTexture3D[2] = info.image3DMaxDepth_;
deviceProps.hdpMemFlushCntl = nullptr;
deviceProps.hdpRegFlushCntl = nullptr;
deviceProps.memPitch = info.maxMemAllocSize_;
deviceProps.textureAlignment = std::max(info.imageBaseAddressAlignment_, info.imagePitchAlignment_);
deviceProps.kernelExecTimeoutEnabled = 0;
deviceProps.ECCEnabled = info.errorCorrectionSupport_? 1:0;
*props = deviceProps;
HIP_RETURN(hipSuccess);
}
hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) {
HIP_INIT_API(NONE, deviceId, acc);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) {
HIP_INIT_API(NONE, stream, av);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
+543
View File
@@ -0,0 +1,543 @@
/*
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include "hip_internal.hpp"
hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
HIP_INIT_API(hipChooseDevice, device, properties);
if (device == nullptr || properties == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
*device = 0;
cl_uint maxMatchedCount = 0;
int count = 0;
ihipDeviceGetCount(&count);
for (cl_int i = 0; i< count; ++i) {
hipDeviceProp_t currentProp = {0};
cl_uint validPropCount = 0;
cl_uint matchedCount = 0;
hipError_t err = hipGetDeviceProperties(&currentProp, i);
if (properties->major != 0) {
validPropCount++;
if(currentProp.major >= properties->major) {
matchedCount++;
}
}
if (properties->minor != 0) {
validPropCount++;
if(currentProp.minor >= properties->minor) {
matchedCount++;
}
}
if(properties->totalGlobalMem != 0) {
validPropCount++;
if(currentProp.totalGlobalMem >= properties->totalGlobalMem) {
matchedCount++;
}
}
if(properties->sharedMemPerBlock != 0) {
validPropCount++;
if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) {
matchedCount++;
}
}
if(properties->maxThreadsPerBlock != 0) {
validPropCount++;
if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) {
matchedCount++;
}
}
if(properties->totalConstMem != 0) {
validPropCount++;
if(currentProp.totalConstMem >= properties->totalConstMem ) {
matchedCount++;
}
}
if(properties->multiProcessorCount != 0) {
validPropCount++;
if(currentProp.multiProcessorCount >=
properties->multiProcessorCount ) {
matchedCount++;
}
}
if(properties->maxThreadsPerMultiProcessor != 0) {
validPropCount++;
if(currentProp.maxThreadsPerMultiProcessor >=
properties->maxThreadsPerMultiProcessor ) {
matchedCount++;
}
}
if(properties->memoryClockRate != 0) {
validPropCount++;
if(currentProp.memoryClockRate >= properties->memoryClockRate ) {
matchedCount++;
}
}
if(properties->memoryBusWidth != 0) {
validPropCount++;
if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) {
matchedCount++;
}
}
if(properties->l2CacheSize != 0) {
validPropCount++;
if(currentProp.l2CacheSize >= properties->l2CacheSize ) {
matchedCount++;
}
}
if(properties->regsPerBlock != 0) {
validPropCount++;
if(currentProp.regsPerBlock >= properties->regsPerBlock ) {
matchedCount++;
}
}
if(properties->maxSharedMemoryPerMultiProcessor != 0) {
validPropCount++;
if(currentProp.maxSharedMemoryPerMultiProcessor >=
properties->maxSharedMemoryPerMultiProcessor ) {
matchedCount++;
}
}
if(properties->warpSize != 0) {
validPropCount++;
if(currentProp.warpSize >= properties->warpSize ) {
matchedCount++;
}
}
if(validPropCount == matchedCount) {
*device = matchedCount > maxMatchedCount ? i : *device;
maxMatchedCount = std::max(matchedCount, maxMatchedCount);
}
}
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device);
if (pi == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
int count = 0;
ihipDeviceGetCount(&count);
if (device < 0 || device >= count) {
HIP_RETURN(hipErrorInvalidDevice);
}
//FIXME: should we cache the props, or just select from deviceHandle->info_?
hipDeviceProp_t prop = {0};
hipError_t err = hipGetDeviceProperties(&prop, device);
if (err != hipSuccess) {
HIP_RETURN(err);
}
switch (attr) {
case hipDeviceAttributeMaxThreadsPerBlock:
*pi = prop.maxThreadsPerBlock;
break;
case hipDeviceAttributeMaxBlockDimX:
*pi = prop.maxThreadsDim[0];
break;
case hipDeviceAttributeMaxBlockDimY:
*pi = prop.maxThreadsDim[1];
break;
case hipDeviceAttributeMaxBlockDimZ:
*pi = prop.maxThreadsDim[2];
break;
case hipDeviceAttributeMaxGridDimX:
*pi = prop.maxGridSize[0];
break;
case hipDeviceAttributeMaxGridDimY:
*pi = prop.maxGridSize[1];
break;
case hipDeviceAttributeMaxGridDimZ:
*pi = prop.maxGridSize[2];
break;
case hipDeviceAttributeMaxSharedMemoryPerBlock:
*pi = prop.sharedMemPerBlock;
break;
case hipDeviceAttributeTotalConstantMemory:
*pi = prop.totalConstMem;
break;
case hipDeviceAttributeWarpSize:
*pi = prop.warpSize;
break;
case hipDeviceAttributeMaxRegistersPerBlock:
*pi = prop.regsPerBlock;
break;
case hipDeviceAttributeClockRate:
*pi = prop.clockRate;
break;
case hipDeviceAttributeMemoryClockRate:
*pi = prop.memoryClockRate;
break;
case hipDeviceAttributeMemoryBusWidth:
*pi = prop.memoryBusWidth;
break;
case hipDeviceAttributeMultiprocessorCount:
*pi = prop.multiProcessorCount;
break;
case hipDeviceAttributeComputeMode:
*pi = prop.computeMode;
break;
case hipDeviceAttributeL2CacheSize:
*pi = prop.l2CacheSize;
break;
case hipDeviceAttributeMaxThreadsPerMultiProcessor:
*pi = prop.maxThreadsPerMultiProcessor;
break;
case hipDeviceAttributeComputeCapabilityMajor:
*pi = prop.major;
break;
case hipDeviceAttributeComputeCapabilityMinor:
*pi = prop.minor;
break;
case hipDeviceAttributePciBusId:
*pi = prop.pciBusID;
break;
case hipDeviceAttributeConcurrentKernels:
*pi = prop.concurrentKernels;
break;
case hipDeviceAttributePciDeviceId:
*pi = prop.pciDeviceID;
break;
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
*pi = prop.maxSharedMemoryPerMultiProcessor;
break;
case hipDeviceAttributeIsMultiGpuBoard:
*pi = prop.isMultiGpuBoard;
break;
case hipDeviceAttributeCooperativeLaunch:
*pi = prop.cooperativeLaunch;
break;
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
*pi = prop.cooperativeMultiDeviceLaunch;
break;
case hipDeviceAttributeMaxTexture1DWidth:
*pi = prop.maxTexture1D;
break;
case hipDeviceAttributeMaxTexture2DWidth:
*pi = prop.maxTexture2D[0];
break;
case hipDeviceAttributeMaxTexture2DHeight:
*pi = prop.maxTexture2D[1];
break;
case hipDeviceAttributeMaxTexture3DWidth:
*pi = prop.maxTexture3D[0];
break;
case hipDeviceAttributeMaxTexture3DHeight:
*pi = prop.maxTexture3D[1];
break;
case hipDeviceAttributeMaxTexture3DDepth:
*pi = prop.maxTexture3D[2];
break;
case hipDeviceAttributeHdpMemFlushCntl:
*reinterpret_cast<unsigned int**>(pi) = prop.hdpMemFlushCntl;
break;
case hipDeviceAttributeHdpRegFlushCntl:
*reinterpret_cast<unsigned int**>(pi) = prop.hdpRegFlushCntl;
break;
case hipDeviceAttributeMaxPitch:
*pi = prop.memPitch;
break;
case hipDeviceAttributeTextureAlignment:
*pi = prop.textureAlignment;
break;
case hipDeviceAttributeKernelExecTimeout:
*pi = prop.kernelExecTimeoutEnabled;
break;
case hipDeviceAttributeCanMapHostMemory:
*pi = prop.canMapHostMemory;
break;
case hipDeviceAttributeEccEnabled:
*pi = prop.ECCEnabled;
break;
default:
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) {
HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusIdstr);
if (device == nullptr || pciBusIdstr == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
int pciBusID = -1;
int pciDeviceID = -1;
int pciDomainID = -1;
if (sscanf (pciBusIdstr, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID) == 0x3) {
int count = 0;
ihipDeviceGetCount(&count);
for (cl_int i = 0; i < count; i++) {
int pi = 0;
hipDevice_t dev;
hipDeviceGet(&dev, i);
hipDeviceGetAttribute(&pi, hipDeviceAttributePciBusId, dev);
if (pciBusID == pi) {
*device = i;
break;
}
}
}
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) {
HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig);
if(cacheConfig == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
*cacheConfig = hipFuncCache_t();
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) {
HIP_INIT_API(hipDeviceGetLimit, pValue, limit);
if(pValue == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
if(limit == hipLimitMallocHeapSize) {
hipDeviceProp_t prop;
hipGetDeviceProperties(&prop, ihipGetDevice());
*pValue = prop.totalGlobalMem;
HIP_RETURN(hipSuccess);
} else {
HIP_RETURN(hipErrorUnsupportedLimit);
}
}
/**
hipError_t hipDeviceGetP2PAttribute ( int* value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice ) {
assert(0);
HIP_RETURN(hipSuccess);
}
**/
hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) {
HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device);
int count;
ihipDeviceGetCount(&count);
if (device < 0 || device > count) {
HIP_RETURN(hipErrorInvalidDevice);
}
if (pciBusId == nullptr || len < 0) {
HIP_RETURN(hipErrorInvalidValue);
}
hipDeviceProp_t prop;
hipGetDeviceProperties(&prop, device);
snprintf (pciBusId, len, "%04x:%02x:%02x.0",
prop.pciDomainID,
prop.pciBusID,
prop.pciDeviceID);
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) {
HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig);
*pConfig = hipSharedMemBankSizeFourByte;
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceReset ( void ) {
HIP_INIT_API(hipDeviceReset);
/* FIXME */
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) {
HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig);
// No way to set cache config yet.
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) {
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) {
HIP_INIT_API(hipDeviceSetSharedMemConfig, config);
// No way to set cache config yet.
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceSynchronize ( void ) {
HIP_INIT_API(hipDeviceSynchronize);
hip::syncStreams();
amd::HostQueue* queue = hip::getNullStream();
if (!queue) {
HIP_RETURN(hipErrorOutOfMemory);
}
queue->finish();
HIP_RETURN(hipSuccess);
}
int ihipGetDevice() {
for (unsigned int i = 0; i < g_devices.size(); i++) {
if (g_devices[i] == hip::getCurrentContext()) {
return i;
}
}
return -1;
}
hipError_t hipGetDevice ( int* deviceId ) {
HIP_INIT_API(hipGetDevice, deviceId);
if (deviceId != nullptr) {
int dev = ihipGetDevice();
if (dev == -1) {
HIP_RETURN(hipErrorNoDevice);
}
*deviceId = dev;
HIP_RETURN(hipSuccess);
} else {
HIP_RETURN(hipErrorInvalidValue);
}
}
hipError_t hipGetDeviceCount ( int* count ) {
HIP_INIT_API(hipGetDeviceCount, count);
HIP_RETURN(ihipDeviceGetCount(count));
}
hipError_t hipGetDeviceFlags ( unsigned int* flags ) {
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) {
HIP_INIT_API(NONE, handle, event);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) {
HIP_INIT_API(NONE, event, handle);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipSetDevice ( int device ) {
HIP_INIT_API(hipSetDevice, device);
if (static_cast<unsigned int>(device) < g_devices.size()) {
hip::setCurrentContext(device);
HIP_RETURN(hipSuccess);
}
HIP_RETURN(hipErrorInvalidDevice);
}
hipError_t hipSetDeviceFlags ( unsigned int flags ) {
HIP_INIT_API(hipSetDeviceFlags, flags);
/* FIXME */
/* Not all of Ctx may be implemented */
unsigned supportedFlags =
hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax;
if (flags & (~supportedFlags)) {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipSetValidDevices ( int* device_arr, int len ) {
HIP_INIT_API(NONE, device_arr, len);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount) {
HIP_INIT_API(hipExtGetLinkTypeAndHopCount, device1, device2, linktype, hopcount);
amd::Device* amd_dev_obj1 = nullptr;
amd::Device* amd_dev_obj2 = nullptr;
const int numDevices = static_cast<int>(g_devices.size());
if ((device1 < 0) || (device1 >= numDevices) || (device2 < 0) || (device2 >= numDevices)) {
HIP_RETURN(hipErrorInvalidDevice);
}
if ((linktype == nullptr) || (hopcount == nullptr)) {
HIP_RETURN(hipErrorInvalidValue);
}
amd_dev_obj1 = g_devices[device1]->devices()[0];
amd_dev_obj2 = g_devices[device2]->devices()[0];
if (!amd_dev_obj1->findLinkTypeAndHopCount(amd_dev_obj2, linktype, hopcount)) {
HIP_RETURN(hipErrorInvalidHandle);
}
HIP_RETURN(hipSuccess);
}
+166
View File
@@ -0,0 +1,166 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include "hip_internal.hpp"
hipError_t hipGetLastError()
{
HIP_INIT_API(hipGetLastError);
hipError_t err = hip::g_lastError;
hip::g_lastError = hipSuccess;
return err;
}
hipError_t hipPeekAtLastError()
{
HIP_INIT_API(hipPeekAtLastError);
hipError_t err = hip::g_lastError;
HIP_RETURN(err);
}
const char *hipGetErrorName(hipError_t hip_error)
{
switch (hip_error) {
case hipSuccess:
return "hipSuccess";
case hipErrorOutOfMemory:
return "hipErrorOutOfMemory";
case hipErrorNotInitialized:
return "hipErrorNotInitialized";
case hipErrorDeinitialized:
return "hipErrorDeinitialized";
case hipErrorProfilerDisabled:
return "hipErrorProfilerDisabled";
case hipErrorProfilerNotInitialized:
return "hipErrorProfilerNotInitialized";
case hipErrorProfilerAlreadyStarted:
return "hipErrorProfilerAlreadyStarted";
case hipErrorProfilerAlreadyStopped:
return "hipErrorProfilerAlreadyStopped";
case hipErrorInvalidImage:
return "hipErrorInvalidImage";
case hipErrorInvalidContext:
return "hipErrorInvalidContext";
case hipErrorContextAlreadyCurrent:
return "hipErrorContextAlreadyCurrent";
case hipErrorMapFailed:
return "hipErrorMapFailed";
case hipErrorUnmapFailed:
return "hipErrorUnmapFailed";
case hipErrorArrayIsMapped:
return "hipErrorArrayIsMapped";
case hipErrorAlreadyMapped:
return "hipErrorAlreadyMapped";
case hipErrorNoBinaryForGpu:
return "hipErrorNoBinaryForGpu";
case hipErrorAlreadyAcquired:
return "hipErrorAlreadyAcquired";
case hipErrorNotMapped:
return "hipErrorNotMapped";
case hipErrorNotMappedAsArray:
return "hipErrorNotMappedAsArray";
case hipErrorNotMappedAsPointer:
return "hipErrorNotMappedAsPointer";
case hipErrorECCNotCorrectable:
return "hipErrorECCNotCorrectable";
case hipErrorUnsupportedLimit:
return "hipErrorUnsupportedLimit";
case hipErrorContextAlreadyInUse:
return "hipErrorContextAlreadyInUse";
case hipErrorPeerAccessUnsupported:
return "hipErrorPeerAccessUnsupported";
case hipErrorInvalidKernelFile:
return "hipErrorInvalidKernelFile";
case hipErrorInvalidGraphicsContext:
return "hipErrorInvalidGraphicsContext";
case hipErrorInvalidSource:
return "hipErrorInvalidSource";
case hipErrorFileNotFound:
return "hipErrorFileNotFound";
case hipErrorSharedObjectSymbolNotFound:
return "hipErrorSharedObjectSymbolNotFound";
case hipErrorSharedObjectInitFailed:
return "hipErrorSharedObjectInitFailed";
case hipErrorOperatingSystem:
return "hipErrorOperatingSystem";
case hipErrorSetOnActiveProcess:
return "hipErrorSetOnActiveProcess";
case hipErrorInvalidHandle:
return "hipErrorInvalidHandle";
case hipErrorNotFound:
return "hipErrorNotFound";
case hipErrorIllegalAddress:
return "hipErrorIllegalAddress";
case hipErrorMissingConfiguration:
return "hipErrorMissingConfiguration";
case hipErrorLaunchFailure:
return "hipErrorLaunchFailure";
case hipErrorPriorLaunchFailure:
return "hipErrorPriorLaunchFailure";
case hipErrorLaunchTimeOut:
return "hipErrorLaunchTimeOut";
case hipErrorLaunchOutOfResources:
return "hipErrorLaunchOutOfResources";
case hipErrorInvalidDeviceFunction:
return "hipErrorInvalidDeviceFunction";
case hipErrorInvalidConfiguration:
return "hipErrorInvalidConfiguration";
case hipErrorInvalidDevice:
return "hipErrorInvalidDevice";
case hipErrorInvalidValue:
return "hipErrorInvalidValue";
case hipErrorInvalidDevicePointer:
return "hipErrorInvalidDevicePointer";
case hipErrorInvalidMemcpyDirection:
return "hipErrorInvalidMemcpyDirection";
case hipErrorUnknown:
return "hipErrorUnknown";
case hipErrorNotReady:
return "hipErrorNotReady";
case hipErrorNoDevice:
return "hipErrorNoDevice";
case hipErrorPeerAccessAlreadyEnabled:
return "hipErrorPeerAccessAlreadyEnabled";
case hipErrorPeerAccessNotEnabled:
return "hipErrorPeerAccessNotEnabled";
case hipErrorRuntimeMemory:
return "hipErrorRuntimeMemory";
case hipErrorRuntimeOther:
return "hipErrorRuntimeOther";
case hipErrorHostMemoryAlreadyRegistered:
return "hipErrorHostMemoryAlreadyRegistered";
case hipErrorHostMemoryNotRegistered:
return "hipErrorHostMemoryNotRegistered";
case hipErrorTbd:
return "hipErrorTbd";
default:
return "hipErrorUnknown";
};
}
const char *hipGetErrorString(hipError_t hip_error)
{
return hipGetErrorName(hip_error);
}
+264
View File
@@ -0,0 +1,264 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include "hip_event.hpp"
namespace hip {
bool Event::ready() {
event_->notifyCmdQueue();
return (event_->status() == CL_COMPLETE);
}
hipError_t Event::query() {
amd::ScopedLock lock(lock_);
if (event_ == nullptr) {
return hipErrorInvalidHandle;
}
return ready() ? hipSuccess : hipErrorNotReady;
}
hipError_t Event::synchronize() {
amd::ScopedLock lock(lock_);
if (event_ == nullptr) {
return hipErrorInvalidHandle;
}
event_->awaitCompletion();
return hipSuccess;
}
hipError_t Event::elapsedTime(Event& eStop, float& ms) {
amd::ScopedLock startLock(lock_);
if (this == &eStop) {
if (event_ == nullptr) {
return hipErrorInvalidHandle;
}
if (flags & hipEventDisableTiming) {
return hipErrorInvalidHandle;
}
if (!ready()) {
return hipErrorNotReady;
}
ms = 0.f;
return hipSuccess;
}
amd::ScopedLock stopLock(eStop.lock_);
if (event_ == nullptr ||
eStop.event_ == nullptr) {
return hipErrorInvalidHandle;
}
if ((flags | eStop.flags) & hipEventDisableTiming) {
return hipErrorInvalidHandle;
}
if (!ready() || !eStop.ready()) {
return hipErrorNotReady;
}
if (event_ != eStop.event_) {
ms = static_cast<float>(static_cast<int64_t>(eStop.event_->profilingInfo().end_ -
event_->profilingInfo().start_))/1000000.f;
} else {
ms = 0.f;
}
return hipSuccess;
}
hipError_t Event::streamWait(amd::HostQueue* hostQueue, uint flags) {
if (stream_ == hostQueue) return hipSuccess;
amd::ScopedLock lock(lock_);
bool retain = false;
if (event_ == nullptr) {
event_ = stream_->getLastQueuedCommand(true);
retain = true;
}
if (!event_->notifyCmdQueue()) {
return hipErrorLaunchOutOfResources;
}
amd::Command::EventWaitList eventWaitList;
eventWaitList.push_back(event_);
amd::Command* command = new amd::Marker(*hostQueue, false, eventWaitList);
if (command == NULL) {
return hipErrorOutOfMemory;
}
command->enqueue();
command->release();
if (retain) {
event_->release();
event_ = nullptr;
}
return hipSuccess;
}
void Event::addMarker(amd::HostQueue* queue, amd::Command* command) {
amd::ScopedLock lock(lock_);
stream_ = queue;
if (event_ != nullptr) {
event_->release();
}
event_ = &command->event();
}
}
hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags) {
if (event == nullptr) {
return hipErrorInvalidValue;
}
unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming |
hipEventReleaseToDevice | hipEventReleaseToSystem;
const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem);
const bool illegalFlags =
(flags & ~supportedFlags) || // can't set any unsupported flags.
(flags & releaseFlags) == releaseFlags; // can't set both release flags
if (!illegalFlags) {
hip::Event* e = new hip::Event(flags);
if (e == nullptr) {
return hipErrorOutOfMemory;
}
*event = reinterpret_cast<hipEvent_t>(e);
} else {
return hipErrorInvalidValue;
}
return hipSuccess;
}
hipError_t ihipEventQuery(hipEvent_t event) {
if (event == nullptr) {
return hipErrorInvalidHandle;
}
hip::Event* e = reinterpret_cast<hip::Event*>(event);
return e->query();
}
hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) {
HIP_INIT_API(hipEventCreateWithFlags, event, flags);
HIP_RETURN(ihipEventCreateWithFlags(event, flags));
}
hipError_t hipEventCreate(hipEvent_t* event) {
HIP_INIT_API(hipEventCreate, event);
HIP_RETURN(ihipEventCreateWithFlags(event, 0));
}
hipError_t hipEventDestroy(hipEvent_t event) {
HIP_INIT_API(hipEventDestroy, event);
if (event == nullptr) {
HIP_RETURN(hipErrorInvalidHandle);
}
delete reinterpret_cast<hip::Event*>(event);
HIP_RETURN(hipSuccess);
}
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) {
HIP_INIT_API(hipEventElapsedTime, ms, start, stop);
if (start == nullptr || stop == nullptr) {
HIP_RETURN(hipErrorInvalidHandle);
}
if (ms == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
hip::Event* eStart = reinterpret_cast<hip::Event*>(start);
hip::Event* eStop = reinterpret_cast<hip::Event*>(stop);
HIP_RETURN(eStart->elapsedTime(*eStop, *ms));
}
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) {
HIP_INIT_API(hipEventRecord, event, stream);
if (event == nullptr) {
HIP_RETURN(hipErrorInvalidHandle);
}
hip::Event* e = reinterpret_cast<hip::Event*>(event);
amd::HostQueue* queue = hip::getQueue(stream);
amd::Command* command = queue->getLastQueuedCommand(true);
if (command == nullptr) {
command = new amd::Marker(*queue, false);
command->enqueue();
}
e->addMarker(queue, command);
HIP_RETURN(hipSuccess);
}
hipError_t hipEventSynchronize(hipEvent_t event) {
HIP_INIT_API(hipEventSynchronize, event);
if (event == nullptr) {
HIP_RETURN(hipErrorInvalidHandle);
}
hip::Event* e = reinterpret_cast<hip::Event*>(event);
HIP_RETURN(e->synchronize());
}
hipError_t hipEventQuery(hipEvent_t event) {
HIP_INIT_API(hipEventQuery, event);
HIP_RETURN(ihipEventQuery(event));
}
+67
View File
@@ -0,0 +1,67 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_EVENT_H
#define HIP_EVENT_H
#include "hip_internal.hpp"
#include "thread/monitor.hpp"
namespace hip {
class TimerMarker: public amd::Marker {
public:
TimerMarker(amd::HostQueue& queue) : amd::Marker(queue, false) {
profilingInfo_.enabled_ = true;
profilingInfo_.callback_ = nullptr;
profilingInfo_.start_ = profilingInfo_.end_ = 0;
}
};
class Event {
public:
Event(unsigned int flags) : flags(flags), lock_("hipEvent_t"), stream_(getNullStream()), event_(nullptr) {}
~Event() {
if (event_ != nullptr) {
event_->release();
}
}
unsigned int flags;
hipError_t query();
hipError_t synchronize();
hipError_t elapsedTime(Event& stop, float& ms);
hipError_t streamWait(amd::HostQueue* queue, uint flags);
void addMarker(amd::HostQueue* queue, amd::Command* command);
private:
amd::Monitor lock_;
amd::HostQueue* stream_;
amd::Event* event_;
bool ready();
};
};
#endif // HIP_EVEMT_H
+219
View File
@@ -0,0 +1,219 @@
EXPORTS
hipChooseDevice
hipCtxCreate
hipCtxDestroy
hipCtxDisablePeerAccess
hipCtxEnablePeerAccess
hipCtxGetApiVersion
hipCtxGetCacheConfig
hipCtxGetCurrent
hipCtxGetDevice
hipCtxGetFlags
hipCtxGetSharedMemConfig
hipCtxPopCurrent
hipCtxPushCurrent
hipCtxSetCacheConfig
hipCtxSetCurrent
hipCtxSetSharedMemConfig
hipCtxSynchronize
hipDeviceCanAccessPeer
hipDeviceComputeCapability
hipDeviceDisablePeerAccess
hipDeviceEnablePeerAccess
hipDeviceGet
hipDeviceGetAttribute
hipDeviceGetByPCIBusId
hipDeviceGetCacheConfig
hipDeviceGetStreamPriorityRange
hipDeviceGetLimit
hipDeviceGetName
hipDeviceGetPCIBusId
hipDeviceGetSharedMemConfig
hipDevicePrimaryCtxGetState
hipDevicePrimaryCtxRelease
hipDevicePrimaryCtxReset
hipDevicePrimaryCtxRetain
hipDevicePrimaryCtxSetFlags
hipDeviceReset
hipDeviceSetCacheConfig
hipDeviceSetSharedMemConfig
hipDeviceSynchronize
hipDeviceTotalMem
hipDriverGetVersion
hipEventCreate
hipEventCreateWithFlags
hipEventDestroy
hipEventElapsedTime
hipEventQuery
hipEventRecord
hipEventSynchronize
hipExtGetLinkTypeAndHopCount
hipExtLaunchMultiKernelMultiDevice
hipExtMallocWithFlags
hipExtModuleLaunchKernel
hipFree
hipFreeArray
hipFuncSetCacheConfig
hipGetDevice
hipGetDeviceCount
hipGetDeviceProperties
hipGetErrorName
hipGetErrorString
hipGetLastError
hipMemAllocHost
hipHostAlloc
hipHostFree
hipHostGetDevicePointer
hipHostGetFlags
hipHostMalloc
hipHostRegister
hipHostUnregister
hipInit
hipIpcCloseMemHandle
hipIpcGetMemHandle
hipIpcOpenMemHandle
hipMalloc
hipMalloc3D
hipMalloc3DArray
hipMallocManaged
hipArrayCreate
hipArray3DCreate
hipMallocArray
hipMemAllocPitch
hipMallocPitch
hipMemcpy
hipMemcpyWithStream
hipMemcpyParam2D
hipMemcpy2D
hipMemcpy2DAsync
hipMemcpy2DToArray
hipMemcpy3D
hipMemcpy3DAsync
hipMemcpyAsync
hipMemcpyDtoD
hipMemcpyDtoDAsync
hipMemcpyDtoH
hipMemcpyDtoHAsync
hipMemcpyFromSymbol
hipMemcpyFromSymbolAsync
hipMemcpyHtoD
hipMemcpyHtoDAsync
hipMemcpyPeer
hipMemcpyPeerAsync
hipMemcpyToArray
hipMemcpyFromArray
hipMemcpyToSymbol
hipMemcpyToSymbolAsync
hipMemGetAddressRange
hipGetSymbolAddress
hipGetSymbolSize
hipMemGetInfo
hipMemPtrGetInfo
hipMemset
hipMemsetAsync
hipMemsetD8
hipMemsetD8Async
hipMemsetD16
hipMemsetD16Async
hipMemsetD32
hipMemsetD32Async
hipMemset2D
hipMemset2DAsync
hipMemset3D
hipMemset3DAsync
hipModuleGetFunction
hipModuleGetGlobal
hipModuleGetTexRef
hipModuleLaunchKernel
hipModuleLaunchKernelExt
hipLaunchCooperativeKernel
hipLaunchCooperativeKernelMultiDevice
hipHccModuleLaunchKernel
hipModuleLoad
hipModuleLoadData
hipModuleLoadDataEx
hipModuleUnload
hipOccupancyMaxActiveBlocksPerMultiprocessor
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
hipFuncGetAttributes
hipPeekAtLastError
hipPointerGetAttributes
hipProfilerStart
hipProfilerStop
hipRuntimeGetVersion
hipSetDevice
hipSetDeviceFlags
hipStreamAddCallback
hipStreamCreate
hipStreamCreateWithFlags
hipStreamCreateWithPriority
hipStreamDestroy
hipStreamGetFlags
hipStreamQuery
hipStreamSynchronize
hipStreamWaitEvent
__hipPopCallConfiguration
__hipPushCallConfiguration
__hipRegisterFatBinary
__hipRegisterFunction
__hipRegisterVar
__hipUnregisterFatBinary
__gnu_h2f_ieee
__gnu_f2h_ieee
hipConfigureCall
hipSetupArgument
hipLaunchByPtr
hipLaunchKernel
hipRegisterApiCallback
hipRemoveApiCallback
hipRegisterActivityCallback
hipRemoveActivityCallback
hipApiName
hipKernelNameRef
hipCreateTextureObject
hipDestroyTextureObject
hipGetTextureObjectResourceDesc
hipGetTextureObjectResourceViewDesc
hipGetTextureObjectTextureDesc
hipBindTexture
hipBindTexture2D
hipBindTextureToArray
hipBindTextureToMipmappedArray
hipUnbindTexture
ihipUnbindTextureImpl
hipGetChannelDesc
hipGetTextureAlignmentOffset
hipGetTextureReference
hipTexRefSetFormat
hipTexRefSetFlags
hipTexRefSetFilterMode
hipTexRefSetAddressMode
hipTexRefSetArray
hipTexRefSetAddress
hipTexRefSetAddress2D
hipTexRefGetAddress
hipTexRefGetAddressMode
hipTexRefGetArray
hipTexRefSetArray
ihipBindTextureImpl
ihipBindTextureToArrayImpl
hipCreateChannelDesc
hipProfilerStart
hipProfilerStop
hipHccGetAccelerator
hipHccGetAcceleratorView
hipCreateSurfaceObject
hipDestroySurfaceObject
hipInitActivityCallback
hipEnableActivityCallback
hipGetCmdName
hiprtcAddNameExpression
hiprtcCompileProgram
hiprtcCreateProgram
hiprtcDestroyProgram
hiprtcGetLoweredName
hiprtcGetProgramLog
hiprtcGetProgramLogSize
hiprtcGetCode
hiprtcGetCodeSize
hiprtcGetErrorString
+239
View File
@@ -0,0 +1,239 @@
HIP_1.0 {
global:
hipChooseDevice;
hipCtxCreate;
hipCtxDestroy;
hipCtxDisablePeerAccess;
hipCtxEnablePeerAccess;
hipCtxGetApiVersion;
hipCtxGetCacheConfig;
hipCtxGetCurrent;
hipCtxGetDevice;
hipCtxGetFlags;
hipCtxGetSharedMemConfig;
hipCtxPopCurrent;
hipCtxPushCurrent;
hipCtxSetCacheConfig;
hipCtxSetCurrent;
hipCtxSetSharedMemConfig;
hipCtxSynchronize;
hipDeviceCanAccessPeer;
hipDeviceComputeCapability;
hipDeviceDisablePeerAccess;
hipDeviceEnablePeerAccess;
hipDeviceGet;
hipDeviceGetAttribute;
hipDeviceGetByPCIBusId;
hipDeviceGetCacheConfig;
hipDeviceGetStreamPriorityRange;
hipDeviceGetLimit;
hipDeviceGetName;
hipDeviceGetPCIBusId;
hipDeviceGetSharedMemConfig;
hipDevicePrimaryCtxGetState;
hipDevicePrimaryCtxRelease;
hipDevicePrimaryCtxReset;
hipDevicePrimaryCtxRetain;
hipDevicePrimaryCtxSetFlags;
hipDeviceReset;
hipDeviceSetCacheConfig;
hipDeviceSetSharedMemConfig;
hipDeviceSynchronize;
hipDeviceTotalMem;
hipDriverGetVersion;
hipEventCreate;
hipEventCreateWithFlags;
hipEventDestroy;
hipEventElapsedTime;
hipEventQuery;
hipEventRecord;
hipEventSynchronize;
hipExtGetLinkTypeAndHopCount;
hipExtLaunchMultiKernelMultiDevice;
hipExtMallocWithFlags;
hipExtModuleLaunchKernel;
hipFree;
hipFreeArray;
hipFuncSetCacheConfig;
hipGetDevice;
hipGetDeviceCount;
hipGetDeviceProperties;
hipGetErrorName;
hipGetErrorString;
hipGetLastError;
hipMemAllocHost;
hipHostAlloc;
hipHostFree;
hipHostGetDevicePointer;
hipHostGetFlags;
hipHostMalloc;
hipHostRegister;
hipHostUnregister;
hipInit;
hipIpcCloseMemHandle;
hipIpcGetMemHandle;
hipIpcOpenMemHandle;
hipMalloc;
hipMalloc3D;
hipMalloc3DArray;
hipMallocManaged;
hipArrayCreate;
hipArray3DCreate;
hipMallocArray;
hipMallocPitch;
hipMemAllocPitch;
hipMemcpy;
hipMemcpyWithStream;
hipMemcpyParam2D;
hipMemcpy2D;
hipMemcpy2DAsync;
hipMemcpy2DToArray;
hipMemcpy3D;
hipMemcpy3DAsync;
hipMemcpyAsync;
hipMemcpyDtoD;
hipMemcpyDtoDAsync;
hipMemcpyDtoH;
hipMemcpyDtoHAsync;
hipMemcpyFromSymbol;
hipMemcpyFromSymbolAsync;
hipMemcpyHtoD;
hipMemcpyHtoDAsync;
hipMemcpyPeer;
hipMemcpyPeerAsync;
hipMemcpyToArray;
hipMemcpyFromArray;
hipMemcpyToSymbol;
hipMemcpyToSymbolAsync;
hipMemGetAddressRange;
hipGetSymbolAddress;
hipGetSymbolSize;
hipMemGetInfo;
hipMemPtrGetInfo;
hipMemset;
hipMemsetAsync;
hipMemsetD8;
hipMemsetD8Async;
hipMemsetD16;
hipMemsetD16Async;
hipMemsetD32;
hipMemsetD32Async;
hipMemset2D;
hipMemset2DAsync;
hipMemset3D;
hipMemset3DAsync;
hipModuleGetFunction;
hipModuleGetGlobal;
hipModuleGetTexRef;
hipModuleLaunchKernel;
hipModuleLaunchKernelExt;
hipLaunchCooperativeKernel;
hipLaunchCooperativeKernelMultiDevice;
hipModuleLoad;
hipModuleLoadData;
hipModuleLoadDataEx;
hipModuleUnload;
hipOccupancyMaxActiveBlocksPerMultiprocessor;
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
hipFuncGetAttributes;
hipPeekAtLastError;
hipPointerGetAttributes;
hipProfilerStart;
hipProfilerStop;
hipRuntimeGetVersion;
hipSetDevice;
hipSetDeviceFlags;
hipStreamAddCallback;
hipStreamCreate;
hipStreamCreateWithFlags;
hipStreamCreateWithPriority;
hipStreamDestroy;
hipStreamGetFlags;
hipStreamQuery;
hipStreamSynchronize;
hipStreamWaitEvent;
__hipPopCallConfiguration;
__hipPushCallConfiguration;
__hipRegisterFatBinary;
__hipRegisterFunction;
__hipRegisterVar;
__hipUnregisterFatBinary;
__gnu_h2f_ieee;
__gnu_f2h_ieee;
hipConfigureCall;
hipSetupArgument;
hipLaunchByPtr;
hipLaunchKernel;
hipRegisterApiCallback;
hipRemoveApiCallback;
hipRegisterActivityCallback;
hipRemoveActivityCallback;
hipApiName;
hipKernelNameRef;
hipProfilerStart;
hipProfilerStop;
hiprtcCompileProgram;
hiprtcCreateProgram;
hiprtcDestroyProgram;
hiprtcGetLoweredName;
hiprtcGetProgramLog;
hiprtcGetProgramLogSize;
hiprtcGetCode;
hiprtcGetCodeSize;
hiprtcGetErrorString;
extern "C++" {
hip_impl::hipLaunchKernelGGLImpl*;
hip_impl::demangle*;
hipCreateTextureObject*;
hipDestroyTextureObject*;
hipGetTextureObjectResourceDesc*;
hipGetTextureObjectResourceViewDesc*;
hipGetTextureObjectTextureDesc*;
hipBindTexture*;
hipBindTexture2D*;
hipBindTextureToArray*;
hipBindTextureToMipmappedArray*;
hipUnbindTexture*;
ihipUnbindTextureImpl*;
hipGetChannelDesc*;
hipGetTextureAlignmentOffset*;
hipGetTextureReference*;
hipTexRefSetFormat*;
hipTexRefSetFlags*;
hipTexRefSetFilterMode*;
hipTexRefSetAddressMode*;
hipTexRefSetArray*;
hipTexRefSetAddress*;
hipTexRefSetAddress2D*;
hipTexRefGetAddress*;
hipTexRefGetAddressMode*;
hipTexRefGetArray*;
hipTexRefSetArray*;
hipCreateChannelDesc*;
ihipBindTextureImpl*;
ihipBindTextureToArrayImpl*;
hipHccGetAccelerator*;
hipHccGetAcceleratorView*;
hipCreateSurfaceObject*;
hipDestroySurfaceObject*;
hipHccModuleLaunchKernel*;
hipExtModuleLaunchKernel*;
hipInitActivityCallback*;
hipEnableActivityCallback*;
hipGetCmdName*;
hiprtcAddNameExpression*;
hiprtcCompileProgram*;
hiprtcCreateProgram*;
hiprtcDestroyProgram*;
hiprtcGetLoweredName*;
hiprtcGetProgramLog*;
hiprtcGetProgramLogSize*;
hiprtcGetCode*;
hiprtcGetCodeSize*;
hiprtcGetErrorString*;
hiprtcVersion*;
hiprtcGetTypeName*;
};
local:
*;
};
+75
View File
@@ -0,0 +1,75 @@
#define STR(__macro__) #__macro__
#define XSTR(__macro__) STR(__macro__)
#if defined(_DEBUG)
#define DEBUG_ONLY(x) x
#else
#define DEBUG_ONLY(x)
#endif
#define VERSION_PREFIX_MAJOR 2
#define VERSION_PREFIX_MINOR 0
#define APSTUDIO_READONLY_SYMBOLS
/////////////////////////////////////////////////////////////////////////////
//
// Generated from the TEXTINCLUDE 2 resource.
//
#include "winresrc.h"
#include "utils/versions.hpp"
/////////////////////////////////////////////////////////////////////////////
#undef APSTUDIO_READONLY_SYMBOLS
/////////////////////////////////////////////////////////////////////////////
// English (U.S.) resources
#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
#ifdef _WIN32
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
#pragma code_page(1252)
#endif //_WIN32
/////////////////////////////////////////////////////////////////////////////
//
// Version
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER
PRODUCTVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
#else
FILEFLAGS 0x0L
#endif
FILEOS 0x40004L
FILETYPE 0x2L
FILESUBTYPE 0x0L
BEGIN
BLOCK "StringFileInfo"
BEGIN
BLOCK "040904b0"
BEGIN
VALUE "Comments", " \0"
VALUE "CompanyName", "Advanced Micro Devices Inc.\0"
VALUE "FileDescription", AMD_PLATFORM_NAME " OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " Runtime\0"
VALUE "FileVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER)
VALUE "InternalName", "OpenCL"
VALUE "LegalCopyright", "Copyright (C) 2011 Advanced Micro Devices Inc.\0"
VALUE "OriginalFilename", "OpenCL.dll"
VALUE "ProductName", "OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " " AMD_PLATFORM_INFO "\0"
VALUE "ProductVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER)
END
END
BLOCK "VarFileInfo"
BEGIN
VALUE "Translation", 0x409, 1200
END
END
#endif // English (U.S.) resources
/////////////////////////////////////////////////////////////////////////////
+58
View File
@@ -0,0 +1,58 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include "hip_prof_api.h"
// HIP API callback/activity
api_callbacks_table_t callbacks_table;
extern const std::string& FunctionName(const hipFunction_t f);
const char* hipKernelNameRef(const hipFunction_t f) { return FunctionName(f).c_str(); }
hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) {
return callbacks_table.set_callback(id, reinterpret_cast<api_callbacks_table_t::fun_t>(fun), arg) ?
hipSuccess : hipErrorInvalidValue;
}
hipError_t hipRemoveApiCallback(uint32_t id) {
return callbacks_table.set_callback(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue;
}
hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) {
return callbacks_table.set_activity(id, reinterpret_cast<api_callbacks_table_t::act_t>(fun), arg) ?
hipSuccess : hipErrorInvalidValue;
}
hipError_t hipRemoveActivityCallback(uint32_t id) {
return callbacks_table.set_activity(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue;
}
hipError_t hipEnableTracing(bool enabled) {
callbacks_table.set_enabled(enabled);
return hipSuccess;
}
const char* hipApiName(uint32_t id) {
return hip_api_name(id);
}
+230
View File
@@ -0,0 +1,230 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_SRC_HIP_INTERNAL_H
#define HIP_SRC_HIP_INTERNAL_H
#include "vdi_common.hpp"
#include "hip_prof_api.h"
#include "trace_helper.h"
#include "utils/debug.hpp"
#include <unordered_set>
#include <thread>
#include <stack>
#include <mutex>
#include <iterator>
/*! IHIP IPC MEMORY Structure */
#define IHIP_IPC_MEM_HANDLE_SIZE 32
#define IHIP_IPC_MEM_RESERVED_SIZE LP64_SWITCH(28,24)
typedef struct ihipIpcMemHandle_st {
char ipc_handle[IHIP_IPC_MEM_HANDLE_SIZE]; ///< ipc memory handle on ROCr
size_t psize;
char reserved[IHIP_IPC_MEM_RESERVED_SIZE];
} ihipIpcMemHandle_t;
#define HIP_INIT() \
std::call_once(hip::g_ihipInitialized, hip::init); \
if (hip::g_context == nullptr && g_devices.size() > 0) { \
hip::g_context = g_devices[0]; \
}
// This macro should be called at the beginning of every HIP API.
#define HIP_INIT_API(cid, ...) \
ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \
amd::Thread* thread = amd::Thread::current(); \
if (!VDI_CHECK_THREAD(thread)) { \
HIP_RETURN(hipErrorOutOfMemory); \
} \
HIP_INIT() \
HIP_CB_SPAWNER_OBJECT(cid);
#define HIP_RETURN(ret) \
hip::g_lastError = ret; \
ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, hipGetErrorName(hip::g_lastError)); \
return hip::g_lastError;
namespace hc {
class accelerator;
class accelerator_view;
};
namespace hip {
extern std::once_flag g_ihipInitialized;
extern thread_local amd::Context* g_context;
extern thread_local hipError_t g_lastError;
extern amd::Context* host_context;
extern void init();
extern amd::Context* getCurrentContext();
extern void setCurrentContext(unsigned int index);
extern amd::HostQueue* getQueue(hipStream_t s);
extern amd::HostQueue* getNullStream(amd::Context&);
extern amd::HostQueue* getNullStream();
extern void syncStreams();
struct Function {
amd::Kernel* function_;
amd::Monitor lock_;
Function(amd::Kernel* f) : function_(f), lock_("function lock") {}
hipFunction_t asHipFunction() { return reinterpret_cast<hipFunction_t>(this); }
static Function* asFunction(hipFunction_t f) { return reinterpret_cast<Function*>(f); }
};
struct Stream {
amd::HostQueue* queue;
amd::Device* device;
amd::Context* context;
amd::CommandQueue::Priority priority;
unsigned int flags;
Stream(amd::Device* dev, amd::Context* ctx, amd::CommandQueue::Priority p, unsigned int f);
void create();
amd::HostQueue* asHostQueue();
void destroy();
void finish();
};
};
struct ihipExec_t {
dim3 gridDim_;
dim3 blockDim_;
size_t sharedMem_;
hipStream_t hStream_;
std::vector<char> arguments_;
};
class PlatformState {
amd::Monitor lock_{"Guards global function map"};
std::unordered_map<const void*, std::vector<std::pair<hipModule_t, bool>>> modules_;
bool initialized_{false};
void digestFatBinary(const void* data, std::vector<std::pair<hipModule_t, bool>>& programs);
public:
void init();
std::vector<std::pair<hipModule_t, bool>>* addFatBinary(const void*data)
{
if (initialized_) {
digestFatBinary(data, modules_[data]);
}
return &modules_[data];
}
void removeFatBinary(std::vector<std::pair<hipModule_t, bool>>* module)
{
for (auto& mod : modules_) {
if (&mod.second == module) {
modules_.erase(&mod);
return;
}
}
}
struct RegisteredVar {
public:
RegisteredVar(): size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {}
~RegisteredVar() {}
hipDeviceptr_t getdeviceptr() const { return devicePtr_; };
size_t getvarsize() const { return size_; };
size_t size_; // Size of the variable
hipDeviceptr_t devicePtr_; //Device Memory Address of the variable.
amd::Memory* amd_mem_obj_;
};
struct DeviceFunction {
std::string deviceName;
std::vector< std::pair< hipModule_t, bool > >* modules;
std::vector<hipFunction_t> functions;
};
struct DeviceVar {
void* shadowVptr;
std::string hostVar;
size_t size;
std::vector< std::pair< hipModule_t, bool > >* modules;
std::vector<RegisteredVar> rvars;
bool dyn_undef;
};
private:
std::unordered_map<const void*, DeviceFunction > functions_;
std::unordered_multimap<std::string, DeviceVar > vars_;
static PlatformState* platform_;
PlatformState() {}
~PlatformState() {}
public:
static PlatformState& instance() {
return *platform_;
}
std::vector< std::pair<hipModule_t, bool> >* unregisterVar(hipModule_t hmod);
PlatformState::DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod);
void registerVar(const void* hostvar, const DeviceVar& var);
void registerFunction(const void* hostFunction, const DeviceFunction& func);
hipFunction_t getFunc(const void* hostFunction, int deviceId);
bool getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr);
bool getGlobalVar(const void* hostVar, int deviceId, hipModule_t hmod,
hipDeviceptr_t* dev_ptr, size_t* size_ptr);
bool getTexRef(const char* hostVar, textureReference** texRef);
bool getShadowVarInfo(std::string var_name, hipModule_t hmod,
void** var_addr, size_t* var_size);
void setupArgument(const void *arg, size_t size, size_t offset);
void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream);
void popExec(ihipExec_t& exec);
};
extern std::vector<amd::Context*> g_devices;
extern hipError_t ihipDeviceGetCount(int* count);
extern int ihipGetDevice();
extern hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags);
extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset);
extern bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr,
size_t* var_size);
inline std::ostream& operator<<(std::ostream& os, const dim3& s) {
os << '{';
os << s.x;
os << ',';
os << s.y;
os << ',';
os << s.z;
os << '}';
return os;
}
#endif // HIP_SRC_HIP_INTERNAL_H
+1714
View File
File diff suppressed because it is too large Load Diff
+534
View File
@@ -0,0 +1,534 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include <libelf.h>
#include <fstream>
#include "hip_internal.hpp"
#include "platform/program.hpp"
#include "hip_event.hpp"
hipError_t ihipModuleLoadData(hipModule_t *module, const void *image);
const std::string& FunctionName(const hipFunction_t f)
{
return hip::Function::asFunction(f)->function_->name();
}
static uint64_t ElfSize(const void *emi)
{
const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi;
const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff);
uint64_t max_offset = ehdr->e_shoff;
uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum;
for (uint16_t i=0; i < ehdr->e_shnum; ++i){
uint64_t cur_offset = static_cast<uint64_t>(shdr[i].sh_offset);
if (max_offset < cur_offset) {
max_offset = cur_offset;
total_size = max_offset;
if(SHT_NOBITS != shdr[i].sh_type) {
total_size += static_cast<uint64_t>(shdr[i].sh_size);
}
}
}
return total_size;
}
hipError_t hipModuleLoad(hipModule_t* module, const char* fname)
{
HIP_INIT_API(hipModuleLoad, module, fname);
if (!fname) {
HIP_RETURN(hipErrorInvalidValue);
}
std::ifstream file(fname, std::ios::binary);
if (!file.is_open()) {
HIP_RETURN(hipErrorFileNotFound);
}
std::vector<char> tmp{std::istreambuf_iterator<char>{file}, std::istreambuf_iterator<char>{}};
HIP_RETURN(ihipModuleLoadData(module, tmp.data()));
}
bool ihipModuleUnregisterGlobal(hipModule_t hmod) {
std::vector< std::pair<hipModule_t, bool> >* modules =
PlatformState::instance().unregisterVar(hmod);
if (modules != nullptr) {
delete modules;
}
return true;
}
hipError_t hipModuleUnload(hipModule_t hmod)
{
HIP_INIT_API(hipModuleUnload, hmod);
if (hmod == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
amd::Program* program = as_amd(reinterpret_cast<cl_program>(hmod));
if(!ihipModuleUnregisterGlobal(hmod)) {
HIP_RETURN(hipErrorInvalidSymbol);
}
program->release();
HIP_RETURN(hipSuccess);
}
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
{
HIP_INIT_API(hipModuleLoadData, module, image);
HIP_RETURN(ihipModuleLoadData(module, image));
}
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image,
unsigned int numOptions, hipJitOption* options,
void** optionsValues)
{
/* TODO: Pass options to Program */
HIP_INIT_API(hipModuleLoadData, module, image);
HIP_RETURN(ihipModuleLoadData(module, image));
}
extern bool __hipExtractCodeObjectFromFatBinary(const void* data,
const std::vector<const char*>& devices,
std::vector<std::pair<const void*, size_t>>& code_objs);
bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* module) {
std::vector<std::string> undef_vars;
device::Program* dev_program
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
if (!dev_program->getUndefinedVarFromCodeObj(&undef_vars)) {
return false;
}
for (auto it = undef_vars.begin(); it != undef_vars.end(); ++it) {
auto modules = new std::vector<std::pair<hipModule_t, bool> >(g_devices.size());
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
modules->at(dev) = std::make_pair(*module, true);
}
texture<float, hipTextureType1D, hipReadModeElementType>* tex_hptr
= new texture<float, hipTextureType1D, hipReadModeElementType>();
memset(tex_hptr, 0x00, sizeof(texture<float, hipTextureType1D, hipReadModeElementType>));
PlatformState::DeviceVar dvar{ reinterpret_cast<char*>(tex_hptr), it->c_str(), sizeof(*tex_hptr), modules,
std::vector<PlatformState::RegisteredVar>{ g_devices.size()}, true };
PlatformState::instance().registerVar(it->c_str(), dvar);
}
return true;
}
bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) {
size_t var_size = 0;
hipDeviceptr_t device_ptr = nullptr;
std::vector<std::string> var_names;
device::Program* dev_program
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
if (!dev_program->getGlobalVarFromCodeObj(&var_names)) {
return false;
}
for (auto it = var_names.begin(); it != var_names.end(); ++it) {
auto modules = new std::vector<std::pair<hipModule_t, bool> >(g_devices.size());
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
modules->at(dev) = std::make_pair(*module, true);
}
PlatformState::DeviceVar dvar{nullptr, it->c_str(), 0, modules,
std::vector<PlatformState::RegisteredVar>{ g_devices.size()}, false };
PlatformState::instance().registerVar(it->c_str(), dvar);
}
return true;
}
hipError_t ihipModuleLoadData(hipModule_t *module, const void *image)
{
std::vector<std::pair<const void*, size_t>> code_objs;
if (__hipExtractCodeObjectFromFatBinary(image, {hip::getCurrentContext()->devices()[0]->info().name_}, code_objs))
image = code_objs[0].first;
amd::Program* program = new amd::Program(*hip::getCurrentContext());
if (program == NULL) {
return hipErrorOutOfMemory;
}
program->setVarInfoCallBack(&getSvarInfo);
if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image))) {
return hipErrorInvalidKernelFile;
}
*module = reinterpret_cast<hipModule_t>(as_cl(program));
if (!ihipModuleRegisterGlobal(program, module)) {
return hipErrorSharedObjectSymbolNotFound;
}
if (!ihipModuleRegisterUndefined(program, module)) {
return hipErrorSharedObjectSymbolNotFound;
}
if(CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) {
return hipErrorSharedObjectInitFailed;
}
return hipSuccess;
}
hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name)
{
HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name);
amd::Program* program = as_amd(reinterpret_cast<cl_program>(hmod));
const amd::Symbol* symbol = program->findSymbol(name);
if (!symbol) {
HIP_RETURN(hipErrorNotFound);
}
amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name);
if (!kernel) {
HIP_RETURN(hipErrorOutOfMemory);
}
hip::Function* f = new hip::Function(kernel);
*hfunc = f->asHipFunction();
HIP_RETURN(hipSuccess);
}
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name)
{
HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name);
/* Get address and size for the global symbol */
if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), hmod,
dptr, bytes)) {
HIP_RETURN(hipErrorNotFound);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
{
HIP_INIT_API(hipFuncGetAttributes, attr, func);
if (!PlatformState::instance().getFuncAttr(func, attr)) {
HIP_RETURN(hipErrorInvalidDeviceFunction);
}
HIP_RETURN(hipSuccess);
}
hipError_t ihipModuleLaunchKernel(hipFunction_t f,
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t sharedMemBytes, hipStream_t hStream,
void **kernelParams, void **extra,
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0,
uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0,
uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0) {
HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params);
hip::Function* function = hip::Function::asFunction(f);
amd::Kernel* kernel = function->function_;
amd::ScopedLock lock(function->lock_);
hip::Event* eStart = reinterpret_cast<hip::Event*>(startEvent);
hip::Event* eStop = reinterpret_cast<hip::Event*>(stopEvent);
amd::HostQueue* queue = hip::getQueue(hStream);
const amd::Device& device = queue->vdev()->device();
if ((params & amd::NDRangeKernelCommand::CooperativeGroups) &&
!device.info().cooperativeGroups_) {
return hipErrorLaunchFailure;
}
if ((params & amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups) &&
!device.info().cooperativeMultiDeviceGroups_) {
return hipErrorLaunchFailure;
}
if (!queue) {
return hipErrorOutOfMemory;
}
size_t globalWorkOffset[3] = {0};
size_t globalWorkSize[3] = { gridDimX, gridDimY, gridDimZ };
size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ };
amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize);
amd::Command::EventWaitList waitList;
address kernargs = nullptr;
// 'extra' is a struct that contains the following info: {
// HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs,
// HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size,
// HIP_LAUNCH_PARAM_END }
if (extra != nullptr) {
if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER ||
extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE || extra[4] != HIP_LAUNCH_PARAM_END) {
return hipErrorNotInitialized;
}
kernargs = reinterpret_cast<address>(extra[1]);
}
const amd::KernelSignature& signature = kernel->signature();
for (size_t i = 0; i < signature.numParameters(); ++i) {
const amd::KernelParameterDescriptor& desc = signature.at(i);
if (kernelParams == nullptr) {
assert(kernargs != nullptr);
kernel->parameters().set(i, desc.size_, kernargs + desc.offset_,
desc.type_ == T_POINTER/*svmBound*/);
} else {
assert(extra == nullptr);
kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER/*svmBound*/);
}
}
if(startEvent != nullptr) {
amd::Command* startCommand = new hip::TimerMarker(*queue);
startCommand->enqueue();
eStart->addMarker(queue, startCommand);
}
amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(
*queue, waitList, *kernel, ndrange, sharedMemBytes,
params, gridId, numGrids, prevGridSum, allGridSum, firstDevice);
if (!command) {
return hipErrorOutOfMemory;
}
// Capture the kernel arguments
if (CL_SUCCESS != command->captureAndValidate()) {
delete command;
return hipErrorOutOfMemory;
}
command->enqueue();
if(stopEvent != nullptr) {
eStop->addMarker(queue, command);
command->retain();
}
command->release();
return hipSuccess;
}
hipError_t hipModuleLaunchKernel(hipFunction_t f,
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t sharedMemBytes, hipStream_t hStream,
void **kernelParams, void **extra)
{
HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ,
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream,
kernelParams, extra);
HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX * blockDimX, gridDimY * blockDimY, gridDimZ * blockDimZ,
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr));
}
hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
uint32_t localWorkSizeZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags)
{
HIP_INIT_API(NONE, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
sharedMemBytes, hStream,
kernelParams, extra, startEvent, stopEvent, flags);
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags));
}
hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX,
uint32_t gridDimY, uint32_t gridDimZ,
uint32_t blockDimX, uint32_t blockDimY,
uint32_t blockDimZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent,
hipEvent_t stopEvent)
{
HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ,
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream,
kernelParams, extra, startEvent, stopEvent);
HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
}
hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX,
uint32_t gridDimY, uint32_t gridDimZ,
uint32_t blockDimX, uint32_t blockDimY,
uint32_t blockDimZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent,
hipEvent_t stopEvent)
{
HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ,
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream,
kernelParams, extra, startEvent, stopEvent);
HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
}
hipError_t hipLaunchCooperativeKernel(const void* f,
dim3 gridDim, dim3 blockDim,
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
{
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim,
sharedMemBytes, hStream);
int deviceId = ihipGetDevice();
hipFunction_t func = PlatformState::instance().getFunc(f, deviceId);
if (func == nullptr) {
HIP_RETURN(hipErrorInvalidDeviceFunction);
}
HIP_RETURN(ihipModuleLaunchKernel(func, gridDim.x * blockDim.x, gridDim.y * blockDim.y, gridDim.z * blockDim.z,
blockDim.x, blockDim.y, blockDim.z,
sharedMemBytes, hStream, kernelParams, nullptr, nullptr, nullptr, 0,
amd::NDRangeKernelCommand::CooperativeGroups));
}
hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
int numDevices, unsigned int flags, uint32_t extFlags)
{
int numActiveGPUs = 0;
ihipDeviceGetCount(&numActiveGPUs);
if ((numDevices > numActiveGPUs) || (launchParamsList == nullptr)) {
return hipErrorInvalidValue;
}
hipError_t result = hipErrorUnknown;
uint64_t allGridSize = 0;
for (int i = 0; i < numDevices; ++i) {
const hipLaunchParams& launch = launchParamsList[i];
allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z;
}
uint64_t prevGridSize = 0;
uint32_t firstDevice = 0;
for (int i = 0; i < numDevices; ++i) {
const hipLaunchParams& launch = launchParamsList[i];
amd::HostQueue* queue = reinterpret_cast<hip::Stream*>(launch.stream)->asHostQueue();
hipFunction_t func = nullptr;
// The order of devices in the launch may not match the order in the global array
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
// Find the matching device and request the kernel function
if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) {
func = PlatformState::instance().getFunc(launch.func, dev);
// Save VDI index of the first device in the launch
if (i == 0) {
firstDevice = queue->vdev()->device().index();
}
break;
}
}
if (func == nullptr) {
result = hipErrorInvalidDeviceFunction;
HIP_RETURN(result);
}
result = ihipModuleLaunchKernel(func,
launch.gridDim.x * launch.blockDim.x,
launch.gridDim.y * launch.blockDim.y,
launch.gridDim.z * launch.blockDim.z,
launch.blockDim.x, launch.blockDim.y, launch.blockDim.z,
launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr,
flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice);
if (result != hipSuccess) {
break;
}
prevGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z;
}
return result;
}
hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
int numDevices, unsigned int flags)
{
HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, numDevices, flags);
return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags,
(amd::NDRangeKernelCommand::CooperativeGroups |
amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups));
}
hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
int numDevices, unsigned int flags) {
HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags);
return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0);
}
hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) {
HIP_INIT_API(hipModuleGetTexRef, texRef, hmod, name);
/* input args check */
if ((texRef == nullptr) || (name == nullptr)) {
HIP_RETURN(hipErrorInvalidValue);
}
/* Get address and size for the global symbol */
if (!PlatformState::instance().getTexRef(name, texRef)) {
HIP_RETURN(hipErrorNotFound);
}
HIP_RETURN(hipSuccess);
}
+121
View File
@@ -0,0 +1,121 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include "hip_internal.hpp"
hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) {
HIP_INIT_API(NONE, canAccessPeer, thisCtx, peerCtx);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx,
size_t sizeBytes) {
HIP_INIT_API(NONE, dst, dstCtx, src, srcCtx, sizeBytes);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice,
size_t sizeBytes, hipStream_t stream) {
HIP_INIT_API(NONE, dst, dstDevice, src, srcDevice, sizeBytes, stream);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId) {
HIP_INIT_API(hipDeviceCanAccessPeer, canAccessPeer, deviceId, peerDeviceId);
amd::Device* device = nullptr;
amd::Device* peer_device = nullptr;
if (canAccessPeer == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
/* Peer cannot be self */
if (deviceId == peerDeviceId) {
*canAccessPeer = 0;
return HIP_RETURN(hipSuccess);
}
/* Cannot exceed the max number of devices */
if (static_cast<size_t>(deviceId) >= g_devices.size()
|| static_cast<size_t>(peerDeviceId) >= g_devices.size()) {
return HIP_RETURN(hipErrorInvalidValue);
}
device = g_devices[deviceId]->devices()[0];
peer_device = g_devices[peerDeviceId]->devices()[0];
*canAccessPeer = static_cast<int>(std::find(device->p2pDevices_.begin(),
device->p2pDevices_.end(), as_cl(peer_device))
!= device->p2pDevices_.end());
return HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) {
HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId);
HIP_RETURN(hipSuccess);
}
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) {
HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags);
HIP_RETURN(hipSuccess);
}
hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
size_t sizeBytes) {
HIP_INIT_API(hipMemcpyPeer, dst, dstDevice, src, srcDevice, sizeBytes);
HIP_RETURN(hipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice));
}
hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice,
size_t sizeBytes, hipStream_t stream) {
HIP_INIT_API(hipMemcpyPeerAsync, dst, dstDevice, src, srcDevice, sizeBytes, stream);
HIP_RETURN(hipMemcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream));
}
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags);
HIP_RETURN(hipSuccess);
}
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx);
HIP_RETURN(hipSuccess);
}
+993
View File
@@ -0,0 +1,993 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include "hip_internal.hpp"
#include "platform/program.hpp"
#include "platform/runtime.hpp"
#include <unordered_map>
#include "elfio.hpp"
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
thread_local std::stack<ihipExec_t> execStack_;
PlatformState* PlatformState::platform_ = new PlatformState();
struct __CudaFatBinaryWrapper {
unsigned int magic;
unsigned int version;
void* binary;
void* dummy1;
};
#define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
#define HIP_AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa"
#define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn-amd-amdhsa-"
struct __ClangOffloadBundleDesc {
uint64_t offset;
uint64_t size;
uint64_t tripleSize;
const char triple[1];
};
struct __ClangOffloadBundleHeader {
const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1];
uint64_t numBundles;
__ClangOffloadBundleDesc desc[1];
};
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
hipModule_t hmod, const char* name);
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
hipDeviceptr_t* dptr, size_t* bytes);
static bool isCompatibleCodeObject(const std::string& codeobj_target_id,
const char* device_name) {
// Workaround for device name mismatch.
// Device name may contain feature strings delimited by '+', e.g.
// gfx900+xnack. Currently HIP-Clang does not include feature strings
// in code object target id in fat binary. Therefore drop the feature
// strings from device name before comparing it with code object target id.
std::string short_name(device_name);
auto feature_loc = short_name.find('+');
if (feature_loc != std::string::npos) {
short_name.erase(feature_loc);
}
return codeobj_target_id == short_name;
}
// Extracts code objects from fat binary in data for device names given in devices.
// Returns true if code objects are extracted successfully.
bool __hipExtractCodeObjectFromFatBinary(const void* data,
const std::vector<const char*>& devices,
std::vector<std::pair<const void*, size_t>>& code_objs)
{
std::string magic((const char*)data, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1);
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) {
return false;
}
code_objs.resize(devices.size());
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
const auto* desc = &obheader->desc[0];
unsigned num_code_objs = 0;
for (uint64_t i = 0; i < obheader->numBundles; ++i,
desc = reinterpret_cast<const __ClangOffloadBundleDesc*>(
reinterpret_cast<uintptr_t>(&desc->triple[0]) + desc->tripleSize)) {
std::string triple(desc->triple, sizeof(HIP_AMDGCN_AMDHSA_TRIPLE) - 1);
if (triple.compare(HIP_AMDGCN_AMDHSA_TRIPLE))
continue;
std::string target(desc->triple + sizeof(HIP_AMDGCN_AMDHSA_TRIPLE),
desc->tripleSize - sizeof(HIP_AMDGCN_AMDHSA_TRIPLE));
const void *image = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(obheader) + desc->offset);
size_t size = desc->size;
for (size_t dev = 0; dev < devices.size(); ++dev) {
const char* name = devices[dev];
if (!isCompatibleCodeObject(target, name)) {
continue;
}
code_objs[dev] = std::make_pair(image, size);
num_code_objs++;
}
}
if (num_code_objs == devices.size())
return true;
else
return false;
}
extern "C" std::vector<std::pair<hipModule_t, bool>>* __hipRegisterFatBinary(const void* data)
{
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) {
return nullptr;
}
return PlatformState::instance().addFatBinary(fbwrapper->binary);
}
void PlatformState::digestFatBinary(const void* data, std::vector<std::pair<hipModule_t, bool>>& programs)
{
if (programs.size() > 0) {
return;
}
std::vector<std::pair<const void*, size_t>> code_objs;
std::vector<const char*> devices;
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
amd::Context* ctx = g_devices[dev];
devices.push_back(ctx->devices()[0]->info().name_);
}
if (!__hipExtractCodeObjectFromFatBinary((char*)data, devices, code_objs)) {
return;
}
programs.resize(g_devices.size());
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
amd::Context* ctx = g_devices[dev];
amd::Program* program = new amd::Program(*ctx);
if (program == nullptr) {
return;
}
if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) {
programs.at(dev) = std::make_pair(reinterpret_cast<hipModule_t>(as_cl(program)) , false);
}
}
}
void PlatformState::init()
{
amd::ScopedLock lock(lock_);
if(initialized_ || g_devices.empty()) {
return;
}
initialized_ = true;
for (auto& it : modules_) {
digestFatBinary(it.first, it.second);
}
for (auto& it : functions_) {
it.second.functions.resize(g_devices.size());
}
for (auto& it : vars_) {
it.second.rvars.resize(g_devices.size());
}
}
std::vector< std::pair<hipModule_t, bool> >* PlatformState::unregisterVar(hipModule_t hmod) {
amd::ScopedLock lock(lock_);
std::vector< std::pair<hipModule_t, bool> >* rmodules = nullptr;
auto it = vars_.begin();
while (it != vars_.end()) {
DeviceVar& dvar = it->second;
if ((*dvar.modules)[0].first == hmod) {
rmodules = dvar.modules;
if (dvar.dyn_undef) {
texture<float, hipTextureType1D, hipReadModeElementType>* tex_hptr
= reinterpret_cast<texture<float, hipTextureType1D, hipReadModeElementType> *>(dvar.shadowVptr);
delete tex_hptr;
}
vars_.erase(it++);
} else {
++it;
}
}
return rmodules;
}
PlatformState::DeviceVar* PlatformState::findVar(std::string hostVar, int deviceId, hipModule_t hmod) {
DeviceVar* dvar = nullptr;
if (hmod != nullptr) {
// If module is provided, then get the var only from that module
auto var_range = vars_.equal_range(hostVar);
for (auto it = var_range.first; it != var_range.second; ++it) {
if ((*it->second.modules)[deviceId].first == hmod) {
dvar = &(it->second);
break;
}
}
} else {
// If var count is < 2, return the var
if (vars_.count(hostVar) < 2) {
auto it = vars_.find(hostVar);
dvar = ((it == vars_.end()) ? nullptr : &(it->second));
} else {
// If var count is > 2, return the original var,
// if original var count != 1, return vars_.end()/Invalid
size_t orig_global_count = 0;
auto var_range = vars_.equal_range(hostVar);
for (auto it = var_range.first; it != var_range.second; ++it) {
// when dyn_undef is set, it is a shadow var
if (it->second.dyn_undef == false) {
++orig_global_count;
dvar = &(it->second);
}
}
dvar = ((orig_global_count == 1) ? dvar : nullptr);
}
}
return dvar;
}
void PlatformState::registerVar(const void* hostvar,
const DeviceVar& rvar) {
amd::ScopedLock lock(lock_);
vars_.insert(std::make_pair(std::string(reinterpret_cast<const char*>(hostvar)), rvar));
}
void PlatformState::registerFunction(const void* hostFunction,
const DeviceFunction& func) {
amd::ScopedLock lock(lock_);
functions_.insert(std::make_pair(hostFunction, func));
}
bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFuncAttributes* func_attr) {
device::Program* dev_program
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
const auto it = dev_program->kernels().find(std::string(func_name));
if (it == dev_program->kernels().cend()) {
return false;
}
const device::Kernel::WorkGroupInfo* wginfo = it->second->workGroupInfo();
func_attr->localSizeBytes = wginfo->localMemSize_;
func_attr->sharedSizeBytes = wginfo->size_;
func_attr->maxThreadsPerBlock = wginfo->wavefrontSize_;
func_attr->numRegs = wginfo->usedVGPRs_;
return true;
}
bool PlatformState::getShadowVarInfo(std::string var_name, hipModule_t hmod,
void** var_addr, size_t* var_size) {
DeviceVar* dvar = findVar(var_name, ihipGetDevice(), hmod);
if (dvar != nullptr) {
*var_addr = dvar->shadowVptr;
*var_size = dvar->size;
return true;
} else {
return false;
}
}
bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr,
size_t* var_size) {
return PlatformState::instance().getShadowVarInfo(var_name, reinterpret_cast<hipModule_t>(program),
var_addr, var_size);
}
hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) {
amd::ScopedLock lock(lock_);
const auto it = functions_.find(hostFunction);
if (it != functions_.cend()) {
PlatformState::DeviceFunction& devFunc = it->second;
if (devFunc.functions[deviceId] == 0) {
hipModule_t module = (*devFunc.modules)[deviceId].first;
if (!(*devFunc.modules)[deviceId].second) {
amd::Program* program = as_amd(reinterpret_cast<cl_program>(module));
program->setVarInfoCallBack(&getSvarInfo);
if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) {
return nullptr;
}
(*devFunc.modules)[deviceId].second = true;
}
hipFunction_t function = nullptr;
if (hipSuccess == hipModuleGetFunction(&function, module, devFunc.deviceName.c_str()) &&
function != nullptr) {
devFunc.functions[deviceId] = function;
}
else {
// tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for"
// " device %d\n", deviceName, deviceId);
}
}
return devFunc.functions[deviceId];
}
return nullptr;
}
bool PlatformState::getFuncAttr(const void* hostFunction,
hipFuncAttributes* func_attr) {
if (func_attr == nullptr) {
return false;
}
const auto it = functions_.find(hostFunction);
if (it == functions_.cend()) {
return false;
}
PlatformState::DeviceFunction& devFunc = it->second;
int deviceId = ihipGetDevice();
/* If module has not been initialized yet, build the kernel now*/
if (!(*devFunc.modules)[deviceId].second) {
if (nullptr == PlatformState::instance().getFunc(hostFunction, deviceId)) {
return false;
}
}
amd::Program* program = as_amd(reinterpret_cast<cl_program>((*devFunc.modules)[deviceId].first));
if (!ihipGetFuncAttributes(devFunc.deviceName.c_str(), program, func_attr)) {
return false;
}
return true;
}
bool PlatformState::getTexRef(const char* hostVar, textureReference** texRef) {
amd::ScopedLock lock(lock_);
DeviceVar* dvar = findVar(std::string(hostVar), ihipGetDevice(), nullptr);
if (dvar == nullptr) {
return false;
}
if (!dvar->dyn_undef) {
return false;
}
*texRef = reinterpret_cast<textureReference *>(dvar->shadowVptr);
return true;
}
bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, hipModule_t hmod,
hipDeviceptr_t* dev_ptr, size_t* size_ptr) {
amd::ScopedLock lock(lock_);
DeviceVar* dvar = findVar(std::string(reinterpret_cast<const char*>(hostVar)), deviceId, hmod);
if (dvar != nullptr) {
if (dvar->rvars[deviceId].getdeviceptr() == nullptr) {
size_t sym_size = 0;
hipDeviceptr_t device_ptr = nullptr;
amd::Memory* amd_mem_obj = nullptr;
if (!(*dvar->modules)[deviceId].second) {
amd::Program* program = as_amd(reinterpret_cast<cl_program>((*dvar->modules)[deviceId].first));
program->setVarInfoCallBack(&getSvarInfo);
if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) {
return false;
}
(*dvar->modules)[deviceId].second = true;
}
if((hipSuccess == ihipCreateGlobalVarObj(dvar->hostVar.c_str(), (*dvar->modules)[deviceId].first,
&amd_mem_obj, &device_ptr, &sym_size))
&& (device_ptr != nullptr)) {
dvar->rvars[deviceId].size_ = sym_size;
dvar->rvars[deviceId].devicePtr_ = device_ptr;
dvar->rvars[deviceId].amd_mem_obj_ = amd_mem_obj;
amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj);
} else {
LogError("[HIP] __hipRegisterVar cannot find kernel for device \n");
}
}
*size_ptr = dvar->rvars[deviceId].getvarsize();
*dev_ptr = dvar->rvars[deviceId].getdeviceptr();
return true;
} else {
return false;
}
}
void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) {
auto& arguments = execStack_.top().arguments_;
if (arguments.size() < offset + size) {
arguments.resize(offset + size);
}
::memcpy(&arguments[offset], arg, size);
}
void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem,
hipStream_t stream) {
execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
}
void PlatformState::popExec(ihipExec_t& exec) {
exec = std::move(execStack_.top());
execStack_.pop();
}
extern "C" void __hipRegisterFunction(
std::vector<std::pair<hipModule_t,bool> >* modules,
const void* hostFunction,
char* deviceFunction,
const char* deviceName,
unsigned int threadLimit,
uint3* tid,
uint3* bid,
dim3* blockDim,
dim3* gridDim,
int* wSize)
{
PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector<hipFunction_t>{g_devices.size()}};
PlatformState::instance().registerFunction(hostFunction, func);
// for (size_t i = 0; i < g_devices.size(); ++i) {
// PlatformState::instance().getFunc(hostFunction, i);
// }
}
// Registers a device-side global variable.
// For each global variable in device code, there is a corresponding shadow
// global variable in host code. The shadow host variable is used to keep
// track of the value of the device side global variable between kernel
// executions.
extern "C" void __hipRegisterVar(
std::vector<std::pair<hipModule_t,bool> >* modules, // The device modules containing code object
char* var, // The shadow variable in host code
char* hostVar, // Variable name in host code
char* deviceVar, // Variable name in device code
int ext, // Whether this variable is external
int size, // Size of the variable
int constant, // Whether this variable is constant
int global) // Unknown, always 0
{
PlatformState::DeviceVar dvar{var, std::string{ hostVar }, static_cast<size_t>(size), modules,
std::vector<PlatformState::RegisteredVar>{g_devices.size()}, false };
PlatformState::instance().registerVar(hostVar, dvar);
}
extern "C" void __hipUnregisterFatBinary(std::vector< std::pair<hipModule_t, bool> >* modules)
{
HIP_INIT();
std::for_each(modules->begin(), modules->end(), [](std::pair<hipModule_t, bool> module){
if (module.first != nullptr) {
as_amd(reinterpret_cast<cl_program>(module.first))->release();
}
});
if (modules->size() > 0) {
PlatformState::instance().unregisterVar((*modules)[0].first);
}
PlatformState::instance().removeFatBinary(modules);
}
extern "C" hipError_t hipConfigureCall(
dim3 gridDim,
dim3 blockDim,
size_t sharedMem,
hipStream_t stream)
{
HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream);
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
HIP_RETURN(hipSuccess);
}
extern "C" hipError_t __hipPushCallConfiguration(
dim3 gridDim,
dim3 blockDim,
size_t sharedMem,
hipStream_t stream)
{
HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream);
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
HIP_RETURN(hipSuccess);
}
extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim,
dim3 *blockDim,
size_t *sharedMem,
hipStream_t *stream) {
HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream);
ihipExec_t exec;
PlatformState::instance().popExec(exec);
*gridDim = exec.gridDim_;
*blockDim = exec.blockDim_;
*sharedMem = exec.sharedMem_;
*stream = exec.hStream_;
HIP_RETURN(hipSuccess);
}
extern "C" hipError_t hipSetupArgument(
const void *arg,
size_t size,
size_t offset)
{
HIP_INIT_API(NONE, arg, size, offset);
PlatformState::instance().setupArgument(arg, size, offset);
HIP_RETURN(hipSuccess);
}
extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
{
HIP_INIT_API(NONE, hostFunction);
int deviceId = ihipGetDevice();
if (deviceId == -1) {
HIP_RETURN(hipErrorNoDevice);
}
hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId);
if (func == nullptr) {
HIP_RETURN(hipErrorInvalidDeviceFunction);
}
ihipExec_t exec;
PlatformState::instance().popExec(exec);
size_t size = exec.arguments_.size();
void *extra[] = {
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0],
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END
};
HIP_RETURN(hipModuleLaunchKernel(func,
exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z,
exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z,
exec.sharedMem_, exec.hStream_, nullptr, extra));
}
extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
dim3 gridDim,
dim3 blockDim,
void** args,
size_t sharedMemBytes,
hipStream_t stream)
{
HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes,
stream);
int deviceId = ihipGetDevice();
if (deviceId == -1) {
HIP_RETURN(hipErrorNoDevice);
}
hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId);
if (func == nullptr) {
HIP_RETURN(hipErrorInvalidDeviceFunction);
}
HIP_RETURN(hipModuleLaunchKernel(func, gridDim.x, gridDim.y, gridDim.z,
blockDim.x, blockDim.y, blockDim.z,
sharedMemBytes, stream, args, nullptr));
}
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
size_t size = 0;
if(!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr,
devPtr, &size)) {
HIP_RETURN(hipErrorInvalidSymbol);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbolName) {
hipDeviceptr_t devPtr = nullptr;
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr,
&devPtr, sizePtr)) {
HIP_RETURN(hipErrorInvalidSymbol);
}
HIP_RETURN(hipSuccess);
}
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
hipDeviceptr_t* dptr, size_t* bytes)
{
HIP_INIT();
amd::Program* program = nullptr;
device::Program* dev_program = nullptr;
/* Get Device Program pointer*/
program = as_amd(reinterpret_cast<cl_program>(hmod));
dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
if (dev_program == nullptr) {
HIP_RETURN(hipErrorInvalidDeviceFunction);
}
/* Find the global Symbols */
if(!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) {
HIP_RETURN(hipErrorInvalidSymbol);
}
HIP_RETURN(hipSuccess);
}
namespace hip_impl {
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
hipFunction_t f,
int blockSize,
size_t dynamicSMemSize)
{
HIP_INIT_API(NONE, f, blockSize, dynamicSMemSize);
int deviceId = ihipGetDevice();
// FIXME: Function may not be a device function and may have been obtaiend via
// hipModuleGetFunction and thus not in the functions_ map. Check the map
// else interpret as a hip::Function for now.
hipFunction_t func = PlatformState::instance().getFunc(f, deviceId);
if (func == nullptr) {
func = f;
}
hip::Function* function = hip::Function::asFunction(func);
if (function == nullptr) {
HIP_RETURN(hipErrorInvalidDeviceFunction);
}
amd::Kernel* kernel = function->function_;
if (!kernel) {
HIP_RETURN(hipErrorOutOfMemory);
}
if (blockSize == 0) {
HIP_RETURN(hipErrorInvalidValue);
}
amd::Device* device = hip::getCurrentContext()->devices()[0];
const device::Kernel::WorkGroupInfo* wrkGrpInfo = kernel->getDeviceKernel(*device)->workGroupInfo();
// Find threads accupancy per CU => simd_per_cu * GPR usage
constexpr size_t MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD
size_t VgprWaves = wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4);
size_t GprWaves;
if (wrkGrpInfo->usedSGPRs_ > 0) {
const size_t maxSGPRs = (device->info().gfxipVersion_ < 800) ? 512 : 800;
size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16);
GprWaves = std::min(VgprWaves, SgprWaves);
}
else {
GprWaves = VgprWaves;
}
size_t alu_accupancy = device->info().simdPerCU_ * std::min(MaxWavesPerSimd, GprWaves);
alu_accupancy *= wrkGrpInfo->wavefrontSize_;
// Calculate blocks occupancy per CU
*numBlocks = alu_accupancy / amd::alignUp(blockSize, wrkGrpInfo->wavefrontSize_);
size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSize;
if (total_used_lds != 0) {
// Calculate LDS occupancy per CU. lds_per_cu / (static_lsd + dynamic_lds)
int lds_occupancy = static_cast<int>(device->info().localMemSize_ / total_used_lds);
*numBlocks = std::min(*numBlocks, lds_occupancy);
}
HIP_RETURN(hipSuccess);
}
}
extern "C" {
// FIXME: Need to replace `uint32_t` with `int` finally.
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(uint32_t* numBlocks,
hipFunction_t f,
uint32_t blockSize,
size_t dynamicSMemSize)
{
int NB;
hipError_t Ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(&NB, f, blockSize, dynamicSMemSize);
*numBlocks = NB;
HIP_RETURN(Ret);
}
// FIXME: Need to replace `uint32_t` with `int` finally.
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(uint32_t* numBlocks,
hipFunction_t f,
uint32_t blockSize,
size_t dynamicSMemSize,
unsigned int flags)
{
int NB;
hipError_t Ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(&NB, f, blockSize, dynamicSMemSize);
*numBlocks = NB;
HIP_RETURN(Ret);
}
}
#if defined(ATI_OS_LINUX)
namespace hip_impl {
struct dl_phdr_info {
ELFIO::Elf64_Addr dlpi_addr;
const char *dlpi_name;
const ELFIO::Elf64_Phdr *dlpi_phdr;
ELFIO::Elf64_Half dlpi_phnum;
};
extern "C" int dl_iterate_phdr(
int (*callback) (struct dl_phdr_info *info, size_t size, void *data), void *data
);
struct Symbol {
std::string name;
ELFIO::Elf64_Addr value = 0;
ELFIO::Elf_Xword size = 0;
ELFIO::Elf_Half sect_idx = 0;
uint8_t bind = 0;
uint8_t type = 0;
uint8_t other = 0;
};
inline Symbol read_symbol(const ELFIO::symbol_section_accessor& section, unsigned int idx) {
assert(idx < section.get_symbols_num());
Symbol r;
section.get_symbol(idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other);
return r;
}
template <typename P>
inline ELFIO::section* find_section_if(ELFIO::elfio& reader, P p) {
const auto it = find_if(reader.sections.begin(), reader.sections.end(), std::move(p));
return it != reader.sections.end() ? *it : nullptr;
}
std::vector<std::pair<uintptr_t, std::string>> function_names_for(const ELFIO::elfio& reader,
ELFIO::section* symtab) {
std::vector<std::pair<uintptr_t, std::string>> r;
ELFIO::symbol_section_accessor symbols{reader, symtab};
for (auto i = 0u; i != symbols.get_symbols_num(); ++i) {
auto tmp = read_symbol(symbols, i);
if (tmp.type == STT_FUNC && tmp.sect_idx != SHN_UNDEF && !tmp.name.empty()) {
r.emplace_back(tmp.value, tmp.name);
}
}
return r;
}
const std::vector<std::pair<uintptr_t, std::string>>& function_names_for_process() {
static constexpr const char self[] = "/proc/self/exe";
static std::vector<std::pair<uintptr_t, std::string>> r;
static std::once_flag f;
std::call_once(f, []() {
ELFIO::elfio reader;
if (reader.load(self)) {
const auto it = find_section_if(
reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; });
if (it) r = function_names_for(reader, it);
}
});
return r;
}
const std::unordered_map<uintptr_t, std::string>& function_names()
{
static std::unordered_map<uintptr_t, std::string> r{
function_names_for_process().cbegin(),
function_names_for_process().cend()};
static std::once_flag f;
std::call_once(f, []() {
dl_iterate_phdr([](dl_phdr_info* info, size_t, void*) {
ELFIO::elfio reader;
if (reader.load(info->dlpi_name)) {
const auto it = find_section_if(
reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; });
if (it) {
auto n = function_names_for(reader, it);
for (auto&& f : n) f.first += info->dlpi_addr;
r.insert(make_move_iterator(n.begin()), make_move_iterator(n.end()));
}
}
return 0;
},
nullptr);
});
return r;
}
std::vector<char> bundles_for_process() {
static constexpr const char self[] = "/proc/self/exe";
static constexpr const char kernel_section[] = ".kernel";
std::vector<char> r;
ELFIO::elfio reader;
if (reader.load(self)) {
auto it = find_section_if(
reader, [](const ELFIO::section* x) { return x->get_name() == kernel_section; });
if (it) r.insert(r.end(), it->get_data(), it->get_data() + it->get_size());
}
return r;
}
const std::vector<hipModule_t>& modules() {
static std::vector<hipModule_t> r;
static std::once_flag f;
std::call_once(f, []() {
static std::vector<std::vector<char>> bundles{bundles_for_process()};
dl_iterate_phdr(
[](dl_phdr_info* info, std::size_t, void*) {
ELFIO::elfio tmp;
if (tmp.load(info->dlpi_name)) {
const auto it = find_section_if(
tmp, [](const ELFIO::section* x) { return x->get_name() == ".kernel"; });
if (it) bundles.emplace_back(it->get_data(), it->get_data() + it->get_size());
}
return 0;
},
nullptr);
for (auto&& bundle : bundles) {
if (bundle.empty()) {
continue;
}
std::string magic(&bundle[0], sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1);
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR))
continue;
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(&bundle[0]);
const auto* desc = &obheader->desc[0];
for (uint64_t i = 0; i < obheader->numBundles; ++i,
desc = reinterpret_cast<const __ClangOffloadBundleDesc*>(
reinterpret_cast<uintptr_t>(&desc->triple[0]) + desc->tripleSize)) {
std::string triple(desc->triple, sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1);
if (triple.compare(HCC_AMDGCN_AMDHSA_TRIPLE))
continue;
std::string target(desc->triple + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE),
desc->tripleSize - sizeof(HCC_AMDGCN_AMDHSA_TRIPLE));
if (isCompatibleCodeObject(target, hip::getCurrentContext()->devices()[0]->info().name_)) {
hipModule_t module;
if (hipSuccess == hipModuleLoadData(&module, reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(obheader) + desc->offset)))
r.push_back(module);
break;
}
}
}
});
return r;
}
const std::unordered_map<uintptr_t, hipFunction_t>& functions()
{
static std::unordered_map<uintptr_t, hipFunction_t> r;
static std::once_flag f;
std::call_once(f, []() {
for (auto&& function : function_names()) {
for (auto&& module : modules()) {
hipFunction_t f;
if (hipSuccess == hipModuleGetFunction(&f, module, function.second.c_str())) {
r[function.first] = f;
}
}
}
});
return r;
}
void hipLaunchKernelGGLImpl(
uintptr_t function_address,
const dim3& numBlocks,
const dim3& dimBlocks,
uint32_t sharedMemBytes,
hipStream_t stream,
void** kernarg)
{
HIP_INIT();
const auto it = functions().find(function_address);
if (it == functions().cend())
assert(0);
hipModuleLaunchKernel(it->second,
numBlocks.x, numBlocks.y, numBlocks.z,
dimBlocks.x, dimBlocks.y, dimBlocks.z,
sharedMemBytes, stream, nullptr, kernarg);
}
void hipLaunchCooperativeKernelGGLImpl(
uintptr_t function_address,
const dim3& numBlocks,
const dim3& dimBlocks,
uint32_t sharedMemBytes,
hipStream_t stream,
void** kernarg)
{
HIP_INIT();
hipLaunchCooperativeKernel(reinterpret_cast<void*>(function_address),
numBlocks, dimBlocks, kernarg, sharedMemBytes, stream);
}
}
#endif // defined(ATI_OS_LINUX)
// conversion routines between float and half precision
static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; }
static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; }
static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); }
// half float, the f16 is in the low 16 bits of the input argument
static inline float __convert_half_to_float(std::uint32_t a) noexcept {
std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U;
u = (a & 0x7fff) != 0 ? v : u;
return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/;
}
// float half with nearest even rounding
// The lower 16 bits of the result is the bit pattern for the f16
static inline std::uint32_t __convert_float_to_half(float a) noexcept {
std::uint32_t u = f32_as_u32(a);
int e = static_cast<int>((u >> 23) & 0xff) - 127 + 15;
std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0);
std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0);
std::uint32_t n = ((std::uint32_t)e << 12) | m;
std::uint32_t s = (u >> 16) & 0x8000;
int b = clamp_int(1-e, 0, 13);
std::uint32_t d = (0x1000 | m) >> b;
d |= (d << b) != (0x1000 | m);
std::uint32_t v = e < 1 ? d : n;
v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5));
v = e > 30 ? 0x7c00 : v;
v = e == 143 ? i : v;
return s | v;
}
extern "C" float __gnu_h2f_ieee(unsigned short h){
return __convert_half_to_float((std::uint32_t) h);
}
extern "C" unsigned short __gnu_f2h_ieee(float f){
return (unsigned short)__convert_float_to_half(f);
}
+252
View File
@@ -0,0 +1,252 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_SRC_HIP_PROF_API_H
#define HIP_SRC_HIP_PROF_API_H
#include <atomic>
#include <iostream>
#include <mutex>
#if USE_PROF_API
#include "hip/hcc_detail/hip_prof_str.h"
#include "platform/prof_protocol.h"
// HIP API callbacks spawner object macro
#define HIP_CB_SPAWNER_OBJECT(CB_ID) \
api_callbacks_spawner_t<HIP_API_ID_##CB_ID> __api_tracer; \
{ \
hip_api_data_t* api_data = __api_tracer.get_api_data_ptr(); \
if (api_data != NULL) { \
hip_api_data_t& api_data_ref = *api_data; \
INIT_CB_ARGS_DATA(CB_ID, api_data_ref); \
__api_tracer.call(); \
} \
}
static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API;
typedef activity_record_t hip_api_record_t;
typedef activity_rtapi_callback_t hip_api_callback_t;
typedef activity_sync_callback_t hip_act_callback_t;
class api_callbacks_table_t {
public:
typedef std::mutex mutex_t;
typedef hip_api_record_t record_t;
typedef hip_api_callback_t fun_t;
typedef hip_act_callback_t act_t;
// HIP API callbacks table
struct hip_cb_table_entry_t {
volatile std::atomic<bool> sync;
volatile std::atomic<uint32_t> sem;
act_t act;
void* a_arg;
fun_t fun;
void* arg;
};
struct hip_cb_table_t {
hip_cb_table_entry_t arr[HIP_API_ID_NUMBER];
};
api_callbacks_table_t() {
memset(&callbacks_table_, 0, sizeof(callbacks_table_));
}
bool set_activity(uint32_t id, act_t fun, void* arg) {
std::lock_guard<mutex_t> lock(mutex_);
bool ret = true;
if (id < HIP_API_ID_NUMBER) {
cb_sync(id);
callbacks_table_.arr[id].act = fun;
callbacks_table_.arr[id].a_arg = arg;
enabled_ = true;
cb_release(id);
} else {
ret = false;
}
return ret;
}
bool set_callback(uint32_t id, fun_t fun, void* arg) {
std::lock_guard<mutex_t> lock(mutex_);
bool ret = true;
if (id < HIP_API_ID_NUMBER) {
cb_sync(id);
callbacks_table_.arr[id].fun = fun;
callbacks_table_.arr[id].arg = arg;
enabled_ = true;
cb_release(id);
} else {
ret = false;
}
return ret;
}
void set_enabled(const bool& enabled) {
enabled_ = enabled;
}
inline hip_cb_table_entry_t& entry(const uint32_t& id) {
return callbacks_table_.arr[id];
}
inline void sem_sync(const uint32_t& id) {
sem_increment(id);
if (entry(id).sync.load() == true) sync_wait(id);
}
inline void sem_release(const uint32_t& id) {
sem_decrement(id);
}
inline bool is_enabled() const {
return enabled_;
}
private:
inline void cb_sync(const uint32_t& id) {
entry(id).sync.store(true);
while (entry(id).sem.load() != 0) {}
}
inline void cb_release(const uint32_t& id) {
entry(id).sync.store(false);
}
inline void sem_increment(const uint32_t& id) {
const uint32_t prev = entry(id).sem.fetch_add(1);
if (prev == UINT32_MAX) {
std::cerr << "sem overflow id = " << id << std::endl << std::flush;
abort();
}
}
inline void sem_decrement(const uint32_t& id) {
const uint32_t prev = entry(id).sem.fetch_sub(1);
if (prev == 0) {
std::cerr << "sem corrupted id = " << id << std::endl << std::flush;
abort();
}
}
void sync_wait(const uint32_t& id) {
sem_decrement(id);
while (entry(id).sync.load() == true) {}
sem_increment(id);
}
mutex_t mutex_;
hip_cb_table_t callbacks_table_;
bool enabled_;
};
extern api_callbacks_table_t callbacks_table;
template <int cid_>
class api_callbacks_spawner_t {
public:
api_callbacks_spawner_t() :
api_data_(NULL)
{
if (!is_enabled()) return;
if (cid_ >= HIP_API_ID_NUMBER) {
fprintf(stderr, "HIP %s bad id %d\n", __FUNCTION__, cid_);
abort();
}
callbacks_table.sem_sync(cid_);
hip_act_callback_t act = entry(cid_).act;
if (act != NULL) api_data_ = (hip_api_data_t*) act(cid_, NULL, NULL, NULL);
}
void call() {
hip_api_callback_t fun = entry(cid_).fun;
void* arg = entry(cid_).arg;
if (fun != NULL) {
fun(HIP_DOMAIN_ID, cid_, api_data_, arg);
api_data_->phase = ACTIVITY_API_PHASE_EXIT;
}
}
~api_callbacks_spawner_t() {
if (!is_enabled()) return;
if (api_data_ != NULL) {
hip_api_callback_t fun = entry(cid_).fun;
void* arg = entry(cid_).arg;
hip_act_callback_t act = entry(cid_).act;
void* a_arg = entry(cid_).a_arg;
if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, api_data_, arg);
if (act != NULL) act(cid_, NULL, NULL, a_arg);
}
callbacks_table.sem_release(cid_);
}
hip_api_data_t* get_api_data_ptr() {
return api_data_;
}
bool is_enabled() const {
return callbacks_table.is_enabled();
}
private:
inline api_callbacks_table_t::hip_cb_table_entry_t& entry(const uint32_t& id) {
return callbacks_table.entry(id);
}
hip_api_data_t* api_data_;
};
template <>
class api_callbacks_spawner_t<HIP_API_ID_NUMBER> {
public:
api_callbacks_spawner_t() {}
void call() {}
hip_api_data_t* get_api_data_ptr() { return NULL; }
bool is_enabled() const { return false; }
};
#else
#define HIP_CB_SPAWNER_OBJECT(x) do {} while(0)
class api_callbacks_table_t {
public:
typedef void* act_t;
typedef void* fun_t;
bool set_activity(uint32_t id, act_t fun, void* arg) { return false; }
bool set_callback(uint32_t id, fun_t fun, void* arg) { return false; }
};
#endif
#endif // HIP_SRC_HIP_PROF_API_H
+42
View File
@@ -0,0 +1,42 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include "hip_internal.hpp"
hipError_t hipProfilerStart() {
HIP_INIT_API(hipProfilerStart);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipProfilerStop() {
HIP_INIT_API(hipProfilerStop);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
+377
View File
@@ -0,0 +1,377 @@
/*
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include "hiprtc_internal.hpp"
#include <hip/hiprtc.h>
#include "platform/program.hpp"
extern "C" char * __cxa_demangle(const char *mangled_name, char *output_buffer,
size_t *length, int *status);
namespace hiprtc {
thread_local hiprtcResult g_lastRtcError = HIPRTC_SUCCESS;
}
class ProgramState {
amd::Monitor lock_;
private:
static ProgramState* programState_;
ProgramState() : lock_("Guards program state") {}
~ProgramState() {}
public:
std::unordered_map<amd::Program*,
std::pair<std::vector<std::string>, std::vector<std::string>>> progHeaders_;
std::map<std::string, std::pair<std::string, std::string>> nameExpresssion_;
static ProgramState& instance();
void createProgramHeaders(amd::Program* program, int numHeaders,
const char** headers, const char** headerNames);
void getProgramHeaders(amd::Program* program, int* numHeaders, char** headers, char ** headerNames);
uint32_t addNameExpression(const char* name_expression);
char* getLoweredName(const char* name_expression);
};
ProgramState* ProgramState::programState_ = nullptr;
ProgramState& ProgramState::instance() {
if (programState_ == nullptr) {
programState_ = new ProgramState;
}
return *programState_;
}
void ProgramState::createProgramHeaders(amd::Program* program, int numHeaders,
const char** headers, const char** headerNames) {
amd::ScopedLock lock(lock_);
std::vector<std::string> vHeaderNames;
std::vector<std::string> vHeaders;
for (auto i = 0; i != numHeaders; ++i) {
vHeaders.emplace_back(headers[i]);
vHeaderNames.emplace_back(headerNames[i]);
progHeaders_[program] = std::make_pair(std::move(vHeaders), std::move(vHeaderNames));
}
}
void ProgramState::getProgramHeaders(amd::Program* program, int* numHeaders,
char** headers, char ** headerNames) {
amd::ScopedLock lock(lock_);
const auto it = progHeaders_.find(program);
if (it != progHeaders_.cend()) {
*numHeaders = it->second.first.size();
*headers = reinterpret_cast<char*>(it->second.first.data());
*headerNames = reinterpret_cast<char*>(it->second.second.data());
}
}
uint32_t ProgramState::addNameExpression(const char* name_expression) {
amd::ScopedLock lock(lock_);
// Strip clean of any '(' or ')' or '&'
std::string strippedName(name_expression);
if (strippedName.back() == ')') {
strippedName.pop_back();
strippedName.erase(0, strippedName.find('('));
}
if (strippedName.front() == '&') {
strippedName.erase(0, 1);
}
auto it = nameExpresssion_.find(name_expression);
if (it == nameExpresssion_.end()) {
nameExpresssion_.insert(std::pair<std::string, std::pair<std::string, std::string>>
(name_expression, std::make_pair(strippedName,"")));
}
return nameExpresssion_.size();
}
namespace hip_impl {
char* demangle(const char* loweredName) {
#ifdef ATI_OS_LINUX
if (!loweredName) {
return nullptr;
}
int status = 0;
char* demangledName = __cxa_demangle(loweredName, nullptr, nullptr, &status);
if (status != 0) {
return nullptr;
}
return demangledName;
#else
return nullptr;
#endif
}
} // hip_impl
static std::string handleMangledName(std::string name) {
std::string loweredName;
char* demangled = hip_impl::demangle(name.c_str());
loweredName.assign(demangled == nullptr ? std::string() : demangled);
free(demangled);
if (loweredName.empty()) {
return name;
}
if (loweredName.find(".kd") != std::string::npos) {
return {};
}
if (loweredName.find("void ") == 0) {
loweredName.erase(0, strlen("void "));
}
auto dx{loweredName.find_first_of("(<")};
if (dx == std::string::npos) {
return loweredName;
}
if (loweredName[dx] == '<') {
uint32_t count = 1;
do {
++dx;
count += (loweredName[dx] == '<') ? 1 : ((loweredName[dx] == '>') ? -1 : 0);
} while (count);
loweredName.erase(++dx);
} else {
loweredName.erase(dx);
}
return loweredName;
}
const char* hiprtcGetErrorString(hiprtcResult x) {
switch (x) {
case HIPRTC_SUCCESS:
return "HIPRTC_SUCCESS";
case HIPRTC_ERROR_OUT_OF_MEMORY:
return "HIPRTC_ERROR_OUT_OF_MEMORY";
case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE:
return "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE";
case HIPRTC_ERROR_INVALID_INPUT:
return "HIPRTC_ERROR_INVALID_INPUT";
case HIPRTC_ERROR_INVALID_PROGRAM:
return "HIPRTC_ERROR_INVALID_PROGRAM";
case HIPRTC_ERROR_INVALID_OPTION:
return "HIPRTC_ERROR_INVALID_OPTION";
case HIPRTC_ERROR_COMPILATION:
return "HIPRTC_ERROR_COMPILATION";
case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE:
return "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE";
case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION:
return "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION";
case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION:
return "HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION";
case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID:
return "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID";
case HIPRTC_ERROR_INTERNAL_ERROR:
return "HIPRTC_ERROR_INTERNAL_ERROR";
default:
throw std::logic_error{"Invalid HIPRTC result."};
};
}
hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name,
int numHeaders, const char** headers, const char** headerNames) {
HIPRTC_INIT_API(prog, src, name, numHeaders, headers, headerNames);
if (prog == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_PROGRAM);
}
if (numHeaders < 0) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
if (numHeaders && (headers == nullptr || headerNames == nullptr)) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
amd::Program* program = new amd::Program(*hip::getCurrentContext(), src, amd::Program::HIP);
if (program == NULL) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0])) {
program->release();
HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE);
}
ProgramState::instance().createProgramHeaders(program, numHeaders, headers, headerNames);
*prog = reinterpret_cast<hiprtcProgram>(as_cl(program));
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) {
// FIXME[skudchad] Add headers to amd::Program::build and device::Program::build,
// pass the saved from ProgramState to amd::Program::build
HIPRTC_INIT_API(prog, numOptions, options);
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
std::ostringstream ostrstr;
std::vector<const char*> oarr(&options[0], &options[numOptions]);
std::copy(oarr.begin(), oarr.end(), std::ostream_iterator<std::string>(ostrstr, " "));
std::vector<amd::Device*> devices{hip::getCurrentContext()->devices()[0]};
if (CL_SUCCESS != program->build(devices, ostrstr.str().c_str(), nullptr, nullptr)) {
HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION);
}
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression) {
HIPRTC_INIT_API(prog, name_expression);
if (name_expression == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
uint32_t id = ProgramState::instance().addNameExpression(name_expression);
const auto var{"__hiprtc_" + std::to_string(id)};
const auto code{"\nextern \"C\" constexpr auto " + var + " = " + name_expression + ';'};
program->appendToSource(code.c_str());
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression,
const char** loweredName) {
HIPRTC_INIT_API(prog, name_expression, loweredName);
if (name_expression == nullptr || loweredName == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
device::Program* dev_program
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
auto it = ProgramState::instance().nameExpresssion_.find(name_expression);
if (it == ProgramState::instance().nameExpresssion_.end()) {
return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID;
}
std::string strippedName = it->second.first;
std::vector<std::string> mangledNames;
if (!dev_program->getLoweredNames(&mangledNames)) {
HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION);
}
for (auto &name : mangledNames) {
std::string demangledName = handleMangledName(name);
if (demangledName == strippedName) {
it->second.second.assign(name);
}
}
*loweredName = it->second.second.c_str();
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) {
HIPRTC_INIT_API(prog);
if (prog == NULL) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
// Release program. hiprtcProgram is a double pointer so free *prog
amd::Program* program = as_amd(reinterpret_cast<cl_program>(*prog));
program->release();
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* binaryMem) {
HIPRTC_INIT_API(prog, binaryMem);
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
const device::Program::binary_t& binary =
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0])->binary();
::memcpy(binaryMem, binary.first, binary.second);
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* binarySizeRet) {
HIPRTC_INIT_API(prog, binarySizeRet);
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
*binarySizeRet =
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0])->binary().second;
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* dst) {
HIPRTC_INIT_API(prog, dst);
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
const device::Program* devProgram =
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
auto log = program->programLog() + devProgram->buildLog().c_str();
log.copy(dst, log.size());
dst[log.size()] = '\0';
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) {
HIPRTC_INIT_API(prog, logSizeRet);
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
const device::Program* devProgram =
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
auto log = program->programLog() + devProgram->buildLog().c_str();
*logSizeRet = log.size() + 1;
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
+263
View File
@@ -0,0 +1,263 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include "hip_internal.hpp"
#include "hip_event.hpp"
#include "thread/monitor.hpp"
static amd::Monitor streamSetLock("Guards global stream set");
static std::unordered_set<hip::Stream*> streamSet;
// Internal structure for stream callback handler
class StreamCallback {
public:
StreamCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData,
amd::Command* command)
: stream_(stream), callBack_(callback),
userData_(userData), command_(command) {
};
hipStream_t stream_;
hipStreamCallback_t callBack_;
void* userData_;
amd::Command* command_;
};
namespace hip {
void syncStreams() {
amd::ScopedLock lock(streamSetLock);
for (const auto& it : streamSet) {
it->finish();
}
}
Stream::Stream(amd::Device* dev, amd::Context* ctx, amd::CommandQueue::Priority p, unsigned int f) :
queue(nullptr), device(dev), context(ctx), priority(p), flags(f) {}
void Stream::create() {
cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE;
queue = new amd::HostQueue(*context, *device, properties,
amd::CommandQueue::RealTimeDisabled, priority);
assert(queue != nullptr);
queue->create();
}
amd::HostQueue* Stream::asHostQueue() {
if (queue == nullptr) {
create();
}
return queue;
}
void Stream::destroy() {
if (queue != nullptr) {
queue->release();
queue = nullptr;
}
}
void Stream::finish() {
if (queue != nullptr) {
queue->finish();
}
}
};
void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) {
hipError_t status = hipSuccess;
StreamCallback* cbo = reinterpret_cast<StreamCallback*>(user_data);
cbo->callBack_(cbo->stream_, status, cbo->userData_);
cbo->command_->release();
delete cbo;
}
static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd::CommandQueue::Priority priority) {
amd::Device* device = hip::getCurrentContext()->devices()[0];
hip::Stream* hStream = new hip::Stream(device, hip::getCurrentContext(), priority, flags);
if (hStream == nullptr) {
return hipErrorOutOfMemory;
}
if (!(flags & hipStreamNonBlocking)) {
hip::syncStreams();
{
amd::ScopedLock lock(streamSetLock);
streamSet.insert(hStream);
}
}
*stream = reinterpret_cast<hipStream_t>(hStream);
ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipStreamCreate: %zx", hStream);
return hipSuccess;
}
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) {
HIP_INIT_API(hipStreamCreateWithFlags, stream, flags);
HIP_RETURN(ihipStreamCreate(stream, flags, amd::CommandQueue::Priority::Normal));
}
hipError_t hipStreamCreate(hipStream_t *stream) {
HIP_INIT_API(hipStreamCreate, stream);
HIP_RETURN(ihipStreamCreate(stream, hipStreamDefault, amd::CommandQueue::Priority::Normal));
}
hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority);
if (priority > static_cast<int>(amd::CommandQueue::Priority::High)) {
priority = static_cast<int>(amd::CommandQueue::Priority::High);
} else if (priority < static_cast<int>(amd::CommandQueue::Priority::Normal)) {
priority = static_cast<int>(amd::CommandQueue::Priority::Normal);
}
return HIP_RETURN(ihipStreamCreate(stream, flags, static_cast<amd::CommandQueue::Priority>(priority)));
}
hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority);
if (leastPriority != nullptr) {
*leastPriority = static_cast<int>(amd::CommandQueue::Priority::Normal);
}
if (greatestPriority != nullptr) {
// Only report one kind of priority for now.
*greatestPriority = static_cast<int>(amd::CommandQueue::Priority::Normal);
}
return HIP_RETURN(hipSuccess);
}
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) {
HIP_INIT_API(hipStreamGetFlags, stream, flags);
hip::Stream* hStream = reinterpret_cast<hip::Stream*>(stream);
if(flags != nullptr && hStream != nullptr) {
*flags = hStream->flags;
} else {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipStreamSynchronize(hipStream_t stream) {
HIP_INIT_API(hipStreamSynchronize, stream);
amd::HostQueue* hostQueue = hip::getQueue(stream);
hostQueue->finish();
HIP_RETURN(hipSuccess);
}
hipError_t hipStreamDestroy(hipStream_t stream) {
HIP_INIT_API(hipStreamDestroy, stream);
if (stream == nullptr) {
HIP_RETURN(hipErrorInvalidHandle);
}
amd::ScopedLock lock(streamSetLock);
hip::Stream* hStream = reinterpret_cast<hip::Stream*>(stream);
hStream->destroy();
streamSet.erase(hStream);
delete hStream;
HIP_RETURN(hipSuccess);
}
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) {
HIP_INIT_API(hipStreamWaitEvent, stream, event, flags);
amd::HostQueue* queue;
if (stream == nullptr) {
queue = hip::getNullStream();
} else {
queue = reinterpret_cast<hip::Stream*>(stream)->asHostQueue();
}
if (event == nullptr) {
HIP_RETURN(hipErrorInvalidHandle);
}
hip::Event* e = reinterpret_cast<hip::Event*>(event);
return HIP_RETURN(e->streamWait(queue, flags));
}
hipError_t hipStreamQuery(hipStream_t stream) {
HIP_INIT_API(hipStreamQuery, stream);
amd::HostQueue* hostQueue;
if (stream == nullptr) {
hostQueue = hip::getNullStream();
} else {
hostQueue = reinterpret_cast<hip::Stream*>(stream)->asHostQueue();
}
amd::Command* command = hostQueue->getLastQueuedCommand(false);
if (command == nullptr) {
HIP_RETURN(hipSuccess);
}
amd::Event& event = command->event();
if (command->type() != 0) {
event.notifyCmdQueue();
}
HIP_RETURN((command->status() == CL_COMPLETE) ? hipSuccess : hipErrorNotReady);
}
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData,
unsigned int flags) {
HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags);
amd::HostQueue* hostQueue = reinterpret_cast<hip::Stream*>
(stream)->asHostQueue();
amd::Command* command = hostQueue->getLastQueuedCommand(true);
amd::Event& event = command->event();
StreamCallback* cbo = new StreamCallback(stream, callback, userData, command);
if(!event.setCallback(CL_COMPLETE, ihipStreamCallback, reinterpret_cast<void*>(cbo))) {
command->release();
return hipErrorInvalidHandle;
}
event.notifyCmdQueue();
HIP_RETURN(hipSuccess);
}
+96
View File
@@ -0,0 +1,96 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include "hip_internal.hpp"
#include <hip/hcc_detail/hip_surface_types.h>
namespace hip {
static amd::Monitor surfaceLock("Guards surface objects");
struct hipSurface {
hipSurface(const hipResourceDesc* pResDesc): array(nullptr)
{
memcpy(&resDesc, pResDesc, sizeof(hipResourceDesc));
}
hipArray* array;
hipResourceDesc resDesc;
};
static std::unordered_map<hipSurfaceObject_t, hipSurface*> surfaceHash;
};
using namespace hip;
hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
const hipResourceDesc* pResDesc) {
HIP_INIT_API(NONE, pSurfObject, pResDesc);
hipSurface* pSurface = new hipSurface(pResDesc);
assert(pSurface != nullptr);
switch (pResDesc->resType) {
case hipResourceTypeArray:
pSurface->array = pResDesc->res.array.array;
break;
default:
break;
}
hipSurfaceObject_t surfObj;
hipError_t err = hipMalloc(reinterpret_cast<void**>(&surfObj), sizeof(hipArray));
if (err != hipSuccess) {
delete pSurface;
HIP_RETURN(hipErrorOutOfMemory);
}
err = hipMemcpy(reinterpret_cast<void*>(surfObj), reinterpret_cast<void*>(pResDesc->res.array.array), sizeof(hipArray),
hipMemcpyHostToDevice);
if (err != hipSuccess) {
delete pSurface;
hipFree(reinterpret_cast<void*>(surfObj));
HIP_RETURN(err);
}
*pSurfObject = surfObj;
amd::ScopedLock lock(surfaceLock);
surfaceHash[*pSurfObject] = pSurface;
HIP_RETURN(hipSuccess);
}
hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
HIP_INIT_API(NONE, surfaceObject);
amd::ScopedLock lock(surfaceLock);
hipSurface* pSurface = surfaceHash[surfaceObject];
if (pSurface != nullptr) {
delete pSurface;
surfaceHash.erase(surfaceObject);
HIP_RETURN(hipFree(reinterpret_cast<void*>(surfaceObject)));
}
HIP_RETURN(hipErrorInvalidValue);
}
+808
View File
@@ -0,0 +1,808 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime.h>
#include <hip/hcc_detail/texture_types.h>
#include "hip_internal.hpp"
#include "platform/sampler.hpp"
namespace hip {
struct TextureObject {
uint32_t imageSRD[HIP_IMAGE_OBJECT_SIZE_DWORD];
uint32_t samplerSRD[HIP_SAMPLER_OBJECT_SIZE_DWORD];
amd::Image* image;
amd::Sampler* sampler;
hipResourceDesc resDesc;
};
};
void getDrvChannelOrderAndType(const enum hipArray_Format Format, unsigned int NumChannels,
cl_channel_order* channelOrder,
cl_channel_type* channelType) {
switch (Format) {
case HIP_AD_FORMAT_UNSIGNED_INT8:
*channelType = CL_UNSIGNED_INT8;
break;
case HIP_AD_FORMAT_UNSIGNED_INT16:
*channelType = CL_UNSIGNED_INT16;
break;
case HIP_AD_FORMAT_UNSIGNED_INT32:
*channelType = CL_UNSIGNED_INT32;
break;
case HIP_AD_FORMAT_SIGNED_INT8:
*channelType = CL_SIGNED_INT8;
break;
case HIP_AD_FORMAT_SIGNED_INT16:
*channelType = CL_SIGNED_INT16;
break;
case HIP_AD_FORMAT_SIGNED_INT32:
*channelType = CL_SIGNED_INT32;
break;
case HIP_AD_FORMAT_HALF:
*channelType = CL_HALF_FLOAT;
break;
case HIP_AD_FORMAT_FLOAT:
*channelType = CL_FLOAT;
break;
default:
break;
}
if (NumChannels == 4) {
*channelOrder = CL_RGBA;
} else if (NumChannels == 2) {
*channelOrder = CL_RG;
} else if (NumChannels == 1) {
*channelOrder = CL_R;
}
}
void setDescFromChannelType(cl_channel_type channelType, hipChannelFormatDesc* desc) {
memset(desc, 0x00, sizeof(hipChannelFormatDesc));
switch (channelType) {
case CL_SIGNED_INT8:
case CL_SIGNED_INT16:
case CL_SIGNED_INT32:
desc->f = hipChannelFormatKindSigned;
break;
case CL_UNSIGNED_INT8:
case CL_UNSIGNED_INT16:
case CL_UNSIGNED_INT32:
desc->f = hipChannelFormatKindUnsigned;
break;
case CL_HALF_FLOAT:
case CL_FLOAT:
desc->f = hipChannelFormatKindFloat;
break;
default:
desc->f = hipChannelFormatKindNone;
break;
}
switch (channelType) {
case CL_SIGNED_INT8:
case CL_UNSIGNED_INT8:
desc->x = 8;
break;
case CL_SIGNED_INT16:
case CL_UNSIGNED_INT16:
case CL_HALF_FLOAT:
desc->x = 16;
break;
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT32:
case CL_FLOAT:
desc->x = 32;
break;
default:
desc->x = 0;
break;
}
}
void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureReadMode readMode,
cl_channel_order* channelOrder, cl_channel_type* channelType) {
if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w != 0) {
*channelOrder = CL_RGBA;
} else if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w == 0) {
*channelOrder = CL_RGB;
} else if (desc.x != 0 && desc.y != 0 && desc.z == 0 && desc.w == 0) {
*channelOrder = CL_RG;
} else if (desc.x != 0 && desc.y == 0 && desc.z == 0 && desc.w == 0) {
*channelOrder = CL_R;
} else {
}
switch (desc.f) {
case hipChannelFormatKindUnsigned:
switch (desc.x) {
case 32:
*channelType = CL_UNSIGNED_INT32;
break;
case 16:
*channelType = readMode == hipReadModeNormalizedFloat
? CL_UNORM_INT16
: CL_UNSIGNED_INT16;
break;
case 8:
*channelType = readMode == hipReadModeNormalizedFloat
? CL_UNORM_INT8
: CL_UNSIGNED_INT8;
break;
default:
*channelType = CL_UNSIGNED_INT32;
}
break;
case hipChannelFormatKindSigned:
switch (desc.x) {
case 32:
*channelType = CL_SIGNED_INT32;
break;
case 16:
*channelType = readMode == hipReadModeNormalizedFloat
? CL_SNORM_INT16
: CL_SIGNED_INT16;
break;
case 8:
*channelType = readMode == hipReadModeNormalizedFloat
? CL_SNORM_INT8
: CL_SIGNED_INT8;
break;
default:
*channelType = CL_SIGNED_INT32;
}
break;
case hipChannelFormatKindFloat:
switch (desc.x) {
case 32:
*channelType = CL_FLOAT;
break;
case 16:
*channelType = CL_HALF_FLOAT;
break;
case 8:
break;
default:
*channelType = CL_FLOAT;
}
break;
case hipChannelFormatKindNone:
default:
break;
}
}
void getByteSizeFromChannelFormatKind(enum hipChannelFormatKind channelFormatKind, size_t* byteSize) {
switch (channelFormatKind)
{
case hipChannelFormatKindSigned:
*byteSize = sizeof(int);
break;
case hipChannelFormatKindUnsigned:
*byteSize = sizeof(unsigned int);
break;
case hipChannelFormatKindFloat:
*byteSize = sizeof(float);
break;
case hipChannelFormatKindNone:
*byteSize = sizeof(size_t);
break;
default:
*byteSize = 1;
break;
}
}
amd::Sampler* fillSamplerDescriptor(enum hipTextureAddressMode addressMode,
enum hipTextureFilterMode filterMode, int normalizedCoords) {
#ifndef CL_FILTER_NONE
#define CL_FILTER_NONE 0x1142
#endif
uint32_t filter_mode = CL_FILTER_NONE;
switch (filterMode) {
case hipFilterModePoint:
filter_mode = CL_FILTER_NEAREST;
break;
case hipFilterModeLinear:
filter_mode = CL_FILTER_LINEAR;
break;
}
uint32_t address_mode = CL_ADDRESS_NONE;
switch (addressMode) {
case hipAddressModeWrap:
address_mode = CL_ADDRESS_REPEAT;
break;
case hipAddressModeClamp:
address_mode = CL_ADDRESS_CLAMP;
break;
case hipAddressModeMirror:
address_mode = CL_ADDRESS_MIRRORED_REPEAT;
break;
case hipAddressModeBorder:
address_mode = CL_ADDRESS_CLAMP_TO_EDGE;
break;
}
amd::Sampler* sampler = new amd::Sampler(*hip::getCurrentContext(),
normalizedCoords == CL_TRUE,
address_mode, filter_mode, CL_FILTER_NONE, 0.f, CL_MAXFLOAT);
if (sampler == nullptr) {
return nullptr;
}
if (!sampler->create()) {
delete sampler;
return nullptr;
}
return sampler;
}
hip::TextureObject* ihipCreateTextureObject(const hipResourceDesc& resDesc, amd::Image& image, amd::Sampler& sampler) {
hip::TextureObject* texture;
ihipMalloc(reinterpret_cast<void**>(&texture), sizeof(hip::TextureObject), CL_MEM_SVM_FINE_GRAIN_BUFFER);
if (texture == nullptr) {
return nullptr;
}
device::Memory* imageMem = image.getDeviceMemory(*hip::getCurrentContext()->devices()[0]);
memcpy(texture->imageSRD, imageMem->cpuSrd(), sizeof(uint32_t)*HIP_IMAGE_OBJECT_SIZE_DWORD);
texture->image = &image;
device::Sampler* devSampler = sampler.getDeviceSampler(*hip::getCurrentContext()->devices()[0]);
memcpy(texture->samplerSRD, devSampler->hwState(), sizeof(uint32_t)*HIP_SAMPLER_OBJECT_SIZE_DWORD);
texture->sampler = &sampler;
memcpy(&texture->resDesc, &resDesc, sizeof(hipResourceDesc));
return texture;
}
hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc,
const hipTextureDesc* pTexDesc,
const hipResourceViewDesc* pResViewDesc) {
HIP_INIT_API(NONE, pTexObject, pResDesc, pTexDesc, pResViewDesc);
amd::Device* device = hip::getCurrentContext()->devices()[0];
if (!device->info().imageSupport_) {
HIP_RETURN(hipErrorInvalidValue);
}
amd::Image* image = nullptr;
cl_image_format image_format;
getChannelOrderAndType(pResDesc->res.pitch2D.desc, pTexDesc->readMode,
&image_format.image_channel_order, &image_format.image_channel_data_type);
const amd::Image::Format imageFormat(image_format);
amd::Memory* memory = nullptr;
size_t offset = 0;
cl_mem_object_type clType;
switch (pResDesc->resType) {
case hipResourceTypeArray:
{
memory = getMemoryObject(pResDesc->res.array.array->data, offset);
getChannelOrderAndType(pResDesc->res.array.array->desc, pTexDesc->readMode,
&image_format.image_channel_order, &image_format.image_channel_data_type);
const amd::Image::Format imageFormat(image_format);
switch (pResDesc->res.array.array->type) {
case hipArrayLayered:
case hipArrayCubemap:
assert(0);
break;
case hipArraySurfaceLoadStore:
case hipArrayTextureGather:
case hipArrayDefault:
default:
switch(pResDesc->res.array.array->textureType) {
case hipTextureType3D:
clType = CL_MEM_OBJECT_IMAGE3D;
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
clType, memory->getMemFlags(), imageFormat,
pResDesc->res.array.array->width, pResDesc->res.array.array->height,
pResDesc->res.array.array->depth, 0, 0);
break;
case hipTextureType2D:
clType = CL_MEM_OBJECT_IMAGE2D;
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
clType, memory->getMemFlags(), imageFormat,
pResDesc->res.array.array->width, pResDesc->res.array.array->height, 1, 0, 0);
break;
default:
break;
}
break;
}
}
break;
case hipResourceTypeMipmappedArray:
assert(0);
break;
case hipResourceTypeLinear:
{
assert(pResViewDesc == nullptr);
memory = getMemoryObject(pResDesc->res.linear.devPtr, offset);
getChannelOrderAndType(pResDesc->res.linear.desc, pTexDesc->readMode,
&image_format.image_channel_order, &image_format.image_channel_data_type);
const amd::Image::Format imageFormat(image_format);
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat,
pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize(), 1, 1,
pResDesc->res.linear.sizeInBytes, 0);
}
break;
case hipResourceTypePitch2D:
assert(pResViewDesc == nullptr);
memory = getMemoryObject(pResDesc->res.pitch2D.devPtr, offset);
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat,
pResDesc->res.pitch2D.width, pResDesc->res.pitch2D.height, 1,
pResDesc->res.pitch2D.pitchInBytes, 0);
break;
default: HIP_RETURN(hipErrorInvalidValue);
}
if (!image->create()) {
delete image;
HIP_RETURN(hipErrorOutOfMemory);
}
amd::Sampler* sampler = fillSamplerDescriptor(pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords);
*pTexObject = reinterpret_cast<hipTextureObject_t>(ihipCreateTextureObject(*pResDesc, *image, *sampler));
HIP_RETURN(hipSuccess);
}
void ihipDestroyTextureObject(hip::TextureObject* texture) {
texture->image->release();
texture->sampler->release();
hipFree(texture);
}
hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
HIP_INIT_API(NONE, textureObject);
hip::TextureObject* texture = reinterpret_cast<hip::TextureObject*>(textureObject);
ihipDestroyTextureObject(texture);
HIP_RETURN(hipSuccess);
}
hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
hipTextureObject_t textureObject) {
HIP_INIT_API(NONE, pResDesc, textureObject);
hip::TextureObject* texture = reinterpret_cast<hip::TextureObject*>(textureObject);
if (pResDesc != nullptr && texture != nullptr) {
memcpy(pResDesc, &(texture->resDesc), sizeof(hipResourceDesc));
}
HIP_RETURN(hipErrorInvalidValue);
}
hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc,
hipTextureObject_t textureObject) {
HIP_INIT_API(NONE, pResViewDesc, textureObject);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc,
hipTextureObject_t textureObject) {
HIP_INIT_API(NONE, pTexDesc, textureObject);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t ihipBindTexture(cl_mem_object_type type,
size_t* offset, textureReference* tex, const void* devPtr,
const hipChannelFormatDesc& desc, size_t width, size_t height,
size_t pitch) {
if (tex == nullptr) {
return hipErrorInvalidImage;
}
if (hip::getCurrentContext()) {
cl_image_format image_format;
size_t byteSize;
size_t rowPitch = 0;
size_t depth = 0;
size_t slicePitch = 0;
getChannelOrderAndType(desc, hipReadModeElementType,
&image_format.image_channel_order, &image_format.image_channel_data_type);
getByteSizeFromChannelFormatKind(desc.f, &byteSize);
const amd::Image::Format imageFormat(image_format);
amd::Memory* memory = getMemoryObject(devPtr, *offset);
switch (type) {
case CL_MEM_OBJECT_IMAGE3D:
rowPitch = width * byteSize;
depth = pitch;
slicePitch = rowPitch * height;
break;
case CL_MEM_OBJECT_IMAGE2D:
default:
rowPitch = pitch;
depth = 1;
slicePitch = 0;
break;
}
amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
type, memory->getMemFlags(), imageFormat, width, height, depth, rowPitch, slicePitch);
if (!image->create()) {
delete image;
return hipErrorOutOfMemory;
}
*offset = 0;
if (tex->textureObject) {
ihipDestroyTextureObject(reinterpret_cast<hip::TextureObject*>(tex->textureObject));
}
amd::Sampler* sampler = fillSamplerDescriptor(tex->addressMode[0], tex->filterMode, tex->normalized);
hipResourceDesc resDesc;
memset(&resDesc, 0, sizeof(hipResourceDesc));
switch (type) {
case CL_MEM_OBJECT_IMAGE1D:
resDesc.resType = hipResourceTypeLinear;
resDesc.res.linear.devPtr = const_cast<void*>(devPtr);
resDesc.res.linear.desc = desc;
resDesc.res.linear.sizeInBytes = image->getSize();
break;
case CL_MEM_OBJECT_IMAGE2D:
resDesc.resType = hipResourceTypePitch2D;
resDesc.res.pitch2D.devPtr = const_cast<void*>(devPtr);
resDesc.res.pitch2D.desc = desc;
resDesc.res.pitch2D.width = width;
resDesc.res.pitch2D.height = height;
resDesc.res.pitch2D.pitchInBytes = pitch;
break;
case CL_MEM_OBJECT_IMAGE3D:
resDesc.resType = hipResourceTypeArray;
resDesc.res.array.array = (hipArray*)malloc(sizeof(hipArray));
resDesc.res.array.array->desc = desc;
resDesc.res.array.array->width = width;
resDesc.res.array.array->height = height;
resDesc.res.array.array->depth = depth;
resDesc.res.array.array->Format = tex->format;
resDesc.res.array.array->NumChannels = tex->numChannels;
resDesc.res.array.array->isDrv = false;
resDesc.res.array.array->textureType = hipTextureType3D;
resDesc.res.array.array->data = const_cast<void*>(devPtr);
break;
default:
resDesc.resType = hipResourceTypeArray;
resDesc.res.array.array = nullptr;
break;
}
tex->textureObject = reinterpret_cast<hipTextureObject_t>(ihipCreateTextureObject(resDesc, *image, *sampler));
if(type == CL_MEM_OBJECT_IMAGE3D) {
free(resDesc.res.array.array);
}
memset(&resDesc, 0, sizeof(hipResourceDesc));
return hipSuccess;
}
return hipErrorInvalidValue;
}
hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr,
const hipChannelFormatDesc* desc, size_t size) {
HIP_INIT_API(NONE, offset, tex, devPtr, desc, size);
if (desc == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
cl_image_format image_format;
getChannelOrderAndType(*desc, hipReadModeElementType,
&image_format.image_channel_order, &image_format.image_channel_data_type);
const amd::Image::Format imageFormat(image_format);
HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size / imageFormat.getElementSize(), 1, size));
}
hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr,
const hipChannelFormatDesc* desc, size_t width, size_t height,
size_t pitch) {
HIP_INIT_API(NONE, offset, tex, devPtr, desc, width, height, pitch);
HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, *desc, width, height, pitch));
}
hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
const hipChannelFormatDesc* desc) {
HIP_INIT_API(NONE, tex, array, desc);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t ihipBindTextureImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
const void* devPtr, const struct hipChannelFormatDesc* desc,
size_t size, textureReference* tex) {
HIP_INIT_API(NONE, dim, readMode, offset, devPtr, size, tex);
assert(1 == dim);
HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size, 1, 0));
}
hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode,
hipArray_const_t array,
const struct hipChannelFormatDesc& desc,
textureReference* tex) {
HIP_INIT_API(NONE, dim, readMode, &desc, array, tex);
cl_mem_object_type clType;
size_t offset = 0;
switch (dim) {
case 1:
clType = CL_MEM_OBJECT_IMAGE1D;
break;
case 2:
clType = CL_MEM_OBJECT_IMAGE2D;
break;
case 3:
case hipTextureType2DLayered:
clType = CL_MEM_OBJECT_IMAGE3D;
break;
default:
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, desc, array->width,
array->height, array->depth));
}
hipError_t hipBindTextureToMipmappedArray(textureReference* tex,
hipMipmappedArray_const_t mipmappedArray,
const hipChannelFormatDesc* desc) {
HIP_INIT_API(NONE, tex, mipmappedArray, desc);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) {
ihipDestroyTextureObject(reinterpret_cast<hip::TextureObject*>(textureObject));
return hipSuccess;
}
hipError_t hipUnbindTexture(const textureReference* tex) {
HIP_INIT_API(NONE, tex);
ihipDestroyTextureObject(reinterpret_cast<hip::TextureObject*>(tex->textureObject));
HIP_RETURN(hipSuccess);
}
hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) {
HIP_INIT_API(NONE, desc, array);
if (desc != nullptr) {
*desc = array->desc;
}
HIP_RETURN(hipSuccess);
}
hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex) {
HIP_INIT_API(NONE, offset, tex);
if ((offset == nullptr) || (tex == nullptr)) {
HIP_RETURN(hipErrorInvalidValue);
}
*offset = 0;
HIP_RETURN(hipSuccess);
}
hipError_t hipGetTextureReference(const textureReference** tex, const void* symbol) {
HIP_INIT_API(NONE, tex, symbol);
assert(0 && "Unimplemented");
HIP_RETURN(hipErrorNotSupported);
}
hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents) {
HIP_INIT_API(NONE, tex, fmt, NumPackedComponents);
if (tex == nullptr) {
HIP_RETURN(hipErrorInvalidImage);
}
tex->format = fmt;
tex->numChannels = NumPackedComponents;
HIP_RETURN(hipSuccess);
}
hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags) {
HIP_INIT_API(NONE, tex, flags);
if (tex == nullptr) {
HIP_RETURN(hipErrorInvalidImage);
}
tex->normalized = flags;
HIP_RETURN(hipSuccess);
}
hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm) {
HIP_INIT_API(NONE, tex, fm);
if (tex == nullptr) {
HIP_RETURN(hipErrorInvalidImage);
}
tex->filterMode = fm;
HIP_RETURN(hipSuccess);
}
hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* am, textureReference tex, int dim) {
HIP_INIT_API(NONE, am, &tex, dim);
if ((am == nullptr) || (dim >= 3)) {
HIP_RETURN(hipErrorInvalidValue);
}
*am = tex.addressMode[dim];
HIP_RETURN(hipSuccess);
}
hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am) {
HIP_INIT_API(NONE, tex, dim, am);
if (tex == nullptr) {
HIP_RETURN(hipErrorInvalidImage);
}
tex->addressMode[dim] = am;
HIP_RETURN(hipSuccess);
}
hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex) {
HIP_INIT_API(NONE, array, &tex);
hip::TextureObject* texture = nullptr;
if ((array == nullptr) || (*array == nullptr)) {
HIP_RETURN(hipErrorInvalidImage);
}
texture = reinterpret_cast<hip::TextureObject *>(tex.textureObject);
if(hipResourceTypeArray != texture->resDesc.resType){
HIP_RETURN(hipErrorInvalidValue);
}
if (texture->resDesc.res.array.array == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
**array = *(texture->resDesc.res.array.array);
HIP_RETURN(hipSuccess);
}
hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags) {
HIP_INIT_API(NONE, tex, array, flags);
size_t offset = 0;
cl_mem_object_type clType;
if ((tex == nullptr) || (array == nullptr)) {
HIP_RETURN(hipErrorInvalidImage);
}
switch(array->textureType) {
case hipTextureType3D:
clType = CL_MEM_OBJECT_IMAGE3D;
break;
case hipTextureType2D:
clType = CL_MEM_OBJECT_IMAGE2D;
break;
case hipTextureType1D:
clType = CL_MEM_OBJECT_IMAGE1D;
break;
default:
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, array->desc, array->width,
array->height, array->depth));
}
hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex) {
HIP_INIT_API(NONE, dev_ptr, &tex);
hip::TextureObject* texture = nullptr;
device::Memory* dev_mem = nullptr;
texture = reinterpret_cast<hip::TextureObject *>(tex.textureObject);
if ((texture == nullptr) || (texture->image == nullptr)) {
HIP_RETURN(hipErrorInvalidImage);
}
dev_mem = texture->image->getDeviceMemory(*hip::getCurrentContext()->devices()[0]);
if (dev_mem == nullptr) {
HIP_RETURN(hipErrorInvalidImage);
}
*dev_ptr = reinterpret_cast<void*>(dev_mem->virtualAddress());
HIP_RETURN(hipSuccess);
}
hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr,
size_t size) {
HIP_INIT_API(NONE, offset, tex, devPtr, size);
if (tex == nullptr) {
HIP_RETURN(hipErrorInvalidImage);
}
cl_image_format image_format;
getDrvChannelOrderAndType(tex->format, tex->numChannels,
&image_format.image_channel_order, &image_format.image_channel_data_type);
const amd::Image::Format imageFormat(image_format);
HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, tex->channelDesc, size / imageFormat.getElementSize(), 1, size));
}
hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc,
hipDeviceptr_t devPtr, size_t pitch) {
HIP_INIT_API(NONE, tex, desc, devPtr, pitch);
if (desc == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
size_t offset;
HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, tex->channelDesc, desc->Width, desc->Height, pitch));
}
+44
View File
@@ -0,0 +1,44 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIPRTC_SRC_HIP_INTERNAL_H
#define HIPRTC_SRC_HIP_INTERNAL_H
#include "hip_internal.hpp"
// This macro should be called at the beginning of every HIP RTC API.
#define HIPRTC_INIT_API(...) \
ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \
amd::Thread* thread = amd::Thread::current(); \
if (!VDI_CHECK_THREAD(thread)) { \
HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \
} \
HIP_INIT();
#define HIPRTC_RETURN(ret) \
hiprtc::g_lastRtcError = ret; \
ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, \
hiprtcGetErrorString(hiprtc::g_lastRtcError)); \
return hiprtc::g_lastRtcError;
#endif // HIPRTC_SRC_HIP_INTERNAL_H
+247
View File
@@ -0,0 +1,247 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <iostream>
#include <iomanip>
#include <sstream>
#include <string>
//---
// Helper functions to convert HIP function arguments into strings.
// Handles POD data types as well as enumerations (ie hipMemcpyKind).
// The implementation uses C++11 variadic templates and template specialization.
// The hipMemcpyKind example below is a good example that shows how to implement conversion for a
// new HSA type.
// Handy macro to convert an enumeration to a stringified version of same:
#define CASE_STR(x) \
case x: \
return #x;
inline const char* ihipErrorString(hipError_t hip_error) {
switch (hip_error) {
CASE_STR(hipSuccess);
CASE_STR(hipErrorOutOfMemory);
CASE_STR(hipErrorNotInitialized);
CASE_STR(hipErrorDeinitialized);
CASE_STR(hipErrorProfilerDisabled);
CASE_STR(hipErrorProfilerNotInitialized);
CASE_STR(hipErrorProfilerAlreadyStarted);
CASE_STR(hipErrorProfilerAlreadyStopped);
CASE_STR(hipErrorInvalidImage);
CASE_STR(hipErrorInvalidContext);
CASE_STR(hipErrorContextAlreadyCurrent);
CASE_STR(hipErrorMapFailed);
CASE_STR(hipErrorUnmapFailed);
CASE_STR(hipErrorArrayIsMapped);
CASE_STR(hipErrorAlreadyMapped);
CASE_STR(hipErrorNoBinaryForGpu);
CASE_STR(hipErrorAlreadyAcquired);
CASE_STR(hipErrorNotMapped);
CASE_STR(hipErrorNotMappedAsArray);
CASE_STR(hipErrorNotMappedAsPointer);
CASE_STR(hipErrorECCNotCorrectable);
CASE_STR(hipErrorUnsupportedLimit);
CASE_STR(hipErrorContextAlreadyInUse);
CASE_STR(hipErrorPeerAccessUnsupported);
CASE_STR(hipErrorInvalidKernelFile);
CASE_STR(hipErrorInvalidGraphicsContext);
CASE_STR(hipErrorInvalidSource);
CASE_STR(hipErrorFileNotFound);
CASE_STR(hipErrorSharedObjectSymbolNotFound);
CASE_STR(hipErrorSharedObjectInitFailed);
CASE_STR(hipErrorOperatingSystem);
CASE_STR(hipErrorSetOnActiveProcess);
CASE_STR(hipErrorInvalidHandle);
CASE_STR(hipErrorNotFound);
CASE_STR(hipErrorIllegalAddress);
CASE_STR(hipErrorMissingConfiguration);
CASE_STR(hipErrorLaunchFailure);
CASE_STR(hipErrorPriorLaunchFailure);
CASE_STR(hipErrorLaunchTimeOut);
CASE_STR(hipErrorLaunchOutOfResources);
CASE_STR(hipErrorInvalidDeviceFunction);
CASE_STR(hipErrorInvalidConfiguration);
CASE_STR(hipErrorInvalidDevice);
CASE_STR(hipErrorInvalidValue);
CASE_STR(hipErrorInvalidDevicePointer);
CASE_STR(hipErrorInvalidMemcpyDirection);
CASE_STR(hipErrorUnknown);
CASE_STR(hipErrorNotReady);
CASE_STR(hipErrorNoDevice);
CASE_STR(hipErrorPeerAccessAlreadyEnabled);
CASE_STR(hipErrorPeerAccessNotEnabled);
CASE_STR(hipErrorRuntimeMemory);
CASE_STR(hipErrorRuntimeOther);
CASE_STR(hipErrorHostMemoryAlreadyRegistered);
CASE_STR(hipErrorHostMemoryNotRegistered);
CASE_STR(hipErrorTbd);
default:
return "hipErrorUnknown";
};
};
// Building block functions:
template <typename T>
inline std::string ToHexString(T v) {
std::ostringstream ss;
ss << "0x" << std::hex << v;
return ss.str();
};
template <typename T>
inline std::string ToString(T* v) {
std::ostringstream ss;
if (v == NULL) {
ss << "char array:<null>";
} else {
ss << v;
}
return ss.str();
};
template <typename T>
inline std::string ToString(T** v) {
std::ostringstream ss;
if (v == NULL) {
ss << "char array:<null>";
} else {
ss << v;
}
return ss.str();
};
//---
// Template overloads for ToString to handle specific types
// This is the default which works for most types:
template <typename T>
inline std::string ToString(T v) {
std::ostringstream ss;
ss << v;
return ss.str();
};
template <>
inline std::string ToString(hipFunction_t v) {
std::ostringstream ss;
ss << "0x" << std::hex << static_cast<void*>(v);
return ss.str();
};
// hipEvent_t specialization. TODO - maybe add an event ID for debug?
template <>
inline std::string ToString(hipEvent_t v) {
std::ostringstream ss;
ss << "event:" << std::hex << static_cast<void*>(v);
return ss.str();
};
// hipStream_t
template <>
inline std::string ToString(hipStream_t v) {
std::ostringstream ss;
if (v == NULL) {
ss << "stream:<null>";
} else {
ss << "stream:" << std::hex << static_cast<void*>(v);
}
return ss.str();
};
// hipCtx_t
template <>
inline std::string ToString(hipCtx_t v) {
std::ostringstream ss;
if (v == NULL) {
ss << "context:<null>";
} else {
ss << "context:" << std::hex << static_cast<void*>(v);
}
return ss.str();
};
// hipPitchedPtr
template <>
inline std::string ToString(hipPitchedPtr v) {
std::ostringstream ss;
ss << "pitchPtr:" << std::hex << static_cast<void*>(v.ptr);
return ss.str();
};
// hipMemcpyKind specialization
template <>
inline std::string ToString(hipMemcpyKind v) {
switch (v) {
CASE_STR(hipMemcpyHostToHost);
CASE_STR(hipMemcpyHostToDevice);
CASE_STR(hipMemcpyDeviceToHost);
CASE_STR(hipMemcpyDeviceToDevice);
CASE_STR(hipMemcpyDefault);
default:
return ToHexString(v);
};
};
template <>
inline std::string ToString(hipFuncCache_t v) {
switch (v) {
CASE_STR(hipFuncCachePreferNone);
CASE_STR(hipFuncCachePreferShared);
CASE_STR(hipFuncCachePreferL1);
CASE_STR(hipFuncCachePreferEqual);
default:
return ToHexString(v);
};
};
template <>
inline std::string ToString(hipSharedMemConfig v) {
switch (v) {
CASE_STR(hipSharedMemBankSizeDefault);
CASE_STR(hipSharedMemBankSizeFourByte);
CASE_STR(hipSharedMemBankSizeEightByte);
default:
return ToHexString(v);
};
};
template <>
inline std::string ToString(hipError_t v) {
return ihipErrorString(v);
};
// Catch empty arguments case
inline std::string ToString() { return (""); }
//---
// C++11 variadic template - peels off first argument, converts to string, and calls itself again to
// peel the next arg. Strings are automatically separated by comma+space.
template <typename T, typename... Args>
inline std::string ToString(T first, Args... args) {
return ToString(first) + ", " + ToString(args...);
}