Merge HIP/VDI branch 'amd-staging' into lmoriche/amd-master-next
Change-Id: Iabaab4e72815ba483a1330ec6a1130f2b86676f0
This commit is contained in:
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "vdi_common.hpp"
|
||||
#include <icd/loader/icd_dispatch.h>
|
||||
|
||||
KHRicdVendorDispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0};
|
||||
amd::PlatformIDS amd::PlatformID::Platform = {amd::ICDDispatchedObject::icdVendorDispatch_};
|
||||
|
||||
RUNTIME_ENTRY(cl_int, clGetDeviceIDs,
|
||||
(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries,
|
||||
cl_device_id* devices, cl_uint* num_devices)) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
RUNTIME_EXIT
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "platform/activity.hpp"
|
||||
|
||||
extern "C" void hipInitActivityCallback(void* id_callback, void* op_callback, void* arg) {
|
||||
activity_prof::CallbacksTable::init(reinterpret_cast<activity_prof::id_callback_fun_t>(id_callback),
|
||||
reinterpret_cast<activity_prof::callback_fun_t>(op_callback),
|
||||
arg);
|
||||
}
|
||||
|
||||
extern "C" bool hipEnableActivityCallback(unsigned op, bool enable) {
|
||||
return activity_prof::CallbacksTable::SetEnabled(op, enable);
|
||||
}
|
||||
|
||||
extern "C" const char* hipGetCmdName(unsigned op) {
|
||||
return getOclCommandKindString(static_cast<uint32_t>(op));
|
||||
}
|
||||
@@ -0,0 +1,382 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "hip_internal.hpp"
|
||||
#include "platform/runtime.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
#include "utils/versions.hpp"
|
||||
|
||||
std::vector<amd::Context*> g_devices;
|
||||
|
||||
namespace hip {
|
||||
|
||||
thread_local amd::Context* g_context = nullptr;
|
||||
thread_local std::stack<amd::Context*> g_ctxtStack;
|
||||
thread_local hipError_t g_lastError = hipSuccess;
|
||||
std::once_flag g_ihipInitialized;
|
||||
amd::Context* host_context = nullptr;
|
||||
|
||||
std::map<amd::Context*, amd::HostQueue*> g_nullStreams;
|
||||
|
||||
void init() {
|
||||
if (!amd::Runtime::initialized()) {
|
||||
amd::IS_HIP = true;
|
||||
GPU_NUM_MEM_DEPENDENCY = 0;
|
||||
amd::Runtime::init();
|
||||
}
|
||||
|
||||
const std::vector<amd::Device*>& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false);
|
||||
|
||||
for (unsigned int i=0; i<devices.size(); i++) {
|
||||
const std::vector<amd::Device*> device(1, devices[i]);
|
||||
amd::Context* context = new amd::Context(device, amd::Context::Info());
|
||||
if (!context) return;
|
||||
|
||||
if (context && CL_SUCCESS != context->create(nullptr)) {
|
||||
context->release();
|
||||
} else {
|
||||
g_devices.push_back(context);
|
||||
}
|
||||
}
|
||||
|
||||
host_context = new amd::Context(devices, amd::Context::Info());
|
||||
if (!host_context) return;
|
||||
|
||||
if (host_context && CL_SUCCESS != host_context->create(nullptr)) {
|
||||
host_context->release();
|
||||
}
|
||||
|
||||
PlatformState::instance().init();
|
||||
}
|
||||
|
||||
amd::Context* getCurrentContext() {
|
||||
return g_context;
|
||||
}
|
||||
|
||||
void setCurrentContext(unsigned int index) {
|
||||
assert(index<g_devices.size());
|
||||
g_context = g_devices[index];
|
||||
}
|
||||
|
||||
amd::HostQueue* getQueue(hipStream_t stream) {
|
||||
if (stream == nullptr) {
|
||||
syncStreams();
|
||||
return getNullStream();
|
||||
} else {
|
||||
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
|
||||
if ((s->flags & hipStreamNonBlocking) == 0) {
|
||||
getNullStream()->finish();
|
||||
}
|
||||
return s->asHostQueue();
|
||||
}
|
||||
}
|
||||
|
||||
amd::HostQueue* getNullStream(amd::Context& context) {
|
||||
auto stream = g_nullStreams.find(&context);
|
||||
if (stream == g_nullStreams.end()) {
|
||||
amd::Device* device = context.devices()[0];
|
||||
cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE;
|
||||
amd::HostQueue* queue = new amd::HostQueue(context, *device, properties,
|
||||
amd::CommandQueue::RealTimeDisabled,
|
||||
amd::CommandQueue::Priority::Normal);
|
||||
g_nullStreams[&context] = queue;
|
||||
return queue;
|
||||
}
|
||||
return stream->second;
|
||||
}
|
||||
|
||||
amd::HostQueue* getNullStream() {
|
||||
amd::Context* context = getCurrentContext();
|
||||
return context ? getNullStream(*context) : nullptr;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
using namespace hip;
|
||||
|
||||
hipError_t hipInit(unsigned int flags) {
|
||||
HIP_INIT_API(hipInit, flags);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) {
|
||||
HIP_INIT_API(hipCtxCreate, ctx, flags, device);
|
||||
|
||||
if (static_cast<size_t>(device) >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
*ctx = reinterpret_cast<hipCtx_t>(g_devices[device]);
|
||||
|
||||
// Increment ref count for device primary context
|
||||
g_devices[device]->retain();
|
||||
g_ctxtStack.push(g_devices[device]);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
|
||||
HIP_INIT_API(hipCtxSetCurrent, ctx);
|
||||
|
||||
if (ctx == nullptr) {
|
||||
if(!g_ctxtStack.empty()) {
|
||||
g_ctxtStack.pop();
|
||||
}
|
||||
} else {
|
||||
hip::g_context = reinterpret_cast<amd::Context*>(as_amd(ctx));
|
||||
if(!g_ctxtStack.empty()) {
|
||||
g_ctxtStack.pop();
|
||||
}
|
||||
g_ctxtStack.push(hip::getCurrentContext());
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
|
||||
HIP_INIT_API(hipCtxGetCurrent, ctx);
|
||||
|
||||
*ctx = reinterpret_cast<hipCtx_t>(hip::getCurrentContext());
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
|
||||
HIP_INIT_API(hipCtxGetSharedMemConfig, pConfig);
|
||||
|
||||
*pConfig = hipSharedMemBankSizeFourByte;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipRuntimeGetVersion(int *runtimeVersion) {
|
||||
HIP_INIT_API(hipRuntimeGetVersion, runtimeVersion);
|
||||
|
||||
if (!runtimeVersion) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
*runtimeVersion = AMD_PLATFORM_BUILD_NUMBER;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxDestroy(hipCtx_t ctx) {
|
||||
HIP_INIT_API(hipCtxDestroy, ctx);
|
||||
|
||||
amd::Context* amdContext = reinterpret_cast<amd::Context*>(as_amd(ctx));
|
||||
if (amdContext == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
// Release last tracked command
|
||||
hip::getNullStream()->setLastQueuedCommand(nullptr);
|
||||
|
||||
// Need to remove the ctx of calling thread if its the top one
|
||||
if (!g_ctxtStack.empty() && g_ctxtStack.top() == amdContext) {
|
||||
g_ctxtStack.pop();
|
||||
}
|
||||
|
||||
// Remove context from global context list
|
||||
for (unsigned int i = 0; i < g_devices.size(); i++) {
|
||||
if (g_devices[i] == amdContext) {
|
||||
// Decrement ref count for device primary context
|
||||
amdContext->release();
|
||||
}
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
|
||||
HIP_INIT_API(hipCtxPopCurrent, ctx);
|
||||
|
||||
amd::Context* amdContext = reinterpret_cast<amd::Context*>(as_amd(ctx));
|
||||
if (amdContext == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidContext);
|
||||
}
|
||||
|
||||
if (!g_ctxtStack.empty()) {
|
||||
amdContext = g_ctxtStack.top();
|
||||
g_ctxtStack.pop();
|
||||
} else {
|
||||
HIP_RETURN(hipErrorInvalidContext);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
|
||||
HIP_INIT_API(hipCtxPushCurrent, ctx);
|
||||
|
||||
amd::Context* amdContext = reinterpret_cast<amd::Context*>(as_amd(ctx));
|
||||
if (amdContext == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidContext);
|
||||
}
|
||||
|
||||
hip::g_context = amdContext;
|
||||
g_ctxtStack.push(hip::getCurrentContext());
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDriverGetVersion(int* driverVersion) {
|
||||
HIP_INIT_API(hipDriverGetVersion, driverVersion);
|
||||
|
||||
auto* deviceHandle = g_devices[0]->devices()[0];
|
||||
const auto& info = deviceHandle->info();
|
||||
|
||||
if (driverVersion) {
|
||||
*driverVersion = AMD_PLATFORM_BUILD_NUMBER * 100 +
|
||||
AMD_PLATFORM_REVISION_NUMBER;
|
||||
} else {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetDevice(hipDevice_t* device) {
|
||||
HIP_INIT_API(hipCtxGetDevice, device);
|
||||
|
||||
if (device != nullptr) {
|
||||
for (unsigned int i = 0; i < g_devices.size(); i++) {
|
||||
if (g_devices[i] == hip::getCurrentContext()) {
|
||||
*device = static_cast<hipDevice_t>(i);
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipErrorInvalidContext);
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
|
||||
HIP_INIT_API(hipCtxGetApiVersion, apiVersion);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) {
|
||||
HIP_INIT_API(hipCtxGetCacheConfig, cacheConfig);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) {
|
||||
HIP_INIT_API(hipCtxSetCacheConfig, cacheConfig);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
|
||||
HIP_INIT_API(hipCtxSetSharedMemConfig, config);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipCtxSynchronize(void) {
|
||||
HIP_INIT_API(hipCtxSynchronize, 1);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetFlags(unsigned int* flags) {
|
||||
HIP_INIT_API(hipCtxGetFlags, flags);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) {
|
||||
HIP_INIT_API(hipDevicePrimaryCtxGetState, dev, flags, active);
|
||||
|
||||
if (static_cast<unsigned int>(dev) >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
if (flags != nullptr) {
|
||||
*flags = 0;
|
||||
}
|
||||
|
||||
if (active != nullptr) {
|
||||
*active = (g_devices[dev] == hip::getCurrentContext())? 1 : 0;
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
|
||||
HIP_INIT_API(hipDevicePrimaryCtxRelease, dev);
|
||||
|
||||
if (static_cast<unsigned int>(dev) >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
|
||||
HIP_INIT_API(hipDevicePrimaryCtxRetain, pctx, dev);
|
||||
|
||||
if (static_cast<unsigned int>(dev) >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
if (pctx == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
*pctx = reinterpret_cast<hipCtx_t>(g_devices[dev]);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
|
||||
HIP_INIT_API(hipDevicePrimaryCtxReset, dev);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
|
||||
HIP_INIT_API(hipDevicePrimaryCtxSetFlags, dev, flags);
|
||||
|
||||
if (static_cast<unsigned int>(dev) >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
} else {
|
||||
HIP_RETURN(hipErrorContextAlreadyInUse);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,234 @@
|
||||
/*
|
||||
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
|
||||
hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) {
|
||||
HIP_INIT_API(hipDeviceGet, device, deviceId);
|
||||
|
||||
if (device != nullptr) {
|
||||
*device = deviceId;
|
||||
} else {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
};
|
||||
|
||||
hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) {
|
||||
|
||||
HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig);
|
||||
|
||||
// No way to set cache config yet.
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) {
|
||||
|
||||
HIP_INIT_API(hipDeviceTotalMem, bytes, device);
|
||||
|
||||
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
if (bytes == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
auto* deviceHandle = g_devices[device]->devices()[0];
|
||||
const auto& info = deviceHandle->info();
|
||||
|
||||
*bytes = info.globalMemSize_;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) {
|
||||
|
||||
HIP_INIT_API(hipDeviceComputeCapability, major, minor, device);
|
||||
|
||||
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
if (major == nullptr || minor == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
auto* deviceHandle = g_devices[device]->devices()[0];
|
||||
const auto& info = deviceHandle->info();
|
||||
*major = info.gfxipVersion_ / 100;
|
||||
*minor = info.gfxipVersion_ % 100;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetCount(int* count) {
|
||||
HIP_INIT_API(NONE, count);
|
||||
|
||||
HIP_RETURN(ihipDeviceGetCount(count));
|
||||
}
|
||||
|
||||
hipError_t ihipDeviceGetCount(int* count) {
|
||||
if (count == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
// Get all available devices
|
||||
*count = g_devices.size();
|
||||
|
||||
if (*count < 1) {
|
||||
return hipErrorNoDevice;
|
||||
}
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) {
|
||||
|
||||
HIP_INIT_API(hipDeviceGetName, (void*)name, len, device);
|
||||
|
||||
if (device < 0 || static_cast<size_t>(device) >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
if (name == nullptr || len <= 0) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
auto* deviceHandle = g_devices[device]->devices()[0];
|
||||
const auto& info = deviceHandle->info();
|
||||
const auto nameLen = ::strlen(info.boardName_);
|
||||
|
||||
// Make sure that the size of `dest` is big enough to hold `src` including
|
||||
// trailing zero byte
|
||||
if (nameLen > (cl_uint)(len - 1)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
::strncpy(name, info.boardName_, (nameLen + 1));
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipGetDeviceProperties ( hipDeviceProp_t* props, hipDevice_t device ) {
|
||||
HIP_INIT_API(hipGetDeviceProperties, props, device);
|
||||
|
||||
if (props == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
if (unsigned(device) >= g_devices.size()) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
auto* deviceHandle = g_devices[device]->devices()[0];
|
||||
|
||||
hipDeviceProp_t deviceProps = {0};
|
||||
|
||||
const auto& info = deviceHandle->info();
|
||||
::strncpy(deviceProps.name, info.boardName_, 128);
|
||||
deviceProps.totalGlobalMem = info.globalMemSize_;
|
||||
deviceProps.sharedMemPerBlock = info.localMemSizePerCU_;
|
||||
deviceProps.regsPerBlock = info.availableSGPRs_;
|
||||
deviceProps.warpSize = info.wavefrontWidth_;
|
||||
deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_;
|
||||
deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0];
|
||||
deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1];
|
||||
deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2];
|
||||
deviceProps.maxGridSize[0] = INT32_MAX;
|
||||
deviceProps.maxGridSize[1] = INT32_MAX;
|
||||
deviceProps.maxGridSize[2] = INT32_MAX;
|
||||
deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000;
|
||||
deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000;
|
||||
deviceProps.memoryBusWidth = info.globalMemChannels_ * 32;
|
||||
deviceProps.totalConstMem = info.maxConstantBufferSize_;
|
||||
deviceProps.major = info.gfxipVersion_ / 100;
|
||||
deviceProps.minor = info.gfxipVersion_ % 100;
|
||||
deviceProps.multiProcessorCount = info.maxComputeUnits_;
|
||||
deviceProps.l2CacheSize = info.l2CacheSize_;
|
||||
deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_;
|
||||
deviceProps.computeMode = 0;
|
||||
deviceProps.clockInstructionRate = info.timeStampFrequency_;
|
||||
deviceProps.arch.hasGlobalInt32Atomics = 1;
|
||||
deviceProps.arch.hasGlobalFloatAtomicExch = 1;
|
||||
deviceProps.arch.hasSharedInt32Atomics = 1;
|
||||
deviceProps.arch.hasSharedFloatAtomicExch = 1;
|
||||
deviceProps.arch.hasFloatAtomicAdd = 0;
|
||||
deviceProps.arch.hasGlobalInt64Atomics = 1;
|
||||
deviceProps.arch.hasSharedInt64Atomics = 1;
|
||||
deviceProps.arch.hasDoubles = 1;
|
||||
deviceProps.arch.hasWarpVote = 0;
|
||||
deviceProps.arch.hasWarpBallot = 0;
|
||||
deviceProps.arch.hasWarpShuffle = 0;
|
||||
deviceProps.arch.hasFunnelShift = 0;
|
||||
deviceProps.arch.hasThreadFenceSystem = 1;
|
||||
deviceProps.arch.hasSyncThreadsExt = 0;
|
||||
deviceProps.arch.hasSurfaceFuncs = 0;
|
||||
deviceProps.arch.has3dGrid = 1;
|
||||
deviceProps.arch.hasDynamicParallelism = 0;
|
||||
deviceProps.concurrentKernels = 1;
|
||||
deviceProps.pciDomainID = info.deviceTopology_.pcie.function;
|
||||
deviceProps.pciBusID = info.deviceTopology_.pcie.bus;
|
||||
deviceProps.pciDeviceID = info.deviceTopology_.pcie.device;
|
||||
deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_;
|
||||
//deviceProps.isMultiGpuBoard = info.;
|
||||
deviceProps.canMapHostMemory = 1;
|
||||
deviceProps.gcnArch = info.gfxipVersion_;
|
||||
deviceProps.cooperativeLaunch = info.cooperativeGroups_;
|
||||
deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_;
|
||||
|
||||
deviceProps.maxTexture1D = info.imageMaxBufferSize_;
|
||||
deviceProps.maxTexture2D[0] = info.image2DMaxWidth_;
|
||||
deviceProps.maxTexture2D[1] = info.image2DMaxHeight_;
|
||||
deviceProps.maxTexture3D[0] = info.image3DMaxWidth_;
|
||||
deviceProps.maxTexture3D[1] = info.image3DMaxHeight_;
|
||||
deviceProps.maxTexture3D[2] = info.image3DMaxDepth_;
|
||||
deviceProps.hdpMemFlushCntl = nullptr;
|
||||
deviceProps.hdpRegFlushCntl = nullptr;
|
||||
|
||||
deviceProps.memPitch = info.maxMemAllocSize_;
|
||||
deviceProps.textureAlignment = std::max(info.imageBaseAddressAlignment_, info.imagePitchAlignment_);
|
||||
deviceProps.kernelExecTimeoutEnabled = 0;
|
||||
deviceProps.ECCEnabled = info.errorCorrectionSupport_? 1:0;
|
||||
|
||||
*props = deviceProps;
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator* acc) {
|
||||
HIP_INIT_API(NONE, deviceId, acc);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** av) {
|
||||
HIP_INIT_API(NONE, stream, av);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
@@ -0,0 +1,543 @@
|
||||
/*
|
||||
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
|
||||
hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) {
|
||||
|
||||
HIP_INIT_API(hipChooseDevice, device, properties);
|
||||
|
||||
if (device == nullptr || properties == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
*device = 0;
|
||||
cl_uint maxMatchedCount = 0;
|
||||
int count = 0;
|
||||
ihipDeviceGetCount(&count);
|
||||
|
||||
for (cl_int i = 0; i< count; ++i) {
|
||||
hipDeviceProp_t currentProp = {0};
|
||||
cl_uint validPropCount = 0;
|
||||
cl_uint matchedCount = 0;
|
||||
hipError_t err = hipGetDeviceProperties(¤tProp, i);
|
||||
if (properties->major != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.major >= properties->major) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if (properties->minor != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.minor >= properties->minor) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->totalGlobalMem != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.totalGlobalMem >= properties->totalGlobalMem) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->sharedMemPerBlock != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->maxThreadsPerBlock != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->totalConstMem != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.totalConstMem >= properties->totalConstMem ) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->multiProcessorCount != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.multiProcessorCount >=
|
||||
properties->multiProcessorCount ) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->maxThreadsPerMultiProcessor != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.maxThreadsPerMultiProcessor >=
|
||||
properties->maxThreadsPerMultiProcessor ) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->memoryClockRate != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.memoryClockRate >= properties->memoryClockRate ) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->memoryBusWidth != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->l2CacheSize != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.l2CacheSize >= properties->l2CacheSize ) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->regsPerBlock != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.regsPerBlock >= properties->regsPerBlock ) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->maxSharedMemoryPerMultiProcessor != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.maxSharedMemoryPerMultiProcessor >=
|
||||
properties->maxSharedMemoryPerMultiProcessor ) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(properties->warpSize != 0) {
|
||||
validPropCount++;
|
||||
if(currentProp.warpSize >= properties->warpSize ) {
|
||||
matchedCount++;
|
||||
}
|
||||
}
|
||||
if(validPropCount == matchedCount) {
|
||||
*device = matchedCount > maxMatchedCount ? i : *device;
|
||||
maxMatchedCount = std::max(matchedCount, maxMatchedCount);
|
||||
}
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
|
||||
|
||||
HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device);
|
||||
|
||||
if (pi == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
int count = 0;
|
||||
ihipDeviceGetCount(&count);
|
||||
if (device < 0 || device >= count) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
//FIXME: should we cache the props, or just select from deviceHandle->info_?
|
||||
hipDeviceProp_t prop = {0};
|
||||
hipError_t err = hipGetDeviceProperties(&prop, device);
|
||||
if (err != hipSuccess) {
|
||||
HIP_RETURN(err);
|
||||
}
|
||||
|
||||
switch (attr) {
|
||||
case hipDeviceAttributeMaxThreadsPerBlock:
|
||||
*pi = prop.maxThreadsPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimX:
|
||||
*pi = prop.maxThreadsDim[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimY:
|
||||
*pi = prop.maxThreadsDim[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimZ:
|
||||
*pi = prop.maxThreadsDim[2];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimX:
|
||||
*pi = prop.maxGridSize[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimY:
|
||||
*pi = prop.maxGridSize[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimZ:
|
||||
*pi = prop.maxGridSize[2];
|
||||
break;
|
||||
case hipDeviceAttributeMaxSharedMemoryPerBlock:
|
||||
*pi = prop.sharedMemPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeTotalConstantMemory:
|
||||
*pi = prop.totalConstMem;
|
||||
break;
|
||||
case hipDeviceAttributeWarpSize:
|
||||
*pi = prop.warpSize;
|
||||
break;
|
||||
case hipDeviceAttributeMaxRegistersPerBlock:
|
||||
*pi = prop.regsPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeClockRate:
|
||||
*pi = prop.clockRate;
|
||||
break;
|
||||
case hipDeviceAttributeMemoryClockRate:
|
||||
*pi = prop.memoryClockRate;
|
||||
break;
|
||||
case hipDeviceAttributeMemoryBusWidth:
|
||||
*pi = prop.memoryBusWidth;
|
||||
break;
|
||||
case hipDeviceAttributeMultiprocessorCount:
|
||||
*pi = prop.multiProcessorCount;
|
||||
break;
|
||||
case hipDeviceAttributeComputeMode:
|
||||
*pi = prop.computeMode;
|
||||
break;
|
||||
case hipDeviceAttributeL2CacheSize:
|
||||
*pi = prop.l2CacheSize;
|
||||
break;
|
||||
case hipDeviceAttributeMaxThreadsPerMultiProcessor:
|
||||
*pi = prop.maxThreadsPerMultiProcessor;
|
||||
break;
|
||||
case hipDeviceAttributeComputeCapabilityMajor:
|
||||
*pi = prop.major;
|
||||
break;
|
||||
case hipDeviceAttributeComputeCapabilityMinor:
|
||||
*pi = prop.minor;
|
||||
break;
|
||||
case hipDeviceAttributePciBusId:
|
||||
*pi = prop.pciBusID;
|
||||
break;
|
||||
case hipDeviceAttributeConcurrentKernels:
|
||||
*pi = prop.concurrentKernels;
|
||||
break;
|
||||
case hipDeviceAttributePciDeviceId:
|
||||
*pi = prop.pciDeviceID;
|
||||
break;
|
||||
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
|
||||
*pi = prop.maxSharedMemoryPerMultiProcessor;
|
||||
break;
|
||||
case hipDeviceAttributeIsMultiGpuBoard:
|
||||
*pi = prop.isMultiGpuBoard;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeLaunch:
|
||||
*pi = prop.cooperativeLaunch;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
|
||||
*pi = prop.cooperativeMultiDeviceLaunch;
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture1DWidth:
|
||||
*pi = prop.maxTexture1D;
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture2DWidth:
|
||||
*pi = prop.maxTexture2D[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture2DHeight:
|
||||
*pi = prop.maxTexture2D[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DWidth:
|
||||
*pi = prop.maxTexture3D[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DHeight:
|
||||
*pi = prop.maxTexture3D[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DDepth:
|
||||
*pi = prop.maxTexture3D[2];
|
||||
break;
|
||||
case hipDeviceAttributeHdpMemFlushCntl:
|
||||
*reinterpret_cast<unsigned int**>(pi) = prop.hdpMemFlushCntl;
|
||||
break;
|
||||
case hipDeviceAttributeHdpRegFlushCntl:
|
||||
*reinterpret_cast<unsigned int**>(pi) = prop.hdpRegFlushCntl;
|
||||
break;
|
||||
case hipDeviceAttributeMaxPitch:
|
||||
*pi = prop.memPitch;
|
||||
break;
|
||||
case hipDeviceAttributeTextureAlignment:
|
||||
*pi = prop.textureAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeKernelExecTimeout:
|
||||
*pi = prop.kernelExecTimeoutEnabled;
|
||||
break;
|
||||
case hipDeviceAttributeCanMapHostMemory:
|
||||
*pi = prop.canMapHostMemory;
|
||||
break;
|
||||
case hipDeviceAttributeEccEnabled:
|
||||
*pi = prop.ECCEnabled;
|
||||
break;
|
||||
default:
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) {
|
||||
|
||||
HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusIdstr);
|
||||
|
||||
if (device == nullptr || pciBusIdstr == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
int pciBusID = -1;
|
||||
int pciDeviceID = -1;
|
||||
int pciDomainID = -1;
|
||||
|
||||
if (sscanf (pciBusIdstr, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID) == 0x3) {
|
||||
int count = 0;
|
||||
ihipDeviceGetCount(&count);
|
||||
for (cl_int i = 0; i < count; i++) {
|
||||
int pi = 0;
|
||||
hipDevice_t dev;
|
||||
hipDeviceGet(&dev, i);
|
||||
hipDeviceGetAttribute(&pi, hipDeviceAttributePciBusId, dev);
|
||||
|
||||
if (pciBusID == pi) {
|
||||
*device = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) {
|
||||
HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig);
|
||||
|
||||
if(cacheConfig == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
*cacheConfig = hipFuncCache_t();
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) {
|
||||
|
||||
HIP_INIT_API(hipDeviceGetLimit, pValue, limit);
|
||||
|
||||
if(pValue == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
if(limit == hipLimitMallocHeapSize) {
|
||||
hipDeviceProp_t prop;
|
||||
hipGetDeviceProperties(&prop, ihipGetDevice());
|
||||
|
||||
*pValue = prop.totalGlobalMem;
|
||||
HIP_RETURN(hipSuccess);
|
||||
} else {
|
||||
HIP_RETURN(hipErrorUnsupportedLimit);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
hipError_t hipDeviceGetP2PAttribute ( int* value, hipDeviceP2PAttr attr, int srcDevice, int dstDevice ) {
|
||||
assert(0);
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
**/
|
||||
|
||||
hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) {
|
||||
|
||||
HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device);
|
||||
|
||||
int count;
|
||||
ihipDeviceGetCount(&count);
|
||||
if (device < 0 || device > count) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
if (pciBusId == nullptr || len < 0) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
hipDeviceProp_t prop;
|
||||
hipGetDeviceProperties(&prop, device);
|
||||
|
||||
snprintf (pciBusId, len, "%04x:%02x:%02x.0",
|
||||
prop.pciDomainID,
|
||||
prop.pciBusID,
|
||||
prop.pciDeviceID);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) {
|
||||
HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig);
|
||||
|
||||
*pConfig = hipSharedMemBankSizeFourByte;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceReset ( void ) {
|
||||
HIP_INIT_API(hipDeviceReset);
|
||||
|
||||
/* FIXME */
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) {
|
||||
HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig);
|
||||
|
||||
// No way to set cache config yet.
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) {
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) {
|
||||
HIP_INIT_API(hipDeviceSetSharedMemConfig, config);
|
||||
|
||||
// No way to set cache config yet.
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSynchronize ( void ) {
|
||||
HIP_INIT_API(hipDeviceSynchronize);
|
||||
|
||||
hip::syncStreams();
|
||||
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
|
||||
if (!queue) {
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
|
||||
queue->finish();
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
int ihipGetDevice() {
|
||||
for (unsigned int i = 0; i < g_devices.size(); i++) {
|
||||
if (g_devices[i] == hip::getCurrentContext()) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
hipError_t hipGetDevice ( int* deviceId ) {
|
||||
HIP_INIT_API(hipGetDevice, deviceId);
|
||||
|
||||
if (deviceId != nullptr) {
|
||||
int dev = ihipGetDevice();
|
||||
if (dev == -1) {
|
||||
HIP_RETURN(hipErrorNoDevice);
|
||||
}
|
||||
*deviceId = dev;
|
||||
HIP_RETURN(hipSuccess);
|
||||
} else {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipGetDeviceCount ( int* count ) {
|
||||
HIP_INIT_API(hipGetDeviceCount, count);
|
||||
|
||||
HIP_RETURN(ihipDeviceGetCount(count));
|
||||
}
|
||||
|
||||
hipError_t hipGetDeviceFlags ( unsigned int* flags ) {
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipIpcGetEventHandle ( hipIpcEventHandle_t* handle, hipEvent_t event ) {
|
||||
HIP_INIT_API(NONE, handle, event);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipIpcOpenEventHandle ( hipEvent_t* event, hipIpcEventHandle_t handle ) {
|
||||
HIP_INIT_API(NONE, event, handle);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipSetDevice ( int device ) {
|
||||
HIP_INIT_API(hipSetDevice, device);
|
||||
|
||||
if (static_cast<unsigned int>(device) < g_devices.size()) {
|
||||
hip::setCurrentContext(device);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
hipError_t hipSetDeviceFlags ( unsigned int flags ) {
|
||||
HIP_INIT_API(hipSetDeviceFlags, flags);
|
||||
|
||||
/* FIXME */
|
||||
/* Not all of Ctx may be implemented */
|
||||
|
||||
unsigned supportedFlags =
|
||||
hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax;
|
||||
|
||||
if (flags & (~supportedFlags)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipSetValidDevices ( int* device_arr, int len ) {
|
||||
HIP_INIT_API(NONE, device_arr, len);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount) {
|
||||
HIP_INIT_API(hipExtGetLinkTypeAndHopCount, device1, device2, linktype, hopcount);
|
||||
|
||||
amd::Device* amd_dev_obj1 = nullptr;
|
||||
amd::Device* amd_dev_obj2 = nullptr;
|
||||
const int numDevices = static_cast<int>(g_devices.size());
|
||||
|
||||
if ((device1 < 0) || (device1 >= numDevices) || (device2 < 0) || (device2 >= numDevices)) {
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
if ((linktype == nullptr) || (hopcount == nullptr)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
amd_dev_obj1 = g_devices[device1]->devices()[0];
|
||||
amd_dev_obj2 = g_devices[device2]->devices()[0];
|
||||
|
||||
if (!amd_dev_obj1->findLinkTypeAndHopCount(amd_dev_obj2, linktype, hopcount)) {
|
||||
HIP_RETURN(hipErrorInvalidHandle);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
|
||||
hipError_t hipGetLastError()
|
||||
{
|
||||
HIP_INIT_API(hipGetLastError);
|
||||
hipError_t err = hip::g_lastError;
|
||||
hip::g_lastError = hipSuccess;
|
||||
return err;
|
||||
}
|
||||
|
||||
hipError_t hipPeekAtLastError()
|
||||
{
|
||||
HIP_INIT_API(hipPeekAtLastError);
|
||||
hipError_t err = hip::g_lastError;
|
||||
HIP_RETURN(err);
|
||||
}
|
||||
|
||||
const char *hipGetErrorName(hipError_t hip_error)
|
||||
{
|
||||
switch (hip_error) {
|
||||
case hipSuccess:
|
||||
return "hipSuccess";
|
||||
case hipErrorOutOfMemory:
|
||||
return "hipErrorOutOfMemory";
|
||||
case hipErrorNotInitialized:
|
||||
return "hipErrorNotInitialized";
|
||||
case hipErrorDeinitialized:
|
||||
return "hipErrorDeinitialized";
|
||||
case hipErrorProfilerDisabled:
|
||||
return "hipErrorProfilerDisabled";
|
||||
case hipErrorProfilerNotInitialized:
|
||||
return "hipErrorProfilerNotInitialized";
|
||||
case hipErrorProfilerAlreadyStarted:
|
||||
return "hipErrorProfilerAlreadyStarted";
|
||||
case hipErrorProfilerAlreadyStopped:
|
||||
return "hipErrorProfilerAlreadyStopped";
|
||||
case hipErrorInvalidImage:
|
||||
return "hipErrorInvalidImage";
|
||||
case hipErrorInvalidContext:
|
||||
return "hipErrorInvalidContext";
|
||||
case hipErrorContextAlreadyCurrent:
|
||||
return "hipErrorContextAlreadyCurrent";
|
||||
case hipErrorMapFailed:
|
||||
return "hipErrorMapFailed";
|
||||
case hipErrorUnmapFailed:
|
||||
return "hipErrorUnmapFailed";
|
||||
case hipErrorArrayIsMapped:
|
||||
return "hipErrorArrayIsMapped";
|
||||
case hipErrorAlreadyMapped:
|
||||
return "hipErrorAlreadyMapped";
|
||||
case hipErrorNoBinaryForGpu:
|
||||
return "hipErrorNoBinaryForGpu";
|
||||
case hipErrorAlreadyAcquired:
|
||||
return "hipErrorAlreadyAcquired";
|
||||
case hipErrorNotMapped:
|
||||
return "hipErrorNotMapped";
|
||||
case hipErrorNotMappedAsArray:
|
||||
return "hipErrorNotMappedAsArray";
|
||||
case hipErrorNotMappedAsPointer:
|
||||
return "hipErrorNotMappedAsPointer";
|
||||
case hipErrorECCNotCorrectable:
|
||||
return "hipErrorECCNotCorrectable";
|
||||
case hipErrorUnsupportedLimit:
|
||||
return "hipErrorUnsupportedLimit";
|
||||
case hipErrorContextAlreadyInUse:
|
||||
return "hipErrorContextAlreadyInUse";
|
||||
case hipErrorPeerAccessUnsupported:
|
||||
return "hipErrorPeerAccessUnsupported";
|
||||
case hipErrorInvalidKernelFile:
|
||||
return "hipErrorInvalidKernelFile";
|
||||
case hipErrorInvalidGraphicsContext:
|
||||
return "hipErrorInvalidGraphicsContext";
|
||||
case hipErrorInvalidSource:
|
||||
return "hipErrorInvalidSource";
|
||||
case hipErrorFileNotFound:
|
||||
return "hipErrorFileNotFound";
|
||||
case hipErrorSharedObjectSymbolNotFound:
|
||||
return "hipErrorSharedObjectSymbolNotFound";
|
||||
case hipErrorSharedObjectInitFailed:
|
||||
return "hipErrorSharedObjectInitFailed";
|
||||
case hipErrorOperatingSystem:
|
||||
return "hipErrorOperatingSystem";
|
||||
case hipErrorSetOnActiveProcess:
|
||||
return "hipErrorSetOnActiveProcess";
|
||||
case hipErrorInvalidHandle:
|
||||
return "hipErrorInvalidHandle";
|
||||
case hipErrorNotFound:
|
||||
return "hipErrorNotFound";
|
||||
case hipErrorIllegalAddress:
|
||||
return "hipErrorIllegalAddress";
|
||||
case hipErrorMissingConfiguration:
|
||||
return "hipErrorMissingConfiguration";
|
||||
case hipErrorLaunchFailure:
|
||||
return "hipErrorLaunchFailure";
|
||||
case hipErrorPriorLaunchFailure:
|
||||
return "hipErrorPriorLaunchFailure";
|
||||
case hipErrorLaunchTimeOut:
|
||||
return "hipErrorLaunchTimeOut";
|
||||
case hipErrorLaunchOutOfResources:
|
||||
return "hipErrorLaunchOutOfResources";
|
||||
case hipErrorInvalidDeviceFunction:
|
||||
return "hipErrorInvalidDeviceFunction";
|
||||
case hipErrorInvalidConfiguration:
|
||||
return "hipErrorInvalidConfiguration";
|
||||
case hipErrorInvalidDevice:
|
||||
return "hipErrorInvalidDevice";
|
||||
case hipErrorInvalidValue:
|
||||
return "hipErrorInvalidValue";
|
||||
case hipErrorInvalidDevicePointer:
|
||||
return "hipErrorInvalidDevicePointer";
|
||||
case hipErrorInvalidMemcpyDirection:
|
||||
return "hipErrorInvalidMemcpyDirection";
|
||||
case hipErrorUnknown:
|
||||
return "hipErrorUnknown";
|
||||
case hipErrorNotReady:
|
||||
return "hipErrorNotReady";
|
||||
case hipErrorNoDevice:
|
||||
return "hipErrorNoDevice";
|
||||
case hipErrorPeerAccessAlreadyEnabled:
|
||||
return "hipErrorPeerAccessAlreadyEnabled";
|
||||
case hipErrorPeerAccessNotEnabled:
|
||||
return "hipErrorPeerAccessNotEnabled";
|
||||
case hipErrorRuntimeMemory:
|
||||
return "hipErrorRuntimeMemory";
|
||||
case hipErrorRuntimeOther:
|
||||
return "hipErrorRuntimeOther";
|
||||
case hipErrorHostMemoryAlreadyRegistered:
|
||||
return "hipErrorHostMemoryAlreadyRegistered";
|
||||
case hipErrorHostMemoryNotRegistered:
|
||||
return "hipErrorHostMemoryNotRegistered";
|
||||
case hipErrorTbd:
|
||||
return "hipErrorTbd";
|
||||
default:
|
||||
return "hipErrorUnknown";
|
||||
};
|
||||
}
|
||||
|
||||
const char *hipGetErrorString(hipError_t hip_error)
|
||||
{
|
||||
return hipGetErrorName(hip_error);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,264 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_event.hpp"
|
||||
|
||||
namespace hip {
|
||||
|
||||
bool Event::ready() {
|
||||
event_->notifyCmdQueue();
|
||||
|
||||
return (event_->status() == CL_COMPLETE);
|
||||
}
|
||||
|
||||
hipError_t Event::query() {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
if (event_ == nullptr) {
|
||||
return hipErrorInvalidHandle;
|
||||
}
|
||||
|
||||
return ready() ? hipSuccess : hipErrorNotReady;
|
||||
}
|
||||
|
||||
hipError_t Event::synchronize() {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
if (event_ == nullptr) {
|
||||
return hipErrorInvalidHandle;
|
||||
}
|
||||
|
||||
event_->awaitCompletion();
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t Event::elapsedTime(Event& eStop, float& ms) {
|
||||
amd::ScopedLock startLock(lock_);
|
||||
|
||||
if (this == &eStop) {
|
||||
if (event_ == nullptr) {
|
||||
return hipErrorInvalidHandle;
|
||||
}
|
||||
|
||||
if (flags & hipEventDisableTiming) {
|
||||
return hipErrorInvalidHandle;
|
||||
}
|
||||
|
||||
if (!ready()) {
|
||||
return hipErrorNotReady;
|
||||
}
|
||||
|
||||
ms = 0.f;
|
||||
return hipSuccess;
|
||||
}
|
||||
amd::ScopedLock stopLock(eStop.lock_);
|
||||
|
||||
if (event_ == nullptr ||
|
||||
eStop.event_ == nullptr) {
|
||||
return hipErrorInvalidHandle;
|
||||
}
|
||||
|
||||
if ((flags | eStop.flags) & hipEventDisableTiming) {
|
||||
return hipErrorInvalidHandle;
|
||||
}
|
||||
|
||||
if (!ready() || !eStop.ready()) {
|
||||
return hipErrorNotReady;
|
||||
}
|
||||
|
||||
if (event_ != eStop.event_) {
|
||||
ms = static_cast<float>(static_cast<int64_t>(eStop.event_->profilingInfo().end_ -
|
||||
event_->profilingInfo().start_))/1000000.f;
|
||||
} else {
|
||||
ms = 0.f;
|
||||
}
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t Event::streamWait(amd::HostQueue* hostQueue, uint flags) {
|
||||
if (stream_ == hostQueue) return hipSuccess;
|
||||
|
||||
amd::ScopedLock lock(lock_);
|
||||
bool retain = false;
|
||||
|
||||
if (event_ == nullptr) {
|
||||
event_ = stream_->getLastQueuedCommand(true);
|
||||
retain = true;
|
||||
}
|
||||
|
||||
if (!event_->notifyCmdQueue()) {
|
||||
return hipErrorLaunchOutOfResources;
|
||||
}
|
||||
amd::Command::EventWaitList eventWaitList;
|
||||
eventWaitList.push_back(event_);
|
||||
|
||||
amd::Command* command = new amd::Marker(*hostQueue, false, eventWaitList);
|
||||
if (command == NULL) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
command->enqueue();
|
||||
command->release();
|
||||
|
||||
if (retain) {
|
||||
event_->release();
|
||||
event_ = nullptr;
|
||||
}
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
void Event::addMarker(amd::HostQueue* queue, amd::Command* command) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
stream_ = queue;
|
||||
|
||||
if (event_ != nullptr) {
|
||||
event_->release();
|
||||
}
|
||||
|
||||
event_ = &command->event();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags) {
|
||||
if (event == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming |
|
||||
hipEventReleaseToDevice | hipEventReleaseToSystem;
|
||||
const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem);
|
||||
|
||||
const bool illegalFlags =
|
||||
(flags & ~supportedFlags) || // can't set any unsupported flags.
|
||||
(flags & releaseFlags) == releaseFlags; // can't set both release flags
|
||||
|
||||
if (!illegalFlags) {
|
||||
hip::Event* e = new hip::Event(flags);
|
||||
|
||||
if (e == nullptr) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
*event = reinterpret_cast<hipEvent_t>(e);
|
||||
} else {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t ihipEventQuery(hipEvent_t event) {
|
||||
if (event == nullptr) {
|
||||
return hipErrorInvalidHandle;
|
||||
}
|
||||
|
||||
hip::Event* e = reinterpret_cast<hip::Event*>(event);
|
||||
|
||||
return e->query();
|
||||
}
|
||||
|
||||
hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) {
|
||||
HIP_INIT_API(hipEventCreateWithFlags, event, flags);
|
||||
|
||||
HIP_RETURN(ihipEventCreateWithFlags(event, flags));
|
||||
}
|
||||
|
||||
hipError_t hipEventCreate(hipEvent_t* event) {
|
||||
HIP_INIT_API(hipEventCreate, event);
|
||||
|
||||
HIP_RETURN(ihipEventCreateWithFlags(event, 0));
|
||||
}
|
||||
|
||||
hipError_t hipEventDestroy(hipEvent_t event) {
|
||||
HIP_INIT_API(hipEventDestroy, event);
|
||||
|
||||
if (event == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidHandle);
|
||||
}
|
||||
|
||||
delete reinterpret_cast<hip::Event*>(event);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) {
|
||||
HIP_INIT_API(hipEventElapsedTime, ms, start, stop);
|
||||
|
||||
if (start == nullptr || stop == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidHandle);
|
||||
}
|
||||
|
||||
if (ms == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
hip::Event* eStart = reinterpret_cast<hip::Event*>(start);
|
||||
hip::Event* eStop = reinterpret_cast<hip::Event*>(stop);
|
||||
|
||||
HIP_RETURN(eStart->elapsedTime(*eStop, *ms));
|
||||
}
|
||||
|
||||
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) {
|
||||
HIP_INIT_API(hipEventRecord, event, stream);
|
||||
|
||||
if (event == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidHandle);
|
||||
}
|
||||
|
||||
hip::Event* e = reinterpret_cast<hip::Event*>(event);
|
||||
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
|
||||
amd::Command* command = queue->getLastQueuedCommand(true);
|
||||
|
||||
if (command == nullptr) {
|
||||
command = new amd::Marker(*queue, false);
|
||||
command->enqueue();
|
||||
}
|
||||
|
||||
e->addMarker(queue, command);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipEventSynchronize(hipEvent_t event) {
|
||||
HIP_INIT_API(hipEventSynchronize, event);
|
||||
|
||||
if (event == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidHandle);
|
||||
}
|
||||
|
||||
hip::Event* e = reinterpret_cast<hip::Event*>(event);
|
||||
|
||||
HIP_RETURN(e->synchronize());
|
||||
}
|
||||
|
||||
hipError_t hipEventQuery(hipEvent_t event) {
|
||||
HIP_INIT_API(hipEventQuery, event);
|
||||
|
||||
HIP_RETURN(ihipEventQuery(event));
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_EVENT_H
|
||||
#define HIP_EVENT_H
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
|
||||
namespace hip {
|
||||
|
||||
class TimerMarker: public amd::Marker {
|
||||
public:
|
||||
TimerMarker(amd::HostQueue& queue) : amd::Marker(queue, false) {
|
||||
profilingInfo_.enabled_ = true;
|
||||
profilingInfo_.callback_ = nullptr;
|
||||
profilingInfo_.start_ = profilingInfo_.end_ = 0;
|
||||
}
|
||||
};
|
||||
|
||||
class Event {
|
||||
public:
|
||||
Event(unsigned int flags) : flags(flags), lock_("hipEvent_t"), stream_(getNullStream()), event_(nullptr) {}
|
||||
~Event() {
|
||||
if (event_ != nullptr) {
|
||||
event_->release();
|
||||
}
|
||||
}
|
||||
unsigned int flags;
|
||||
|
||||
hipError_t query();
|
||||
hipError_t synchronize();
|
||||
hipError_t elapsedTime(Event& stop, float& ms);
|
||||
hipError_t streamWait(amd::HostQueue* queue, uint flags);
|
||||
|
||||
void addMarker(amd::HostQueue* queue, amd::Command* command);
|
||||
|
||||
private:
|
||||
amd::Monitor lock_;
|
||||
amd::HostQueue* stream_;
|
||||
amd::Event* event_;
|
||||
|
||||
bool ready();
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif // HIP_EVEMT_H
|
||||
@@ -0,0 +1,219 @@
|
||||
EXPORTS
|
||||
hipChooseDevice
|
||||
hipCtxCreate
|
||||
hipCtxDestroy
|
||||
hipCtxDisablePeerAccess
|
||||
hipCtxEnablePeerAccess
|
||||
hipCtxGetApiVersion
|
||||
hipCtxGetCacheConfig
|
||||
hipCtxGetCurrent
|
||||
hipCtxGetDevice
|
||||
hipCtxGetFlags
|
||||
hipCtxGetSharedMemConfig
|
||||
hipCtxPopCurrent
|
||||
hipCtxPushCurrent
|
||||
hipCtxSetCacheConfig
|
||||
hipCtxSetCurrent
|
||||
hipCtxSetSharedMemConfig
|
||||
hipCtxSynchronize
|
||||
hipDeviceCanAccessPeer
|
||||
hipDeviceComputeCapability
|
||||
hipDeviceDisablePeerAccess
|
||||
hipDeviceEnablePeerAccess
|
||||
hipDeviceGet
|
||||
hipDeviceGetAttribute
|
||||
hipDeviceGetByPCIBusId
|
||||
hipDeviceGetCacheConfig
|
||||
hipDeviceGetStreamPriorityRange
|
||||
hipDeviceGetLimit
|
||||
hipDeviceGetName
|
||||
hipDeviceGetPCIBusId
|
||||
hipDeviceGetSharedMemConfig
|
||||
hipDevicePrimaryCtxGetState
|
||||
hipDevicePrimaryCtxRelease
|
||||
hipDevicePrimaryCtxReset
|
||||
hipDevicePrimaryCtxRetain
|
||||
hipDevicePrimaryCtxSetFlags
|
||||
hipDeviceReset
|
||||
hipDeviceSetCacheConfig
|
||||
hipDeviceSetSharedMemConfig
|
||||
hipDeviceSynchronize
|
||||
hipDeviceTotalMem
|
||||
hipDriverGetVersion
|
||||
hipEventCreate
|
||||
hipEventCreateWithFlags
|
||||
hipEventDestroy
|
||||
hipEventElapsedTime
|
||||
hipEventQuery
|
||||
hipEventRecord
|
||||
hipEventSynchronize
|
||||
hipExtGetLinkTypeAndHopCount
|
||||
hipExtLaunchMultiKernelMultiDevice
|
||||
hipExtMallocWithFlags
|
||||
hipExtModuleLaunchKernel
|
||||
hipFree
|
||||
hipFreeArray
|
||||
hipFuncSetCacheConfig
|
||||
hipGetDevice
|
||||
hipGetDeviceCount
|
||||
hipGetDeviceProperties
|
||||
hipGetErrorName
|
||||
hipGetErrorString
|
||||
hipGetLastError
|
||||
hipMemAllocHost
|
||||
hipHostAlloc
|
||||
hipHostFree
|
||||
hipHostGetDevicePointer
|
||||
hipHostGetFlags
|
||||
hipHostMalloc
|
||||
hipHostRegister
|
||||
hipHostUnregister
|
||||
hipInit
|
||||
hipIpcCloseMemHandle
|
||||
hipIpcGetMemHandle
|
||||
hipIpcOpenMemHandle
|
||||
hipMalloc
|
||||
hipMalloc3D
|
||||
hipMalloc3DArray
|
||||
hipMallocManaged
|
||||
hipArrayCreate
|
||||
hipArray3DCreate
|
||||
hipMallocArray
|
||||
hipMemAllocPitch
|
||||
hipMallocPitch
|
||||
hipMemcpy
|
||||
hipMemcpyWithStream
|
||||
hipMemcpyParam2D
|
||||
hipMemcpy2D
|
||||
hipMemcpy2DAsync
|
||||
hipMemcpy2DToArray
|
||||
hipMemcpy3D
|
||||
hipMemcpy3DAsync
|
||||
hipMemcpyAsync
|
||||
hipMemcpyDtoD
|
||||
hipMemcpyDtoDAsync
|
||||
hipMemcpyDtoH
|
||||
hipMemcpyDtoHAsync
|
||||
hipMemcpyFromSymbol
|
||||
hipMemcpyFromSymbolAsync
|
||||
hipMemcpyHtoD
|
||||
hipMemcpyHtoDAsync
|
||||
hipMemcpyPeer
|
||||
hipMemcpyPeerAsync
|
||||
hipMemcpyToArray
|
||||
hipMemcpyFromArray
|
||||
hipMemcpyToSymbol
|
||||
hipMemcpyToSymbolAsync
|
||||
hipMemGetAddressRange
|
||||
hipGetSymbolAddress
|
||||
hipGetSymbolSize
|
||||
hipMemGetInfo
|
||||
hipMemPtrGetInfo
|
||||
hipMemset
|
||||
hipMemsetAsync
|
||||
hipMemsetD8
|
||||
hipMemsetD8Async
|
||||
hipMemsetD16
|
||||
hipMemsetD16Async
|
||||
hipMemsetD32
|
||||
hipMemsetD32Async
|
||||
hipMemset2D
|
||||
hipMemset2DAsync
|
||||
hipMemset3D
|
||||
hipMemset3DAsync
|
||||
hipModuleGetFunction
|
||||
hipModuleGetGlobal
|
||||
hipModuleGetTexRef
|
||||
hipModuleLaunchKernel
|
||||
hipModuleLaunchKernelExt
|
||||
hipLaunchCooperativeKernel
|
||||
hipLaunchCooperativeKernelMultiDevice
|
||||
hipHccModuleLaunchKernel
|
||||
hipModuleLoad
|
||||
hipModuleLoadData
|
||||
hipModuleLoadDataEx
|
||||
hipModuleUnload
|
||||
hipOccupancyMaxActiveBlocksPerMultiprocessor
|
||||
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
|
||||
hipFuncGetAttributes
|
||||
hipPeekAtLastError
|
||||
hipPointerGetAttributes
|
||||
hipProfilerStart
|
||||
hipProfilerStop
|
||||
hipRuntimeGetVersion
|
||||
hipSetDevice
|
||||
hipSetDeviceFlags
|
||||
hipStreamAddCallback
|
||||
hipStreamCreate
|
||||
hipStreamCreateWithFlags
|
||||
hipStreamCreateWithPriority
|
||||
hipStreamDestroy
|
||||
hipStreamGetFlags
|
||||
hipStreamQuery
|
||||
hipStreamSynchronize
|
||||
hipStreamWaitEvent
|
||||
__hipPopCallConfiguration
|
||||
__hipPushCallConfiguration
|
||||
__hipRegisterFatBinary
|
||||
__hipRegisterFunction
|
||||
__hipRegisterVar
|
||||
__hipUnregisterFatBinary
|
||||
__gnu_h2f_ieee
|
||||
__gnu_f2h_ieee
|
||||
hipConfigureCall
|
||||
hipSetupArgument
|
||||
hipLaunchByPtr
|
||||
hipLaunchKernel
|
||||
hipRegisterApiCallback
|
||||
hipRemoveApiCallback
|
||||
hipRegisterActivityCallback
|
||||
hipRemoveActivityCallback
|
||||
hipApiName
|
||||
hipKernelNameRef
|
||||
hipCreateTextureObject
|
||||
hipDestroyTextureObject
|
||||
hipGetTextureObjectResourceDesc
|
||||
hipGetTextureObjectResourceViewDesc
|
||||
hipGetTextureObjectTextureDesc
|
||||
hipBindTexture
|
||||
hipBindTexture2D
|
||||
hipBindTextureToArray
|
||||
hipBindTextureToMipmappedArray
|
||||
hipUnbindTexture
|
||||
ihipUnbindTextureImpl
|
||||
hipGetChannelDesc
|
||||
hipGetTextureAlignmentOffset
|
||||
hipGetTextureReference
|
||||
hipTexRefSetFormat
|
||||
hipTexRefSetFlags
|
||||
hipTexRefSetFilterMode
|
||||
hipTexRefSetAddressMode
|
||||
hipTexRefSetArray
|
||||
hipTexRefSetAddress
|
||||
hipTexRefSetAddress2D
|
||||
hipTexRefGetAddress
|
||||
hipTexRefGetAddressMode
|
||||
hipTexRefGetArray
|
||||
hipTexRefSetArray
|
||||
ihipBindTextureImpl
|
||||
ihipBindTextureToArrayImpl
|
||||
hipCreateChannelDesc
|
||||
hipProfilerStart
|
||||
hipProfilerStop
|
||||
hipHccGetAccelerator
|
||||
hipHccGetAcceleratorView
|
||||
hipCreateSurfaceObject
|
||||
hipDestroySurfaceObject
|
||||
hipInitActivityCallback
|
||||
hipEnableActivityCallback
|
||||
hipGetCmdName
|
||||
hiprtcAddNameExpression
|
||||
hiprtcCompileProgram
|
||||
hiprtcCreateProgram
|
||||
hiprtcDestroyProgram
|
||||
hiprtcGetLoweredName
|
||||
hiprtcGetProgramLog
|
||||
hiprtcGetProgramLogSize
|
||||
hiprtcGetCode
|
||||
hiprtcGetCodeSize
|
||||
hiprtcGetErrorString
|
||||
@@ -0,0 +1,239 @@
|
||||
HIP_1.0 {
|
||||
global:
|
||||
hipChooseDevice;
|
||||
hipCtxCreate;
|
||||
hipCtxDestroy;
|
||||
hipCtxDisablePeerAccess;
|
||||
hipCtxEnablePeerAccess;
|
||||
hipCtxGetApiVersion;
|
||||
hipCtxGetCacheConfig;
|
||||
hipCtxGetCurrent;
|
||||
hipCtxGetDevice;
|
||||
hipCtxGetFlags;
|
||||
hipCtxGetSharedMemConfig;
|
||||
hipCtxPopCurrent;
|
||||
hipCtxPushCurrent;
|
||||
hipCtxSetCacheConfig;
|
||||
hipCtxSetCurrent;
|
||||
hipCtxSetSharedMemConfig;
|
||||
hipCtxSynchronize;
|
||||
hipDeviceCanAccessPeer;
|
||||
hipDeviceComputeCapability;
|
||||
hipDeviceDisablePeerAccess;
|
||||
hipDeviceEnablePeerAccess;
|
||||
hipDeviceGet;
|
||||
hipDeviceGetAttribute;
|
||||
hipDeviceGetByPCIBusId;
|
||||
hipDeviceGetCacheConfig;
|
||||
hipDeviceGetStreamPriorityRange;
|
||||
hipDeviceGetLimit;
|
||||
hipDeviceGetName;
|
||||
hipDeviceGetPCIBusId;
|
||||
hipDeviceGetSharedMemConfig;
|
||||
hipDevicePrimaryCtxGetState;
|
||||
hipDevicePrimaryCtxRelease;
|
||||
hipDevicePrimaryCtxReset;
|
||||
hipDevicePrimaryCtxRetain;
|
||||
hipDevicePrimaryCtxSetFlags;
|
||||
hipDeviceReset;
|
||||
hipDeviceSetCacheConfig;
|
||||
hipDeviceSetSharedMemConfig;
|
||||
hipDeviceSynchronize;
|
||||
hipDeviceTotalMem;
|
||||
hipDriverGetVersion;
|
||||
hipEventCreate;
|
||||
hipEventCreateWithFlags;
|
||||
hipEventDestroy;
|
||||
hipEventElapsedTime;
|
||||
hipEventQuery;
|
||||
hipEventRecord;
|
||||
hipEventSynchronize;
|
||||
hipExtGetLinkTypeAndHopCount;
|
||||
hipExtLaunchMultiKernelMultiDevice;
|
||||
hipExtMallocWithFlags;
|
||||
hipExtModuleLaunchKernel;
|
||||
hipFree;
|
||||
hipFreeArray;
|
||||
hipFuncSetCacheConfig;
|
||||
hipGetDevice;
|
||||
hipGetDeviceCount;
|
||||
hipGetDeviceProperties;
|
||||
hipGetErrorName;
|
||||
hipGetErrorString;
|
||||
hipGetLastError;
|
||||
hipMemAllocHost;
|
||||
hipHostAlloc;
|
||||
hipHostFree;
|
||||
hipHostGetDevicePointer;
|
||||
hipHostGetFlags;
|
||||
hipHostMalloc;
|
||||
hipHostRegister;
|
||||
hipHostUnregister;
|
||||
hipInit;
|
||||
hipIpcCloseMemHandle;
|
||||
hipIpcGetMemHandle;
|
||||
hipIpcOpenMemHandle;
|
||||
hipMalloc;
|
||||
hipMalloc3D;
|
||||
hipMalloc3DArray;
|
||||
hipMallocManaged;
|
||||
hipArrayCreate;
|
||||
hipArray3DCreate;
|
||||
hipMallocArray;
|
||||
hipMallocPitch;
|
||||
hipMemAllocPitch;
|
||||
hipMemcpy;
|
||||
hipMemcpyWithStream;
|
||||
hipMemcpyParam2D;
|
||||
hipMemcpy2D;
|
||||
hipMemcpy2DAsync;
|
||||
hipMemcpy2DToArray;
|
||||
hipMemcpy3D;
|
||||
hipMemcpy3DAsync;
|
||||
hipMemcpyAsync;
|
||||
hipMemcpyDtoD;
|
||||
hipMemcpyDtoDAsync;
|
||||
hipMemcpyDtoH;
|
||||
hipMemcpyDtoHAsync;
|
||||
hipMemcpyFromSymbol;
|
||||
hipMemcpyFromSymbolAsync;
|
||||
hipMemcpyHtoD;
|
||||
hipMemcpyHtoDAsync;
|
||||
hipMemcpyPeer;
|
||||
hipMemcpyPeerAsync;
|
||||
hipMemcpyToArray;
|
||||
hipMemcpyFromArray;
|
||||
hipMemcpyToSymbol;
|
||||
hipMemcpyToSymbolAsync;
|
||||
hipMemGetAddressRange;
|
||||
hipGetSymbolAddress;
|
||||
hipGetSymbolSize;
|
||||
hipMemGetInfo;
|
||||
hipMemPtrGetInfo;
|
||||
hipMemset;
|
||||
hipMemsetAsync;
|
||||
hipMemsetD8;
|
||||
hipMemsetD8Async;
|
||||
hipMemsetD16;
|
||||
hipMemsetD16Async;
|
||||
hipMemsetD32;
|
||||
hipMemsetD32Async;
|
||||
hipMemset2D;
|
||||
hipMemset2DAsync;
|
||||
hipMemset3D;
|
||||
hipMemset3DAsync;
|
||||
hipModuleGetFunction;
|
||||
hipModuleGetGlobal;
|
||||
hipModuleGetTexRef;
|
||||
hipModuleLaunchKernel;
|
||||
hipModuleLaunchKernelExt;
|
||||
hipLaunchCooperativeKernel;
|
||||
hipLaunchCooperativeKernelMultiDevice;
|
||||
hipModuleLoad;
|
||||
hipModuleLoadData;
|
||||
hipModuleLoadDataEx;
|
||||
hipModuleUnload;
|
||||
hipOccupancyMaxActiveBlocksPerMultiprocessor;
|
||||
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
|
||||
hipFuncGetAttributes;
|
||||
hipPeekAtLastError;
|
||||
hipPointerGetAttributes;
|
||||
hipProfilerStart;
|
||||
hipProfilerStop;
|
||||
hipRuntimeGetVersion;
|
||||
hipSetDevice;
|
||||
hipSetDeviceFlags;
|
||||
hipStreamAddCallback;
|
||||
hipStreamCreate;
|
||||
hipStreamCreateWithFlags;
|
||||
hipStreamCreateWithPriority;
|
||||
hipStreamDestroy;
|
||||
hipStreamGetFlags;
|
||||
hipStreamQuery;
|
||||
hipStreamSynchronize;
|
||||
hipStreamWaitEvent;
|
||||
__hipPopCallConfiguration;
|
||||
__hipPushCallConfiguration;
|
||||
__hipRegisterFatBinary;
|
||||
__hipRegisterFunction;
|
||||
__hipRegisterVar;
|
||||
__hipUnregisterFatBinary;
|
||||
__gnu_h2f_ieee;
|
||||
__gnu_f2h_ieee;
|
||||
hipConfigureCall;
|
||||
hipSetupArgument;
|
||||
hipLaunchByPtr;
|
||||
hipLaunchKernel;
|
||||
hipRegisterApiCallback;
|
||||
hipRemoveApiCallback;
|
||||
hipRegisterActivityCallback;
|
||||
hipRemoveActivityCallback;
|
||||
hipApiName;
|
||||
hipKernelNameRef;
|
||||
hipProfilerStart;
|
||||
hipProfilerStop;
|
||||
hiprtcCompileProgram;
|
||||
hiprtcCreateProgram;
|
||||
hiprtcDestroyProgram;
|
||||
hiprtcGetLoweredName;
|
||||
hiprtcGetProgramLog;
|
||||
hiprtcGetProgramLogSize;
|
||||
hiprtcGetCode;
|
||||
hiprtcGetCodeSize;
|
||||
hiprtcGetErrorString;
|
||||
extern "C++" {
|
||||
hip_impl::hipLaunchKernelGGLImpl*;
|
||||
hip_impl::demangle*;
|
||||
hipCreateTextureObject*;
|
||||
hipDestroyTextureObject*;
|
||||
hipGetTextureObjectResourceDesc*;
|
||||
hipGetTextureObjectResourceViewDesc*;
|
||||
hipGetTextureObjectTextureDesc*;
|
||||
hipBindTexture*;
|
||||
hipBindTexture2D*;
|
||||
hipBindTextureToArray*;
|
||||
hipBindTextureToMipmappedArray*;
|
||||
hipUnbindTexture*;
|
||||
ihipUnbindTextureImpl*;
|
||||
hipGetChannelDesc*;
|
||||
hipGetTextureAlignmentOffset*;
|
||||
hipGetTextureReference*;
|
||||
hipTexRefSetFormat*;
|
||||
hipTexRefSetFlags*;
|
||||
hipTexRefSetFilterMode*;
|
||||
hipTexRefSetAddressMode*;
|
||||
hipTexRefSetArray*;
|
||||
hipTexRefSetAddress*;
|
||||
hipTexRefSetAddress2D*;
|
||||
hipTexRefGetAddress*;
|
||||
hipTexRefGetAddressMode*;
|
||||
hipTexRefGetArray*;
|
||||
hipTexRefSetArray*;
|
||||
hipCreateChannelDesc*;
|
||||
ihipBindTextureImpl*;
|
||||
ihipBindTextureToArrayImpl*;
|
||||
hipHccGetAccelerator*;
|
||||
hipHccGetAcceleratorView*;
|
||||
hipCreateSurfaceObject*;
|
||||
hipDestroySurfaceObject*;
|
||||
hipHccModuleLaunchKernel*;
|
||||
hipExtModuleLaunchKernel*;
|
||||
hipInitActivityCallback*;
|
||||
hipEnableActivityCallback*;
|
||||
hipGetCmdName*;
|
||||
hiprtcAddNameExpression*;
|
||||
hiprtcCompileProgram*;
|
||||
hiprtcCreateProgram*;
|
||||
hiprtcDestroyProgram*;
|
||||
hiprtcGetLoweredName*;
|
||||
hiprtcGetProgramLog*;
|
||||
hiprtcGetProgramLogSize*;
|
||||
hiprtcGetCode*;
|
||||
hiprtcGetCodeSize*;
|
||||
hiprtcGetErrorString*;
|
||||
hiprtcVersion*;
|
||||
hiprtcGetTypeName*;
|
||||
};
|
||||
local:
|
||||
*;
|
||||
};
|
||||
@@ -0,0 +1,75 @@
|
||||
#define STR(__macro__) #__macro__
|
||||
#define XSTR(__macro__) STR(__macro__)
|
||||
|
||||
#if defined(_DEBUG)
|
||||
#define DEBUG_ONLY(x) x
|
||||
#else
|
||||
#define DEBUG_ONLY(x)
|
||||
#endif
|
||||
|
||||
#define VERSION_PREFIX_MAJOR 2
|
||||
#define VERSION_PREFIX_MINOR 0
|
||||
|
||||
|
||||
#define APSTUDIO_READONLY_SYMBOLS
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Generated from the TEXTINCLUDE 2 resource.
|
||||
//
|
||||
#include "winresrc.h"
|
||||
#include "utils/versions.hpp"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#undef APSTUDIO_READONLY_SYMBOLS
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// English (U.S.) resources
|
||||
|
||||
#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
|
||||
#ifdef _WIN32
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
#pragma code_page(1252)
|
||||
#endif //_WIN32
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Version
|
||||
//
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER
|
||||
PRODUCTVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
#else
|
||||
FILEFLAGS 0x0L
|
||||
#endif
|
||||
FILEOS 0x40004L
|
||||
FILETYPE 0x2L
|
||||
FILESUBTYPE 0x0L
|
||||
BEGIN
|
||||
BLOCK "StringFileInfo"
|
||||
BEGIN
|
||||
BLOCK "040904b0"
|
||||
BEGIN
|
||||
VALUE "Comments", " \0"
|
||||
VALUE "CompanyName", "Advanced Micro Devices Inc.\0"
|
||||
VALUE "FileDescription", AMD_PLATFORM_NAME " OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " Runtime\0"
|
||||
VALUE "FileVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER)
|
||||
VALUE "InternalName", "OpenCL"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2011 Advanced Micro Devices Inc.\0"
|
||||
VALUE "OriginalFilename", "OpenCL.dll"
|
||||
VALUE "ProductName", "OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " " AMD_PLATFORM_INFO "\0"
|
||||
VALUE "ProductVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER)
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
BEGIN
|
||||
VALUE "Translation", 0x409, 1200
|
||||
END
|
||||
END
|
||||
|
||||
#endif // English (U.S.) resources
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_prof_api.h"
|
||||
|
||||
// HIP API callback/activity
|
||||
|
||||
api_callbacks_table_t callbacks_table;
|
||||
|
||||
extern const std::string& FunctionName(const hipFunction_t f);
|
||||
const char* hipKernelNameRef(const hipFunction_t f) { return FunctionName(f).c_str(); }
|
||||
|
||||
hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) {
|
||||
return callbacks_table.set_callback(id, reinterpret_cast<api_callbacks_table_t::fun_t>(fun), arg) ?
|
||||
hipSuccess : hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
hipError_t hipRemoveApiCallback(uint32_t id) {
|
||||
return callbacks_table.set_callback(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) {
|
||||
return callbacks_table.set_activity(id, reinterpret_cast<api_callbacks_table_t::act_t>(fun), arg) ?
|
||||
hipSuccess : hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
hipError_t hipRemoveActivityCallback(uint32_t id) {
|
||||
return callbacks_table.set_activity(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
hipError_t hipEnableTracing(bool enabled) {
|
||||
callbacks_table.set_enabled(enabled);
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
const char* hipApiName(uint32_t id) {
|
||||
return hip_api_name(id);
|
||||
}
|
||||
@@ -0,0 +1,230 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_SRC_HIP_INTERNAL_H
|
||||
#define HIP_SRC_HIP_INTERNAL_H
|
||||
|
||||
#include "vdi_common.hpp"
|
||||
#include "hip_prof_api.h"
|
||||
#include "trace_helper.h"
|
||||
#include "utils/debug.hpp"
|
||||
#include <unordered_set>
|
||||
#include <thread>
|
||||
#include <stack>
|
||||
#include <mutex>
|
||||
#include <iterator>
|
||||
|
||||
|
||||
/*! IHIP IPC MEMORY Structure */
|
||||
#define IHIP_IPC_MEM_HANDLE_SIZE 32
|
||||
#define IHIP_IPC_MEM_RESERVED_SIZE LP64_SWITCH(28,24)
|
||||
|
||||
typedef struct ihipIpcMemHandle_st {
|
||||
char ipc_handle[IHIP_IPC_MEM_HANDLE_SIZE]; ///< ipc memory handle on ROCr
|
||||
size_t psize;
|
||||
char reserved[IHIP_IPC_MEM_RESERVED_SIZE];
|
||||
} ihipIpcMemHandle_t;
|
||||
|
||||
#define HIP_INIT() \
|
||||
std::call_once(hip::g_ihipInitialized, hip::init); \
|
||||
if (hip::g_context == nullptr && g_devices.size() > 0) { \
|
||||
hip::g_context = g_devices[0]; \
|
||||
}
|
||||
|
||||
// This macro should be called at the beginning of every HIP API.
|
||||
#define HIP_INIT_API(cid, ...) \
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \
|
||||
amd::Thread* thread = amd::Thread::current(); \
|
||||
if (!VDI_CHECK_THREAD(thread)) { \
|
||||
HIP_RETURN(hipErrorOutOfMemory); \
|
||||
} \
|
||||
HIP_INIT() \
|
||||
HIP_CB_SPAWNER_OBJECT(cid);
|
||||
|
||||
#define HIP_RETURN(ret) \
|
||||
hip::g_lastError = ret; \
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, hipGetErrorName(hip::g_lastError)); \
|
||||
return hip::g_lastError;
|
||||
|
||||
namespace hc {
|
||||
class accelerator;
|
||||
class accelerator_view;
|
||||
};
|
||||
|
||||
namespace hip {
|
||||
extern std::once_flag g_ihipInitialized;
|
||||
extern thread_local amd::Context* g_context;
|
||||
extern thread_local hipError_t g_lastError;
|
||||
extern amd::Context* host_context;
|
||||
|
||||
extern void init();
|
||||
|
||||
extern amd::Context* getCurrentContext();
|
||||
extern void setCurrentContext(unsigned int index);
|
||||
|
||||
extern amd::HostQueue* getQueue(hipStream_t s);
|
||||
extern amd::HostQueue* getNullStream(amd::Context&);
|
||||
extern amd::HostQueue* getNullStream();
|
||||
extern void syncStreams();
|
||||
|
||||
|
||||
struct Function {
|
||||
amd::Kernel* function_;
|
||||
amd::Monitor lock_;
|
||||
|
||||
Function(amd::Kernel* f) : function_(f), lock_("function lock") {}
|
||||
hipFunction_t asHipFunction() { return reinterpret_cast<hipFunction_t>(this); }
|
||||
|
||||
static Function* asFunction(hipFunction_t f) { return reinterpret_cast<Function*>(f); }
|
||||
};
|
||||
|
||||
struct Stream {
|
||||
amd::HostQueue* queue;
|
||||
|
||||
amd::Device* device;
|
||||
amd::Context* context;
|
||||
amd::CommandQueue::Priority priority;
|
||||
unsigned int flags;
|
||||
|
||||
Stream(amd::Device* dev, amd::Context* ctx, amd::CommandQueue::Priority p, unsigned int f);
|
||||
void create();
|
||||
amd::HostQueue* asHostQueue();
|
||||
void destroy();
|
||||
void finish();
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
struct ihipExec_t {
|
||||
dim3 gridDim_;
|
||||
dim3 blockDim_;
|
||||
size_t sharedMem_;
|
||||
hipStream_t hStream_;
|
||||
std::vector<char> arguments_;
|
||||
};
|
||||
|
||||
class PlatformState {
|
||||
amd::Monitor lock_{"Guards global function map"};
|
||||
|
||||
std::unordered_map<const void*, std::vector<std::pair<hipModule_t, bool>>> modules_;
|
||||
bool initialized_{false};
|
||||
|
||||
void digestFatBinary(const void* data, std::vector<std::pair<hipModule_t, bool>>& programs);
|
||||
public:
|
||||
void init();
|
||||
std::vector<std::pair<hipModule_t, bool>>* addFatBinary(const void*data)
|
||||
{
|
||||
if (initialized_) {
|
||||
digestFatBinary(data, modules_[data]);
|
||||
}
|
||||
return &modules_[data];
|
||||
}
|
||||
void removeFatBinary(std::vector<std::pair<hipModule_t, bool>>* module)
|
||||
{
|
||||
for (auto& mod : modules_) {
|
||||
if (&mod.second == module) {
|
||||
modules_.erase(&mod);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct RegisteredVar {
|
||||
public:
|
||||
RegisteredVar(): size_(0), devicePtr_(nullptr), amd_mem_obj_(nullptr) {}
|
||||
~RegisteredVar() {}
|
||||
|
||||
hipDeviceptr_t getdeviceptr() const { return devicePtr_; };
|
||||
size_t getvarsize() const { return size_; };
|
||||
|
||||
size_t size_; // Size of the variable
|
||||
hipDeviceptr_t devicePtr_; //Device Memory Address of the variable.
|
||||
amd::Memory* amd_mem_obj_;
|
||||
};
|
||||
|
||||
struct DeviceFunction {
|
||||
std::string deviceName;
|
||||
std::vector< std::pair< hipModule_t, bool > >* modules;
|
||||
std::vector<hipFunction_t> functions;
|
||||
};
|
||||
struct DeviceVar {
|
||||
void* shadowVptr;
|
||||
std::string hostVar;
|
||||
size_t size;
|
||||
std::vector< std::pair< hipModule_t, bool > >* modules;
|
||||
std::vector<RegisteredVar> rvars;
|
||||
bool dyn_undef;
|
||||
};
|
||||
private:
|
||||
std::unordered_map<const void*, DeviceFunction > functions_;
|
||||
std::unordered_multimap<std::string, DeviceVar > vars_;
|
||||
|
||||
static PlatformState* platform_;
|
||||
|
||||
PlatformState() {}
|
||||
~PlatformState() {}
|
||||
public:
|
||||
static PlatformState& instance() {
|
||||
return *platform_;
|
||||
}
|
||||
|
||||
std::vector< std::pair<hipModule_t, bool> >* unregisterVar(hipModule_t hmod);
|
||||
|
||||
|
||||
PlatformState::DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod);
|
||||
void registerVar(const void* hostvar, const DeviceVar& var);
|
||||
void registerFunction(const void* hostFunction, const DeviceFunction& func);
|
||||
|
||||
hipFunction_t getFunc(const void* hostFunction, int deviceId);
|
||||
bool getFuncAttr(const void* hostFunction, hipFuncAttributes* func_attr);
|
||||
bool getGlobalVar(const void* hostVar, int deviceId, hipModule_t hmod,
|
||||
hipDeviceptr_t* dev_ptr, size_t* size_ptr);
|
||||
bool getTexRef(const char* hostVar, textureReference** texRef);
|
||||
|
||||
bool getShadowVarInfo(std::string var_name, hipModule_t hmod,
|
||||
void** var_addr, size_t* var_size);
|
||||
void setupArgument(const void *arg, size_t size, size_t offset);
|
||||
void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream);
|
||||
|
||||
void popExec(ihipExec_t& exec);
|
||||
};
|
||||
|
||||
extern std::vector<amd::Context*> g_devices;
|
||||
extern hipError_t ihipDeviceGetCount(int* count);
|
||||
extern int ihipGetDevice();
|
||||
extern hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags);
|
||||
extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset);
|
||||
extern bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr,
|
||||
size_t* var_size);
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const dim3& s) {
|
||||
os << '{';
|
||||
os << s.x;
|
||||
os << ',';
|
||||
os << s.y;
|
||||
os << ',';
|
||||
os << s.z;
|
||||
os << '}';
|
||||
return os;
|
||||
}
|
||||
|
||||
#endif // HIP_SRC_HIP_INTERNAL_H
|
||||
+1714
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,534 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <libelf.h>
|
||||
#include <fstream>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include "hip_event.hpp"
|
||||
|
||||
hipError_t ihipModuleLoadData(hipModule_t *module, const void *image);
|
||||
|
||||
const std::string& FunctionName(const hipFunction_t f)
|
||||
{
|
||||
return hip::Function::asFunction(f)->function_->name();
|
||||
}
|
||||
|
||||
static uint64_t ElfSize(const void *emi)
|
||||
{
|
||||
const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi;
|
||||
const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff);
|
||||
|
||||
uint64_t max_offset = ehdr->e_shoff;
|
||||
uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum;
|
||||
|
||||
for (uint16_t i=0; i < ehdr->e_shnum; ++i){
|
||||
uint64_t cur_offset = static_cast<uint64_t>(shdr[i].sh_offset);
|
||||
if (max_offset < cur_offset) {
|
||||
max_offset = cur_offset;
|
||||
total_size = max_offset;
|
||||
if(SHT_NOBITS != shdr[i].sh_type) {
|
||||
total_size += static_cast<uint64_t>(shdr[i].sh_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
return total_size;
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoad(hipModule_t* module, const char* fname)
|
||||
{
|
||||
HIP_INIT_API(hipModuleLoad, module, fname);
|
||||
|
||||
if (!fname) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
std::ifstream file(fname, std::ios::binary);
|
||||
|
||||
if (!file.is_open()) {
|
||||
HIP_RETURN(hipErrorFileNotFound);
|
||||
}
|
||||
|
||||
std::vector<char> tmp{std::istreambuf_iterator<char>{file}, std::istreambuf_iterator<char>{}};
|
||||
|
||||
HIP_RETURN(ihipModuleLoadData(module, tmp.data()));
|
||||
}
|
||||
|
||||
bool ihipModuleUnregisterGlobal(hipModule_t hmod) {
|
||||
std::vector< std::pair<hipModule_t, bool> >* modules =
|
||||
PlatformState::instance().unregisterVar(hmod);
|
||||
if (modules != nullptr) {
|
||||
delete modules;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
hipError_t hipModuleUnload(hipModule_t hmod)
|
||||
{
|
||||
HIP_INIT_API(hipModuleUnload, hmod);
|
||||
|
||||
if (hmod == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(hmod));
|
||||
|
||||
if(!ihipModuleUnregisterGlobal(hmod)) {
|
||||
HIP_RETURN(hipErrorInvalidSymbol);
|
||||
}
|
||||
|
||||
program->release();
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
|
||||
{
|
||||
HIP_INIT_API(hipModuleLoadData, module, image);
|
||||
|
||||
HIP_RETURN(ihipModuleLoadData(module, image));
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image,
|
||||
unsigned int numOptions, hipJitOption* options,
|
||||
void** optionsValues)
|
||||
{
|
||||
/* TODO: Pass options to Program */
|
||||
HIP_INIT_API(hipModuleLoadData, module, image);
|
||||
|
||||
HIP_RETURN(ihipModuleLoadData(module, image));
|
||||
}
|
||||
|
||||
extern bool __hipExtractCodeObjectFromFatBinary(const void* data,
|
||||
const std::vector<const char*>& devices,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs);
|
||||
|
||||
bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* module) {
|
||||
|
||||
std::vector<std::string> undef_vars;
|
||||
device::Program* dev_program
|
||||
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
|
||||
|
||||
if (!dev_program->getUndefinedVarFromCodeObj(&undef_vars)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto it = undef_vars.begin(); it != undef_vars.end(); ++it) {
|
||||
auto modules = new std::vector<std::pair<hipModule_t, bool> >(g_devices.size());
|
||||
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
||||
modules->at(dev) = std::make_pair(*module, true);
|
||||
}
|
||||
|
||||
texture<float, hipTextureType1D, hipReadModeElementType>* tex_hptr
|
||||
= new texture<float, hipTextureType1D, hipReadModeElementType>();
|
||||
memset(tex_hptr, 0x00, sizeof(texture<float, hipTextureType1D, hipReadModeElementType>));
|
||||
|
||||
PlatformState::DeviceVar dvar{ reinterpret_cast<char*>(tex_hptr), it->c_str(), sizeof(*tex_hptr), modules,
|
||||
std::vector<PlatformState::RegisteredVar>{ g_devices.size()}, true };
|
||||
PlatformState::instance().registerVar(it->c_str(), dvar);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) {
|
||||
|
||||
size_t var_size = 0;
|
||||
hipDeviceptr_t device_ptr = nullptr;
|
||||
std::vector<std::string> var_names;
|
||||
|
||||
device::Program* dev_program
|
||||
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
|
||||
|
||||
if (!dev_program->getGlobalVarFromCodeObj(&var_names)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto it = var_names.begin(); it != var_names.end(); ++it) {
|
||||
auto modules = new std::vector<std::pair<hipModule_t, bool> >(g_devices.size());
|
||||
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
||||
modules->at(dev) = std::make_pair(*module, true);
|
||||
}
|
||||
|
||||
PlatformState::DeviceVar dvar{nullptr, it->c_str(), 0, modules,
|
||||
std::vector<PlatformState::RegisteredVar>{ g_devices.size()}, false };
|
||||
PlatformState::instance().registerVar(it->c_str(), dvar);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
hipError_t ihipModuleLoadData(hipModule_t *module, const void *image)
|
||||
{
|
||||
std::vector<std::pair<const void*, size_t>> code_objs;
|
||||
if (__hipExtractCodeObjectFromFatBinary(image, {hip::getCurrentContext()->devices()[0]->info().name_}, code_objs))
|
||||
image = code_objs[0].first;
|
||||
|
||||
amd::Program* program = new amd::Program(*hip::getCurrentContext());
|
||||
if (program == NULL) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
program->setVarInfoCallBack(&getSvarInfo);
|
||||
|
||||
if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image))) {
|
||||
return hipErrorInvalidKernelFile;
|
||||
}
|
||||
|
||||
*module = reinterpret_cast<hipModule_t>(as_cl(program));
|
||||
|
||||
if (!ihipModuleRegisterGlobal(program, module)) {
|
||||
return hipErrorSharedObjectSymbolNotFound;
|
||||
}
|
||||
|
||||
if (!ihipModuleRegisterUndefined(program, module)) {
|
||||
return hipErrorSharedObjectSymbolNotFound;
|
||||
}
|
||||
|
||||
if(CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) {
|
||||
return hipErrorSharedObjectInitFailed;
|
||||
}
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name)
|
||||
{
|
||||
HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name);
|
||||
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(hmod));
|
||||
|
||||
const amd::Symbol* symbol = program->findSymbol(name);
|
||||
if (!symbol) {
|
||||
HIP_RETURN(hipErrorNotFound);
|
||||
}
|
||||
|
||||
amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name);
|
||||
if (!kernel) {
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
|
||||
hip::Function* f = new hip::Function(kernel);
|
||||
*hfunc = f->asHipFunction();
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name)
|
||||
{
|
||||
HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name);
|
||||
|
||||
/* Get address and size for the global symbol */
|
||||
if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), hmod,
|
||||
dptr, bytes)) {
|
||||
HIP_RETURN(hipErrorNotFound);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
|
||||
{
|
||||
HIP_INIT_API(hipFuncGetAttributes, attr, func);
|
||||
|
||||
if (!PlatformState::instance().getFuncAttr(func, attr)) {
|
||||
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0,
|
||||
uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0,
|
||||
uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0) {
|
||||
HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params);
|
||||
|
||||
hip::Function* function = hip::Function::asFunction(f);
|
||||
amd::Kernel* kernel = function->function_;
|
||||
|
||||
amd::ScopedLock lock(function->lock_);
|
||||
|
||||
hip::Event* eStart = reinterpret_cast<hip::Event*>(startEvent);
|
||||
hip::Event* eStop = reinterpret_cast<hip::Event*>(stopEvent);
|
||||
amd::HostQueue* queue = hip::getQueue(hStream);
|
||||
const amd::Device& device = queue->vdev()->device();
|
||||
|
||||
if ((params & amd::NDRangeKernelCommand::CooperativeGroups) &&
|
||||
!device.info().cooperativeGroups_) {
|
||||
return hipErrorLaunchFailure;
|
||||
}
|
||||
if ((params & amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups) &&
|
||||
!device.info().cooperativeMultiDeviceGroups_) {
|
||||
return hipErrorLaunchFailure;
|
||||
}
|
||||
if (!queue) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
size_t globalWorkOffset[3] = {0};
|
||||
size_t globalWorkSize[3] = { gridDimX, gridDimY, gridDimZ };
|
||||
size_t localWorkSize[3] = { blockDimX, blockDimY, blockDimZ };
|
||||
amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize);
|
||||
amd::Command::EventWaitList waitList;
|
||||
|
||||
address kernargs = nullptr;
|
||||
|
||||
// 'extra' is a struct that contains the following info: {
|
||||
// HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs,
|
||||
// HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size,
|
||||
// HIP_LAUNCH_PARAM_END }
|
||||
if (extra != nullptr) {
|
||||
if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER ||
|
||||
extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE || extra[4] != HIP_LAUNCH_PARAM_END) {
|
||||
return hipErrorNotInitialized;
|
||||
}
|
||||
kernargs = reinterpret_cast<address>(extra[1]);
|
||||
}
|
||||
|
||||
const amd::KernelSignature& signature = kernel->signature();
|
||||
for (size_t i = 0; i < signature.numParameters(); ++i) {
|
||||
const amd::KernelParameterDescriptor& desc = signature.at(i);
|
||||
if (kernelParams == nullptr) {
|
||||
assert(kernargs != nullptr);
|
||||
kernel->parameters().set(i, desc.size_, kernargs + desc.offset_,
|
||||
desc.type_ == T_POINTER/*svmBound*/);
|
||||
} else {
|
||||
assert(extra == nullptr);
|
||||
kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER/*svmBound*/);
|
||||
}
|
||||
}
|
||||
|
||||
if(startEvent != nullptr) {
|
||||
amd::Command* startCommand = new hip::TimerMarker(*queue);
|
||||
startCommand->enqueue();
|
||||
eStart->addMarker(queue, startCommand);
|
||||
}
|
||||
|
||||
amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(
|
||||
*queue, waitList, *kernel, ndrange, sharedMemBytes,
|
||||
params, gridId, numGrids, prevGridSum, allGridSum, firstDevice);
|
||||
if (!command) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
// Capture the kernel arguments
|
||||
if (CL_SUCCESS != command->captureAndValidate()) {
|
||||
delete command;
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
command->enqueue();
|
||||
|
||||
if(stopEvent != nullptr) {
|
||||
eStop->addMarker(queue, command);
|
||||
command->retain();
|
||||
}
|
||||
|
||||
command->release();
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra)
|
||||
{
|
||||
HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra);
|
||||
|
||||
HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX * blockDimX, gridDimY * blockDimY, gridDimZ * blockDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr));
|
||||
}
|
||||
|
||||
hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
|
||||
uint32_t localWorkSizeZ, size_t sharedMemBytes,
|
||||
hipStream_t hStream, void** kernelParams, void** extra,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags)
|
||||
{
|
||||
HIP_INIT_API(NONE, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra, startEvent, stopEvent, flags);
|
||||
|
||||
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
|
||||
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags));
|
||||
}
|
||||
|
||||
|
||||
|
||||
hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX,
|
||||
uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY,
|
||||
uint32_t blockDimZ, size_t sharedMemBytes,
|
||||
hipStream_t hStream, void** kernelParams, void** extra,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent)
|
||||
{
|
||||
HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra, startEvent, stopEvent);
|
||||
|
||||
HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
|
||||
}
|
||||
|
||||
hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t gridDimX,
|
||||
uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY,
|
||||
uint32_t blockDimZ, size_t sharedMemBytes,
|
||||
hipStream_t hStream, void** kernelParams, void** extra,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent)
|
||||
{
|
||||
HIP_INIT_API(NONE, f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra, startEvent, stopEvent);
|
||||
|
||||
HIP_RETURN(ihipModuleLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
|
||||
}
|
||||
|
||||
hipError_t hipLaunchCooperativeKernel(const void* f,
|
||||
dim3 gridDim, dim3 blockDim,
|
||||
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
|
||||
{
|
||||
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim,
|
||||
sharedMemBytes, hStream);
|
||||
|
||||
int deviceId = ihipGetDevice();
|
||||
hipFunction_t func = PlatformState::instance().getFunc(f, deviceId);
|
||||
if (func == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
||||
}
|
||||
|
||||
HIP_RETURN(ihipModuleLaunchKernel(func, gridDim.x * blockDim.x, gridDim.y * blockDim.y, gridDim.z * blockDim.z,
|
||||
blockDim.x, blockDim.y, blockDim.z,
|
||||
sharedMemBytes, hStream, kernelParams, nullptr, nullptr, nullptr, 0,
|
||||
amd::NDRangeKernelCommand::CooperativeGroups));
|
||||
}
|
||||
|
||||
hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices, unsigned int flags, uint32_t extFlags)
|
||||
{
|
||||
int numActiveGPUs = 0;
|
||||
ihipDeviceGetCount(&numActiveGPUs);
|
||||
|
||||
if ((numDevices > numActiveGPUs) || (launchParamsList == nullptr)) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
hipError_t result = hipErrorUnknown;
|
||||
uint64_t allGridSize = 0;
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
const hipLaunchParams& launch = launchParamsList[i];
|
||||
allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z;
|
||||
}
|
||||
uint64_t prevGridSize = 0;
|
||||
uint32_t firstDevice = 0;
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
const hipLaunchParams& launch = launchParamsList[i];
|
||||
amd::HostQueue* queue = reinterpret_cast<hip::Stream*>(launch.stream)->asHostQueue();
|
||||
hipFunction_t func = nullptr;
|
||||
// The order of devices in the launch may not match the order in the global array
|
||||
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
||||
// Find the matching device and request the kernel function
|
||||
if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) {
|
||||
func = PlatformState::instance().getFunc(launch.func, dev);
|
||||
// Save VDI index of the first device in the launch
|
||||
if (i == 0) {
|
||||
firstDevice = queue->vdev()->device().index();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (func == nullptr) {
|
||||
result = hipErrorInvalidDeviceFunction;
|
||||
HIP_RETURN(result);
|
||||
}
|
||||
|
||||
result = ihipModuleLaunchKernel(func,
|
||||
launch.gridDim.x * launch.blockDim.x,
|
||||
launch.gridDim.y * launch.blockDim.y,
|
||||
launch.gridDim.z * launch.blockDim.z,
|
||||
launch.blockDim.x, launch.blockDim.y, launch.blockDim.z,
|
||||
launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr,
|
||||
flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice);
|
||||
if (result != hipSuccess) {
|
||||
break;
|
||||
}
|
||||
prevGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices, unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, numDevices, flags);
|
||||
|
||||
return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags,
|
||||
(amd::NDRangeKernelCommand::CooperativeGroups |
|
||||
amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups));
|
||||
}
|
||||
|
||||
hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices, unsigned int flags) {
|
||||
HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags);
|
||||
|
||||
return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0);
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) {
|
||||
HIP_INIT_API(hipModuleGetTexRef, texRef, hmod, name);
|
||||
|
||||
/* input args check */
|
||||
if ((texRef == nullptr) || (name == nullptr)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
/* Get address and size for the global symbol */
|
||||
if (!PlatformState::instance().getTexRef(name, texRef)) {
|
||||
HIP_RETURN(hipErrorNotFound);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
|
||||
hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) {
|
||||
HIP_INIT_API(NONE, canAccessPeer, thisCtx, peerCtx);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx,
|
||||
size_t sizeBytes) {
|
||||
HIP_INIT_API(NONE, dst, dstCtx, src, srcCtx, sizeBytes);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice,
|
||||
size_t sizeBytes, hipStream_t stream) {
|
||||
HIP_INIT_API(NONE, dst, dstDevice, src, srcDevice, sizeBytes, stream);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId) {
|
||||
HIP_INIT_API(hipDeviceCanAccessPeer, canAccessPeer, deviceId, peerDeviceId);
|
||||
|
||||
amd::Device* device = nullptr;
|
||||
amd::Device* peer_device = nullptr;
|
||||
|
||||
if (canAccessPeer == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
/* Peer cannot be self */
|
||||
if (deviceId == peerDeviceId) {
|
||||
*canAccessPeer = 0;
|
||||
return HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
/* Cannot exceed the max number of devices */
|
||||
if (static_cast<size_t>(deviceId) >= g_devices.size()
|
||||
|| static_cast<size_t>(peerDeviceId) >= g_devices.size()) {
|
||||
return HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
device = g_devices[deviceId]->devices()[0];
|
||||
peer_device = g_devices[peerDeviceId]->devices()[0];
|
||||
|
||||
*canAccessPeer = static_cast<int>(std::find(device->p2pDevices_.begin(),
|
||||
device->p2pDevices_.end(), as_cl(peer_device))
|
||||
!= device->p2pDevices_.end());
|
||||
|
||||
return HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) {
|
||||
HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) {
|
||||
HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
|
||||
size_t sizeBytes) {
|
||||
HIP_INIT_API(hipMemcpyPeer, dst, dstDevice, src, srcDevice, sizeBytes);
|
||||
|
||||
HIP_RETURN(hipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice,
|
||||
size_t sizeBytes, hipStream_t stream) {
|
||||
HIP_INIT_API(hipMemcpyPeerAsync, dst, dstDevice, src, srcDevice, sizeBytes, stream);
|
||||
|
||||
HIP_RETURN(hipMemcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream));
|
||||
}
|
||||
|
||||
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
|
||||
HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
|
||||
HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
@@ -0,0 +1,993 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include "platform/runtime.hpp"
|
||||
|
||||
#include <unordered_map>
|
||||
#include "elfio.hpp"
|
||||
|
||||
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
|
||||
|
||||
thread_local std::stack<ihipExec_t> execStack_;
|
||||
PlatformState* PlatformState::platform_ = new PlatformState();
|
||||
|
||||
struct __CudaFatBinaryWrapper {
|
||||
unsigned int magic;
|
||||
unsigned int version;
|
||||
void* binary;
|
||||
void* dummy1;
|
||||
};
|
||||
|
||||
#define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
|
||||
#define HIP_AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa"
|
||||
#define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn-amd-amdhsa-"
|
||||
|
||||
struct __ClangOffloadBundleDesc {
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
uint64_t tripleSize;
|
||||
const char triple[1];
|
||||
};
|
||||
|
||||
struct __ClangOffloadBundleHeader {
|
||||
const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1];
|
||||
uint64_t numBundles;
|
||||
__ClangOffloadBundleDesc desc[1];
|
||||
};
|
||||
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
|
||||
hipModule_t hmod, const char* name);
|
||||
|
||||
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
|
||||
hipDeviceptr_t* dptr, size_t* bytes);
|
||||
|
||||
static bool isCompatibleCodeObject(const std::string& codeobj_target_id,
|
||||
const char* device_name) {
|
||||
// Workaround for device name mismatch.
|
||||
// Device name may contain feature strings delimited by '+', e.g.
|
||||
// gfx900+xnack. Currently HIP-Clang does not include feature strings
|
||||
// in code object target id in fat binary. Therefore drop the feature
|
||||
// strings from device name before comparing it with code object target id.
|
||||
std::string short_name(device_name);
|
||||
auto feature_loc = short_name.find('+');
|
||||
if (feature_loc != std::string::npos) {
|
||||
short_name.erase(feature_loc);
|
||||
}
|
||||
return codeobj_target_id == short_name;
|
||||
}
|
||||
|
||||
// Extracts code objects from fat binary in data for device names given in devices.
|
||||
// Returns true if code objects are extracted successfully.
|
||||
bool __hipExtractCodeObjectFromFatBinary(const void* data,
|
||||
const std::vector<const char*>& devices,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs)
|
||||
{
|
||||
std::string magic((const char*)data, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1);
|
||||
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
code_objs.resize(devices.size());
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
|
||||
const auto* desc = &obheader->desc[0];
|
||||
unsigned num_code_objs = 0;
|
||||
for (uint64_t i = 0; i < obheader->numBundles; ++i,
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleDesc*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->triple[0]) + desc->tripleSize)) {
|
||||
|
||||
std::string triple(desc->triple, sizeof(HIP_AMDGCN_AMDHSA_TRIPLE) - 1);
|
||||
if (triple.compare(HIP_AMDGCN_AMDHSA_TRIPLE))
|
||||
continue;
|
||||
|
||||
std::string target(desc->triple + sizeof(HIP_AMDGCN_AMDHSA_TRIPLE),
|
||||
desc->tripleSize - sizeof(HIP_AMDGCN_AMDHSA_TRIPLE));
|
||||
|
||||
const void *image = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(obheader) + desc->offset);
|
||||
size_t size = desc->size;
|
||||
|
||||
for (size_t dev = 0; dev < devices.size(); ++dev) {
|
||||
const char* name = devices[dev];
|
||||
|
||||
if (!isCompatibleCodeObject(target, name)) {
|
||||
continue;
|
||||
}
|
||||
code_objs[dev] = std::make_pair(image, size);
|
||||
num_code_objs++;
|
||||
}
|
||||
}
|
||||
if (num_code_objs == devices.size())
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
extern "C" std::vector<std::pair<hipModule_t, bool>>* __hipRegisterFatBinary(const void* data)
|
||||
{
|
||||
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
|
||||
if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return PlatformState::instance().addFatBinary(fbwrapper->binary);
|
||||
}
|
||||
|
||||
void PlatformState::digestFatBinary(const void* data, std::vector<std::pair<hipModule_t, bool>>& programs)
|
||||
{
|
||||
if (programs.size() > 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::pair<const void*, size_t>> code_objs;
|
||||
std::vector<const char*> devices;
|
||||
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
||||
amd::Context* ctx = g_devices[dev];
|
||||
devices.push_back(ctx->devices()[0]->info().name_);
|
||||
}
|
||||
|
||||
if (!__hipExtractCodeObjectFromFatBinary((char*)data, devices, code_objs)) {
|
||||
return;
|
||||
}
|
||||
|
||||
programs.resize(g_devices.size());
|
||||
|
||||
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
||||
amd::Context* ctx = g_devices[dev];
|
||||
amd::Program* program = new amd::Program(*ctx);
|
||||
if (program == nullptr) {
|
||||
return;
|
||||
}
|
||||
if (CL_SUCCESS == program->addDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) {
|
||||
programs.at(dev) = std::make_pair(reinterpret_cast<hipModule_t>(as_cl(program)) , false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PlatformState::init()
|
||||
{
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
if(initialized_ || g_devices.empty()) {
|
||||
return;
|
||||
}
|
||||
initialized_ = true;
|
||||
|
||||
for (auto& it : modules_) {
|
||||
digestFatBinary(it.first, it.second);
|
||||
}
|
||||
for (auto& it : functions_) {
|
||||
it.second.functions.resize(g_devices.size());
|
||||
}
|
||||
for (auto& it : vars_) {
|
||||
it.second.rvars.resize(g_devices.size());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector< std::pair<hipModule_t, bool> >* PlatformState::unregisterVar(hipModule_t hmod) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
std::vector< std::pair<hipModule_t, bool> >* rmodules = nullptr;
|
||||
auto it = vars_.begin();
|
||||
while (it != vars_.end()) {
|
||||
DeviceVar& dvar = it->second;
|
||||
if ((*dvar.modules)[0].first == hmod) {
|
||||
rmodules = dvar.modules;
|
||||
if (dvar.dyn_undef) {
|
||||
texture<float, hipTextureType1D, hipReadModeElementType>* tex_hptr
|
||||
= reinterpret_cast<texture<float, hipTextureType1D, hipReadModeElementType> *>(dvar.shadowVptr);
|
||||
delete tex_hptr;
|
||||
}
|
||||
vars_.erase(it++);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
return rmodules;
|
||||
}
|
||||
|
||||
PlatformState::DeviceVar* PlatformState::findVar(std::string hostVar, int deviceId, hipModule_t hmod) {
|
||||
DeviceVar* dvar = nullptr;
|
||||
if (hmod != nullptr) {
|
||||
// If module is provided, then get the var only from that module
|
||||
auto var_range = vars_.equal_range(hostVar);
|
||||
for (auto it = var_range.first; it != var_range.second; ++it) {
|
||||
if ((*it->second.modules)[deviceId].first == hmod) {
|
||||
dvar = &(it->second);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If var count is < 2, return the var
|
||||
if (vars_.count(hostVar) < 2) {
|
||||
auto it = vars_.find(hostVar);
|
||||
dvar = ((it == vars_.end()) ? nullptr : &(it->second));
|
||||
} else {
|
||||
// If var count is > 2, return the original var,
|
||||
// if original var count != 1, return vars_.end()/Invalid
|
||||
size_t orig_global_count = 0;
|
||||
auto var_range = vars_.equal_range(hostVar);
|
||||
for (auto it = var_range.first; it != var_range.second; ++it) {
|
||||
// when dyn_undef is set, it is a shadow var
|
||||
if (it->second.dyn_undef == false) {
|
||||
++orig_global_count;
|
||||
dvar = &(it->second);
|
||||
}
|
||||
}
|
||||
dvar = ((orig_global_count == 1) ? dvar : nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
return dvar;
|
||||
}
|
||||
|
||||
void PlatformState::registerVar(const void* hostvar,
|
||||
const DeviceVar& rvar) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
vars_.insert(std::make_pair(std::string(reinterpret_cast<const char*>(hostvar)), rvar));
|
||||
}
|
||||
|
||||
void PlatformState::registerFunction(const void* hostFunction,
|
||||
const DeviceFunction& func) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
functions_.insert(std::make_pair(hostFunction, func));
|
||||
}
|
||||
|
||||
bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFuncAttributes* func_attr) {
|
||||
device::Program* dev_program
|
||||
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
|
||||
|
||||
const auto it = dev_program->kernels().find(std::string(func_name));
|
||||
if (it == dev_program->kernels().cend()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const device::Kernel::WorkGroupInfo* wginfo = it->second->workGroupInfo();
|
||||
func_attr->localSizeBytes = wginfo->localMemSize_;
|
||||
func_attr->sharedSizeBytes = wginfo->size_;
|
||||
func_attr->maxThreadsPerBlock = wginfo->wavefrontSize_;
|
||||
func_attr->numRegs = wginfo->usedVGPRs_;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PlatformState::getShadowVarInfo(std::string var_name, hipModule_t hmod,
|
||||
void** var_addr, size_t* var_size) {
|
||||
DeviceVar* dvar = findVar(var_name, ihipGetDevice(), hmod);
|
||||
if (dvar != nullptr) {
|
||||
*var_addr = dvar->shadowVptr;
|
||||
*var_size = dvar->size;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr,
|
||||
size_t* var_size) {
|
||||
return PlatformState::instance().getShadowVarInfo(var_name, reinterpret_cast<hipModule_t>(program),
|
||||
var_addr, var_size);
|
||||
}
|
||||
|
||||
hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
const auto it = functions_.find(hostFunction);
|
||||
if (it != functions_.cend()) {
|
||||
PlatformState::DeviceFunction& devFunc = it->second;
|
||||
if (devFunc.functions[deviceId] == 0) {
|
||||
hipModule_t module = (*devFunc.modules)[deviceId].first;
|
||||
if (!(*devFunc.modules)[deviceId].second) {
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(module));
|
||||
program->setVarInfoCallBack(&getSvarInfo);
|
||||
if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) {
|
||||
return nullptr;
|
||||
}
|
||||
(*devFunc.modules)[deviceId].second = true;
|
||||
}
|
||||
hipFunction_t function = nullptr;
|
||||
if (hipSuccess == hipModuleGetFunction(&function, module, devFunc.deviceName.c_str()) &&
|
||||
function != nullptr) {
|
||||
devFunc.functions[deviceId] = function;
|
||||
}
|
||||
else {
|
||||
// tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for"
|
||||
// " device %d\n", deviceName, deviceId);
|
||||
}
|
||||
}
|
||||
return devFunc.functions[deviceId];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool PlatformState::getFuncAttr(const void* hostFunction,
|
||||
hipFuncAttributes* func_attr) {
|
||||
if (func_attr == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto it = functions_.find(hostFunction);
|
||||
if (it == functions_.cend()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
PlatformState::DeviceFunction& devFunc = it->second;
|
||||
int deviceId = ihipGetDevice();
|
||||
|
||||
/* If module has not been initialized yet, build the kernel now*/
|
||||
if (!(*devFunc.modules)[deviceId].second) {
|
||||
if (nullptr == PlatformState::instance().getFunc(hostFunction, deviceId)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>((*devFunc.modules)[deviceId].first));
|
||||
if (!ihipGetFuncAttributes(devFunc.deviceName.c_str(), program, func_attr)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PlatformState::getTexRef(const char* hostVar, textureReference** texRef) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
DeviceVar* dvar = findVar(std::string(hostVar), ihipGetDevice(), nullptr);
|
||||
if (dvar == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!dvar->dyn_undef) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*texRef = reinterpret_cast<textureReference *>(dvar->shadowVptr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, hipModule_t hmod,
|
||||
hipDeviceptr_t* dev_ptr, size_t* size_ptr) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
DeviceVar* dvar = findVar(std::string(reinterpret_cast<const char*>(hostVar)), deviceId, hmod);
|
||||
if (dvar != nullptr) {
|
||||
if (dvar->rvars[deviceId].getdeviceptr() == nullptr) {
|
||||
size_t sym_size = 0;
|
||||
hipDeviceptr_t device_ptr = nullptr;
|
||||
amd::Memory* amd_mem_obj = nullptr;
|
||||
|
||||
if (!(*dvar->modules)[deviceId].second) {
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>((*dvar->modules)[deviceId].first));
|
||||
program->setVarInfoCallBack(&getSvarInfo);
|
||||
if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) {
|
||||
return false;
|
||||
}
|
||||
(*dvar->modules)[deviceId].second = true;
|
||||
}
|
||||
if((hipSuccess == ihipCreateGlobalVarObj(dvar->hostVar.c_str(), (*dvar->modules)[deviceId].first,
|
||||
&amd_mem_obj, &device_ptr, &sym_size))
|
||||
&& (device_ptr != nullptr)) {
|
||||
dvar->rvars[deviceId].size_ = sym_size;
|
||||
dvar->rvars[deviceId].devicePtr_ = device_ptr;
|
||||
dvar->rvars[deviceId].amd_mem_obj_ = amd_mem_obj;
|
||||
amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj);
|
||||
} else {
|
||||
LogError("[HIP] __hipRegisterVar cannot find kernel for device \n");
|
||||
}
|
||||
}
|
||||
*size_ptr = dvar->rvars[deviceId].getvarsize();
|
||||
*dev_ptr = dvar->rvars[deviceId].getdeviceptr();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) {
|
||||
auto& arguments = execStack_.top().arguments_;
|
||||
|
||||
if (arguments.size() < offset + size) {
|
||||
arguments.resize(offset + size);
|
||||
}
|
||||
|
||||
::memcpy(&arguments[offset], arg, size);
|
||||
}
|
||||
|
||||
void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem,
|
||||
hipStream_t stream) {
|
||||
execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
|
||||
}
|
||||
|
||||
void PlatformState::popExec(ihipExec_t& exec) {
|
||||
exec = std::move(execStack_.top());
|
||||
execStack_.pop();
|
||||
}
|
||||
|
||||
extern "C" void __hipRegisterFunction(
|
||||
std::vector<std::pair<hipModule_t,bool> >* modules,
|
||||
const void* hostFunction,
|
||||
char* deviceFunction,
|
||||
const char* deviceName,
|
||||
unsigned int threadLimit,
|
||||
uint3* tid,
|
||||
uint3* bid,
|
||||
dim3* blockDim,
|
||||
dim3* gridDim,
|
||||
int* wSize)
|
||||
{
|
||||
PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector<hipFunction_t>{g_devices.size()}};
|
||||
PlatformState::instance().registerFunction(hostFunction, func);
|
||||
// for (size_t i = 0; i < g_devices.size(); ++i) {
|
||||
// PlatformState::instance().getFunc(hostFunction, i);
|
||||
// }
|
||||
}
|
||||
|
||||
// Registers a device-side global variable.
|
||||
// For each global variable in device code, there is a corresponding shadow
|
||||
// global variable in host code. The shadow host variable is used to keep
|
||||
// track of the value of the device side global variable between kernel
|
||||
// executions.
|
||||
extern "C" void __hipRegisterVar(
|
||||
std::vector<std::pair<hipModule_t,bool> >* modules, // The device modules containing code object
|
||||
char* var, // The shadow variable in host code
|
||||
char* hostVar, // Variable name in host code
|
||||
char* deviceVar, // Variable name in device code
|
||||
int ext, // Whether this variable is external
|
||||
int size, // Size of the variable
|
||||
int constant, // Whether this variable is constant
|
||||
int global) // Unknown, always 0
|
||||
{
|
||||
PlatformState::DeviceVar dvar{var, std::string{ hostVar }, static_cast<size_t>(size), modules,
|
||||
std::vector<PlatformState::RegisteredVar>{g_devices.size()}, false };
|
||||
|
||||
PlatformState::instance().registerVar(hostVar, dvar);
|
||||
}
|
||||
|
||||
extern "C" void __hipUnregisterFatBinary(std::vector< std::pair<hipModule_t, bool> >* modules)
|
||||
{
|
||||
HIP_INIT();
|
||||
|
||||
std::for_each(modules->begin(), modules->end(), [](std::pair<hipModule_t, bool> module){
|
||||
if (module.first != nullptr) {
|
||||
as_amd(reinterpret_cast<cl_program>(module.first))->release();
|
||||
}
|
||||
});
|
||||
if (modules->size() > 0) {
|
||||
PlatformState::instance().unregisterVar((*modules)[0].first);
|
||||
}
|
||||
PlatformState::instance().removeFatBinary(modules);
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipConfigureCall(
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
size_t sharedMem,
|
||||
hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
extern "C" hipError_t __hipPushCallConfiguration(
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
size_t sharedMem,
|
||||
hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim,
|
||||
dim3 *blockDim,
|
||||
size_t *sharedMem,
|
||||
hipStream_t *stream) {
|
||||
HIP_INIT_API(NONE, gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
ihipExec_t exec;
|
||||
PlatformState::instance().popExec(exec);
|
||||
*gridDim = exec.gridDim_;
|
||||
*blockDim = exec.blockDim_;
|
||||
*sharedMem = exec.sharedMem_;
|
||||
*stream = exec.hStream_;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipSetupArgument(
|
||||
const void *arg,
|
||||
size_t size,
|
||||
size_t offset)
|
||||
{
|
||||
HIP_INIT_API(NONE, arg, size, offset);
|
||||
|
||||
PlatformState::instance().setupArgument(arg, size, offset);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
|
||||
{
|
||||
HIP_INIT_API(NONE, hostFunction);
|
||||
|
||||
int deviceId = ihipGetDevice();
|
||||
if (deviceId == -1) {
|
||||
HIP_RETURN(hipErrorNoDevice);
|
||||
}
|
||||
hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId);
|
||||
if (func == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
||||
}
|
||||
|
||||
ihipExec_t exec;
|
||||
PlatformState::instance().popExec(exec);
|
||||
|
||||
size_t size = exec.arguments_.size();
|
||||
void *extra[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0],
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
|
||||
HIP_RETURN(hipModuleLaunchKernel(func,
|
||||
exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z,
|
||||
exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z,
|
||||
exec.sharedMem_, exec.hStream_, nullptr, extra));
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(NONE, hostFunction, gridDim, blockDim, args, sharedMemBytes,
|
||||
stream);
|
||||
|
||||
int deviceId = ihipGetDevice();
|
||||
if (deviceId == -1) {
|
||||
HIP_RETURN(hipErrorNoDevice);
|
||||
}
|
||||
hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId);
|
||||
if (func == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipModuleLaunchKernel(func, gridDim.x, gridDim.y, gridDim.z,
|
||||
blockDim.x, blockDim.y, blockDim.z,
|
||||
sharedMemBytes, stream, args, nullptr));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
|
||||
size_t size = 0;
|
||||
if(!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr,
|
||||
devPtr, &size)) {
|
||||
HIP_RETURN(hipErrorInvalidSymbol);
|
||||
}
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbolName) {
|
||||
hipDeviceptr_t devPtr = nullptr;
|
||||
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), nullptr,
|
||||
&devPtr, sizePtr)) {
|
||||
HIP_RETURN(hipErrorInvalidSymbol);
|
||||
}
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
|
||||
hipDeviceptr_t* dptr, size_t* bytes)
|
||||
{
|
||||
HIP_INIT();
|
||||
|
||||
amd::Program* program = nullptr;
|
||||
device::Program* dev_program = nullptr;
|
||||
|
||||
/* Get Device Program pointer*/
|
||||
program = as_amd(reinterpret_cast<cl_program>(hmod));
|
||||
dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
|
||||
|
||||
if (dev_program == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
||||
}
|
||||
/* Find the global Symbols */
|
||||
if(!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) {
|
||||
HIP_RETURN(hipErrorInvalidSymbol);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
namespace hip_impl {
|
||||
|
||||
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
|
||||
hipFunction_t f,
|
||||
int blockSize,
|
||||
size_t dynamicSMemSize)
|
||||
{
|
||||
HIP_INIT_API(NONE, f, blockSize, dynamicSMemSize);
|
||||
int deviceId = ihipGetDevice();
|
||||
// FIXME: Function may not be a device function and may have been obtaiend via
|
||||
// hipModuleGetFunction and thus not in the functions_ map. Check the map
|
||||
// else interpret as a hip::Function for now.
|
||||
hipFunction_t func = PlatformState::instance().getFunc(f, deviceId);
|
||||
if (func == nullptr) {
|
||||
func = f;
|
||||
}
|
||||
hip::Function* function = hip::Function::asFunction(func);
|
||||
if (function == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
||||
}
|
||||
amd::Kernel* kernel = function->function_;
|
||||
if (!kernel) {
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
if (blockSize == 0) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
amd::Device* device = hip::getCurrentContext()->devices()[0];
|
||||
const device::Kernel::WorkGroupInfo* wrkGrpInfo = kernel->getDeviceKernel(*device)->workGroupInfo();
|
||||
|
||||
// Find threads accupancy per CU => simd_per_cu * GPR usage
|
||||
constexpr size_t MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD
|
||||
size_t VgprWaves = wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4);
|
||||
|
||||
size_t GprWaves;
|
||||
if (wrkGrpInfo->usedSGPRs_ > 0) {
|
||||
const size_t maxSGPRs = (device->info().gfxipVersion_ < 800) ? 512 : 800;
|
||||
size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16);
|
||||
GprWaves = std::min(VgprWaves, SgprWaves);
|
||||
}
|
||||
else {
|
||||
GprWaves = VgprWaves;
|
||||
}
|
||||
|
||||
size_t alu_accupancy = device->info().simdPerCU_ * std::min(MaxWavesPerSimd, GprWaves);
|
||||
alu_accupancy *= wrkGrpInfo->wavefrontSize_;
|
||||
// Calculate blocks occupancy per CU
|
||||
*numBlocks = alu_accupancy / amd::alignUp(blockSize, wrkGrpInfo->wavefrontSize_);
|
||||
|
||||
size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSize;
|
||||
if (total_used_lds != 0) {
|
||||
// Calculate LDS occupancy per CU. lds_per_cu / (static_lsd + dynamic_lds)
|
||||
int lds_occupancy = static_cast<int>(device->info().localMemSize_ / total_used_lds);
|
||||
*numBlocks = std::min(*numBlocks, lds_occupancy);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
// FIXME: Need to replace `uint32_t` with `int` finally.
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(uint32_t* numBlocks,
|
||||
hipFunction_t f,
|
||||
uint32_t blockSize,
|
||||
size_t dynamicSMemSize)
|
||||
{
|
||||
int NB;
|
||||
hipError_t Ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(&NB, f, blockSize, dynamicSMemSize);
|
||||
*numBlocks = NB;
|
||||
HIP_RETURN(Ret);
|
||||
}
|
||||
|
||||
// FIXME: Need to replace `uint32_t` with `int` finally.
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(uint32_t* numBlocks,
|
||||
hipFunction_t f,
|
||||
uint32_t blockSize,
|
||||
size_t dynamicSMemSize,
|
||||
unsigned int flags)
|
||||
{
|
||||
int NB;
|
||||
hipError_t Ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(&NB, f, blockSize, dynamicSMemSize);
|
||||
*numBlocks = NB;
|
||||
HIP_RETURN(Ret);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if defined(ATI_OS_LINUX)
|
||||
|
||||
namespace hip_impl {
|
||||
|
||||
struct dl_phdr_info {
|
||||
ELFIO::Elf64_Addr dlpi_addr;
|
||||
const char *dlpi_name;
|
||||
const ELFIO::Elf64_Phdr *dlpi_phdr;
|
||||
ELFIO::Elf64_Half dlpi_phnum;
|
||||
};
|
||||
|
||||
extern "C" int dl_iterate_phdr(
|
||||
int (*callback) (struct dl_phdr_info *info, size_t size, void *data), void *data
|
||||
);
|
||||
|
||||
struct Symbol {
|
||||
std::string name;
|
||||
ELFIO::Elf64_Addr value = 0;
|
||||
ELFIO::Elf_Xword size = 0;
|
||||
ELFIO::Elf_Half sect_idx = 0;
|
||||
uint8_t bind = 0;
|
||||
uint8_t type = 0;
|
||||
uint8_t other = 0;
|
||||
};
|
||||
|
||||
inline Symbol read_symbol(const ELFIO::symbol_section_accessor& section, unsigned int idx) {
|
||||
assert(idx < section.get_symbols_num());
|
||||
|
||||
Symbol r;
|
||||
section.get_symbol(idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline ELFIO::section* find_section_if(ELFIO::elfio& reader, P p) {
|
||||
const auto it = find_if(reader.sections.begin(), reader.sections.end(), std::move(p));
|
||||
|
||||
return it != reader.sections.end() ? *it : nullptr;
|
||||
}
|
||||
|
||||
std::vector<std::pair<uintptr_t, std::string>> function_names_for(const ELFIO::elfio& reader,
|
||||
ELFIO::section* symtab) {
|
||||
std::vector<std::pair<uintptr_t, std::string>> r;
|
||||
ELFIO::symbol_section_accessor symbols{reader, symtab};
|
||||
|
||||
for (auto i = 0u; i != symbols.get_symbols_num(); ++i) {
|
||||
auto tmp = read_symbol(symbols, i);
|
||||
|
||||
if (tmp.type == STT_FUNC && tmp.sect_idx != SHN_UNDEF && !tmp.name.empty()) {
|
||||
r.emplace_back(tmp.value, tmp.name);
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
const std::vector<std::pair<uintptr_t, std::string>>& function_names_for_process() {
|
||||
static constexpr const char self[] = "/proc/self/exe";
|
||||
|
||||
static std::vector<std::pair<uintptr_t, std::string>> r;
|
||||
static std::once_flag f;
|
||||
|
||||
std::call_once(f, []() {
|
||||
ELFIO::elfio reader;
|
||||
|
||||
if (reader.load(self)) {
|
||||
const auto it = find_section_if(
|
||||
reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; });
|
||||
|
||||
if (it) r = function_names_for(reader, it);
|
||||
}
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
const std::unordered_map<uintptr_t, std::string>& function_names()
|
||||
{
|
||||
static std::unordered_map<uintptr_t, std::string> r{
|
||||
function_names_for_process().cbegin(),
|
||||
function_names_for_process().cend()};
|
||||
static std::once_flag f;
|
||||
|
||||
std::call_once(f, []() {
|
||||
dl_iterate_phdr([](dl_phdr_info* info, size_t, void*) {
|
||||
ELFIO::elfio reader;
|
||||
|
||||
if (reader.load(info->dlpi_name)) {
|
||||
const auto it = find_section_if(
|
||||
reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; });
|
||||
|
||||
if (it) {
|
||||
auto n = function_names_for(reader, it);
|
||||
|
||||
for (auto&& f : n) f.first += info->dlpi_addr;
|
||||
|
||||
r.insert(make_move_iterator(n.begin()), make_move_iterator(n.end()));
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
},
|
||||
nullptr);
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
std::vector<char> bundles_for_process() {
|
||||
static constexpr const char self[] = "/proc/self/exe";
|
||||
static constexpr const char kernel_section[] = ".kernel";
|
||||
std::vector<char> r;
|
||||
|
||||
ELFIO::elfio reader;
|
||||
|
||||
if (reader.load(self)) {
|
||||
auto it = find_section_if(
|
||||
reader, [](const ELFIO::section* x) { return x->get_name() == kernel_section; });
|
||||
|
||||
if (it) r.insert(r.end(), it->get_data(), it->get_data() + it->get_size());
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
const std::vector<hipModule_t>& modules() {
|
||||
static std::vector<hipModule_t> r;
|
||||
static std::once_flag f;
|
||||
|
||||
std::call_once(f, []() {
|
||||
static std::vector<std::vector<char>> bundles{bundles_for_process()};
|
||||
|
||||
dl_iterate_phdr(
|
||||
[](dl_phdr_info* info, std::size_t, void*) {
|
||||
ELFIO::elfio tmp;
|
||||
if (tmp.load(info->dlpi_name)) {
|
||||
const auto it = find_section_if(
|
||||
tmp, [](const ELFIO::section* x) { return x->get_name() == ".kernel"; });
|
||||
|
||||
if (it) bundles.emplace_back(it->get_data(), it->get_data() + it->get_size());
|
||||
}
|
||||
return 0;
|
||||
},
|
||||
nullptr);
|
||||
|
||||
for (auto&& bundle : bundles) {
|
||||
if (bundle.empty()) {
|
||||
continue;
|
||||
}
|
||||
std::string magic(&bundle[0], sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1);
|
||||
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR))
|
||||
continue;
|
||||
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(&bundle[0]);
|
||||
const auto* desc = &obheader->desc[0];
|
||||
for (uint64_t i = 0; i < obheader->numBundles; ++i,
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleDesc*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->triple[0]) + desc->tripleSize)) {
|
||||
|
||||
std::string triple(desc->triple, sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1);
|
||||
if (triple.compare(HCC_AMDGCN_AMDHSA_TRIPLE))
|
||||
continue;
|
||||
|
||||
std::string target(desc->triple + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE),
|
||||
desc->tripleSize - sizeof(HCC_AMDGCN_AMDHSA_TRIPLE));
|
||||
|
||||
if (isCompatibleCodeObject(target, hip::getCurrentContext()->devices()[0]->info().name_)) {
|
||||
hipModule_t module;
|
||||
if (hipSuccess == hipModuleLoadData(&module, reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(obheader) + desc->offset)))
|
||||
r.push_back(module);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
const std::unordered_map<uintptr_t, hipFunction_t>& functions()
|
||||
{
|
||||
static std::unordered_map<uintptr_t, hipFunction_t> r;
|
||||
static std::once_flag f;
|
||||
|
||||
std::call_once(f, []() {
|
||||
for (auto&& function : function_names()) {
|
||||
for (auto&& module : modules()) {
|
||||
hipFunction_t f;
|
||||
if (hipSuccess == hipModuleGetFunction(&f, module, function.second.c_str())) {
|
||||
r[function.first] = f;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
void hipLaunchKernelGGLImpl(
|
||||
uintptr_t function_address,
|
||||
const dim3& numBlocks,
|
||||
const dim3& dimBlocks,
|
||||
uint32_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
void** kernarg)
|
||||
{
|
||||
HIP_INIT();
|
||||
|
||||
const auto it = functions().find(function_address);
|
||||
if (it == functions().cend())
|
||||
assert(0);
|
||||
|
||||
hipModuleLaunchKernel(it->second,
|
||||
numBlocks.x, numBlocks.y, numBlocks.z,
|
||||
dimBlocks.x, dimBlocks.y, dimBlocks.z,
|
||||
sharedMemBytes, stream, nullptr, kernarg);
|
||||
}
|
||||
|
||||
void hipLaunchCooperativeKernelGGLImpl(
|
||||
uintptr_t function_address,
|
||||
const dim3& numBlocks,
|
||||
const dim3& dimBlocks,
|
||||
uint32_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
void** kernarg)
|
||||
{
|
||||
HIP_INIT();
|
||||
|
||||
hipLaunchCooperativeKernel(reinterpret_cast<void*>(function_address),
|
||||
numBlocks, dimBlocks, kernarg, sharedMemBytes, stream);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // defined(ATI_OS_LINUX)
|
||||
|
||||
// conversion routines between float and half precision
|
||||
static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; }
|
||||
static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; }
|
||||
static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); }
|
||||
|
||||
// half float, the f16 is in the low 16 bits of the input argument
|
||||
static inline float __convert_half_to_float(std::uint32_t a) noexcept {
|
||||
std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
|
||||
std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U;
|
||||
u = (a & 0x7fff) != 0 ? v : u;
|
||||
return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/;
|
||||
}
|
||||
|
||||
// float half with nearest even rounding
|
||||
// The lower 16 bits of the result is the bit pattern for the f16
|
||||
static inline std::uint32_t __convert_float_to_half(float a) noexcept {
|
||||
std::uint32_t u = f32_as_u32(a);
|
||||
int e = static_cast<int>((u >> 23) & 0xff) - 127 + 15;
|
||||
std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0);
|
||||
std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0);
|
||||
std::uint32_t n = ((std::uint32_t)e << 12) | m;
|
||||
std::uint32_t s = (u >> 16) & 0x8000;
|
||||
int b = clamp_int(1-e, 0, 13);
|
||||
std::uint32_t d = (0x1000 | m) >> b;
|
||||
d |= (d << b) != (0x1000 | m);
|
||||
std::uint32_t v = e < 1 ? d : n;
|
||||
v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5));
|
||||
v = e > 30 ? 0x7c00 : v;
|
||||
v = e == 143 ? i : v;
|
||||
return s | v;
|
||||
}
|
||||
|
||||
extern "C" float __gnu_h2f_ieee(unsigned short h){
|
||||
return __convert_half_to_float((std::uint32_t) h);
|
||||
}
|
||||
|
||||
extern "C" unsigned short __gnu_f2h_ieee(float f){
|
||||
return (unsigned short)__convert_float_to_half(f);
|
||||
}
|
||||
@@ -0,0 +1,252 @@
|
||||
/*
|
||||
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_SRC_HIP_PROF_API_H
|
||||
#define HIP_SRC_HIP_PROF_API_H
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
|
||||
#if USE_PROF_API
|
||||
#include "hip/hcc_detail/hip_prof_str.h"
|
||||
#include "platform/prof_protocol.h"
|
||||
|
||||
// HIP API callbacks spawner object macro
|
||||
#define HIP_CB_SPAWNER_OBJECT(CB_ID) \
|
||||
api_callbacks_spawner_t<HIP_API_ID_##CB_ID> __api_tracer; \
|
||||
{ \
|
||||
hip_api_data_t* api_data = __api_tracer.get_api_data_ptr(); \
|
||||
if (api_data != NULL) { \
|
||||
hip_api_data_t& api_data_ref = *api_data; \
|
||||
INIT_CB_ARGS_DATA(CB_ID, api_data_ref); \
|
||||
__api_tracer.call(); \
|
||||
} \
|
||||
}
|
||||
|
||||
static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API;
|
||||
typedef activity_record_t hip_api_record_t;
|
||||
typedef activity_rtapi_callback_t hip_api_callback_t;
|
||||
typedef activity_sync_callback_t hip_act_callback_t;
|
||||
|
||||
class api_callbacks_table_t {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
typedef hip_api_record_t record_t;
|
||||
typedef hip_api_callback_t fun_t;
|
||||
typedef hip_act_callback_t act_t;
|
||||
|
||||
// HIP API callbacks table
|
||||
struct hip_cb_table_entry_t {
|
||||
volatile std::atomic<bool> sync;
|
||||
volatile std::atomic<uint32_t> sem;
|
||||
act_t act;
|
||||
void* a_arg;
|
||||
fun_t fun;
|
||||
void* arg;
|
||||
};
|
||||
|
||||
struct hip_cb_table_t {
|
||||
hip_cb_table_entry_t arr[HIP_API_ID_NUMBER];
|
||||
};
|
||||
|
||||
api_callbacks_table_t() {
|
||||
memset(&callbacks_table_, 0, sizeof(callbacks_table_));
|
||||
}
|
||||
|
||||
bool set_activity(uint32_t id, act_t fun, void* arg) {
|
||||
std::lock_guard<mutex_t> lock(mutex_);
|
||||
bool ret = true;
|
||||
|
||||
if (id < HIP_API_ID_NUMBER) {
|
||||
cb_sync(id);
|
||||
callbacks_table_.arr[id].act = fun;
|
||||
callbacks_table_.arr[id].a_arg = arg;
|
||||
enabled_ = true;
|
||||
cb_release(id);
|
||||
} else {
|
||||
ret = false;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool set_callback(uint32_t id, fun_t fun, void* arg) {
|
||||
std::lock_guard<mutex_t> lock(mutex_);
|
||||
bool ret = true;
|
||||
|
||||
if (id < HIP_API_ID_NUMBER) {
|
||||
cb_sync(id);
|
||||
callbacks_table_.arr[id].fun = fun;
|
||||
callbacks_table_.arr[id].arg = arg;
|
||||
enabled_ = true;
|
||||
cb_release(id);
|
||||
} else {
|
||||
ret = false;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void set_enabled(const bool& enabled) {
|
||||
enabled_ = enabled;
|
||||
}
|
||||
|
||||
inline hip_cb_table_entry_t& entry(const uint32_t& id) {
|
||||
return callbacks_table_.arr[id];
|
||||
}
|
||||
|
||||
inline void sem_sync(const uint32_t& id) {
|
||||
sem_increment(id);
|
||||
if (entry(id).sync.load() == true) sync_wait(id);
|
||||
}
|
||||
|
||||
inline void sem_release(const uint32_t& id) {
|
||||
sem_decrement(id);
|
||||
}
|
||||
|
||||
inline bool is_enabled() const {
|
||||
return enabled_;
|
||||
}
|
||||
|
||||
private:
|
||||
inline void cb_sync(const uint32_t& id) {
|
||||
entry(id).sync.store(true);
|
||||
while (entry(id).sem.load() != 0) {}
|
||||
}
|
||||
|
||||
inline void cb_release(const uint32_t& id) {
|
||||
entry(id).sync.store(false);
|
||||
}
|
||||
|
||||
inline void sem_increment(const uint32_t& id) {
|
||||
const uint32_t prev = entry(id).sem.fetch_add(1);
|
||||
if (prev == UINT32_MAX) {
|
||||
std::cerr << "sem overflow id = " << id << std::endl << std::flush;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
inline void sem_decrement(const uint32_t& id) {
|
||||
const uint32_t prev = entry(id).sem.fetch_sub(1);
|
||||
if (prev == 0) {
|
||||
std::cerr << "sem corrupted id = " << id << std::endl << std::flush;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void sync_wait(const uint32_t& id) {
|
||||
sem_decrement(id);
|
||||
while (entry(id).sync.load() == true) {}
|
||||
sem_increment(id);
|
||||
}
|
||||
|
||||
mutex_t mutex_;
|
||||
hip_cb_table_t callbacks_table_;
|
||||
bool enabled_;
|
||||
};
|
||||
|
||||
extern api_callbacks_table_t callbacks_table;
|
||||
|
||||
template <int cid_>
|
||||
class api_callbacks_spawner_t {
|
||||
public:
|
||||
api_callbacks_spawner_t() :
|
||||
api_data_(NULL)
|
||||
{
|
||||
if (!is_enabled()) return;
|
||||
|
||||
if (cid_ >= HIP_API_ID_NUMBER) {
|
||||
fprintf(stderr, "HIP %s bad id %d\n", __FUNCTION__, cid_);
|
||||
abort();
|
||||
}
|
||||
callbacks_table.sem_sync(cid_);
|
||||
|
||||
hip_act_callback_t act = entry(cid_).act;
|
||||
if (act != NULL) api_data_ = (hip_api_data_t*) act(cid_, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
void call() {
|
||||
hip_api_callback_t fun = entry(cid_).fun;
|
||||
void* arg = entry(cid_).arg;
|
||||
if (fun != NULL) {
|
||||
fun(HIP_DOMAIN_ID, cid_, api_data_, arg);
|
||||
api_data_->phase = ACTIVITY_API_PHASE_EXIT;
|
||||
}
|
||||
}
|
||||
|
||||
~api_callbacks_spawner_t() {
|
||||
if (!is_enabled()) return;
|
||||
|
||||
if (api_data_ != NULL) {
|
||||
hip_api_callback_t fun = entry(cid_).fun;
|
||||
void* arg = entry(cid_).arg;
|
||||
hip_act_callback_t act = entry(cid_).act;
|
||||
void* a_arg = entry(cid_).a_arg;
|
||||
if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, api_data_, arg);
|
||||
if (act != NULL) act(cid_, NULL, NULL, a_arg);
|
||||
}
|
||||
|
||||
callbacks_table.sem_release(cid_);
|
||||
}
|
||||
|
||||
hip_api_data_t* get_api_data_ptr() {
|
||||
return api_data_;
|
||||
}
|
||||
|
||||
bool is_enabled() const {
|
||||
return callbacks_table.is_enabled();
|
||||
}
|
||||
|
||||
private:
|
||||
inline api_callbacks_table_t::hip_cb_table_entry_t& entry(const uint32_t& id) {
|
||||
return callbacks_table.entry(id);
|
||||
}
|
||||
|
||||
hip_api_data_t* api_data_;
|
||||
};
|
||||
|
||||
template <>
|
||||
class api_callbacks_spawner_t<HIP_API_ID_NUMBER> {
|
||||
public:
|
||||
api_callbacks_spawner_t() {}
|
||||
void call() {}
|
||||
hip_api_data_t* get_api_data_ptr() { return NULL; }
|
||||
bool is_enabled() const { return false; }
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
#define HIP_CB_SPAWNER_OBJECT(x) do {} while(0)
|
||||
|
||||
class api_callbacks_table_t {
|
||||
public:
|
||||
typedef void* act_t;
|
||||
typedef void* fun_t;
|
||||
bool set_activity(uint32_t id, act_t fun, void* arg) { return false; }
|
||||
bool set_callback(uint32_t id, fun_t fun, void* arg) { return false; }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif // HIP_SRC_HIP_PROF_API_H
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
|
||||
hipError_t hipProfilerStart() {
|
||||
HIP_INIT_API(hipProfilerStart);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipProfilerStop() {
|
||||
HIP_INIT_API(hipProfilerStop);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
+377
@@ -0,0 +1,377 @@
|
||||
/*
|
||||
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "hiprtc_internal.hpp"
|
||||
#include <hip/hiprtc.h>
|
||||
#include "platform/program.hpp"
|
||||
|
||||
extern "C" char * __cxa_demangle(const char *mangled_name, char *output_buffer,
|
||||
size_t *length, int *status);
|
||||
|
||||
|
||||
namespace hiprtc {
|
||||
thread_local hiprtcResult g_lastRtcError = HIPRTC_SUCCESS;
|
||||
}
|
||||
|
||||
class ProgramState {
|
||||
amd::Monitor lock_;
|
||||
private:
|
||||
static ProgramState* programState_;
|
||||
|
||||
ProgramState() : lock_("Guards program state") {}
|
||||
~ProgramState() {}
|
||||
public:
|
||||
std::unordered_map<amd::Program*,
|
||||
std::pair<std::vector<std::string>, std::vector<std::string>>> progHeaders_;
|
||||
|
||||
std::map<std::string, std::pair<std::string, std::string>> nameExpresssion_;
|
||||
|
||||
static ProgramState& instance();
|
||||
void createProgramHeaders(amd::Program* program, int numHeaders,
|
||||
const char** headers, const char** headerNames);
|
||||
void getProgramHeaders(amd::Program* program, int* numHeaders, char** headers, char ** headerNames);
|
||||
uint32_t addNameExpression(const char* name_expression);
|
||||
char* getLoweredName(const char* name_expression);
|
||||
};
|
||||
|
||||
ProgramState* ProgramState::programState_ = nullptr;
|
||||
|
||||
ProgramState& ProgramState::instance() {
|
||||
if (programState_ == nullptr) {
|
||||
programState_ = new ProgramState;
|
||||
}
|
||||
return *programState_;
|
||||
}
|
||||
|
||||
void ProgramState::createProgramHeaders(amd::Program* program, int numHeaders,
|
||||
const char** headers, const char** headerNames) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
std::vector<std::string> vHeaderNames;
|
||||
std::vector<std::string> vHeaders;
|
||||
for (auto i = 0; i != numHeaders; ++i) {
|
||||
vHeaders.emplace_back(headers[i]);
|
||||
vHeaderNames.emplace_back(headerNames[i]);
|
||||
progHeaders_[program] = std::make_pair(std::move(vHeaders), std::move(vHeaderNames));
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramState::getProgramHeaders(amd::Program* program, int* numHeaders,
|
||||
char** headers, char ** headerNames) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
const auto it = progHeaders_.find(program);
|
||||
if (it != progHeaders_.cend()) {
|
||||
*numHeaders = it->second.first.size();
|
||||
*headers = reinterpret_cast<char*>(it->second.first.data());
|
||||
*headerNames = reinterpret_cast<char*>(it->second.second.data());
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t ProgramState::addNameExpression(const char* name_expression) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
// Strip clean of any '(' or ')' or '&'
|
||||
std::string strippedName(name_expression);
|
||||
if (strippedName.back() == ')') {
|
||||
strippedName.pop_back();
|
||||
strippedName.erase(0, strippedName.find('('));
|
||||
}
|
||||
if (strippedName.front() == '&') {
|
||||
strippedName.erase(0, 1);
|
||||
}
|
||||
auto it = nameExpresssion_.find(name_expression);
|
||||
if (it == nameExpresssion_.end()) {
|
||||
nameExpresssion_.insert(std::pair<std::string, std::pair<std::string, std::string>>
|
||||
(name_expression, std::make_pair(strippedName,"")));
|
||||
}
|
||||
return nameExpresssion_.size();
|
||||
}
|
||||
|
||||
namespace hip_impl {
|
||||
|
||||
char* demangle(const char* loweredName) {
|
||||
#ifdef ATI_OS_LINUX
|
||||
if (!loweredName) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int status = 0;
|
||||
char* demangledName = __cxa_demangle(loweredName, nullptr, nullptr, &status);
|
||||
if (status != 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return demangledName;
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
} // hip_impl
|
||||
|
||||
static std::string handleMangledName(std::string name) {
|
||||
std::string loweredName;
|
||||
char* demangled = hip_impl::demangle(name.c_str());
|
||||
loweredName.assign(demangled == nullptr ? std::string() : demangled);
|
||||
free(demangled);
|
||||
|
||||
if (loweredName.empty()) {
|
||||
return name;
|
||||
}
|
||||
|
||||
if (loweredName.find(".kd") != std::string::npos) {
|
||||
return {};
|
||||
}
|
||||
|
||||
if (loweredName.find("void ") == 0) {
|
||||
loweredName.erase(0, strlen("void "));
|
||||
}
|
||||
|
||||
auto dx{loweredName.find_first_of("(<")};
|
||||
|
||||
if (dx == std::string::npos) {
|
||||
return loweredName;
|
||||
}
|
||||
|
||||
if (loweredName[dx] == '<') {
|
||||
uint32_t count = 1;
|
||||
do {
|
||||
++dx;
|
||||
count += (loweredName[dx] == '<') ? 1 : ((loweredName[dx] == '>') ? -1 : 0);
|
||||
} while (count);
|
||||
|
||||
loweredName.erase(++dx);
|
||||
} else {
|
||||
loweredName.erase(dx);
|
||||
}
|
||||
|
||||
return loweredName;
|
||||
}
|
||||
|
||||
|
||||
const char* hiprtcGetErrorString(hiprtcResult x) {
|
||||
switch (x) {
|
||||
case HIPRTC_SUCCESS:
|
||||
return "HIPRTC_SUCCESS";
|
||||
case HIPRTC_ERROR_OUT_OF_MEMORY:
|
||||
return "HIPRTC_ERROR_OUT_OF_MEMORY";
|
||||
case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE:
|
||||
return "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE";
|
||||
case HIPRTC_ERROR_INVALID_INPUT:
|
||||
return "HIPRTC_ERROR_INVALID_INPUT";
|
||||
case HIPRTC_ERROR_INVALID_PROGRAM:
|
||||
return "HIPRTC_ERROR_INVALID_PROGRAM";
|
||||
case HIPRTC_ERROR_INVALID_OPTION:
|
||||
return "HIPRTC_ERROR_INVALID_OPTION";
|
||||
case HIPRTC_ERROR_COMPILATION:
|
||||
return "HIPRTC_ERROR_COMPILATION";
|
||||
case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE:
|
||||
return "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE";
|
||||
case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION:
|
||||
return "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION";
|
||||
case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION:
|
||||
return "HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION";
|
||||
case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID:
|
||||
return "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID";
|
||||
case HIPRTC_ERROR_INTERNAL_ERROR:
|
||||
return "HIPRTC_ERROR_INTERNAL_ERROR";
|
||||
default:
|
||||
throw std::logic_error{"Invalid HIPRTC result."};
|
||||
};
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name,
|
||||
int numHeaders, const char** headers, const char** headerNames) {
|
||||
HIPRTC_INIT_API(prog, src, name, numHeaders, headers, headerNames);
|
||||
|
||||
if (prog == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_PROGRAM);
|
||||
}
|
||||
if (numHeaders < 0) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
if (numHeaders && (headers == nullptr || headerNames == nullptr)) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
|
||||
amd::Program* program = new amd::Program(*hip::getCurrentContext(), src, amd::Program::HIP);
|
||||
if (program == NULL) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
|
||||
if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0])) {
|
||||
program->release();
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE);
|
||||
}
|
||||
|
||||
ProgramState::instance().createProgramHeaders(program, numHeaders, headers, headerNames);
|
||||
|
||||
*prog = reinterpret_cast<hiprtcProgram>(as_cl(program));
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) {
|
||||
|
||||
// FIXME[skudchad] Add headers to amd::Program::build and device::Program::build,
|
||||
// pass the saved from ProgramState to amd::Program::build
|
||||
HIPRTC_INIT_API(prog, numOptions, options);
|
||||
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
|
||||
|
||||
std::ostringstream ostrstr;
|
||||
std::vector<const char*> oarr(&options[0], &options[numOptions]);
|
||||
std::copy(oarr.begin(), oarr.end(), std::ostream_iterator<std::string>(ostrstr, " "));
|
||||
|
||||
std::vector<amd::Device*> devices{hip::getCurrentContext()->devices()[0]};
|
||||
if (CL_SUCCESS != program->build(devices, ostrstr.str().c_str(), nullptr, nullptr)) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION);
|
||||
}
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression) {
|
||||
HIPRTC_INIT_API(prog, name_expression);
|
||||
|
||||
if (name_expression == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
|
||||
|
||||
uint32_t id = ProgramState::instance().addNameExpression(name_expression);
|
||||
|
||||
const auto var{"__hiprtc_" + std::to_string(id)};
|
||||
const auto code{"\nextern \"C\" constexpr auto " + var + " = " + name_expression + ';'};
|
||||
|
||||
program->appendToSource(code.c_str());
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression,
|
||||
const char** loweredName) {
|
||||
HIPRTC_INIT_API(prog, name_expression, loweredName);
|
||||
|
||||
if (name_expression == nullptr || loweredName == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
|
||||
|
||||
device::Program* dev_program
|
||||
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
|
||||
|
||||
auto it = ProgramState::instance().nameExpresssion_.find(name_expression);
|
||||
if (it == ProgramState::instance().nameExpresssion_.end()) {
|
||||
return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID;
|
||||
}
|
||||
|
||||
std::string strippedName = it->second.first;
|
||||
std::vector<std::string> mangledNames;
|
||||
|
||||
if (!dev_program->getLoweredNames(&mangledNames)) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION);
|
||||
}
|
||||
|
||||
for (auto &name : mangledNames) {
|
||||
std::string demangledName = handleMangledName(name);
|
||||
if (demangledName == strippedName) {
|
||||
it->second.second.assign(name);
|
||||
}
|
||||
}
|
||||
|
||||
*loweredName = it->second.second.c_str();
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) {
|
||||
HIPRTC_INIT_API(prog);
|
||||
|
||||
if (prog == NULL) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
|
||||
// Release program. hiprtcProgram is a double pointer so free *prog
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(*prog));
|
||||
|
||||
program->release();
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* binaryMem) {
|
||||
HIPRTC_INIT_API(prog, binaryMem);
|
||||
|
||||
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
|
||||
const device::Program::binary_t& binary =
|
||||
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0])->binary();
|
||||
|
||||
::memcpy(binaryMem, binary.first, binary.second);
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* binarySizeRet) {
|
||||
|
||||
HIPRTC_INIT_API(prog, binarySizeRet);
|
||||
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
|
||||
|
||||
*binarySizeRet =
|
||||
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0])->binary().second;
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* dst) {
|
||||
|
||||
HIPRTC_INIT_API(prog, dst);
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
|
||||
const device::Program* devProgram =
|
||||
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
|
||||
|
||||
auto log = program->programLog() + devProgram->buildLog().c_str();
|
||||
|
||||
log.copy(dst, log.size());
|
||||
dst[log.size()] = '\0';
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) {
|
||||
|
||||
HIPRTC_INIT_API(prog, logSizeRet);
|
||||
|
||||
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
|
||||
const device::Program* devProgram =
|
||||
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
|
||||
|
||||
auto log = program->programLog() + devProgram->buildLog().c_str();
|
||||
|
||||
*logSizeRet = log.size() + 1;
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
@@ -0,0 +1,263 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "hip_internal.hpp"
|
||||
#include "hip_event.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
|
||||
static amd::Monitor streamSetLock("Guards global stream set");
|
||||
static std::unordered_set<hip::Stream*> streamSet;
|
||||
|
||||
// Internal structure for stream callback handler
|
||||
class StreamCallback {
|
||||
public:
|
||||
StreamCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData,
|
||||
amd::Command* command)
|
||||
: stream_(stream), callBack_(callback),
|
||||
userData_(userData), command_(command) {
|
||||
};
|
||||
hipStream_t stream_;
|
||||
hipStreamCallback_t callBack_;
|
||||
void* userData_;
|
||||
amd::Command* command_;
|
||||
};
|
||||
|
||||
namespace hip {
|
||||
|
||||
void syncStreams() {
|
||||
amd::ScopedLock lock(streamSetLock);
|
||||
|
||||
for (const auto& it : streamSet) {
|
||||
it->finish();
|
||||
}
|
||||
}
|
||||
|
||||
Stream::Stream(amd::Device* dev, amd::Context* ctx, amd::CommandQueue::Priority p, unsigned int f) :
|
||||
queue(nullptr), device(dev), context(ctx), priority(p), flags(f) {}
|
||||
|
||||
void Stream::create() {
|
||||
cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE;
|
||||
queue = new amd::HostQueue(*context, *device, properties,
|
||||
amd::CommandQueue::RealTimeDisabled, priority);
|
||||
assert(queue != nullptr);
|
||||
queue->create();
|
||||
}
|
||||
|
||||
amd::HostQueue* Stream::asHostQueue() {
|
||||
if (queue == nullptr) {
|
||||
create();
|
||||
}
|
||||
return queue;
|
||||
}
|
||||
|
||||
void Stream::destroy() {
|
||||
if (queue != nullptr) {
|
||||
queue->release();
|
||||
queue = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void Stream::finish() {
|
||||
if (queue != nullptr) {
|
||||
queue->finish();
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) {
|
||||
|
||||
hipError_t status = hipSuccess;
|
||||
StreamCallback* cbo = reinterpret_cast<StreamCallback*>(user_data);
|
||||
cbo->callBack_(cbo->stream_, status, cbo->userData_);
|
||||
cbo->command_->release();
|
||||
delete cbo;
|
||||
}
|
||||
|
||||
static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd::CommandQueue::Priority priority) {
|
||||
amd::Device* device = hip::getCurrentContext()->devices()[0];
|
||||
|
||||
hip::Stream* hStream = new hip::Stream(device, hip::getCurrentContext(), priority, flags);
|
||||
|
||||
if (hStream == nullptr) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
if (!(flags & hipStreamNonBlocking)) {
|
||||
hip::syncStreams();
|
||||
|
||||
{
|
||||
amd::ScopedLock lock(streamSetLock);
|
||||
streamSet.insert(hStream);
|
||||
}
|
||||
}
|
||||
|
||||
*stream = reinterpret_cast<hipStream_t>(hStream);
|
||||
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipStreamCreate: %zx", hStream);
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) {
|
||||
HIP_INIT_API(hipStreamCreateWithFlags, stream, flags);
|
||||
|
||||
HIP_RETURN(ihipStreamCreate(stream, flags, amd::CommandQueue::Priority::Normal));
|
||||
}
|
||||
|
||||
hipError_t hipStreamCreate(hipStream_t *stream) {
|
||||
HIP_INIT_API(hipStreamCreate, stream);
|
||||
|
||||
HIP_RETURN(ihipStreamCreate(stream, hipStreamDefault, amd::CommandQueue::Priority::Normal));
|
||||
}
|
||||
|
||||
hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
|
||||
HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority);
|
||||
|
||||
if (priority > static_cast<int>(amd::CommandQueue::Priority::High)) {
|
||||
priority = static_cast<int>(amd::CommandQueue::Priority::High);
|
||||
} else if (priority < static_cast<int>(amd::CommandQueue::Priority::Normal)) {
|
||||
priority = static_cast<int>(amd::CommandQueue::Priority::Normal);
|
||||
}
|
||||
|
||||
return HIP_RETURN(ihipStreamCreate(stream, flags, static_cast<amd::CommandQueue::Priority>(priority)));
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
|
||||
HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority);
|
||||
|
||||
if (leastPriority != nullptr) {
|
||||
*leastPriority = static_cast<int>(amd::CommandQueue::Priority::Normal);
|
||||
}
|
||||
if (greatestPriority != nullptr) {
|
||||
// Only report one kind of priority for now.
|
||||
*greatestPriority = static_cast<int>(amd::CommandQueue::Priority::Normal);
|
||||
}
|
||||
return HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) {
|
||||
HIP_INIT_API(hipStreamGetFlags, stream, flags);
|
||||
|
||||
hip::Stream* hStream = reinterpret_cast<hip::Stream*>(stream);
|
||||
|
||||
if(flags != nullptr && hStream != nullptr) {
|
||||
*flags = hStream->flags;
|
||||
} else {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipStreamSynchronize(hipStream_t stream) {
|
||||
HIP_INIT_API(hipStreamSynchronize, stream);
|
||||
|
||||
amd::HostQueue* hostQueue = hip::getQueue(stream);
|
||||
hostQueue->finish();
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipStreamDestroy(hipStream_t stream) {
|
||||
HIP_INIT_API(hipStreamDestroy, stream);
|
||||
|
||||
if (stream == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidHandle);
|
||||
}
|
||||
|
||||
amd::ScopedLock lock(streamSetLock);
|
||||
|
||||
hip::Stream* hStream = reinterpret_cast<hip::Stream*>(stream);
|
||||
|
||||
hStream->destroy();
|
||||
streamSet.erase(hStream);
|
||||
|
||||
delete hStream;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) {
|
||||
HIP_INIT_API(hipStreamWaitEvent, stream, event, flags);
|
||||
|
||||
amd::HostQueue* queue;
|
||||
|
||||
if (stream == nullptr) {
|
||||
queue = hip::getNullStream();
|
||||
} else {
|
||||
queue = reinterpret_cast<hip::Stream*>(stream)->asHostQueue();
|
||||
}
|
||||
|
||||
if (event == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidHandle);
|
||||
}
|
||||
|
||||
hip::Event* e = reinterpret_cast<hip::Event*>(event);
|
||||
|
||||
return HIP_RETURN(e->streamWait(queue, flags));
|
||||
}
|
||||
|
||||
hipError_t hipStreamQuery(hipStream_t stream) {
|
||||
HIP_INIT_API(hipStreamQuery, stream);
|
||||
|
||||
amd::HostQueue* hostQueue;
|
||||
if (stream == nullptr) {
|
||||
hostQueue = hip::getNullStream();
|
||||
} else {
|
||||
hostQueue = reinterpret_cast<hip::Stream*>(stream)->asHostQueue();
|
||||
}
|
||||
|
||||
amd::Command* command = hostQueue->getLastQueuedCommand(false);
|
||||
if (command == nullptr) {
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
amd::Event& event = command->event();
|
||||
if (command->type() != 0) {
|
||||
event.notifyCmdQueue();
|
||||
}
|
||||
HIP_RETURN((command->status() == CL_COMPLETE) ? hipSuccess : hipErrorNotReady);
|
||||
}
|
||||
|
||||
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData,
|
||||
unsigned int flags) {
|
||||
HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags);
|
||||
|
||||
amd::HostQueue* hostQueue = reinterpret_cast<hip::Stream*>
|
||||
(stream)->asHostQueue();
|
||||
amd::Command* command = hostQueue->getLastQueuedCommand(true);
|
||||
amd::Event& event = command->event();
|
||||
StreamCallback* cbo = new StreamCallback(stream, callback, userData, command);
|
||||
|
||||
if(!event.setCallback(CL_COMPLETE, ihipStreamCallback, reinterpret_cast<void*>(cbo))) {
|
||||
command->release();
|
||||
return hipErrorInvalidHandle;
|
||||
}
|
||||
|
||||
event.notifyCmdQueue();
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
#include <hip/hcc_detail/hip_surface_types.h>
|
||||
|
||||
namespace hip {
|
||||
|
||||
static amd::Monitor surfaceLock("Guards surface objects");
|
||||
|
||||
struct hipSurface {
|
||||
hipSurface(const hipResourceDesc* pResDesc): array(nullptr)
|
||||
{
|
||||
memcpy(&resDesc, pResDesc, sizeof(hipResourceDesc));
|
||||
}
|
||||
|
||||
hipArray* array;
|
||||
hipResourceDesc resDesc;
|
||||
};
|
||||
|
||||
static std::unordered_map<hipSurfaceObject_t, hipSurface*> surfaceHash;
|
||||
|
||||
};
|
||||
|
||||
using namespace hip;
|
||||
|
||||
hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
|
||||
const hipResourceDesc* pResDesc) {
|
||||
HIP_INIT_API(NONE, pSurfObject, pResDesc);
|
||||
|
||||
hipSurface* pSurface = new hipSurface(pResDesc);
|
||||
assert(pSurface != nullptr);
|
||||
|
||||
switch (pResDesc->resType) {
|
||||
case hipResourceTypeArray:
|
||||
pSurface->array = pResDesc->res.array.array;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
hipSurfaceObject_t surfObj;
|
||||
hipError_t err = hipMalloc(reinterpret_cast<void**>(&surfObj), sizeof(hipArray));
|
||||
if (err != hipSuccess) {
|
||||
delete pSurface;
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
err = hipMemcpy(reinterpret_cast<void*>(surfObj), reinterpret_cast<void*>(pResDesc->res.array.array), sizeof(hipArray),
|
||||
hipMemcpyHostToDevice);
|
||||
if (err != hipSuccess) {
|
||||
delete pSurface;
|
||||
hipFree(reinterpret_cast<void*>(surfObj));
|
||||
HIP_RETURN(err);
|
||||
}
|
||||
*pSurfObject = surfObj;
|
||||
|
||||
amd::ScopedLock lock(surfaceLock);
|
||||
surfaceHash[*pSurfObject] = pSurface;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
|
||||
HIP_INIT_API(NONE, surfaceObject);
|
||||
|
||||
amd::ScopedLock lock(surfaceLock);
|
||||
hipSurface* pSurface = surfaceHash[surfaceObject];
|
||||
if (pSurface != nullptr) {
|
||||
delete pSurface;
|
||||
surfaceHash.erase(surfaceObject);
|
||||
HIP_RETURN(hipFree(reinterpret_cast<void*>(surfaceObject)));
|
||||
}
|
||||
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -0,0 +1,808 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hcc_detail/texture_types.h>
|
||||
#include "hip_internal.hpp"
|
||||
#include "platform/sampler.hpp"
|
||||
|
||||
namespace hip {
|
||||
struct TextureObject {
|
||||
uint32_t imageSRD[HIP_IMAGE_OBJECT_SIZE_DWORD];
|
||||
uint32_t samplerSRD[HIP_SAMPLER_OBJECT_SIZE_DWORD];
|
||||
amd::Image* image;
|
||||
amd::Sampler* sampler;
|
||||
hipResourceDesc resDesc;
|
||||
};
|
||||
};
|
||||
|
||||
void getDrvChannelOrderAndType(const enum hipArray_Format Format, unsigned int NumChannels,
|
||||
cl_channel_order* channelOrder,
|
||||
cl_channel_type* channelType) {
|
||||
switch (Format) {
|
||||
case HIP_AD_FORMAT_UNSIGNED_INT8:
|
||||
*channelType = CL_UNSIGNED_INT8;
|
||||
break;
|
||||
case HIP_AD_FORMAT_UNSIGNED_INT16:
|
||||
*channelType = CL_UNSIGNED_INT16;
|
||||
break;
|
||||
case HIP_AD_FORMAT_UNSIGNED_INT32:
|
||||
*channelType = CL_UNSIGNED_INT32;
|
||||
break;
|
||||
case HIP_AD_FORMAT_SIGNED_INT8:
|
||||
*channelType = CL_SIGNED_INT8;
|
||||
break;
|
||||
case HIP_AD_FORMAT_SIGNED_INT16:
|
||||
*channelType = CL_SIGNED_INT16;
|
||||
break;
|
||||
case HIP_AD_FORMAT_SIGNED_INT32:
|
||||
*channelType = CL_SIGNED_INT32;
|
||||
break;
|
||||
case HIP_AD_FORMAT_HALF:
|
||||
*channelType = CL_HALF_FLOAT;
|
||||
break;
|
||||
case HIP_AD_FORMAT_FLOAT:
|
||||
*channelType = CL_FLOAT;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (NumChannels == 4) {
|
||||
*channelOrder = CL_RGBA;
|
||||
} else if (NumChannels == 2) {
|
||||
*channelOrder = CL_RG;
|
||||
} else if (NumChannels == 1) {
|
||||
*channelOrder = CL_R;
|
||||
}
|
||||
}
|
||||
|
||||
void setDescFromChannelType(cl_channel_type channelType, hipChannelFormatDesc* desc) {
|
||||
|
||||
memset(desc, 0x00, sizeof(hipChannelFormatDesc));
|
||||
|
||||
switch (channelType) {
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_SIGNED_INT32:
|
||||
desc->f = hipChannelFormatKindSigned;
|
||||
break;
|
||||
case CL_UNSIGNED_INT8:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_UNSIGNED_INT32:
|
||||
desc->f = hipChannelFormatKindUnsigned;
|
||||
break;
|
||||
case CL_HALF_FLOAT:
|
||||
case CL_FLOAT:
|
||||
desc->f = hipChannelFormatKindFloat;
|
||||
break;
|
||||
default:
|
||||
desc->f = hipChannelFormatKindNone;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (channelType) {
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
desc->x = 8;
|
||||
break;
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_HALF_FLOAT:
|
||||
desc->x = 16;
|
||||
break;
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_FLOAT:
|
||||
desc->x = 32;
|
||||
break;
|
||||
default:
|
||||
desc->x = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureReadMode readMode,
|
||||
cl_channel_order* channelOrder, cl_channel_type* channelType) {
|
||||
if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w != 0) {
|
||||
*channelOrder = CL_RGBA;
|
||||
} else if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w == 0) {
|
||||
*channelOrder = CL_RGB;
|
||||
} else if (desc.x != 0 && desc.y != 0 && desc.z == 0 && desc.w == 0) {
|
||||
*channelOrder = CL_RG;
|
||||
} else if (desc.x != 0 && desc.y == 0 && desc.z == 0 && desc.w == 0) {
|
||||
*channelOrder = CL_R;
|
||||
} else {
|
||||
}
|
||||
|
||||
switch (desc.f) {
|
||||
case hipChannelFormatKindUnsigned:
|
||||
switch (desc.x) {
|
||||
case 32:
|
||||
*channelType = CL_UNSIGNED_INT32;
|
||||
break;
|
||||
case 16:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? CL_UNORM_INT16
|
||||
: CL_UNSIGNED_INT16;
|
||||
break;
|
||||
case 8:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? CL_UNORM_INT8
|
||||
: CL_UNSIGNED_INT8;
|
||||
break;
|
||||
default:
|
||||
*channelType = CL_UNSIGNED_INT32;
|
||||
}
|
||||
break;
|
||||
case hipChannelFormatKindSigned:
|
||||
switch (desc.x) {
|
||||
case 32:
|
||||
*channelType = CL_SIGNED_INT32;
|
||||
break;
|
||||
case 16:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? CL_SNORM_INT16
|
||||
: CL_SIGNED_INT16;
|
||||
break;
|
||||
case 8:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? CL_SNORM_INT8
|
||||
: CL_SIGNED_INT8;
|
||||
break;
|
||||
default:
|
||||
*channelType = CL_SIGNED_INT32;
|
||||
}
|
||||
break;
|
||||
case hipChannelFormatKindFloat:
|
||||
switch (desc.x) {
|
||||
case 32:
|
||||
*channelType = CL_FLOAT;
|
||||
break;
|
||||
case 16:
|
||||
*channelType = CL_HALF_FLOAT;
|
||||
break;
|
||||
case 8:
|
||||
break;
|
||||
default:
|
||||
*channelType = CL_FLOAT;
|
||||
}
|
||||
break;
|
||||
case hipChannelFormatKindNone:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void getByteSizeFromChannelFormatKind(enum hipChannelFormatKind channelFormatKind, size_t* byteSize) {
|
||||
switch (channelFormatKind)
|
||||
{
|
||||
case hipChannelFormatKindSigned:
|
||||
*byteSize = sizeof(int);
|
||||
break;
|
||||
case hipChannelFormatKindUnsigned:
|
||||
*byteSize = sizeof(unsigned int);
|
||||
break;
|
||||
case hipChannelFormatKindFloat:
|
||||
*byteSize = sizeof(float);
|
||||
break;
|
||||
case hipChannelFormatKindNone:
|
||||
*byteSize = sizeof(size_t);
|
||||
break;
|
||||
default:
|
||||
*byteSize = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
amd::Sampler* fillSamplerDescriptor(enum hipTextureAddressMode addressMode,
|
||||
enum hipTextureFilterMode filterMode, int normalizedCoords) {
|
||||
#ifndef CL_FILTER_NONE
|
||||
#define CL_FILTER_NONE 0x1142
|
||||
#endif
|
||||
uint32_t filter_mode = CL_FILTER_NONE;
|
||||
switch (filterMode) {
|
||||
case hipFilterModePoint:
|
||||
filter_mode = CL_FILTER_NEAREST;
|
||||
break;
|
||||
case hipFilterModeLinear:
|
||||
filter_mode = CL_FILTER_LINEAR;
|
||||
break;
|
||||
}
|
||||
|
||||
uint32_t address_mode = CL_ADDRESS_NONE;
|
||||
switch (addressMode) {
|
||||
case hipAddressModeWrap:
|
||||
address_mode = CL_ADDRESS_REPEAT;
|
||||
break;
|
||||
case hipAddressModeClamp:
|
||||
address_mode = CL_ADDRESS_CLAMP;
|
||||
break;
|
||||
case hipAddressModeMirror:
|
||||
address_mode = CL_ADDRESS_MIRRORED_REPEAT;
|
||||
break;
|
||||
case hipAddressModeBorder:
|
||||
address_mode = CL_ADDRESS_CLAMP_TO_EDGE;
|
||||
break;
|
||||
}
|
||||
amd::Sampler* sampler = new amd::Sampler(*hip::getCurrentContext(),
|
||||
normalizedCoords == CL_TRUE,
|
||||
address_mode, filter_mode, CL_FILTER_NONE, 0.f, CL_MAXFLOAT);
|
||||
if (sampler == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
if (!sampler->create()) {
|
||||
delete sampler;
|
||||
return nullptr;
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
hip::TextureObject* ihipCreateTextureObject(const hipResourceDesc& resDesc, amd::Image& image, amd::Sampler& sampler) {
|
||||
hip::TextureObject* texture;
|
||||
ihipMalloc(reinterpret_cast<void**>(&texture), sizeof(hip::TextureObject), CL_MEM_SVM_FINE_GRAIN_BUFFER);
|
||||
|
||||
if (texture == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
device::Memory* imageMem = image.getDeviceMemory(*hip::getCurrentContext()->devices()[0]);
|
||||
memcpy(texture->imageSRD, imageMem->cpuSrd(), sizeof(uint32_t)*HIP_IMAGE_OBJECT_SIZE_DWORD);
|
||||
texture->image = ℑ
|
||||
|
||||
device::Sampler* devSampler = sampler.getDeviceSampler(*hip::getCurrentContext()->devices()[0]);
|
||||
memcpy(texture->samplerSRD, devSampler->hwState(), sizeof(uint32_t)*HIP_SAMPLER_OBJECT_SIZE_DWORD);
|
||||
texture->sampler = &sampler;
|
||||
|
||||
memcpy(&texture->resDesc, &resDesc, sizeof(hipResourceDesc));
|
||||
|
||||
return texture;
|
||||
}
|
||||
|
||||
hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc,
|
||||
const hipTextureDesc* pTexDesc,
|
||||
const hipResourceViewDesc* pResViewDesc) {
|
||||
HIP_INIT_API(NONE, pTexObject, pResDesc, pTexDesc, pResViewDesc);
|
||||
|
||||
amd::Device* device = hip::getCurrentContext()->devices()[0];
|
||||
|
||||
if (!device->info().imageSupport_) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
amd::Image* image = nullptr;
|
||||
|
||||
cl_image_format image_format;
|
||||
getChannelOrderAndType(pResDesc->res.pitch2D.desc, pTexDesc->readMode,
|
||||
&image_format.image_channel_order, &image_format.image_channel_data_type);
|
||||
|
||||
const amd::Image::Format imageFormat(image_format);
|
||||
|
||||
amd::Memory* memory = nullptr;
|
||||
size_t offset = 0;
|
||||
cl_mem_object_type clType;
|
||||
|
||||
switch (pResDesc->resType) {
|
||||
case hipResourceTypeArray:
|
||||
{
|
||||
memory = getMemoryObject(pResDesc->res.array.array->data, offset);
|
||||
|
||||
getChannelOrderAndType(pResDesc->res.array.array->desc, pTexDesc->readMode,
|
||||
&image_format.image_channel_order, &image_format.image_channel_data_type);
|
||||
const amd::Image::Format imageFormat(image_format);
|
||||
switch (pResDesc->res.array.array->type) {
|
||||
case hipArrayLayered:
|
||||
case hipArrayCubemap:
|
||||
assert(0);
|
||||
break;
|
||||
case hipArraySurfaceLoadStore:
|
||||
case hipArrayTextureGather:
|
||||
case hipArrayDefault:
|
||||
default:
|
||||
switch(pResDesc->res.array.array->textureType) {
|
||||
case hipTextureType3D:
|
||||
clType = CL_MEM_OBJECT_IMAGE3D;
|
||||
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
|
||||
clType, memory->getMemFlags(), imageFormat,
|
||||
pResDesc->res.array.array->width, pResDesc->res.array.array->height,
|
||||
pResDesc->res.array.array->depth, 0, 0);
|
||||
break;
|
||||
case hipTextureType2D:
|
||||
clType = CL_MEM_OBJECT_IMAGE2D;
|
||||
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
|
||||
clType, memory->getMemFlags(), imageFormat,
|
||||
pResDesc->res.array.array->width, pResDesc->res.array.array->height, 1, 0, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case hipResourceTypeMipmappedArray:
|
||||
assert(0);
|
||||
break;
|
||||
case hipResourceTypeLinear:
|
||||
{
|
||||
assert(pResViewDesc == nullptr);
|
||||
memory = getMemoryObject(pResDesc->res.linear.devPtr, offset);
|
||||
|
||||
getChannelOrderAndType(pResDesc->res.linear.desc, pTexDesc->readMode,
|
||||
&image_format.image_channel_order, &image_format.image_channel_data_type);
|
||||
const amd::Image::Format imageFormat(image_format);
|
||||
|
||||
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
|
||||
CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat,
|
||||
pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize(), 1, 1,
|
||||
pResDesc->res.linear.sizeInBytes, 0);
|
||||
}
|
||||
break;
|
||||
case hipResourceTypePitch2D:
|
||||
assert(pResViewDesc == nullptr);
|
||||
memory = getMemoryObject(pResDesc->res.pitch2D.devPtr, offset);
|
||||
|
||||
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
|
||||
CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat,
|
||||
pResDesc->res.pitch2D.width, pResDesc->res.pitch2D.height, 1,
|
||||
pResDesc->res.pitch2D.pitchInBytes, 0);
|
||||
break;
|
||||
default: HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
if (!image->create()) {
|
||||
delete image;
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
|
||||
amd::Sampler* sampler = fillSamplerDescriptor(pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords);
|
||||
|
||||
*pTexObject = reinterpret_cast<hipTextureObject_t>(ihipCreateTextureObject(*pResDesc, *image, *sampler));
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
void ihipDestroyTextureObject(hip::TextureObject* texture) {
|
||||
texture->image->release();
|
||||
texture->sampler->release();
|
||||
|
||||
hipFree(texture);
|
||||
}
|
||||
|
||||
hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
|
||||
HIP_INIT_API(NONE, textureObject);
|
||||
|
||||
hip::TextureObject* texture = reinterpret_cast<hip::TextureObject*>(textureObject);
|
||||
|
||||
ihipDestroyTextureObject(texture);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
|
||||
hipTextureObject_t textureObject) {
|
||||
HIP_INIT_API(NONE, pResDesc, textureObject);
|
||||
|
||||
hip::TextureObject* texture = reinterpret_cast<hip::TextureObject*>(textureObject);
|
||||
|
||||
if (pResDesc != nullptr && texture != nullptr) {
|
||||
memcpy(pResDesc, &(texture->resDesc), sizeof(hipResourceDesc));
|
||||
}
|
||||
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc,
|
||||
hipTextureObject_t textureObject) {
|
||||
HIP_INIT_API(NONE, pResViewDesc, textureObject);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc,
|
||||
hipTextureObject_t textureObject) {
|
||||
HIP_INIT_API(NONE, pTexDesc, textureObject);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t ihipBindTexture(cl_mem_object_type type,
|
||||
size_t* offset, textureReference* tex, const void* devPtr,
|
||||
const hipChannelFormatDesc& desc, size_t width, size_t height,
|
||||
size_t pitch) {
|
||||
if (tex == nullptr) {
|
||||
return hipErrorInvalidImage;
|
||||
}
|
||||
if (hip::getCurrentContext()) {
|
||||
cl_image_format image_format;
|
||||
size_t byteSize;
|
||||
size_t rowPitch = 0;
|
||||
size_t depth = 0;
|
||||
size_t slicePitch = 0;
|
||||
|
||||
getChannelOrderAndType(desc, hipReadModeElementType,
|
||||
&image_format.image_channel_order, &image_format.image_channel_data_type);
|
||||
getByteSizeFromChannelFormatKind(desc.f, &byteSize);
|
||||
const amd::Image::Format imageFormat(image_format);
|
||||
amd::Memory* memory = getMemoryObject(devPtr, *offset);
|
||||
|
||||
switch (type) {
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
rowPitch = width * byteSize;
|
||||
depth = pitch;
|
||||
slicePitch = rowPitch * height;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
default:
|
||||
rowPitch = pitch;
|
||||
depth = 1;
|
||||
slicePitch = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
|
||||
type, memory->getMemFlags(), imageFormat, width, height, depth, rowPitch, slicePitch);
|
||||
if (!image->create()) {
|
||||
delete image;
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
*offset = 0;
|
||||
if (tex->textureObject) {
|
||||
ihipDestroyTextureObject(reinterpret_cast<hip::TextureObject*>(tex->textureObject));
|
||||
}
|
||||
amd::Sampler* sampler = fillSamplerDescriptor(tex->addressMode[0], tex->filterMode, tex->normalized);
|
||||
|
||||
hipResourceDesc resDesc;
|
||||
memset(&resDesc, 0, sizeof(hipResourceDesc));
|
||||
switch (type) {
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
resDesc.resType = hipResourceTypeLinear;
|
||||
resDesc.res.linear.devPtr = const_cast<void*>(devPtr);
|
||||
resDesc.res.linear.desc = desc;
|
||||
resDesc.res.linear.sizeInBytes = image->getSize();
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
resDesc.resType = hipResourceTypePitch2D;
|
||||
resDesc.res.pitch2D.devPtr = const_cast<void*>(devPtr);
|
||||
resDesc.res.pitch2D.desc = desc;
|
||||
resDesc.res.pitch2D.width = width;
|
||||
resDesc.res.pitch2D.height = height;
|
||||
resDesc.res.pitch2D.pitchInBytes = pitch;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
resDesc.resType = hipResourceTypeArray;
|
||||
resDesc.res.array.array = (hipArray*)malloc(sizeof(hipArray));
|
||||
resDesc.res.array.array->desc = desc;
|
||||
resDesc.res.array.array->width = width;
|
||||
resDesc.res.array.array->height = height;
|
||||
resDesc.res.array.array->depth = depth;
|
||||
resDesc.res.array.array->Format = tex->format;
|
||||
resDesc.res.array.array->NumChannels = tex->numChannels;
|
||||
resDesc.res.array.array->isDrv = false;
|
||||
resDesc.res.array.array->textureType = hipTextureType3D;
|
||||
resDesc.res.array.array->data = const_cast<void*>(devPtr);
|
||||
break;
|
||||
default:
|
||||
resDesc.resType = hipResourceTypeArray;
|
||||
resDesc.res.array.array = nullptr;
|
||||
break;
|
||||
}
|
||||
|
||||
tex->textureObject = reinterpret_cast<hipTextureObject_t>(ihipCreateTextureObject(resDesc, *image, *sampler));
|
||||
if(type == CL_MEM_OBJECT_IMAGE3D) {
|
||||
free(resDesc.res.array.array);
|
||||
}
|
||||
memset(&resDesc, 0, sizeof(hipResourceDesc));
|
||||
return hipSuccess;
|
||||
}
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr,
|
||||
const hipChannelFormatDesc* desc, size_t size) {
|
||||
HIP_INIT_API(NONE, offset, tex, devPtr, desc, size);
|
||||
|
||||
if (desc == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
cl_image_format image_format;
|
||||
getChannelOrderAndType(*desc, hipReadModeElementType,
|
||||
&image_format.image_channel_order, &image_format.image_channel_data_type);
|
||||
const amd::Image::Format imageFormat(image_format);
|
||||
|
||||
HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size / imageFormat.getElementSize(), 1, size));
|
||||
}
|
||||
|
||||
hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr,
|
||||
const hipChannelFormatDesc* desc, size_t width, size_t height,
|
||||
size_t pitch) {
|
||||
HIP_INIT_API(NONE, offset, tex, devPtr, desc, width, height, pitch);
|
||||
|
||||
HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, offset, tex, devPtr, *desc, width, height, pitch));
|
||||
}
|
||||
|
||||
hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
|
||||
const hipChannelFormatDesc* desc) {
|
||||
HIP_INIT_API(NONE, tex, array, desc);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t ihipBindTextureImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
|
||||
const void* devPtr, const struct hipChannelFormatDesc* desc,
|
||||
size_t size, textureReference* tex) {
|
||||
HIP_INIT_API(NONE, dim, readMode, offset, devPtr, size, tex);
|
||||
|
||||
assert(1 == dim);
|
||||
|
||||
HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, *desc, size, 1, 0));
|
||||
}
|
||||
|
||||
hipError_t ihipBindTextureToArrayImpl(TlsData* tls, int dim, enum hipTextureReadMode readMode,
|
||||
hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
textureReference* tex) {
|
||||
HIP_INIT_API(NONE, dim, readMode, &desc, array, tex);
|
||||
|
||||
cl_mem_object_type clType;
|
||||
size_t offset = 0;
|
||||
|
||||
switch (dim) {
|
||||
case 1:
|
||||
clType = CL_MEM_OBJECT_IMAGE1D;
|
||||
break;
|
||||
case 2:
|
||||
clType = CL_MEM_OBJECT_IMAGE2D;
|
||||
break;
|
||||
case 3:
|
||||
case hipTextureType2DLayered:
|
||||
clType = CL_MEM_OBJECT_IMAGE3D;
|
||||
break;
|
||||
default:
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, desc, array->width,
|
||||
array->height, array->depth));
|
||||
}
|
||||
|
||||
hipError_t hipBindTextureToMipmappedArray(textureReference* tex,
|
||||
hipMipmappedArray_const_t mipmappedArray,
|
||||
const hipChannelFormatDesc* desc) {
|
||||
HIP_INIT_API(NONE, tex, mipmappedArray, desc);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) {
|
||||
|
||||
ihipDestroyTextureObject(reinterpret_cast<hip::TextureObject*>(textureObject));
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipUnbindTexture(const textureReference* tex) {
|
||||
HIP_INIT_API(NONE, tex);
|
||||
|
||||
ihipDestroyTextureObject(reinterpret_cast<hip::TextureObject*>(tex->textureObject));
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) {
|
||||
HIP_INIT_API(NONE, desc, array);
|
||||
|
||||
if (desc != nullptr) {
|
||||
*desc = array->desc;
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex) {
|
||||
HIP_INIT_API(NONE, offset, tex);
|
||||
|
||||
if ((offset == nullptr) || (tex == nullptr)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
*offset = 0;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureReference(const textureReference** tex, const void* symbol) {
|
||||
HIP_INIT_API(NONE, tex, symbol);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents) {
|
||||
HIP_INIT_API(NONE, tex, fmt, NumPackedComponents);
|
||||
|
||||
if (tex == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidImage);
|
||||
}
|
||||
|
||||
tex->format = fmt;
|
||||
tex->numChannels = NumPackedComponents;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags) {
|
||||
HIP_INIT_API(NONE, tex, flags);
|
||||
|
||||
if (tex == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidImage);
|
||||
}
|
||||
|
||||
tex->normalized = flags;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm) {
|
||||
HIP_INIT_API(NONE, tex, fm);
|
||||
|
||||
if (tex == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidImage);
|
||||
}
|
||||
|
||||
tex->filterMode = fm;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* am, textureReference tex, int dim) {
|
||||
HIP_INIT_API(NONE, am, &tex, dim);
|
||||
|
||||
if ((am == nullptr) || (dim >= 3)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
*am = tex.addressMode[dim];
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am) {
|
||||
HIP_INIT_API(NONE, tex, dim, am);
|
||||
|
||||
if (tex == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidImage);
|
||||
}
|
||||
|
||||
tex->addressMode[dim] = am;
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex) {
|
||||
HIP_INIT_API(NONE, array, &tex);
|
||||
|
||||
hip::TextureObject* texture = nullptr;
|
||||
|
||||
if ((array == nullptr) || (*array == nullptr)) {
|
||||
HIP_RETURN(hipErrorInvalidImage);
|
||||
}
|
||||
|
||||
texture = reinterpret_cast<hip::TextureObject *>(tex.textureObject);
|
||||
if(hipResourceTypeArray != texture->resDesc.resType){
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
if (texture->resDesc.res.array.array == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
**array = *(texture->resDesc.res.array.array);
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags) {
|
||||
HIP_INIT_API(NONE, tex, array, flags);
|
||||
|
||||
size_t offset = 0;
|
||||
cl_mem_object_type clType;
|
||||
|
||||
if ((tex == nullptr) || (array == nullptr)) {
|
||||
HIP_RETURN(hipErrorInvalidImage);
|
||||
}
|
||||
|
||||
switch(array->textureType) {
|
||||
case hipTextureType3D:
|
||||
clType = CL_MEM_OBJECT_IMAGE3D;
|
||||
break;
|
||||
case hipTextureType2D:
|
||||
clType = CL_MEM_OBJECT_IMAGE2D;
|
||||
break;
|
||||
case hipTextureType1D:
|
||||
clType = CL_MEM_OBJECT_IMAGE1D;
|
||||
break;
|
||||
default:
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN(ihipBindTexture(clType, &offset, tex, array->data, array->desc, array->width,
|
||||
array->height, array->depth));
|
||||
}
|
||||
|
||||
hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex) {
|
||||
HIP_INIT_API(NONE, dev_ptr, &tex);
|
||||
|
||||
hip::TextureObject* texture = nullptr;
|
||||
device::Memory* dev_mem = nullptr;
|
||||
|
||||
texture = reinterpret_cast<hip::TextureObject *>(tex.textureObject);
|
||||
if ((texture == nullptr) || (texture->image == nullptr)) {
|
||||
HIP_RETURN(hipErrorInvalidImage);
|
||||
}
|
||||
|
||||
dev_mem = texture->image->getDeviceMemory(*hip::getCurrentContext()->devices()[0]);
|
||||
if (dev_mem == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidImage);
|
||||
}
|
||||
|
||||
*dev_ptr = reinterpret_cast<void*>(dev_mem->virtualAddress());
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr,
|
||||
size_t size) {
|
||||
HIP_INIT_API(NONE, offset, tex, devPtr, size);
|
||||
|
||||
if (tex == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidImage);
|
||||
}
|
||||
|
||||
cl_image_format image_format;
|
||||
getDrvChannelOrderAndType(tex->format, tex->numChannels,
|
||||
&image_format.image_channel_order, &image_format.image_channel_data_type);
|
||||
const amd::Image::Format imageFormat(image_format);
|
||||
|
||||
HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE1D, offset, tex, devPtr, tex->channelDesc, size / imageFormat.getElementSize(), 1, size));
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc,
|
||||
hipDeviceptr_t devPtr, size_t pitch) {
|
||||
HIP_INIT_API(NONE, tex, desc, devPtr, pitch);
|
||||
|
||||
if (desc == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
size_t offset;
|
||||
HIP_RETURN(ihipBindTexture(CL_MEM_OBJECT_IMAGE2D, &offset, tex, devPtr, tex->channelDesc, desc->Width, desc->Height, pitch));
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIPRTC_SRC_HIP_INTERNAL_H
|
||||
#define HIPRTC_SRC_HIP_INTERNAL_H
|
||||
|
||||
#include "hip_internal.hpp"
|
||||
|
||||
// This macro should be called at the beginning of every HIP RTC API.
|
||||
#define HIPRTC_INIT_API(...) \
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s ( %s )", std::this_thread::get_id(), __func__, ToString( __VA_ARGS__ ).c_str()); \
|
||||
amd::Thread* thread = amd::Thread::current(); \
|
||||
if (!VDI_CHECK_THREAD(thread)) { \
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \
|
||||
} \
|
||||
HIP_INIT();
|
||||
|
||||
#define HIPRTC_RETURN(ret) \
|
||||
hiprtc::g_lastRtcError = ret; \
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_API, "[%zx] %s: Returned %s", std::this_thread::get_id(), __func__, \
|
||||
hiprtcGetErrorString(hiprtc::g_lastRtcError)); \
|
||||
return hiprtc::g_lastRtcError;
|
||||
|
||||
|
||||
#endif // HIPRTC_SRC_HIP_INTERNAL_H
|
||||
@@ -0,0 +1,247 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
//---
|
||||
// Helper functions to convert HIP function arguments into strings.
|
||||
// Handles POD data types as well as enumerations (ie hipMemcpyKind).
|
||||
// The implementation uses C++11 variadic templates and template specialization.
|
||||
// The hipMemcpyKind example below is a good example that shows how to implement conversion for a
|
||||
// new HSA type.
|
||||
|
||||
|
||||
// Handy macro to convert an enumeration to a stringified version of same:
|
||||
#define CASE_STR(x) \
|
||||
case x: \
|
||||
return #x;
|
||||
|
||||
inline const char* ihipErrorString(hipError_t hip_error) {
|
||||
switch (hip_error) {
|
||||
CASE_STR(hipSuccess);
|
||||
CASE_STR(hipErrorOutOfMemory);
|
||||
CASE_STR(hipErrorNotInitialized);
|
||||
CASE_STR(hipErrorDeinitialized);
|
||||
CASE_STR(hipErrorProfilerDisabled);
|
||||
CASE_STR(hipErrorProfilerNotInitialized);
|
||||
CASE_STR(hipErrorProfilerAlreadyStarted);
|
||||
CASE_STR(hipErrorProfilerAlreadyStopped);
|
||||
CASE_STR(hipErrorInvalidImage);
|
||||
CASE_STR(hipErrorInvalidContext);
|
||||
CASE_STR(hipErrorContextAlreadyCurrent);
|
||||
CASE_STR(hipErrorMapFailed);
|
||||
CASE_STR(hipErrorUnmapFailed);
|
||||
CASE_STR(hipErrorArrayIsMapped);
|
||||
CASE_STR(hipErrorAlreadyMapped);
|
||||
CASE_STR(hipErrorNoBinaryForGpu);
|
||||
CASE_STR(hipErrorAlreadyAcquired);
|
||||
CASE_STR(hipErrorNotMapped);
|
||||
CASE_STR(hipErrorNotMappedAsArray);
|
||||
CASE_STR(hipErrorNotMappedAsPointer);
|
||||
CASE_STR(hipErrorECCNotCorrectable);
|
||||
CASE_STR(hipErrorUnsupportedLimit);
|
||||
CASE_STR(hipErrorContextAlreadyInUse);
|
||||
CASE_STR(hipErrorPeerAccessUnsupported);
|
||||
CASE_STR(hipErrorInvalidKernelFile);
|
||||
CASE_STR(hipErrorInvalidGraphicsContext);
|
||||
CASE_STR(hipErrorInvalidSource);
|
||||
CASE_STR(hipErrorFileNotFound);
|
||||
CASE_STR(hipErrorSharedObjectSymbolNotFound);
|
||||
CASE_STR(hipErrorSharedObjectInitFailed);
|
||||
CASE_STR(hipErrorOperatingSystem);
|
||||
CASE_STR(hipErrorSetOnActiveProcess);
|
||||
CASE_STR(hipErrorInvalidHandle);
|
||||
CASE_STR(hipErrorNotFound);
|
||||
CASE_STR(hipErrorIllegalAddress);
|
||||
CASE_STR(hipErrorMissingConfiguration);
|
||||
CASE_STR(hipErrorLaunchFailure);
|
||||
CASE_STR(hipErrorPriorLaunchFailure);
|
||||
CASE_STR(hipErrorLaunchTimeOut);
|
||||
CASE_STR(hipErrorLaunchOutOfResources);
|
||||
CASE_STR(hipErrorInvalidDeviceFunction);
|
||||
CASE_STR(hipErrorInvalidConfiguration);
|
||||
CASE_STR(hipErrorInvalidDevice);
|
||||
CASE_STR(hipErrorInvalidValue);
|
||||
CASE_STR(hipErrorInvalidDevicePointer);
|
||||
CASE_STR(hipErrorInvalidMemcpyDirection);
|
||||
CASE_STR(hipErrorUnknown);
|
||||
CASE_STR(hipErrorNotReady);
|
||||
CASE_STR(hipErrorNoDevice);
|
||||
CASE_STR(hipErrorPeerAccessAlreadyEnabled);
|
||||
CASE_STR(hipErrorPeerAccessNotEnabled);
|
||||
CASE_STR(hipErrorRuntimeMemory);
|
||||
CASE_STR(hipErrorRuntimeOther);
|
||||
CASE_STR(hipErrorHostMemoryAlreadyRegistered);
|
||||
CASE_STR(hipErrorHostMemoryNotRegistered);
|
||||
CASE_STR(hipErrorTbd);
|
||||
default:
|
||||
return "hipErrorUnknown";
|
||||
};
|
||||
};
|
||||
|
||||
// Building block functions:
|
||||
template <typename T>
|
||||
inline std::string ToHexString(T v) {
|
||||
std::ostringstream ss;
|
||||
ss << "0x" << std::hex << v;
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline std::string ToString(T* v) {
|
||||
std::ostringstream ss;
|
||||
if (v == NULL) {
|
||||
ss << "char array:<null>";
|
||||
} else {
|
||||
ss << v;
|
||||
}
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline std::string ToString(T** v) {
|
||||
std::ostringstream ss;
|
||||
if (v == NULL) {
|
||||
ss << "char array:<null>";
|
||||
} else {
|
||||
ss << v;
|
||||
}
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
//---
|
||||
// Template overloads for ToString to handle specific types
|
||||
|
||||
// This is the default which works for most types:
|
||||
template <typename T>
|
||||
inline std::string ToString(T v) {
|
||||
std::ostringstream ss;
|
||||
ss << v;
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
template <>
|
||||
inline std::string ToString(hipFunction_t v) {
|
||||
std::ostringstream ss;
|
||||
ss << "0x" << std::hex << static_cast<void*>(v);
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
// hipEvent_t specialization. TODO - maybe add an event ID for debug?
|
||||
template <>
|
||||
inline std::string ToString(hipEvent_t v) {
|
||||
std::ostringstream ss;
|
||||
ss << "event:" << std::hex << static_cast<void*>(v);
|
||||
return ss.str();
|
||||
};
|
||||
// hipStream_t
|
||||
template <>
|
||||
inline std::string ToString(hipStream_t v) {
|
||||
std::ostringstream ss;
|
||||
if (v == NULL) {
|
||||
ss << "stream:<null>";
|
||||
} else {
|
||||
ss << "stream:" << std::hex << static_cast<void*>(v);
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
// hipCtx_t
|
||||
template <>
|
||||
inline std::string ToString(hipCtx_t v) {
|
||||
std::ostringstream ss;
|
||||
if (v == NULL) {
|
||||
ss << "context:<null>";
|
||||
} else {
|
||||
ss << "context:" << std::hex << static_cast<void*>(v);
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
// hipPitchedPtr
|
||||
template <>
|
||||
inline std::string ToString(hipPitchedPtr v) {
|
||||
std::ostringstream ss;
|
||||
ss << "pitchPtr:" << std::hex << static_cast<void*>(v.ptr);
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
// hipMemcpyKind specialization
|
||||
template <>
|
||||
inline std::string ToString(hipMemcpyKind v) {
|
||||
switch (v) {
|
||||
CASE_STR(hipMemcpyHostToHost);
|
||||
CASE_STR(hipMemcpyHostToDevice);
|
||||
CASE_STR(hipMemcpyDeviceToHost);
|
||||
CASE_STR(hipMemcpyDeviceToDevice);
|
||||
CASE_STR(hipMemcpyDefault);
|
||||
default:
|
||||
return ToHexString(v);
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
inline std::string ToString(hipFuncCache_t v) {
|
||||
switch (v) {
|
||||
CASE_STR(hipFuncCachePreferNone);
|
||||
CASE_STR(hipFuncCachePreferShared);
|
||||
CASE_STR(hipFuncCachePreferL1);
|
||||
CASE_STR(hipFuncCachePreferEqual);
|
||||
default:
|
||||
return ToHexString(v);
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
inline std::string ToString(hipSharedMemConfig v) {
|
||||
switch (v) {
|
||||
CASE_STR(hipSharedMemBankSizeDefault);
|
||||
CASE_STR(hipSharedMemBankSizeFourByte);
|
||||
CASE_STR(hipSharedMemBankSizeEightByte);
|
||||
default:
|
||||
return ToHexString(v);
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
inline std::string ToString(hipError_t v) {
|
||||
return ihipErrorString(v);
|
||||
};
|
||||
|
||||
// Catch empty arguments case
|
||||
inline std::string ToString() { return (""); }
|
||||
|
||||
|
||||
//---
|
||||
// C++11 variadic template - peels off first argument, converts to string, and calls itself again to
|
||||
// peel the next arg. Strings are automatically separated by comma+space.
|
||||
template <typename T, typename... Args>
|
||||
inline std::string ToString(T first, Args... args) {
|
||||
return ToString(first) + ", " + ToString(args...);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user