2020-02-04 08:45:01 -08:00
|
|
|
/* Copyright (c) 2015-present Advanced Micro Devices, Inc.
|
|
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
|
|
|
all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
|
THE SOFTWARE. */
|
2018-03-01 22:57:20 -05:00
|
|
|
|
|
|
|
|
#include <hip/hip_runtime.h>
|
2020-04-06 10:57:03 -04:00
|
|
|
#include <hip/hcc_detail/texture_types.h>
|
2018-03-01 22:57:20 -05:00
|
|
|
#include "hip_internal.hpp"
|
|
|
|
|
#include "platform/program.hpp"
|
|
|
|
|
#include "platform/runtime.hpp"
|
|
|
|
|
|
2018-04-27 21:21:31 -04:00
|
|
|
#include <unordered_map>
|
|
|
|
|
#include "elfio.hpp"
|
|
|
|
|
|
2018-05-18 14:34:14 -04:00
|
|
|
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2019-03-18 18:44:55 -04:00
|
|
|
thread_local std::stack<ihipExec_t> execStack_;
|
2020-04-06 09:58:35 -04:00
|
|
|
PlatformState* PlatformState::platform_; // Initiaized as nullptr by default
|
2019-03-18 18:44:55 -04:00
|
|
|
|
2018-03-01 22:57:20 -05:00
|
|
|
struct __CudaFatBinaryWrapper {
|
|
|
|
|
unsigned int magic;
|
|
|
|
|
unsigned int version;
|
|
|
|
|
void* binary;
|
|
|
|
|
void* dummy1;
|
|
|
|
|
};
|
|
|
|
|
|
2018-03-28 19:23:57 -04:00
|
|
|
#define CLANG_OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
|
2018-05-18 14:34:14 -04:00
|
|
|
#define HIP_AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa"
|
2018-05-02 19:14:52 -04:00
|
|
|
#define HCC_AMDGCN_AMDHSA_TRIPLE "hcc-amdgcn-amd-amdhsa-"
|
2018-03-28 19:23:57 -04:00
|
|
|
|
|
|
|
|
struct __ClangOffloadBundleDesc {
|
|
|
|
|
uint64_t offset;
|
|
|
|
|
uint64_t size;
|
|
|
|
|
uint64_t tripleSize;
|
|
|
|
|
const char triple[1];
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct __ClangOffloadBundleHeader {
|
|
|
|
|
const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1];
|
|
|
|
|
uint64_t numBundles;
|
|
|
|
|
__ClangOffloadBundleDesc desc[1];
|
|
|
|
|
};
|
|
|
|
|
|
2019-03-19 11:31:24 -04:00
|
|
|
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
|
|
|
|
|
hipModule_t hmod, const char* name);
|
|
|
|
|
|
2019-04-04 18:22:40 -04:00
|
|
|
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
|
|
|
|
|
hipDeviceptr_t* dptr, size_t* bytes);
|
|
|
|
|
|
2019-05-09 14:19:54 -04:00
|
|
|
static bool isCompatibleCodeObject(const std::string& codeobj_target_id,
|
|
|
|
|
const char* device_name) {
|
2019-06-28 08:06:23 -04:00
|
|
|
// Workaround for device name mismatch.
|
|
|
|
|
// Device name may contain feature strings delimited by '+', e.g.
|
|
|
|
|
// gfx900+xnack. Currently HIP-Clang does not include feature strings
|
|
|
|
|
// in code object target id in fat binary. Therefore drop the feature
|
|
|
|
|
// strings from device name before comparing it with code object target id.
|
|
|
|
|
std::string short_name(device_name);
|
|
|
|
|
auto feature_loc = short_name.find('+');
|
|
|
|
|
if (feature_loc != std::string::npos) {
|
|
|
|
|
short_name.erase(feature_loc);
|
|
|
|
|
}
|
|
|
|
|
return codeobj_target_id == short_name;
|
2019-05-09 14:19:54 -04:00
|
|
|
}
|
|
|
|
|
|
2019-05-27 20:11:08 -04:00
|
|
|
// Extracts code objects from fat binary in data for device names given in devices.
|
|
|
|
|
// Returns true if code objects are extracted successfully.
|
2020-02-19 19:31:10 -05:00
|
|
|
hipError_t __hipExtractCodeObjectFromFatBinary(const void* data,
|
2019-05-27 20:11:08 -04:00
|
|
|
const std::vector<const char*>& devices,
|
|
|
|
|
std::vector<std::pair<const void*, size_t>>& code_objs)
|
2018-03-28 19:23:57 -04:00
|
|
|
{
|
2019-05-27 20:11:08 -04:00
|
|
|
std::string magic((const char*)data, sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1);
|
2018-04-27 21:21:31 -04:00
|
|
|
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) {
|
2020-02-19 19:31:10 -05:00
|
|
|
return hipErrorInvalidKernelFile;
|
2018-04-27 21:21:31 -04:00
|
|
|
}
|
|
|
|
|
|
2019-05-27 20:11:08 -04:00
|
|
|
code_objs.resize(devices.size());
|
|
|
|
|
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
|
2018-04-27 21:21:31 -04:00
|
|
|
const auto* desc = &obheader->desc[0];
|
2019-05-27 20:11:08 -04:00
|
|
|
unsigned num_code_objs = 0;
|
2018-03-28 19:23:57 -04:00
|
|
|
for (uint64_t i = 0; i < obheader->numBundles; ++i,
|
|
|
|
|
desc = reinterpret_cast<const __ClangOffloadBundleDesc*>(
|
|
|
|
|
reinterpret_cast<uintptr_t>(&desc->triple[0]) + desc->tripleSize)) {
|
|
|
|
|
|
2020-02-21 14:37:41 -05:00
|
|
|
std::size_t offset = 0;
|
2020-02-06 13:56:41 -05:00
|
|
|
if (!std::strncmp(desc->triple, HIP_AMDGCN_AMDHSA_TRIPLE,
|
|
|
|
|
sizeof(HIP_AMDGCN_AMDHSA_TRIPLE) - 1)) {
|
|
|
|
|
offset = sizeof(HIP_AMDGCN_AMDHSA_TRIPLE); //For code objects created by CLang
|
|
|
|
|
} else if (!std::strncmp(desc->triple, HCC_AMDGCN_AMDHSA_TRIPLE,
|
|
|
|
|
sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1)) {
|
|
|
|
|
offset = sizeof(HCC_AMDGCN_AMDHSA_TRIPLE); //For code objects created by Hcc
|
|
|
|
|
} else {
|
2018-03-28 19:23:57 -04:00
|
|
|
continue;
|
2020-02-06 13:56:41 -05:00
|
|
|
}
|
|
|
|
|
std::string target(desc->triple + offset, desc->tripleSize - offset);
|
2018-03-28 19:23:57 -04:00
|
|
|
|
|
|
|
|
const void *image = reinterpret_cast<const void*>(
|
|
|
|
|
reinterpret_cast<uintptr_t>(obheader) + desc->offset);
|
|
|
|
|
size_t size = desc->size;
|
|
|
|
|
|
2019-05-27 20:11:08 -04:00
|
|
|
for (size_t dev = 0; dev < devices.size(); ++dev) {
|
|
|
|
|
const char* name = devices[dev];
|
2019-02-01 13:43:50 -05:00
|
|
|
|
2019-05-27 20:11:08 -04:00
|
|
|
if (!isCompatibleCodeObject(target, name)) {
|
2019-03-27 12:53:17 -04:00
|
|
|
continue;
|
2019-02-01 13:43:50 -05:00
|
|
|
}
|
2019-05-27 20:11:08 -04:00
|
|
|
code_objs[dev] = std::make_pair(image, size);
|
|
|
|
|
num_code_objs++;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-05-12 15:51:52 -07:00
|
|
|
if (num_code_objs == devices.size()) {
|
2020-02-19 19:31:10 -05:00
|
|
|
return hipSuccess;
|
2020-05-12 15:51:52 -07:00
|
|
|
} else {
|
2020-05-28 15:11:55 -04:00
|
|
|
fatal("hipErrorNoBinaryForGpu: Coudn't find binary for current devices!");
|
2020-02-19 19:31:10 -05:00
|
|
|
return hipErrorNoBinaryForGpu;
|
2020-05-12 15:51:52 -07:00
|
|
|
}
|
2019-05-27 20:11:08 -04:00
|
|
|
}
|
2019-02-01 13:43:50 -05:00
|
|
|
|
2019-12-11 03:11:19 -05:00
|
|
|
extern "C" std::vector<std::pair<hipModule_t, bool>>* __hipRegisterFatBinary(const void* data)
|
2019-05-27 20:11:08 -04:00
|
|
|
{
|
|
|
|
|
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
|
|
|
|
|
if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot Register fat binary. FatMagic: %u version: %u ",
|
|
|
|
|
fbwrapper->magic, fbwrapper->version);
|
2019-05-27 20:11:08 -04:00
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-11 03:11:19 -05:00
|
|
|
return PlatformState::instance().addFatBinary(fbwrapper->binary);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void PlatformState::digestFatBinary(const void* data, std::vector<std::pair<hipModule_t, bool>>& programs)
|
|
|
|
|
{
|
2019-12-11 19:31:20 -05:00
|
|
|
if (programs.size() > 0) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-22 12:41:42 -04:00
|
|
|
std::vector<std::pair<const void*, size_t>> code_objs;
|
2019-12-11 03:11:19 -05:00
|
|
|
std::vector<const char*> devices;
|
2019-08-22 12:41:42 -04:00
|
|
|
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
2020-02-18 12:36:12 -08:00
|
|
|
devices.push_back(g_devices[dev]->devices()[0]->info().name_);
|
2019-12-11 03:11:19 -05:00
|
|
|
}
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2020-02-19 19:31:10 -05:00
|
|
|
if (hipSuccess != __hipExtractCodeObjectFromFatBinary((char*)data, devices, code_objs)) {
|
2019-12-11 03:11:19 -05:00
|
|
|
return;
|
2019-08-22 12:41:42 -04:00
|
|
|
}
|
|
|
|
|
|
2019-12-11 03:11:19 -05:00
|
|
|
programs.resize(g_devices.size());
|
|
|
|
|
|
2019-08-22 12:41:42 -04:00
|
|
|
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
2020-02-18 12:36:12 -08:00
|
|
|
amd::Context* ctx = g_devices[dev]->asContext();
|
2019-08-22 12:41:42 -04:00
|
|
|
amd::Program* program = new amd::Program(*ctx);
|
|
|
|
|
if (program == nullptr) {
|
2019-12-11 03:11:19 -05:00
|
|
|
return;
|
2019-08-22 12:41:42 -04:00
|
|
|
}
|
2020-04-28 22:41:45 -07:00
|
|
|
if (CL_SUCCESS == program->addDeviceProgram(
|
|
|
|
|
*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second, false)) {
|
2019-12-11 03:11:19 -05:00
|
|
|
programs.at(dev) = std::make_pair(reinterpret_cast<hipModule_t>(as_cl(program)) , false);
|
2019-08-22 12:41:42 -04:00
|
|
|
}
|
|
|
|
|
}
|
2019-12-11 03:11:19 -05:00
|
|
|
}
|
2019-08-22 12:41:42 -04:00
|
|
|
|
2019-12-11 03:11:19 -05:00
|
|
|
void PlatformState::init()
|
|
|
|
|
{
|
2019-12-11 19:31:20 -05:00
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
|
2019-12-11 03:11:19 -05:00
|
|
|
if(initialized_ || g_devices.empty()) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
initialized_ = true;
|
|
|
|
|
|
|
|
|
|
for (auto& it : modules_) {
|
|
|
|
|
digestFatBinary(it.first, it.second);
|
|
|
|
|
}
|
|
|
|
|
for (auto& it : functions_) {
|
|
|
|
|
it.second.functions.resize(g_devices.size());
|
|
|
|
|
}
|
|
|
|
|
for (auto& it : vars_) {
|
|
|
|
|
it.second.rvars.resize(g_devices.size());
|
|
|
|
|
}
|
2019-08-22 12:41:42 -04:00
|
|
|
}
|
|
|
|
|
|
2020-03-18 13:43:58 -04:00
|
|
|
bool PlatformState::unregisterFunc(hipModule_t hmod) {
|
|
|
|
|
amd::ScopedLock lock(lock_);
|
2020-03-30 11:32:27 -04:00
|
|
|
auto mod_it = module_map_.find(hmod);
|
|
|
|
|
if (mod_it != module_map_.cend()) {
|
|
|
|
|
PlatformState::Module* mod_ptr = mod_it->second;
|
|
|
|
|
if(mod_ptr != nullptr) {
|
|
|
|
|
for (auto func_it = mod_ptr->functions_.begin(); func_it != mod_ptr->functions_.end(); ++func_it) {
|
|
|
|
|
PlatformState::DeviceFunction &devFunc = func_it->second;
|
|
|
|
|
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
|
|
|
|
if (devFunc.functions[dev] != 0) {
|
|
|
|
|
hip::Function* f = reinterpret_cast<hip::Function*>(devFunc.functions[dev]);
|
|
|
|
|
delete f;
|
|
|
|
|
}
|
2020-03-18 13:43:58 -04:00
|
|
|
}
|
2020-03-30 11:32:27 -04:00
|
|
|
delete devFunc.modules;
|
2020-03-18 13:43:58 -04:00
|
|
|
}
|
2020-03-30 11:32:27 -04:00
|
|
|
delete mod_ptr;
|
2020-03-18 13:43:58 -04:00
|
|
|
}
|
2020-05-04 18:59:49 -04:00
|
|
|
module_map_.erase(mod_it);
|
2020-03-18 13:43:58 -04:00
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-26 14:32:48 -04:00
|
|
|
std::vector< std::pair<hipModule_t, bool> >* PlatformState::unregisterVar(hipModule_t hmod) {
|
2019-08-07 12:45:50 -04:00
|
|
|
amd::ScopedLock lock(lock_);
|
2019-08-26 14:32:48 -04:00
|
|
|
std::vector< std::pair<hipModule_t, bool> >* rmodules = nullptr;
|
2019-07-23 20:09:59 -04:00
|
|
|
auto it = vars_.begin();
|
|
|
|
|
while (it != vars_.end()) {
|
|
|
|
|
DeviceVar& dvar = it->second;
|
2019-08-22 12:41:42 -04:00
|
|
|
if ((*dvar.modules)[0].first == hmod) {
|
2019-08-26 14:32:48 -04:00
|
|
|
rmodules = dvar.modules;
|
2020-04-06 10:57:03 -04:00
|
|
|
if (dvar.shadowAllocated) {
|
2019-09-27 18:00:49 -04:00
|
|
|
texture<float, hipTextureType1D, hipReadModeElementType>* tex_hptr
|
|
|
|
|
= reinterpret_cast<texture<float, hipTextureType1D, hipReadModeElementType> *>(dvar.shadowVptr);
|
|
|
|
|
delete tex_hptr;
|
|
|
|
|
}
|
2020-05-13 00:26:19 -07:00
|
|
|
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
|
|
|
|
if (dvar.rvars[dev].getdeviceptr()) {
|
|
|
|
|
amd::MemObjMap::RemoveMemObj(dvar.rvars[dev].getdeviceptr());
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-07-23 20:09:59 -04:00
|
|
|
vars_.erase(it++);
|
|
|
|
|
} else {
|
|
|
|
|
++it;
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-08-26 14:32:48 -04:00
|
|
|
return rmodules;
|
2019-07-23 20:09:59 -04:00
|
|
|
}
|
|
|
|
|
|
2019-11-04 10:13:20 -05:00
|
|
|
PlatformState::DeviceVar* PlatformState::findVar(std::string hostVar, int deviceId, hipModule_t hmod) {
|
|
|
|
|
DeviceVar* dvar = nullptr;
|
|
|
|
|
if (hmod != nullptr) {
|
|
|
|
|
// If module is provided, then get the var only from that module
|
|
|
|
|
auto var_range = vars_.equal_range(hostVar);
|
|
|
|
|
for (auto it = var_range.first; it != var_range.second; ++it) {
|
|
|
|
|
if ((*it->second.modules)[deviceId].first == hmod) {
|
|
|
|
|
dvar = &(it->second);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// If var count is < 2, return the var
|
|
|
|
|
if (vars_.count(hostVar) < 2) {
|
|
|
|
|
auto it = vars_.find(hostVar);
|
|
|
|
|
dvar = ((it == vars_.end()) ? nullptr : &(it->second));
|
|
|
|
|
} else {
|
|
|
|
|
// If var count is > 2, return the original var,
|
|
|
|
|
// if original var count != 1, return vars_.end()/Invalid
|
|
|
|
|
size_t orig_global_count = 0;
|
|
|
|
|
auto var_range = vars_.equal_range(hostVar);
|
|
|
|
|
for (auto it = var_range.first; it != var_range.second; ++it) {
|
|
|
|
|
// when dyn_undef is set, it is a shadow var
|
|
|
|
|
if (it->second.dyn_undef == false) {
|
|
|
|
|
++orig_global_count;
|
|
|
|
|
dvar = &(it->second);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dvar = ((orig_global_count == 1) ? dvar : nullptr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dvar;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-24 22:18:23 -04:00
|
|
|
bool PlatformState::findSymbol(const void *hostVar,
|
|
|
|
|
hipModule_t &hmod, std::string &symbolName) {
|
2020-03-04 16:05:37 -05:00
|
|
|
auto it = symbols_.find(hostVar);
|
|
|
|
|
if (it != symbols_.end()) {
|
2020-04-24 22:18:23 -04:00
|
|
|
hmod = it->second.first;
|
|
|
|
|
symbolName = it->second.second;
|
2020-03-04 16:05:37 -05:00
|
|
|
return true;
|
|
|
|
|
}
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Could not find the Symbol: %s \n", symbolName.c_str());
|
2020-03-04 16:05:37 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-24 22:18:23 -04:00
|
|
|
void PlatformState::registerVarSym(const void* hostVar, hipModule_t hmod, const char* symbolName) {
|
2020-03-04 16:05:37 -05:00
|
|
|
amd::ScopedLock lock(lock_);
|
2020-04-24 22:18:23 -04:00
|
|
|
symbols_.insert(std::make_pair(hostVar, std::make_pair(hmod, std::string(symbolName))));
|
2020-03-04 16:05:37 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void PlatformState::registerVar(const char* hostvar,
|
2019-04-22 15:19:24 -04:00
|
|
|
const DeviceVar& rvar) {
|
2019-03-18 18:44:55 -04:00
|
|
|
amd::ScopedLock lock(lock_);
|
2020-04-02 12:43:08 -04:00
|
|
|
vars_.insert(std::make_pair(std::string(hostvar), rvar));
|
2019-03-18 18:44:55 -04:00
|
|
|
}
|
2018-08-02 12:33:55 -04:00
|
|
|
|
2019-03-18 18:44:55 -04:00
|
|
|
void PlatformState::registerFunction(const void* hostFunction,
|
2019-04-22 15:19:24 -04:00
|
|
|
const DeviceFunction& func) {
|
2019-03-18 18:44:55 -04:00
|
|
|
amd::ScopedLock lock(lock_);
|
2019-11-21 19:33:05 -05:00
|
|
|
functions_.insert(std::make_pair(hostFunction, func));
|
2019-03-18 18:44:55 -04:00
|
|
|
}
|
2018-08-02 12:33:55 -04:00
|
|
|
|
2019-04-26 15:15:48 -04:00
|
|
|
bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFuncAttributes* func_attr) {
|
|
|
|
|
device::Program* dev_program
|
2020-02-18 12:36:12 -08:00
|
|
|
= program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
|
2019-04-26 15:15:48 -04:00
|
|
|
|
|
|
|
|
const auto it = dev_program->kernels().find(std::string(func_name));
|
|
|
|
|
if (it == dev_program->kernels().cend()) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Could not find the function %s \n", func_name);
|
2019-04-26 15:15:48 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-12 09:14:58 -04:00
|
|
|
const device::Kernel* kernel = it->second;
|
|
|
|
|
const device::Kernel::WorkGroupInfo* wginfo = kernel->workGroupInfo();
|
|
|
|
|
func_attr->sharedSizeBytes = static_cast<int>(wginfo->localMemSize_);
|
|
|
|
|
func_attr->binaryVersion = static_cast<int>(kernel->signature().version());
|
|
|
|
|
func_attr->cacheModeCA = 0;
|
|
|
|
|
func_attr->constSizeBytes = 0;
|
2020-05-07 19:12:10 -04:00
|
|
|
func_attr->localSizeBytes = wginfo->privateMemSize_;
|
2020-05-12 09:14:58 -04:00
|
|
|
func_attr->maxDynamicSharedSizeBytes = static_cast<int>(wginfo->availableLDSSize_
|
|
|
|
|
- wginfo->localMemSize_);
|
|
|
|
|
|
|
|
|
|
func_attr->maxThreadsPerBlock = static_cast<int>(wginfo->size_);
|
|
|
|
|
func_attr->numRegs = static_cast<int>(wginfo->usedVGPRs_);
|
|
|
|
|
func_attr->preferredShmemCarveout = 0;
|
|
|
|
|
func_attr->ptxVersion = 30;
|
2019-04-26 15:15:48 -04:00
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-04 10:13:20 -05:00
|
|
|
bool PlatformState::getShadowVarInfo(std::string var_name, hipModule_t hmod,
|
|
|
|
|
void** var_addr, size_t* var_size) {
|
|
|
|
|
DeviceVar* dvar = findVar(var_name, ihipGetDevice(), hmod);
|
|
|
|
|
if (dvar != nullptr) {
|
|
|
|
|
*var_addr = dvar->shadowVptr;
|
|
|
|
|
*var_size = dvar->size;
|
2019-08-11 18:53:11 -04:00
|
|
|
return true;
|
|
|
|
|
} else {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot find Var name: %s in module: 0x%x \n", var_name.c_str(), hmod);
|
2019-08-11 18:53:11 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool CL_CALLBACK getSvarInfo(cl_program program, std::string var_name, void** var_addr,
|
|
|
|
|
size_t* var_size) {
|
2019-11-04 10:13:20 -05:00
|
|
|
return PlatformState::instance().getShadowVarInfo(var_name, reinterpret_cast<hipModule_t>(program),
|
|
|
|
|
var_addr, var_size);
|
2019-08-11 18:53:11 -04:00
|
|
|
}
|
|
|
|
|
|
2020-03-30 11:32:27 -04:00
|
|
|
bool PlatformState::registerModFuncs(std::vector<std::string>& func_names, hipModule_t* module) {
|
|
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
PlatformState::Module* mod_ptr = new PlatformState::Module(*module);
|
|
|
|
|
|
|
|
|
|
for (auto it = func_names.begin(); it != func_names.end(); ++it) {
|
|
|
|
|
auto modules = new std::vector<std::pair<hipModule_t, bool> >(g_devices.size());
|
|
|
|
|
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
|
|
|
|
modules->at(dev) = std::make_pair(*module, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PlatformState::DeviceFunction dfunc{*it, modules,
|
|
|
|
|
std::vector<hipFunction_t>(g_devices.size(), 0)};
|
|
|
|
|
mod_ptr->functions_.insert(std::make_pair(*it, dfunc));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
module_map_.insert(std::make_pair(*module, mod_ptr));
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-18 13:43:58 -04:00
|
|
|
bool PlatformState::findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name) {
|
|
|
|
|
amd::ScopedLock lock(lock_);
|
2020-03-30 11:32:27 -04:00
|
|
|
|
|
|
|
|
auto mod_it = module_map_.find(hmod);
|
|
|
|
|
if (mod_it != module_map_.cend()) {
|
2020-05-04 18:59:49 -04:00
|
|
|
assert(mod_it->second != nullptr);
|
2020-03-30 11:32:27 -04:00
|
|
|
auto func_it = mod_it->second->functions_.find(name);
|
|
|
|
|
if (func_it != mod_it->second->functions_.cend()) {
|
|
|
|
|
PlatformState::DeviceFunction& devFunc = func_it->second;
|
2020-03-18 13:43:58 -04:00
|
|
|
if (devFunc.functions[ihipGetDevice()] == 0) {
|
|
|
|
|
if(!createFunc(&devFunc.functions[ihipGetDevice()], hmod, name)) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Could not create a function: %s at module: 0x%x \n", name, hmod);
|
2020-03-18 13:43:58 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
*hfunc = devFunc.functions[ihipGetDevice()];
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot find module: 0x%x in PlatformState Module Map \n", hmod);
|
2020-03-18 13:43:58 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool PlatformState::createFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* name) {
|
|
|
|
|
amd::Program* program = as_amd(reinterpret_cast<cl_program>(hmod));
|
|
|
|
|
|
|
|
|
|
const amd::Symbol* symbol = program->findSymbol(name);
|
|
|
|
|
if (!symbol) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot find Symbol with name: %s \n", name);
|
2020-03-18 13:43:58 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name);
|
|
|
|
|
if (!kernel) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Could not create a new kernel with name: %s \n", name);
|
2020-03-18 13:43:58 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hip::Function* f = new hip::Function(kernel);
|
2020-04-13 22:51:46 -04:00
|
|
|
if (!f) {
|
|
|
|
|
DevLogPrintfError("Could not create a new function with name: %s \n", name);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-18 13:43:58 -04:00
|
|
|
*hfunc = f->asHipFunction();
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2019-03-18 18:44:55 -04:00
|
|
|
hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) {
|
|
|
|
|
amd::ScopedLock lock(lock_);
|
2019-11-21 19:33:05 -05:00
|
|
|
const auto it = functions_.find(hostFunction);
|
2019-03-18 18:44:55 -04:00
|
|
|
if (it != functions_.cend()) {
|
2019-04-22 15:19:24 -04:00
|
|
|
PlatformState::DeviceFunction& devFunc = it->second;
|
|
|
|
|
if (devFunc.functions[deviceId] == 0) {
|
|
|
|
|
hipModule_t module = (*devFunc.modules)[deviceId].first;
|
|
|
|
|
if (!(*devFunc.modules)[deviceId].second) {
|
|
|
|
|
amd::Program* program = as_amd(reinterpret_cast<cl_program>(module));
|
2019-08-11 18:53:11 -04:00
|
|
|
program->setVarInfoCallBack(&getSvarInfo);
|
2019-04-22 15:19:24 -04:00
|
|
|
if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Build error for module: 0x%x at device: %u \n", module, deviceId);
|
2019-04-22 15:19:24 -04:00
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
(*devFunc.modules)[deviceId].second = true;
|
|
|
|
|
}
|
|
|
|
|
hipFunction_t function = nullptr;
|
2020-03-18 13:43:58 -04:00
|
|
|
if (createFunc(&function, module, devFunc.deviceName.c_str()) &&
|
2019-04-22 15:19:24 -04:00
|
|
|
function != nullptr) {
|
|
|
|
|
devFunc.functions[deviceId] = function;
|
2020-05-27 18:02:47 -04:00
|
|
|
} else {
|
|
|
|
|
DevLogPrintfError("__hipRegisterFunction cannot find kernel %s for device %d\n",
|
|
|
|
|
devFunc.deviceName.c_str(), deviceId);
|
|
|
|
|
return nullptr;
|
2019-04-22 15:19:24 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return devFunc.functions[deviceId];
|
2018-08-02 12:33:55 -04:00
|
|
|
}
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot find function: 0x%x in PlatformState \n", hostFunction);
|
2019-04-22 15:19:24 -04:00
|
|
|
return nullptr;
|
2019-03-18 18:44:55 -04:00
|
|
|
}
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2019-04-26 15:15:48 -04:00
|
|
|
bool PlatformState::getFuncAttr(const void* hostFunction,
|
|
|
|
|
hipFuncAttributes* func_attr) {
|
|
|
|
|
if (func_attr == nullptr) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-21 19:33:05 -05:00
|
|
|
const auto it = functions_.find(hostFunction);
|
2019-04-26 15:15:48 -04:00
|
|
|
if (it == functions_.cend()) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot find hostFunction 0x%x \n", hostFunction);
|
2019-04-26 15:15:48 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PlatformState::DeviceFunction& devFunc = it->second;
|
|
|
|
|
int deviceId = ihipGetDevice();
|
|
|
|
|
|
|
|
|
|
/* If module has not been initialized yet, build the kernel now*/
|
|
|
|
|
if (!(*devFunc.modules)[deviceId].second) {
|
|
|
|
|
if (nullptr == PlatformState::instance().getFunc(hostFunction, deviceId)) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot get hostFunction: 0x%x for deviceId:%d \n", hostFunction, deviceId);
|
2019-04-26 15:15:48 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
amd::Program* program = as_amd(reinterpret_cast<cl_program>((*devFunc.modules)[deviceId].first));
|
|
|
|
|
if (!ihipGetFuncAttributes(devFunc.deviceName.c_str(), program, func_attr)) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot get Func attributes for function: %s \n",
|
|
|
|
|
devFunc.deviceName.c_str());
|
2019-04-26 15:15:48 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-18 11:20:50 -05:00
|
|
|
bool PlatformState::getTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef) {
|
2019-09-27 18:00:49 -04:00
|
|
|
amd::ScopedLock lock(lock_);
|
2020-02-18 11:20:50 -05:00
|
|
|
DeviceVar* dvar = findVar(std::string(hostVar), ihipGetDevice(), hmod);
|
2019-11-04 10:13:20 -05:00
|
|
|
if (dvar == nullptr) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot find var:%s for creating texture reference at module: 0x%x \n",
|
|
|
|
|
hostVar, hmod);
|
2019-09-27 18:00:49 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-06 10:57:03 -04:00
|
|
|
switch (dvar->kind) {
|
|
|
|
|
case PlatformState::DVK_Variable:
|
|
|
|
|
// TODO: Need to define a target-specific symbol info to indicate the device
|
|
|
|
|
// variable kind, i.e. regular variable, texture or surface.
|
|
|
|
|
// Before that, have to assume the specified variable is a texture or
|
|
|
|
|
// surface reference variable.
|
|
|
|
|
dvar->kind = DVK_Texture;
|
|
|
|
|
// FALL THROUGH
|
|
|
|
|
case PlatformState::DVK_Texture:
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
// If it's already used as non-texture variable, bail out.
|
2019-09-27 18:00:49 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-06 10:57:03 -04:00
|
|
|
if (!dvar->shadowVptr) {
|
|
|
|
|
dvar->shadowVptr = new texture<char>{};
|
|
|
|
|
dvar->shadowAllocated = true;
|
|
|
|
|
}
|
|
|
|
|
*texRef = reinterpret_cast<textureReference *>(dvar->shadowVptr);
|
2020-04-24 22:18:23 -04:00
|
|
|
registerVarSym(dvar->shadowVptr, hmod, hostVar);
|
2020-03-18 12:23:11 -04:00
|
|
|
|
2019-09-27 18:00:49 -04:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-04 16:05:37 -05:00
|
|
|
bool PlatformState::getGlobalVar(const char* hostVar, int deviceId, hipModule_t hmod,
|
2019-03-18 18:44:55 -04:00
|
|
|
hipDeviceptr_t* dev_ptr, size_t* size_ptr) {
|
|
|
|
|
amd::ScopedLock lock(lock_);
|
2020-04-02 12:43:08 -04:00
|
|
|
DeviceVar* dvar = findVar(std::string(hostVar), deviceId, hmod);
|
2019-11-04 10:13:20 -05:00
|
|
|
if (dvar != nullptr) {
|
|
|
|
|
if (dvar->rvars[deviceId].getdeviceptr() == nullptr) {
|
2019-04-22 15:19:24 -04:00
|
|
|
size_t sym_size = 0;
|
|
|
|
|
hipDeviceptr_t device_ptr = nullptr;
|
|
|
|
|
amd::Memory* amd_mem_obj = nullptr;
|
|
|
|
|
|
2019-11-04 10:13:20 -05:00
|
|
|
if (!(*dvar->modules)[deviceId].second) {
|
|
|
|
|
amd::Program* program = as_amd(reinterpret_cast<cl_program>((*dvar->modules)[deviceId].first));
|
2019-08-11 18:53:11 -04:00
|
|
|
program->setVarInfoCallBack(&getSvarInfo);
|
2019-04-22 15:19:24 -04:00
|
|
|
if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Build Failure for module: 0x%x \n", hmod);
|
2019-04-22 15:19:24 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
2019-11-04 10:13:20 -05:00
|
|
|
(*dvar->modules)[deviceId].second = true;
|
2019-04-22 15:19:24 -04:00
|
|
|
}
|
2019-11-04 10:13:20 -05:00
|
|
|
if((hipSuccess == ihipCreateGlobalVarObj(dvar->hostVar.c_str(), (*dvar->modules)[deviceId].first,
|
2019-04-22 15:19:24 -04:00
|
|
|
&amd_mem_obj, &device_ptr, &sym_size))
|
|
|
|
|
&& (device_ptr != nullptr)) {
|
2019-11-04 10:13:20 -05:00
|
|
|
dvar->rvars[deviceId].size_ = sym_size;
|
|
|
|
|
dvar->rvars[deviceId].devicePtr_ = device_ptr;
|
|
|
|
|
dvar->rvars[deviceId].amd_mem_obj_ = amd_mem_obj;
|
2019-04-22 15:19:24 -04:00
|
|
|
amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj);
|
|
|
|
|
} else {
|
2020-05-27 18:02:47 -04:00
|
|
|
DevLogPrintfError("__hipRegisterVar cannot find Var: %s for deviceId: 0x%x \n",
|
|
|
|
|
dvar->hostVar.c_str(), deviceId);
|
|
|
|
|
return false;
|
2019-04-22 15:19:24 -04:00
|
|
|
}
|
|
|
|
|
}
|
2019-11-04 10:13:20 -05:00
|
|
|
*size_ptr = dvar->rvars[deviceId].getvarsize();
|
|
|
|
|
*dev_ptr = dvar->rvars[deviceId].getdeviceptr();
|
2019-03-18 18:44:55 -04:00
|
|
|
return true;
|
|
|
|
|
} else {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Could not find global var: %s at module:0x%x \n", hostVar, hmod);
|
2019-03-18 18:44:55 -04:00
|
|
|
return false;
|
2018-08-02 12:33:55 -04:00
|
|
|
}
|
2019-03-18 18:44:55 -04:00
|
|
|
}
|
2018-08-02 12:33:55 -04:00
|
|
|
|
2020-04-06 10:57:03 -04:00
|
|
|
bool PlatformState::getGlobalVarFromSymbol(const void* hostVar, int deviceId,
|
|
|
|
|
hipDeviceptr_t* dev_ptr,
|
|
|
|
|
size_t* size_ptr) {
|
2020-04-24 22:18:23 -04:00
|
|
|
hipModule_t hmod;
|
2020-04-06 10:57:03 -04:00
|
|
|
std::string symbolName;
|
2020-04-24 22:18:23 -04:00
|
|
|
if (!PlatformState::instance().findSymbol(hostVar, hmod, symbolName)) {
|
2020-04-06 10:57:03 -04:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return PlatformState::instance().getGlobalVar(symbolName.c_str(),
|
2020-04-24 22:18:23 -04:00
|
|
|
ihipGetDevice(), hmod,
|
2020-04-06 10:57:03 -04:00
|
|
|
dev_ptr, size_ptr);
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-18 18:44:55 -04:00
|
|
|
void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) {
|
|
|
|
|
auto& arguments = execStack_.top().arguments_;
|
2018-08-02 12:33:55 -04:00
|
|
|
|
2019-03-18 18:44:55 -04:00
|
|
|
if (arguments.size() < offset + size) {
|
|
|
|
|
arguments.resize(offset + size);
|
2018-08-02 12:33:55 -04:00
|
|
|
}
|
|
|
|
|
|
2019-03-18 18:44:55 -04:00
|
|
|
::memcpy(&arguments[offset], arg, size);
|
|
|
|
|
}
|
2018-08-02 12:33:55 -04:00
|
|
|
|
2019-03-18 18:44:55 -04:00
|
|
|
void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem,
|
|
|
|
|
hipStream_t stream) {
|
|
|
|
|
execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void PlatformState::popExec(ihipExec_t& exec) {
|
|
|
|
|
exec = std::move(execStack_.top());
|
|
|
|
|
execStack_.pop();
|
|
|
|
|
}
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2020-05-15 16:47:26 -07:00
|
|
|
namespace {
|
|
|
|
|
const int HIP_ENABLE_DEFERRED_LOADING{[] () {
|
|
|
|
|
char *var = getenv("HIP_ENABLE_DEFERRED_LOADING");
|
|
|
|
|
return var ? atoi(var) : 1;
|
|
|
|
|
}()};
|
|
|
|
|
} /* namespace */
|
|
|
|
|
|
2018-03-02 17:55:48 -05:00
|
|
|
extern "C" void __hipRegisterFunction(
|
2019-04-22 15:19:24 -04:00
|
|
|
std::vector<std::pair<hipModule_t,bool> >* modules,
|
2018-03-01 22:57:20 -05:00
|
|
|
const void* hostFunction,
|
|
|
|
|
char* deviceFunction,
|
|
|
|
|
const char* deviceName,
|
|
|
|
|
unsigned int threadLimit,
|
|
|
|
|
uint3* tid,
|
|
|
|
|
uint3* bid,
|
|
|
|
|
dim3* blockDim,
|
|
|
|
|
dim3* gridDim,
|
|
|
|
|
int* wSize)
|
|
|
|
|
{
|
2020-03-30 11:32:27 -04:00
|
|
|
PlatformState::DeviceFunction func{ std::string{deviceName}, modules, std::vector<hipFunction_t>{g_devices.size()}};
|
2019-04-22 15:19:24 -04:00
|
|
|
PlatformState::instance().registerFunction(hostFunction, func);
|
2020-05-15 16:47:26 -07:00
|
|
|
if (!HIP_ENABLE_DEFERRED_LOADING) {
|
2020-05-14 00:18:32 -04:00
|
|
|
HIP_INIT();
|
|
|
|
|
for (size_t i = 0; i < g_devices.size(); ++i) {
|
|
|
|
|
PlatformState::instance().getFunc(hostFunction, i);
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
|
|
|
|
|
2018-07-24 17:14:35 -04:00
|
|
|
// Registers a device-side global variable.
|
|
|
|
|
// For each global variable in device code, there is a corresponding shadow
|
|
|
|
|
// global variable in host code. The shadow host variable is used to keep
|
|
|
|
|
// track of the value of the device side global variable between kernel
|
|
|
|
|
// executions.
|
2018-03-02 17:55:48 -05:00
|
|
|
extern "C" void __hipRegisterVar(
|
2019-04-22 15:19:24 -04:00
|
|
|
std::vector<std::pair<hipModule_t,bool> >* modules, // The device modules containing code object
|
2020-04-02 13:16:07 -04:00
|
|
|
void* var, // The shadow variable in host code
|
2018-07-24 17:14:35 -04:00
|
|
|
char* hostVar, // Variable name in host code
|
|
|
|
|
char* deviceVar, // Variable name in device code
|
|
|
|
|
int ext, // Whether this variable is external
|
2020-04-02 13:16:07 -04:00
|
|
|
size_t size, // Size of the variable
|
2018-07-24 17:14:35 -04:00
|
|
|
int constant, // Whether this variable is constant
|
|
|
|
|
int global) // Unknown, always 0
|
2018-03-01 22:57:20 -05:00
|
|
|
{
|
2020-04-06 10:57:03 -04:00
|
|
|
PlatformState::DeviceVar dvar{PlatformState::DVK_Variable,
|
|
|
|
|
var,
|
|
|
|
|
std::string{hostVar},
|
|
|
|
|
size,
|
|
|
|
|
modules,
|
|
|
|
|
std::vector<PlatformState::RegisteredVar>{g_devices.size()},
|
|
|
|
|
false,
|
|
|
|
|
/*type*/ 0,
|
|
|
|
|
/*norm*/ 0};
|
|
|
|
|
|
|
|
|
|
PlatformState::instance().registerVar(hostVar, dvar);
|
2020-04-24 22:18:23 -04:00
|
|
|
PlatformState::instance().registerVarSym(var, nullptr, deviceVar);
|
2020-04-06 10:57:03 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern "C" void __hipRegisterSurface(std::vector<std::pair<hipModule_t, bool>>*
|
|
|
|
|
modules, // The device modules containing code object
|
|
|
|
|
void* var, // The shadow variable in host code
|
|
|
|
|
char* hostVar, // Variable name in host code
|
|
|
|
|
char* deviceVar, // Variable name in device code
|
|
|
|
|
int type, int ext) {
|
|
|
|
|
PlatformState::DeviceVar dvar{PlatformState::DVK_Surface,
|
|
|
|
|
var,
|
|
|
|
|
std::string{hostVar},
|
|
|
|
|
sizeof(surfaceReference), // Copy whole surfaceReference
|
|
|
|
|
modules,
|
|
|
|
|
std::vector<PlatformState::RegisteredVar>{g_devices.size()},
|
|
|
|
|
false,
|
|
|
|
|
type,
|
|
|
|
|
/*norm*/ 0};
|
|
|
|
|
PlatformState::instance().registerVar(hostVar, dvar);
|
2020-04-24 22:18:23 -04:00
|
|
|
PlatformState::instance().registerVarSym(var, nullptr, deviceVar);
|
2020-04-06 10:57:03 -04:00
|
|
|
}
|
2019-03-18 18:44:55 -04:00
|
|
|
|
2020-04-06 10:57:03 -04:00
|
|
|
extern "C" void __hipRegisterTexture(std::vector<std::pair<hipModule_t, bool>>*
|
|
|
|
|
modules, // The device modules containing code object
|
|
|
|
|
void* var, // The shadow variable in host code
|
|
|
|
|
char* hostVar, // Variable name in host code
|
|
|
|
|
char* deviceVar, // Variable name in device code
|
|
|
|
|
int type, int norm, int ext) {
|
|
|
|
|
PlatformState::DeviceVar dvar{PlatformState::DVK_Texture,
|
|
|
|
|
var,
|
|
|
|
|
std::string{hostVar},
|
|
|
|
|
sizeof(textureReference), // Copy whole textureReference so far.
|
|
|
|
|
modules,
|
|
|
|
|
std::vector<PlatformState::RegisteredVar>{g_devices.size()},
|
|
|
|
|
false,
|
|
|
|
|
type,
|
|
|
|
|
norm};
|
|
|
|
|
PlatformState::instance().registerVar(hostVar, dvar);
|
2020-04-24 22:18:23 -04:00
|
|
|
PlatformState::instance().registerVarSym(var, nullptr, deviceVar);
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
|
|
|
|
|
2019-04-22 15:19:24 -04:00
|
|
|
extern "C" void __hipUnregisterFatBinary(std::vector< std::pair<hipModule_t, bool> >* modules)
|
2018-03-01 22:57:20 -05:00
|
|
|
{
|
2018-03-28 19:23:57 -04:00
|
|
|
HIP_INIT();
|
2019-02-01 13:43:50 -05:00
|
|
|
|
2019-08-22 12:41:42 -04:00
|
|
|
std::for_each(modules->begin(), modules->end(), [](std::pair<hipModule_t, bool> module){
|
|
|
|
|
if (module.first != nullptr) {
|
|
|
|
|
as_amd(reinterpret_cast<cl_program>(module.first))->release();
|
|
|
|
|
}
|
|
|
|
|
});
|
2019-12-17 20:18:36 -05:00
|
|
|
if (modules->size() > 0) {
|
|
|
|
|
PlatformState::instance().unregisterVar((*modules)[0].first);
|
|
|
|
|
}
|
2019-12-11 03:11:19 -05:00
|
|
|
PlatformState::instance().removeFatBinary(modules);
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-02 17:55:48 -05:00
|
|
|
extern "C" hipError_t hipConfigureCall(
|
2018-03-01 22:57:20 -05:00
|
|
|
dim3 gridDim,
|
|
|
|
|
dim3 blockDim,
|
|
|
|
|
size_t sharedMem,
|
|
|
|
|
hipStream_t stream)
|
|
|
|
|
{
|
2020-05-14 03:50:34 -05:00
|
|
|
HIP_INIT_API(hipConfigureCall, gridDim, blockDim, sharedMem, stream);
|
2018-03-28 19:23:57 -04:00
|
|
|
|
2018-08-02 12:33:55 -04:00
|
|
|
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2018-08-14 18:54:13 -04:00
|
|
|
HIP_RETURN(hipSuccess);
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
|
|
|
|
|
2019-09-24 16:58:14 -04:00
|
|
|
extern "C" hipError_t __hipPushCallConfiguration(
|
|
|
|
|
dim3 gridDim,
|
|
|
|
|
dim3 blockDim,
|
|
|
|
|
size_t sharedMem,
|
|
|
|
|
hipStream_t stream)
|
|
|
|
|
{
|
2020-05-14 03:50:34 -05:00
|
|
|
HIP_INIT_API(__hipPushCallConfiguration, gridDim, blockDim, sharedMem, stream);
|
2019-09-24 16:58:14 -04:00
|
|
|
|
|
|
|
|
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
|
|
|
|
|
|
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim,
|
|
|
|
|
dim3 *blockDim,
|
|
|
|
|
size_t *sharedMem,
|
|
|
|
|
hipStream_t *stream) {
|
2020-05-14 03:50:34 -05:00
|
|
|
HIP_INIT_API(__hipPopCallConfiguration, gridDim, blockDim, sharedMem, stream);
|
2019-09-24 16:58:14 -04:00
|
|
|
|
|
|
|
|
ihipExec_t exec;
|
|
|
|
|
PlatformState::instance().popExec(exec);
|
|
|
|
|
*gridDim = exec.gridDim_;
|
|
|
|
|
*blockDim = exec.blockDim_;
|
|
|
|
|
*sharedMem = exec.sharedMem_;
|
|
|
|
|
*stream = exec.hStream_;
|
|
|
|
|
|
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
|
|
|
}
|
|
|
|
|
|
2018-03-02 17:55:48 -05:00
|
|
|
extern "C" hipError_t hipSetupArgument(
|
2018-03-01 22:57:20 -05:00
|
|
|
const void *arg,
|
|
|
|
|
size_t size,
|
|
|
|
|
size_t offset)
|
|
|
|
|
{
|
2020-05-14 03:50:34 -05:00
|
|
|
HIP_INIT_API(hipSetupArgument, arg, size, offset);
|
2018-03-28 19:23:57 -04:00
|
|
|
|
2018-08-02 12:33:55 -04:00
|
|
|
PlatformState::instance().setupArgument(arg, size, offset);
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2018-08-14 18:54:13 -04:00
|
|
|
HIP_RETURN(hipSuccess);
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
|
|
|
|
|
2018-03-02 17:55:48 -05:00
|
|
|
extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
|
2018-03-01 22:57:20 -05:00
|
|
|
{
|
2020-05-14 03:50:34 -05:00
|
|
|
HIP_INIT_API(hipLaunchByPtr, hostFunction);
|
2018-03-28 19:23:57 -04:00
|
|
|
|
2020-02-13 10:44:10 -08:00
|
|
|
ihipExec_t exec;
|
|
|
|
|
PlatformState::instance().popExec(exec);
|
|
|
|
|
|
|
|
|
|
hip::Stream* stream = reinterpret_cast<hip::Stream*>(exec.hStream_);
|
2020-04-23 16:54:48 -04:00
|
|
|
int deviceId = (stream != nullptr)? stream->DeviceId() : ihipGetDevice();
|
2019-12-17 20:18:36 -05:00
|
|
|
if (deviceId == -1) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Wrong DeviceId: %d \n", deviceId);
|
2019-12-17 20:18:36 -05:00
|
|
|
HIP_RETURN(hipErrorNoDevice);
|
|
|
|
|
}
|
2019-02-01 13:43:50 -05:00
|
|
|
hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId);
|
2018-08-14 18:54:13 -04:00
|
|
|
if (func == nullptr) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Could not retrieve hostFunction: 0x%x \n", hostFunction);
|
2019-10-30 13:37:03 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
2018-08-14 18:54:13 -04:00
|
|
|
}
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2018-08-27 18:46:34 -04:00
|
|
|
size_t size = exec.arguments_.size();
|
2018-03-01 22:57:20 -05:00
|
|
|
void *extra[] = {
|
2018-08-02 18:00:05 -04:00
|
|
|
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0],
|
2018-08-27 18:46:34 -04:00
|
|
|
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
2018-03-01 22:57:20 -05:00
|
|
|
HIP_LAUNCH_PARAM_END
|
|
|
|
|
};
|
|
|
|
|
|
2019-02-01 13:43:50 -05:00
|
|
|
HIP_RETURN(hipModuleLaunchKernel(func,
|
2018-08-02 18:00:05 -04:00
|
|
|
exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z,
|
|
|
|
|
exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z,
|
2018-08-14 18:54:13 -04:00
|
|
|
exec.sharedMem_, exec.hStream_, nullptr, extra));
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
2018-04-13 18:19:28 -04:00
|
|
|
|
2020-03-04 16:05:37 -05:00
|
|
|
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) {
|
|
|
|
|
HIP_INIT_API(hipGetSymbolAddress, devPtr, symbol);
|
|
|
|
|
|
2020-04-24 22:18:23 -04:00
|
|
|
hipModule_t hmod;
|
2020-03-04 16:05:37 -05:00
|
|
|
std::string symbolName;
|
2020-04-24 22:18:23 -04:00
|
|
|
if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot find symbol: %s \n", symbolName.c_str());
|
2020-03-04 16:05:37 -05:00
|
|
|
HIP_RETURN(hipErrorInvalidSymbol);
|
|
|
|
|
}
|
2019-03-18 18:44:55 -04:00
|
|
|
size_t size = 0;
|
2020-04-24 22:18:23 -04:00
|
|
|
if(!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod,
|
2019-11-04 10:13:20 -05:00
|
|
|
devPtr, &size)) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot find global variable device ptr for symbol: %s at device: %d \n",
|
|
|
|
|
symbolName.c_str(), ihipGetDevice());
|
2019-10-30 13:37:03 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidSymbol);
|
2019-03-18 18:44:55 -04:00
|
|
|
}
|
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-04 16:05:37 -05:00
|
|
|
hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) {
|
|
|
|
|
HIP_INIT_API(hipGetSymbolSize, sizePtr, symbol);
|
|
|
|
|
|
2020-04-24 22:18:23 -04:00
|
|
|
hipModule_t hmod;
|
2020-03-04 16:05:37 -05:00
|
|
|
std::string symbolName;
|
2020-04-24 22:18:23 -04:00
|
|
|
if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot find symbol: %s \n", symbolName.c_str());
|
2020-03-04 16:05:37 -05:00
|
|
|
HIP_RETURN(hipErrorInvalidSymbol);
|
|
|
|
|
}
|
2019-03-18 18:44:55 -04:00
|
|
|
hipDeviceptr_t devPtr = nullptr;
|
2020-04-24 22:18:23 -04:00
|
|
|
if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod,
|
2019-11-04 10:13:20 -05:00
|
|
|
&devPtr, sizePtr)) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot find global variable device ptr for symbol: %s at device: %d \n",
|
|
|
|
|
symbolName.c_str(), ihipGetDevice());
|
2019-10-30 13:37:03 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidSymbol);
|
2019-03-18 18:44:55 -04:00
|
|
|
}
|
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-04 10:13:20 -05:00
|
|
|
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
|
|
|
|
|
hipDeviceptr_t* dptr, size_t* bytes)
|
2019-04-04 18:22:40 -04:00
|
|
|
{
|
|
|
|
|
HIP_INIT();
|
|
|
|
|
|
|
|
|
|
amd::Program* program = nullptr;
|
|
|
|
|
device::Program* dev_program = nullptr;
|
|
|
|
|
|
|
|
|
|
/* Get Device Program pointer*/
|
|
|
|
|
program = as_amd(reinterpret_cast<cl_program>(hmod));
|
2020-02-18 12:36:12 -08:00
|
|
|
dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
|
2019-04-04 18:22:40 -04:00
|
|
|
|
|
|
|
|
if (dev_program == nullptr) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot get Device Function for module: 0x%x \n", hmod);
|
2019-10-30 13:37:03 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
2019-04-04 18:22:40 -04:00
|
|
|
}
|
|
|
|
|
/* Find the global Symbols */
|
2020-04-03 12:13:12 -04:00
|
|
|
if (!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot create Global Var obj for symbol: %s \n", name);
|
2019-10-30 13:37:03 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidSymbol);
|
2019-04-04 18:22:40 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2019-06-12 10:00:38 -04:00
|
|
|
namespace hip_impl {
|
2020-04-03 12:13:12 -04:00
|
|
|
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2020-05-27 14:39:30 -05:00
|
|
|
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize,
|
|
|
|
|
const amd::Device& device, hipFunction_t func, int inputBlockSize,
|
2020-04-03 12:13:12 -04:00
|
|
|
size_t dynamicSMemSize, bool bCalcPotentialBlkSz)
|
2019-06-12 10:00:38 -04:00
|
|
|
{
|
|
|
|
|
hip::Function* function = hip::Function::asFunction(func);
|
2020-04-03 12:13:12 -04:00
|
|
|
const amd::Kernel& kernel = *function->function_;
|
|
|
|
|
|
|
|
|
|
const device::Kernel::WorkGroupInfo* wrkGrpInfo = kernel.getDeviceKernel(device)->workGroupInfo();
|
2020-05-27 14:39:30 -05:00
|
|
|
if (bCalcPotentialBlkSz == false) {
|
|
|
|
|
if (inputBlockSize == 0) {
|
2020-04-03 12:13:12 -04:00
|
|
|
return hipErrorInvalidValue;
|
2020-03-11 19:06:22 +05:30
|
|
|
}
|
2020-05-27 14:39:30 -05:00
|
|
|
*bestBlockSize = 0;
|
|
|
|
|
// Make sure the requested block size is smaller than max supported
|
|
|
|
|
if (inputBlockSize > int(device.info().maxWorkGroupSize_)) {
|
|
|
|
|
*maxBlocksPerCU = 0;
|
|
|
|
|
*numBlocksPerGrid = 0;
|
|
|
|
|
return hipSuccess;
|
2020-03-11 19:06:22 +05:30
|
|
|
}
|
|
|
|
|
}
|
2020-05-27 14:39:30 -05:00
|
|
|
else {
|
2020-05-28 12:18:09 -05:00
|
|
|
if (inputBlockSize > int(device.info().maxWorkGroupSize_) ||
|
2020-05-27 14:39:30 -05:00
|
|
|
inputBlockSize == 0) {
|
|
|
|
|
// The user wrote the kernel to work with a workgroup size
|
|
|
|
|
// bigger than this hardware can support. Or they do not care
|
|
|
|
|
// about the size So just assume its maximum size is
|
|
|
|
|
// constrained by hardware
|
|
|
|
|
inputBlockSize = device.info().maxWorkGroupSize_;
|
|
|
|
|
}
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
2020-05-27 14:39:30 -05:00
|
|
|
// Find wave occupancy per CU => simd_per_cu * GPR usage
|
2019-06-12 10:00:38 -04:00
|
|
|
constexpr size_t MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD
|
2020-03-30 09:10:16 -04:00
|
|
|
size_t VgprWaves = MaxWavesPerSimd;
|
|
|
|
|
if (wrkGrpInfo->usedVGPRs_ > 0) {
|
|
|
|
|
VgprWaves = wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4);
|
|
|
|
|
}
|
2020-04-03 12:13:12 -04:00
|
|
|
|
2020-03-30 09:10:16 -04:00
|
|
|
size_t GprWaves = VgprWaves;
|
2019-08-01 16:40:24 -04:00
|
|
|
if (wrkGrpInfo->usedSGPRs_ > 0) {
|
2020-05-27 14:39:30 -05:00
|
|
|
size_t maxSGPRs;
|
|
|
|
|
if (device.info().gfxipVersion_ < 800) {
|
|
|
|
|
maxSGPRs = 512;
|
|
|
|
|
}
|
|
|
|
|
else if (device.info().gfxipVersion_ < 1000) {
|
|
|
|
|
maxSGPRs = 800;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
maxSGPRs = SIZE_MAX; // gfx10+ does not share SGPRs between waves
|
|
|
|
|
}
|
|
|
|
|
const size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16);
|
2019-08-01 16:40:24 -04:00
|
|
|
GprWaves = std::min(VgprWaves, SgprWaves);
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-27 14:39:30 -05:00
|
|
|
const size_t alu_occupancy = device.info().simdPerCU_ * std::min(MaxWavesPerSimd, GprWaves);
|
|
|
|
|
const int alu_limited_threads = alu_occupancy * wrkGrpInfo->wavefrontSize_;
|
2019-06-12 10:00:38 -04:00
|
|
|
|
2020-05-27 14:39:30 -05:00
|
|
|
int lds_occupancy_wgs = INT_MAX;
|
|
|
|
|
const size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSize;
|
2019-06-12 10:00:38 -04:00
|
|
|
if (total_used_lds != 0) {
|
2020-05-27 14:39:30 -05:00
|
|
|
lds_occupancy_wgs = static_cast<int>(device.info().localMemSize_ / total_used_lds);
|
|
|
|
|
}
|
|
|
|
|
// Calculate how many blocks of inputBlockSize we can fit per CU
|
|
|
|
|
// Need to align with hardware wavefront size. If they want 65 threads, but
|
|
|
|
|
// waves are 64, then we need 128 threads per block.
|
|
|
|
|
// So this calculates how many blocks we can fit.
|
|
|
|
|
*maxBlocksPerCU = alu_limited_threads / amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_);
|
|
|
|
|
// Unless those blocks are further constrained by LDS size.
|
|
|
|
|
*maxBlocksPerCU = std::min(*maxBlocksPerCU, lds_occupancy_wgs);
|
|
|
|
|
|
|
|
|
|
// Some callers of this function want to return the block size, in threads, that
|
|
|
|
|
// leads to the maximum occupancy. In that case, inputBlockSize is the maximum
|
|
|
|
|
// workgroup size the user wants to allow, or that the hardware can allow.
|
|
|
|
|
// It is either the number of threads that we are limited to due to occupancy, or
|
|
|
|
|
// the maximum available block size for this kernel, which could have come from the
|
|
|
|
|
// user. e.g., if the user indicates the maximum block size is 64 threads, but we
|
|
|
|
|
// calculate that 128 threads can fit in each CU, we have to give up and return 64.
|
|
|
|
|
*bestBlockSize = std::min(alu_limited_threads, amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_));
|
|
|
|
|
// If the best block size is smaller than the block size used to fit the maximum,
|
|
|
|
|
// then we need to make the grid bigger for full occupancy.
|
|
|
|
|
const int bestBlocksPerCU = alu_limited_threads / (*bestBlockSize);
|
|
|
|
|
// Unless those blocks are further constrained by LDS size.
|
|
|
|
|
*numBlocksPerGrid = device.info().maxComputeUnits_ * std::min(bestBlocksPerCU, lds_occupancy_wgs);
|
2019-06-12 10:00:38 -04:00
|
|
|
|
2020-04-03 12:13:12 -04:00
|
|
|
return hipSuccess;
|
2019-06-12 10:00:38 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-30 01:19:24 -04:00
|
|
|
extern "C" {
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
|
|
|
|
|
const void* f, size_t dynSharedMemPerBlk,
|
|
|
|
|
int blockSizeLimit)
|
2020-03-11 19:06:22 +05:30
|
|
|
{
|
2020-04-03 12:13:12 -04:00
|
|
|
HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit);
|
|
|
|
|
if ((gridSize == nullptr) || (blockSize == nullptr)) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-03-11 19:06:22 +05:30
|
|
|
}
|
2020-04-03 12:13:12 -04:00
|
|
|
hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice());
|
|
|
|
|
if (func == nullptr) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
|
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int best_block_size = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2020-05-27 14:39:30 -05:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSizeLimit, dynSharedMemPerBlk,true);
|
2020-04-23 21:42:06 +05:30
|
|
|
if (ret == hipSuccess) {
|
2020-05-27 14:39:30 -05:00
|
|
|
*blockSize = best_block_size;
|
|
|
|
|
*gridSize = max_blocks_per_grid;
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
2020-04-23 21:42:06 +05:30
|
|
|
HIP_RETURN(ret);
|
|
|
|
|
}
|
2020-04-03 12:13:12 -04:00
|
|
|
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
|
|
|
|
|
hipFunction_t f, size_t dynSharedMemPerBlk,
|
|
|
|
|
int blockSizeLimit)
|
|
|
|
|
{
|
|
|
|
|
HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit);
|
|
|
|
|
if ((gridSize == nullptr) || (blockSize == nullptr)) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
2020-04-03 12:13:12 -04:00
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int best_block_size = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2020-05-27 14:39:30 -05:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, dynSharedMemPerBlk,true);
|
2020-04-23 21:42:06 +05:30
|
|
|
if (ret == hipSuccess) {
|
2020-05-27 14:39:30 -05:00
|
|
|
*blockSize = best_block_size;
|
|
|
|
|
*gridSize = max_blocks_per_grid;
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
|
|
|
|
HIP_RETURN(ret);
|
|
|
|
|
}
|
2020-04-03 12:13:12 -04:00
|
|
|
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
|
|
|
|
|
hipFunction_t f, size_t dynSharedMemPerBlk,
|
|
|
|
|
int blockSizeLimit, unsigned int flags)
|
|
|
|
|
{
|
|
|
|
|
HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk, blockSizeLimit, flags);
|
|
|
|
|
if ((gridSize == nullptr) || (blockSize == nullptr)) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
|
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
2020-04-03 12:13:12 -04:00
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int best_block_size = 0;
|
2020-04-03 12:13:12 -04:00
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2020-05-27 14:39:30 -05:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, dynSharedMemPerBlk,true);
|
2020-04-03 12:13:12 -04:00
|
|
|
if (ret == hipSuccess) {
|
2020-05-27 14:39:30 -05:00
|
|
|
*blockSize = best_block_size;
|
|
|
|
|
*gridSize = max_blocks_per_grid;
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
|
|
|
|
HIP_RETURN(ret);
|
2020-03-11 19:06:22 +05:30
|
|
|
}
|
|
|
|
|
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
|
|
|
|
|
hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
|
|
|
|
|
{
|
|
|
|
|
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynSharedMemPerBlk);
|
|
|
|
|
if (numBlocks == nullptr) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
|
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
|
|
|
|
|
|
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
|
|
|
|
int best_block_size = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2020-05-27 14:39:30 -05:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, false);
|
2020-04-23 21:42:06 +05:30
|
|
|
*numBlocks = num_blocks;
|
|
|
|
|
HIP_RETURN(ret);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
|
|
|
|
|
hipFunction_t f, int blockSize,
|
|
|
|
|
size_t dynSharedMemPerBlk, unsigned int flags)
|
|
|
|
|
{
|
|
|
|
|
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynSharedMemPerBlk, flags);
|
|
|
|
|
if (numBlocks == nullptr) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
|
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
|
|
|
|
|
|
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
|
|
|
|
int best_block_size = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2020-05-27 14:39:30 -05:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, false);
|
2020-04-23 21:42:06 +05:30
|
|
|
*numBlocks = num_blocks;
|
|
|
|
|
HIP_RETURN(ret);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
|
|
|
|
|
const void* f, int blockSize, size_t dynamicSMemSize)
|
2019-06-12 10:00:38 -04:00
|
|
|
{
|
2020-04-03 12:13:12 -04:00
|
|
|
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynamicSMemSize);
|
|
|
|
|
if (numBlocks == nullptr) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice());
|
|
|
|
|
if (func == nullptr) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
|
|
|
|
|
|
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
|
|
|
|
int best_block_size = 0;
|
2020-04-03 12:13:12 -04:00
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2020-05-27 14:39:30 -05:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, false);
|
2020-04-03 12:13:12 -04:00
|
|
|
*numBlocks = num_blocks;
|
|
|
|
|
HIP_RETURN(ret);
|
2019-06-12 10:00:38 -04:00
|
|
|
}
|
|
|
|
|
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
|
|
|
|
|
const void* f,
|
|
|
|
|
int blockSize, size_t dynamicSMemSize, unsigned int flags)
|
2019-06-12 10:00:38 -04:00
|
|
|
{
|
2020-04-03 12:13:12 -04:00
|
|
|
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize, flags);
|
|
|
|
|
if (numBlocks == nullptr) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice());
|
|
|
|
|
if (func == nullptr) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
|
|
|
|
|
|
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
|
|
|
|
int best_block_size = 0;
|
2020-04-03 12:13:12 -04:00
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2020-05-27 14:39:30 -05:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, false);
|
2020-04-03 12:13:12 -04:00
|
|
|
*numBlocks = num_blocks;
|
|
|
|
|
HIP_RETURN(ret);
|
2019-10-30 01:19:24 -04:00
|
|
|
}
|
2019-06-12 10:00:38 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2018-04-13 18:19:28 -04:00
|
|
|
#if defined(ATI_OS_LINUX)
|
|
|
|
|
|
|
|
|
|
namespace hip_impl {
|
|
|
|
|
|
2018-04-27 21:21:31 -04:00
|
|
|
struct dl_phdr_info {
|
|
|
|
|
ELFIO::Elf64_Addr dlpi_addr;
|
|
|
|
|
const char *dlpi_name;
|
|
|
|
|
const ELFIO::Elf64_Phdr *dlpi_phdr;
|
|
|
|
|
ELFIO::Elf64_Half dlpi_phnum;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
extern "C" int dl_iterate_phdr(
|
|
|
|
|
int (*callback) (struct dl_phdr_info *info, size_t size, void *data), void *data
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
struct Symbol {
|
|
|
|
|
std::string name;
|
|
|
|
|
ELFIO::Elf64_Addr value = 0;
|
|
|
|
|
ELFIO::Elf_Xword size = 0;
|
|
|
|
|
ELFIO::Elf_Half sect_idx = 0;
|
|
|
|
|
uint8_t bind = 0;
|
|
|
|
|
uint8_t type = 0;
|
|
|
|
|
uint8_t other = 0;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
inline Symbol read_symbol(const ELFIO::symbol_section_accessor& section, unsigned int idx) {
|
|
|
|
|
assert(idx < section.get_symbols_num());
|
|
|
|
|
|
|
|
|
|
Symbol r;
|
|
|
|
|
section.get_symbol(idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other);
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename P>
|
|
|
|
|
inline ELFIO::section* find_section_if(ELFIO::elfio& reader, P p) {
|
|
|
|
|
const auto it = find_if(reader.sections.begin(), reader.sections.end(), std::move(p));
|
|
|
|
|
|
|
|
|
|
return it != reader.sections.end() ? *it : nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<std::pair<uintptr_t, std::string>> function_names_for(const ELFIO::elfio& reader,
|
|
|
|
|
ELFIO::section* symtab) {
|
|
|
|
|
std::vector<std::pair<uintptr_t, std::string>> r;
|
|
|
|
|
ELFIO::symbol_section_accessor symbols{reader, symtab};
|
|
|
|
|
|
|
|
|
|
for (auto i = 0u; i != symbols.get_symbols_num(); ++i) {
|
|
|
|
|
auto tmp = read_symbol(symbols, i);
|
|
|
|
|
|
|
|
|
|
if (tmp.type == STT_FUNC && tmp.sect_idx != SHN_UNDEF && !tmp.name.empty()) {
|
|
|
|
|
r.emplace_back(tmp.value, tmp.name);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const std::vector<std::pair<uintptr_t, std::string>>& function_names_for_process() {
|
|
|
|
|
static constexpr const char self[] = "/proc/self/exe";
|
|
|
|
|
|
|
|
|
|
static std::vector<std::pair<uintptr_t, std::string>> r;
|
|
|
|
|
static std::once_flag f;
|
|
|
|
|
|
|
|
|
|
std::call_once(f, []() {
|
|
|
|
|
ELFIO::elfio reader;
|
|
|
|
|
|
|
|
|
|
if (reader.load(self)) {
|
|
|
|
|
const auto it = find_section_if(
|
|
|
|
|
reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; });
|
|
|
|
|
|
|
|
|
|
if (it) r = function_names_for(reader, it);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const std::unordered_map<uintptr_t, std::string>& function_names()
|
|
|
|
|
{
|
|
|
|
|
static std::unordered_map<uintptr_t, std::string> r{
|
|
|
|
|
function_names_for_process().cbegin(),
|
|
|
|
|
function_names_for_process().cend()};
|
|
|
|
|
static std::once_flag f;
|
|
|
|
|
|
|
|
|
|
std::call_once(f, []() {
|
|
|
|
|
dl_iterate_phdr([](dl_phdr_info* info, size_t, void*) {
|
|
|
|
|
ELFIO::elfio reader;
|
|
|
|
|
|
|
|
|
|
if (reader.load(info->dlpi_name)) {
|
|
|
|
|
const auto it = find_section_if(
|
|
|
|
|
reader, [](const ELFIO::section* x) { return x->get_type() == SHT_SYMTAB; });
|
|
|
|
|
|
|
|
|
|
if (it) {
|
|
|
|
|
auto n = function_names_for(reader, it);
|
|
|
|
|
|
|
|
|
|
for (auto&& f : n) f.first += info->dlpi_addr;
|
|
|
|
|
|
|
|
|
|
r.insert(make_move_iterator(n.begin()), make_move_iterator(n.end()));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
},
|
|
|
|
|
nullptr);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<char> bundles_for_process() {
|
|
|
|
|
static constexpr const char self[] = "/proc/self/exe";
|
|
|
|
|
static constexpr const char kernel_section[] = ".kernel";
|
|
|
|
|
std::vector<char> r;
|
|
|
|
|
|
|
|
|
|
ELFIO::elfio reader;
|
|
|
|
|
|
|
|
|
|
if (reader.load(self)) {
|
|
|
|
|
auto it = find_section_if(
|
|
|
|
|
reader, [](const ELFIO::section* x) { return x->get_name() == kernel_section; });
|
|
|
|
|
|
|
|
|
|
if (it) r.insert(r.end(), it->get_data(), it->get_data() + it->get_size());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const std::vector<hipModule_t>& modules() {
|
|
|
|
|
static std::vector<hipModule_t> r;
|
|
|
|
|
static std::once_flag f;
|
|
|
|
|
|
|
|
|
|
std::call_once(f, []() {
|
|
|
|
|
static std::vector<std::vector<char>> bundles{bundles_for_process()};
|
|
|
|
|
|
|
|
|
|
dl_iterate_phdr(
|
|
|
|
|
[](dl_phdr_info* info, std::size_t, void*) {
|
|
|
|
|
ELFIO::elfio tmp;
|
|
|
|
|
if (tmp.load(info->dlpi_name)) {
|
|
|
|
|
const auto it = find_section_if(
|
|
|
|
|
tmp, [](const ELFIO::section* x) { return x->get_name() == ".kernel"; });
|
|
|
|
|
|
|
|
|
|
if (it) bundles.emplace_back(it->get_data(), it->get_data() + it->get_size());
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
},
|
|
|
|
|
nullptr);
|
|
|
|
|
|
|
|
|
|
for (auto&& bundle : bundles) {
|
2018-11-13 18:10:36 -05:00
|
|
|
if (bundle.empty()) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2018-04-27 21:21:31 -04:00
|
|
|
std::string magic(&bundle[0], sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) - 1);
|
|
|
|
|
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(&bundle[0]);
|
|
|
|
|
const auto* desc = &obheader->desc[0];
|
|
|
|
|
for (uint64_t i = 0; i < obheader->numBundles; ++i,
|
|
|
|
|
desc = reinterpret_cast<const __ClangOffloadBundleDesc*>(
|
|
|
|
|
reinterpret_cast<uintptr_t>(&desc->triple[0]) + desc->tripleSize)) {
|
|
|
|
|
|
|
|
|
|
std::string triple(desc->triple, sizeof(HCC_AMDGCN_AMDHSA_TRIPLE) - 1);
|
|
|
|
|
if (triple.compare(HCC_AMDGCN_AMDHSA_TRIPLE))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
std::string target(desc->triple + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE),
|
|
|
|
|
desc->tripleSize - sizeof(HCC_AMDGCN_AMDHSA_TRIPLE));
|
|
|
|
|
|
2020-02-18 12:36:12 -08:00
|
|
|
if (isCompatibleCodeObject(target, hip::getCurrentDevice()->devices()[0]->info().name_)) {
|
2018-04-27 21:21:31 -04:00
|
|
|
hipModule_t module;
|
|
|
|
|
if (hipSuccess == hipModuleLoadData(&module, reinterpret_cast<const void*>(
|
|
|
|
|
reinterpret_cast<uintptr_t>(obheader) + desc->offset)))
|
|
|
|
|
r.push_back(module);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const std::unordered_map<uintptr_t, hipFunction_t>& functions()
|
|
|
|
|
{
|
|
|
|
|
static std::unordered_map<uintptr_t, hipFunction_t> r;
|
|
|
|
|
static std::once_flag f;
|
|
|
|
|
|
|
|
|
|
std::call_once(f, []() {
|
|
|
|
|
for (auto&& function : function_names()) {
|
|
|
|
|
for (auto&& module : modules()) {
|
|
|
|
|
hipFunction_t f;
|
2019-12-12 16:01:14 -05:00
|
|
|
if (hipSuccess == hipModuleGetFunction(&f, module, function.second.c_str())) {
|
2018-04-27 21:21:31 -04:00
|
|
|
r[function.first] = f;
|
2019-12-12 16:01:14 -05:00
|
|
|
}
|
2018-04-27 21:21:31 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2018-04-13 18:19:28 -04:00
|
|
|
void hipLaunchKernelGGLImpl(
|
|
|
|
|
uintptr_t function_address,
|
|
|
|
|
const dim3& numBlocks,
|
|
|
|
|
const dim3& dimBlocks,
|
|
|
|
|
uint32_t sharedMemBytes,
|
|
|
|
|
hipStream_t stream,
|
2018-04-27 21:21:31 -04:00
|
|
|
void** kernarg)
|
|
|
|
|
{
|
|
|
|
|
HIP_INIT();
|
|
|
|
|
|
|
|
|
|
const auto it = functions().find(function_address);
|
|
|
|
|
if (it == functions().cend())
|
2018-04-29 15:04:45 -04:00
|
|
|
assert(0);
|
2018-04-22 21:17:27 -04:00
|
|
|
|
2018-04-27 21:21:31 -04:00
|
|
|
hipModuleLaunchKernel(it->second,
|
|
|
|
|
numBlocks.x, numBlocks.y, numBlocks.z,
|
|
|
|
|
dimBlocks.x, dimBlocks.y, dimBlocks.z,
|
|
|
|
|
sharedMemBytes, stream, nullptr, kernarg);
|
2018-04-13 18:19:28 -04:00
|
|
|
}
|
|
|
|
|
|
2019-06-12 10:00:38 -04:00
|
|
|
void hipLaunchCooperativeKernelGGLImpl(
|
|
|
|
|
uintptr_t function_address,
|
|
|
|
|
const dim3& numBlocks,
|
|
|
|
|
const dim3& dimBlocks,
|
|
|
|
|
uint32_t sharedMemBytes,
|
|
|
|
|
hipStream_t stream,
|
|
|
|
|
void** kernarg)
|
|
|
|
|
{
|
|
|
|
|
HIP_INIT();
|
|
|
|
|
|
|
|
|
|
hipLaunchCooperativeKernel(reinterpret_cast<void*>(function_address),
|
|
|
|
|
numBlocks, dimBlocks, kernarg, sharedMemBytes, stream);
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-13 18:19:28 -04:00
|
|
|
}
|
|
|
|
|
|
2019-10-16 17:47:55 -04:00
|
|
|
#endif // defined(ATI_OS_LINUX)
|
|
|
|
|
|
2020-02-06 13:56:41 -05:00
|
|
|
extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
|
|
|
|
|
dim3 gridDim,
|
|
|
|
|
dim3 blockDim,
|
|
|
|
|
void** args,
|
|
|
|
|
size_t sharedMemBytes,
|
|
|
|
|
hipStream_t stream)
|
|
|
|
|
{
|
2020-05-14 03:50:34 -05:00
|
|
|
HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes,
|
2020-02-06 13:56:41 -05:00
|
|
|
stream);
|
|
|
|
|
|
2020-02-13 10:44:10 -08:00
|
|
|
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
|
2020-04-23 16:54:48 -04:00
|
|
|
int deviceId = (s != nullptr)? s->DeviceId() : ihipGetDevice();
|
2020-02-06 13:56:41 -05:00
|
|
|
if (deviceId == -1) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Wrong Device Id: %d \n", deviceId);
|
2020-02-06 13:56:41 -05:00
|
|
|
HIP_RETURN(hipErrorNoDevice);
|
|
|
|
|
}
|
|
|
|
|
hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId);
|
|
|
|
|
if (func == nullptr) {
|
|
|
|
|
#ifdef ATI_OS_LINUX
|
|
|
|
|
const auto it = hip_impl::functions().find(reinterpret_cast<uintptr_t>(hostFunction));
|
|
|
|
|
if (it == hip_impl::functions().cend()) {
|
2020-04-13 22:51:46 -04:00
|
|
|
DevLogPrintfError("Cannot find function: 0x%x \n", hostFunction);
|
2020-02-06 13:56:41 -05:00
|
|
|
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
|
|
|
|
}
|
|
|
|
|
func = it->second;
|
|
|
|
|
#else
|
|
|
|
|
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HIP_RETURN(hipModuleLaunchKernel(func, gridDim.x, gridDim.y, gridDim.z,
|
|
|
|
|
blockDim.x, blockDim.y, blockDim.z,
|
|
|
|
|
sharedMemBytes, stream, args, nullptr));
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-03 01:15:15 -04:00
|
|
|
// conversion routines between float and half precision
|
|
|
|
|
static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; }
|
|
|
|
|
static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; }
|
|
|
|
|
static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); }
|
|
|
|
|
|
2018-08-14 18:54:13 -04:00
|
|
|
// half float, the f16 is in the low 16 bits of the input argument
|
2018-08-03 01:15:15 -04:00
|
|
|
static inline float __convert_half_to_float(std::uint32_t a) noexcept {
|
|
|
|
|
std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
|
2019-05-06 17:20:00 -04:00
|
|
|
std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U;
|
2018-08-03 01:15:15 -04:00
|
|
|
u = (a & 0x7fff) != 0 ? v : u;
|
2019-05-06 17:20:00 -04:00
|
|
|
return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/;
|
2018-08-03 01:15:15 -04:00
|
|
|
}
|
|
|
|
|
|
2018-08-14 18:54:13 -04:00
|
|
|
// float half with nearest even rounding
|
2018-08-03 01:15:15 -04:00
|
|
|
// The lower 16 bits of the result is the bit pattern for the f16
|
|
|
|
|
static inline std::uint32_t __convert_float_to_half(float a) noexcept {
|
|
|
|
|
std::uint32_t u = f32_as_u32(a);
|
|
|
|
|
int e = static_cast<int>((u >> 23) & 0xff) - 127 + 15;
|
|
|
|
|
std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0);
|
|
|
|
|
std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0);
|
|
|
|
|
std::uint32_t n = ((std::uint32_t)e << 12) | m;
|
|
|
|
|
std::uint32_t s = (u >> 16) & 0x8000;
|
|
|
|
|
int b = clamp_int(1-e, 0, 13);
|
|
|
|
|
std::uint32_t d = (0x1000 | m) >> b;
|
|
|
|
|
d |= (d << b) != (0x1000 | m);
|
|
|
|
|
std::uint32_t v = e < 1 ? d : n;
|
|
|
|
|
v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5));
|
|
|
|
|
v = e > 30 ? 0x7c00 : v;
|
|
|
|
|
v = e == 143 ? i : v;
|
|
|
|
|
return s | v;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern "C" float __gnu_h2f_ieee(unsigned short h){
|
|
|
|
|
return __convert_half_to_float((std::uint32_t) h);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern "C" unsigned short __gnu_f2h_ieee(float f){
|
|
|
|
|
return (unsigned short)__convert_float_to_half(f);
|
|
|
|
|
}
|