파일
rocm-systems/src/hip_module.cpp
T

601 라인
22 KiB
C++
Raw 일반 보기 히스토리

2016-08-17 10:36:28 -05:00
/*
2017-03-31 12:11:34 -05:00
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
2016-10-15 22:55:22 +05:30
2016-08-17 10:36:28 -05:00
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
2016-10-15 22:55:22 +05:30
2016-08-17 10:36:28 -05:00
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2016-10-15 22:55:22 +05:30
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2016-08-17 10:36:28 -05:00
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
2019-03-06 14:01:44 +02:00
#include "hip/hcc_detail/elfio/elfio.hpp"
#include "hip/hcc_detail/hsa_helpers.hpp"
#include "hip/hcc_detail/program_state.hpp"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
#include <hsa/amd_hsa_kernel_code.h>
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <map>
#include <memory>
#include <mutex>
#include <sstream>
#include <stdexcept>
#include <string>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>
2017-12-28 16:15:45 +05:30
#include "../include/hip/hcc_detail/code_object_bundle.hpp"
2018-11-13 22:28:00 -05:00
#include "hip_fatbin.h"
2018-03-12 11:29:03 +05:30
// TODO Use Pool APIs from HCC to get memory regions.
using namespace ELFIO;
using namespace std;
// calculate MD5 checksum
inline std::string checksum(size_t size, const char *source) {
// FNV-1a hashing, 64-bit version
const uint64_t FNV_prime = 0x100000001b3;
const uint64_t FNV_basis = 0xcbf29ce484222325;
uint64_t hash = FNV_basis;
const char *str = static_cast<const char *>(source);
for (auto i = 0; i < size; ++i) {
hash ^= *str++;
hash *= FNV_prime;
}
return std::to_string(hash);
}
inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
2018-03-12 11:29:03 +05:30
assert(Align != 0u && "Align can't be 0.");
Skew %= Align;
return (Value + Align - 1 - Skew) / Align * Align + Skew;
}
2018-03-12 11:29:03 +05:30
struct ihipKernArgInfo {
vector<uint32_t> Size;
vector<uint32_t> Align;
vector<string> ArgType;
vector<string> ArgName;
uint32_t totalSize;
};
map<string, ihipKernArgInfo> kernelArguments;
2017-02-09 17:22:55 -06:00
2018-03-12 11:29:03 +05:30
struct ihipModuleSymbol_t {
uint64_t _object{}; // The kernel object.
amd_kernel_code_t const* _header{};
2018-03-12 11:29:03 +05:30
string _name; // TODO - review for performance cost. Name is just used for debug.
2017-02-09 17:22:55 -06:00
};
template <>
2018-03-12 11:29:03 +05:30
string ToString(hipFunction_t v) {
2017-02-09 17:22:55 -06:00
std::ostringstream ss;
ss << "0x" << std::hex << v->_object;
return ss.str();
};
2019-03-14 22:43:52 -05:00
const std::string& FunctionSymbol(const hipFunction_t f) { return f->_name; };
2017-02-09 17:22:55 -06:00
2018-03-12 11:29:03 +05:30
#define CHECK_HSA(hsaStatus, hipStatus) \
if (hsaStatus != HSA_STATUS_SUCCESS) { \
return hipStatus; \
}
2016-12-17 07:21:15 -06:00
2018-03-12 11:29:03 +05:30
#define CHECKLOG_HSA(hsaStatus, hipStatus) \
if (hsaStatus != HSA_STATUS_SUCCESS) { \
return ihipLogStatus(hipStatus); \
}
2016-12-17 07:19:22 -06:00
2018-03-12 11:29:03 +05:30
hipError_t hipModuleUnload(hipModule_t hmod) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipModuleUnload, hmod);
2017-10-30 20:18:41 +00:00
// TODO - improve this synchronization so it is thread-safe.
// Currently we want for all inflight activity to complete, but don't prevent another
// thread from launching new kernels before we finish this operation.
ihipSynchronize();
2018-03-12 11:29:03 +05:30
delete hmod; // The ihipModule_t dtor will clean everything up.
hmod = nullptr;
return ihipLogStatus(hipSuccess);
}
2018-03-12 11:29:03 +05:30
hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
uint32_t localWorkSizeZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
auto ctx = ihipGetTlsDefaultCtx();
hipError_t ret = hipSuccess;
2018-03-12 11:29:03 +05:30
if (ctx == nullptr) {
ret = hipErrorInvalidDevice;
2018-03-12 11:29:03 +05:30
} else {
int deviceId = ctx->getDevice()->_deviceId;
2018-03-12 11:29:03 +05:30
ihipDevice_t* currentDevice = ihipGetDevice(deviceId);
hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent;
2018-03-12 11:29:03 +05:30
void* config[5] = {0};
size_t kernArgSize;
2018-03-12 11:29:03 +05:30
if (kernelParams != NULL) {
std::string name = f->_name;
struct ihipKernArgInfo pl = kernelArguments[name];
char* argBuf = (char*)malloc(pl.totalSize);
memset(argBuf, 0, pl.totalSize);
int index = 0;
for (int i = 0; i < pl.Size.size(); i++) {
memcpy(argBuf + index, kernelParams[i], pl.Size[i]);
index += pl.Align[i];
}
config[1] = (void*)argBuf;
kernArgSize = pl.totalSize;
} else if (extra != NULL) {
memcpy(config, extra, sizeof(size_t) * 5);
if (config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER &&
config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END) {
kernArgSize = *(size_t*)(config[3]);
2016-09-02 15:49:22 -05:00
} else {
2017-10-30 20:18:41 +00:00
return hipErrorNotInitialized;
}
2018-03-12 11:29:03 +05:30
} else {
2017-10-30 20:18:41 +00:00
return hipErrorInvalidValue;
}
2016-09-07 12:57:18 -05:00
2016-09-02 15:49:22 -05:00
/*
Kernel argument preparation.
*/
2017-02-09 17:22:55 -06:00
grid_launch_parm lp;
2018-03-12 11:29:03 +05:30
lp.dynamic_group_mem_bytes =
sharedMemBytes; // TODO - this should be part of preLaunchKernel.
hStream = ihipPreLaunchKernel(
hStream, dim3(globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ),
dim3(localWorkSizeX, localWorkSizeY, localWorkSizeZ), &lp, f->_name.c_str());
2016-12-05 16:55:26 +05:30
hsa_kernel_dispatch_packet_t aql;
memset(&aql, 0, sizeof(aql));
2018-03-12 11:29:03 +05:30
// aql.completion_signal._handle = 0;
// aql.kernarg_address = 0;
2017-03-31 12:11:34 -05:00
aql.workgroup_size_x = localWorkSizeX;
aql.workgroup_size_y = localWorkSizeY;
aql.workgroup_size_z = localWorkSizeZ;
aql.grid_size_x = globalWorkSizeX;
aql.grid_size_y = globalWorkSizeY;
aql.grid_size_z = globalWorkSizeZ;
aql.group_segment_size =
f->_header->workgroup_group_segment_byte_size + sharedMemBytes;
aql.private_segment_size =
f->_header->workitem_private_segment_byte_size;
2017-02-09 17:22:55 -06:00
aql.kernel_object = f->_object;
2016-12-05 20:21:33 -06:00
aql.setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
2018-03-12 11:29:03 +05:30
aql.header =
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE);
if((flags & 0x1)== 0 ) {
//in_order
aql.header |= (1 << HSA_PACKET_HEADER_BARRIER);
}
if (HCC_OPT_FLUSH) {
aql.header |= (HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
} else {
aql.header |= (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
};
hc::completion_future cf;
lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize,
2018-03-12 11:29:03 +05:30
(startEvent || stopEvent) ? &cf : nullptr
#if (__hcc_workweek__ > 17312)
2018-03-12 11:29:03 +05:30
,
f->_name.c_str()
#endif
2018-03-12 11:29:03 +05:30
);
if (startEvent) {
startEvent->attachToCompletionFuture(&cf, hStream, hipEventTypeStartCommand);
}
if (stopEvent) {
2018-03-12 11:29:03 +05:30
stopEvent->attachToCompletionFuture(&cf, hStream, hipEventTypeStopCommand);
}
2016-12-05 16:55:26 +05:30
2018-03-12 11:29:03 +05:30
if (kernelParams != NULL) {
free(config[1]);
}
2017-02-10 13:32:13 -06:00
ihipPostLaunchKernel(f->_name.c_str(), hStream, lp);
2016-08-16 14:36:25 -05:00
}
2017-03-31 12:11:34 -05:00
return ret;
2016-08-16 14:36:25 -05:00
}
2018-03-12 11:29:03 +05:30
hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY,
uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY,
uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream,
void** kernelParams, void** extra) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes,
2018-03-12 11:29:03 +05:30
hStream, kernelParams, extra);
return ihipLogStatus(ihipModuleLaunchKernel(
f, blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ, blockDimX, blockDimY,
blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr, 0));
2017-03-31 12:11:34 -05:00
}
hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
uint32_t localWorkSizeZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
2019-03-14 22:43:52 -05:00
HIP_INIT_API(hipExtModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX,
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
return ihipLogStatus(ihipModuleLaunchKernel(
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags));
}
2017-03-31 12:11:34 -05:00
2018-03-12 11:29:03 +05:30
hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
uint32_t localWorkSizeZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent, hipEvent_t stopEvent) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX,
2018-03-12 11:29:03 +05:30
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
return ihipLogStatus(ihipModuleLaunchKernel(
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, 0));
2017-03-31 12:11:34 -05:00
}
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
hipModule_t hmod, const char* name) {
HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name);
if (!dptr || !bytes || !hmod) return hipErrorInvalidValue;
if (!name) return hipErrorNotInitialized;
return hip_impl::read_agent_global_from_module(dptr, bytes, hmod, name);
}
2019-03-06 14:01:44 +02:00
namespace hip_impl {
hsa_executable_t executable_for(hipModule_t hmod) {
return hmod->executable;
}
const char* hash_for(hipModule_t hmod) {
return hmod->hash.c_str();
2019-03-06 14:01:44 +02:00
}
hsa_agent_t this_agent() {
auto ctx = ihipGetTlsDefaultCtx();
if (!ctx) throw runtime_error{"No active HIP context."};
auto device = ctx->getDevice();
if (!device) throw runtime_error{"No device available for HIP."};
2019-03-06 14:01:44 +02:00
ihipDevice_t* currentDevice = ihipGetDevice(device->_deviceId);
if (!currentDevice) throw runtime_error{"No active device for HIP."};
return currentDevice->_hsaAgent;
}
} // Namespace hip_impl.
namespace {
2018-11-13 07:01:17 +05:30
inline void track(const Agent_global& x, hsa_agent_t agent) {
tprintf(DB_MEM, " add variable '%s' with ptr=%p size=%u to tracker\n", x.name,
2018-03-12 11:29:03 +05:30
x.address, x.byte_cnt);
2018-11-13 07:01:17 +05:30
int deviceIndex =0;
for ( deviceIndex = 0; deviceIndex < g_deviceCnt; deviceIndex++) {
if(g_allAgents[deviceIndex] == agent)
break;
}
auto device = ihipGetDevice(deviceIndex - 1);
2018-03-12 11:29:03 +05:30
hc::AmPointerInfo ptr_info(nullptr, x.address, x.address, x.byte_cnt, device->_acc, true,
false);
hc::am_memtracker_add(x.address, ptr_info);
2018-10-31 03:22:38 +05:30
#if USE_APP_PTR_FOR_CTX
hc::am_memtracker_update(x.address, device->_deviceId, 0u, ihipGetTlsDefaultCtx());
#else
2018-03-12 11:29:03 +05:30
hc::am_memtracker_update(x.address, device->_deviceId, 0u);
2018-10-31 03:22:38 +05:30
#endif
2018-03-12 11:29:03 +05:30
}
2018-03-12 11:29:03 +05:30
template <typename Container = vector<Agent_global>>
2018-11-13 07:01:17 +05:30
inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t agent,
2018-03-12 11:29:03 +05:30
hsa_executable_symbol_t x, void* out) {
2019-03-06 14:01:44 +02:00
using namespace hip_impl;
2018-03-12 11:29:03 +05:30
assert(out);
hsa_symbol_kind_t t = {};
hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &t);
2018-03-12 11:29:03 +05:30
if (t == HSA_SYMBOL_KIND_VARIABLE) {
Agent_global tmp(name(x).c_str(), address(x), size(x));
static_cast<Container*>(out)->push_back(std::move(tmp));
2018-03-12 11:29:03 +05:30
2018-11-13 07:01:17 +05:30
track(static_cast<Container*>(out)->back(),agent);
}
2018-03-12 11:29:03 +05:30
return HSA_STATUS_SUCCESS;
}
hsa_executable_symbol_t find_kernel_by_name(hsa_executable_t executable, const char* kname,
2019-04-05 22:54:41 -04:00
hsa_agent_t* agent = nullptr) {
2019-03-06 14:01:44 +02:00
using namespace hip_impl;
2018-03-12 11:29:03 +05:30
pair<const char*, hsa_executable_symbol_t> r{kname, {}};
2018-03-12 11:29:03 +05:30
hsa_executable_iterate_agent_symbols(
2019-04-05 22:54:41 -04:00
executable, agent ? *agent : this_agent(),
2018-03-12 11:29:03 +05:30
[](hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* s) {
auto p = static_cast<pair<const char*, hsa_executable_symbol_t>*>(s);
2018-03-12 11:29:03 +05:30
if (type(x) != HSA_SYMBOL_KIND_KERNEL) {
return HSA_STATUS_SUCCESS;
}
if (name(x) != p->first) return HSA_STATUS_SUCCESS;
2018-03-12 11:29:03 +05:30
p->second = x;
2018-03-12 11:29:03 +05:30
return HSA_STATUS_INFO_BREAK;
},
&r);
2018-03-12 11:29:03 +05:30
return r.second;
}
2019-03-06 14:01:44 +02:00
string read_elf_file_as_string(const void* file) {
// Precondition: file points to an ELF image that was BITWISE loaded
2018-03-12 11:29:03 +05:30
// into process accessible memory, and not one loaded by
// the loader. This is because in the latter case
// alignment may differ, which will break the size
// computation.
// the image is Elf64, and matches endianness i.e. it is
// Little Endian.
if (!file) return {};
2019-03-06 14:01:44 +02:00
auto h = static_cast<const ELFIO::Elf64_Ehdr*>(file);
2018-03-12 11:29:03 +05:30
auto s = static_cast<const char*>(file);
// This assumes the common case of SHT being the last part of the ELF.
2019-03-06 14:01:44 +02:00
auto sz =
sizeof(ELFIO::Elf64_Ehdr) + h->e_shoff + h->e_shentsize * h->e_shnum;
2018-03-12 11:29:03 +05:30
return string{s, s + sz};
}
2018-03-12 11:29:03 +05:30
string code_object_blob_for_agent(const void* maybe_bundled_code, hsa_agent_t agent) {
2019-03-06 14:01:44 +02:00
using namespace hip_impl;
2018-03-12 11:29:03 +05:30
if (!maybe_bundled_code) return {};
2018-03-12 11:29:03 +05:30
Bundled_code_header tmp{maybe_bundled_code};
2018-03-12 11:29:03 +05:30
if (!valid(tmp)) return {};
2018-03-12 11:29:03 +05:30
const auto agent_isa = isa(agent);
2018-03-12 11:29:03 +05:30
const auto it = find_if(bundles(tmp).cbegin(), bundles(tmp).cend(), [=](const Bundled_code& x) {
return agent_isa == triple_to_hsa_isa(x.triple);
;
});
2018-03-12 11:29:03 +05:30
if (it == bundles(tmp).cend()) return {};
2018-03-12 11:29:03 +05:30
return string{it->blob.cbegin(), it->blob.cend()};
}
2019-03-06 14:01:44 +02:00
} // Unnamed namespace.
namespace hip_impl {
vector<Agent_global> read_agent_globals(hsa_agent_t agent,
hsa_executable_t executable) {
vector<Agent_global> r;
hsa_executable_iterate_agent_symbols(
executable, agent, copy_agent_global_variables, &r);
return r;
}
} // Namespace hip_impl.
hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const char* name,
hsa_agent_t *agent = nullptr) {
2019-03-06 14:01:44 +02:00
using namespace hip_impl;
2018-04-16 15:35:04 +05:30
if (!func || !name) return hipErrorInvalidValue;
auto ctx = ihipGetTlsDefaultCtx();
2018-04-16 15:35:04 +05:30
if (!ctx) return hipErrorInvalidContext;
*func = new ihipModuleSymbol_t;
2018-04-16 15:35:04 +05:30
if (!*func) return hipErrorInvalidValue;
2019-04-05 22:54:41 -04:00
auto kernel = find_kernel_by_name(hmod->executable, name, agent);
2018-04-16 15:35:04 +05:30
if (kernel.handle == 0u) return hipErrorNotFound;
// TODO: refactor the whole ihipThisThat, which is a mess and yields the
// below, due to hipFunction_t being a pointer to ihipModuleSymbol_t.
func[0][0] = *static_cast<hipFunction_t>(
Kernel_descriptor{kernel_object(kernel), name});
2018-04-16 15:35:04 +05:30
return hipSuccess;
}
// Get kernel for the current hsa agent.
2018-03-12 11:29:03 +05:30
hipError_t hipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const char* name) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name);
return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name));
}
// Get kernel for the given hsa agent. Internal use only.
hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod,
const char* name, hsa_agent_t *agent) {
HIP_INIT_API(hipModuleGetFunctionEx, hfunc, hmod, name);
return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name, agent));
}
2019-03-06 14:01:44 +02:00
namespace {
hipFuncAttributes make_function_attributes(const amd_kernel_code_t& header) {
hipFuncAttributes r{};
hipDeviceProp_t prop{};
hipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId);
// TODO: at the moment there is no way to query the count of registers
// available per CU, therefore we hardcode it to 64 KiRegisters.
prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024;
r.localSizeBytes = header.workitem_private_segment_byte_size;
r.sharedSizeBytes = header.workgroup_group_segment_byte_size;
r.maxDynamicSharedSizeBytes = prop.sharedMemPerBlock - r.sharedSizeBytes;
r.numRegs = header.workitem_vgpr_count;
r.maxThreadsPerBlock = r.numRegs ?
std::min(prop.maxThreadsPerBlock, prop.regsPerBlock / r.numRegs) :
prop.maxThreadsPerBlock;
r.binaryVersion =
header.amd_machine_version_major * 10 +
header.amd_machine_version_minor;
r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0.
2018-11-06 09:54:34 +01:00
return r;
}
2019-03-06 14:01:44 +02:00
} // Unnamed namespace.
hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
{
2019-03-06 14:01:44 +02:00
using namespace hip_impl;
2018-05-14 17:15:36 +01:00
if (!attr) return hipErrorInvalidValue;
if (!func) return hipErrorInvalidDeviceFunction;
auto agent = this_agent();
2019-03-27 18:19:10 +00:00
const auto it = functions(agent).find(reinterpret_cast<uintptr_t>(func));
2019-03-27 18:19:10 +00:00
if (it == functions(agent).cend()) return hipErrorInvalidDeviceFunction;
2019-03-27 18:19:10 +00:00
const auto header = static_cast<hipFunction_t>(it->second)->_header;
if (!header) throw runtime_error{"Ill-formed Kernel_descriptor."};
*attr = make_function_attributes(*header);
return hipSuccess;
}
2018-04-16 15:35:04 +05:30
hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) {
2019-03-06 14:01:44 +02:00
using namespace hip_impl;
2018-04-16 15:35:04 +05:30
if (!module) return hipErrorInvalidValue;
2018-04-16 15:35:04 +05:30
*module = new ihipModule_t;
2018-04-16 15:35:04 +05:30
auto ctx = ihipGetTlsDefaultCtx();
if (!ctx) return hipErrorInvalidContext;
2018-11-13 22:28:00 -05:00
// try extracting code object from image as fatbin.
char name[64] = {};
hsa_agent_get_info(this_agent(), HSA_AGENT_INFO_NAME, name);
if (auto *code_obj = __hipExtractCodeObjectFromFatBinary(image, name))
image = code_obj;
2018-04-16 15:35:04 +05:30
hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, nullptr,
&(*module)->executable);
auto tmp = code_object_blob_for_agent(image, this_agent());
auto content = tmp.empty() ? read_elf_file_as_string(image) : tmp;
2019-03-06 14:01:44 +02:00
(*module)->executable = load_executable(content, (*module)->executable,
this_agent());
// compute the hash of the code object
(*module)->hash = checksum(content.length(), content.data());
2018-04-16 15:35:04 +05:30
return (*module)->executable.handle ? hipSuccess : hipErrorUnknown;
}
2018-03-12 11:29:03 +05:30
hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipModuleLoadData, module, image);
2018-04-16 15:35:04 +05:30
return ihipLogStatus(ihipModuleLoadData(module,image));
}
2018-04-16 15:35:04 +05:30
hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipModuleLoad, module, fname);
2018-04-16 15:35:04 +05:30
if (!fname) return ihipLogStatus(hipErrorInvalidValue);
2018-04-16 15:35:04 +05:30
ifstream file{fname};
2016-12-17 07:19:22 -06:00
2018-04-16 15:35:04 +05:30
if (!file.is_open()) return ihipLogStatus(hipErrorFileNotFound);
2018-04-16 15:35:04 +05:30
vector<char> tmp{istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
2016-12-17 07:19:22 -06:00
2018-04-16 15:35:04 +05:30
return ihipLogStatus(ihipModuleLoadData(module, tmp.data()));
}
2018-03-12 11:29:03 +05:30
hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions,
hipJitOption* options, void** optionValues) {
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipModuleLoadDataEx, module, image, numOptions, options, optionValues);
2018-04-16 15:35:04 +05:30
return ihipLogStatus(ihipModuleLoadData(module, image));
}
2017-11-09 22:10:55 +05:30
2018-03-12 11:29:03 +05:30
hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) {
2019-03-06 14:01:44 +02:00
using namespace hip_impl;
2018-11-08 08:36:50 -06:00
HIP_INIT_API(hipModuleGetTexRef, texRef, hmod, name);
2017-11-21 21:19:06 +05:30
hipError_t ret = hipErrorNotFound;
2018-03-12 11:29:03 +05:30
if (!texRef) return ihipLogStatus(hipErrorInvalidValue);
2018-03-12 11:29:03 +05:30
if (!hmod || !name) return ihipLogStatus(hipErrorNotInitialized);
const auto it = globals().find(name);
if (it == globals().end()) return ihipLogStatus(hipErrorInvalidValue);
*texRef = reinterpret_cast<textureReference*>(it->second);
return ihipLogStatus(hipSuccess);
2017-11-09 22:10:55 +05:30
}