SWDEV-290384 - Add Linker API support in hiprtc
Change-Id: I4621a033a22e4da0201c3804e2b357470a681ab0
Этот коммит содержится в:
коммит произвёл
Karthik Jayaprakash
родитель
176acb9315
Коммит
84f94fd134
@@ -34,7 +34,7 @@ THE SOFTWARE.
|
||||
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
amd::HostQueue& queue, bool isAsync = false);
|
||||
hipError_t ihipFree(void* ptr);
|
||||
//forward declaration of methods required for managed variables
|
||||
// forward declaration of methods required for managed variables
|
||||
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
|
||||
namespace {
|
||||
size_t constexpr strLiteralLength(char const* str) {
|
||||
@@ -67,9 +67,7 @@ struct __ClangOffloadBundleHeader {
|
||||
|
||||
namespace hip {
|
||||
|
||||
uint64_t CodeObject::ElfSize(const void *emi) {
|
||||
return amd::Elf::getElfSize(emi);
|
||||
}
|
||||
uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); }
|
||||
|
||||
static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported,
|
||||
bool& sramEccSupported) {
|
||||
@@ -375,10 +373,10 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
}
|
||||
|
||||
// This will be moved to COMGR eventually
|
||||
hipError_t CodeObject::ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize,
|
||||
const void ** image, const std::vector<std::string>& device_names,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs) {
|
||||
|
||||
hipError_t CodeObject::ExtractCodeObjectFromFile(
|
||||
amd::Os::FileDesc fdesc, size_t fsize, const void** image,
|
||||
const std::vector<std::string>& device_names,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs) {
|
||||
hipError_t hip_error = hipSuccess;
|
||||
|
||||
if (fdesc < 0) {
|
||||
@@ -386,8 +384,8 @@ hipError_t CodeObject::ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t
|
||||
}
|
||||
|
||||
// Map the file to memory, with offset 0.
|
||||
//file will be unmapped in ModuleUnload
|
||||
//const void* image = nullptr;
|
||||
// file will be unmapped in ModuleUnload
|
||||
// const void* image = nullptr;
|
||||
if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, 0, image)) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
@@ -399,11 +397,9 @@ hipError_t CodeObject::ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t
|
||||
}
|
||||
|
||||
// This will be moved to COMGR eventually
|
||||
hipError_t CodeObject::ExtractCodeObjectFromMemory(const void* data,
|
||||
const std::vector<std::string>& device_names,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs,
|
||||
std::string& uri) {
|
||||
|
||||
hipError_t CodeObject::ExtractCodeObjectFromMemory(
|
||||
const void* data, const std::vector<std::string>& device_names,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs, std::string& uri) {
|
||||
// Get the URI from memory
|
||||
if (!amd::Os::GetURIFromMemory(data, 0, uri)) {
|
||||
return hipErrorInvalidValue;
|
||||
@@ -413,9 +409,9 @@ hipError_t CodeObject::ExtractCodeObjectFromMemory(const void* data,
|
||||
}
|
||||
|
||||
// This will be moved to COMGR eventually
|
||||
hipError_t CodeObject::extractCodeObjectFromFatBinary(const void* data,
|
||||
const std::vector<std::string>& agent_triple_target_ids,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs) {
|
||||
hipError_t CodeObject::extractCodeObjectFromFatBinary(
|
||||
const void* data, const std::vector<std::string>& agent_triple_target_ids,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs) {
|
||||
std::string magic((const char*)data, bundle_magic_string_size);
|
||||
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) {
|
||||
return hipErrorInvalidKernelFile;
|
||||
@@ -492,12 +488,11 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(const void* data,
|
||||
}
|
||||
|
||||
hipError_t DynCO::loadCodeObject(const char* fname, const void* image) {
|
||||
|
||||
amd::ScopedLock lock(dclock_);
|
||||
|
||||
// Number of devices = 1 in dynamic code object
|
||||
fb_info_ = new FatBinaryInfo(fname, image);
|
||||
std::vector<hip::Device*> devices = { g_devices[ihipGetDevice()] };
|
||||
std::vector<hip::Device*> devices = {g_devices[ihipGetDevice()]};
|
||||
IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinary(devices));
|
||||
|
||||
// No Lazy loading for DynCO
|
||||
@@ -512,12 +507,12 @@ hipError_t DynCO::loadCodeObject(const char* fname, const void* image) {
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
//Dynamic Code Object
|
||||
// Dynamic Code Object
|
||||
DynCO::~DynCO() {
|
||||
amd::ScopedLock lock(dclock_);
|
||||
|
||||
for (auto& elem : vars_) {
|
||||
if(elem.second->getVarKind() == Var::DVK_Managed) {
|
||||
if (elem.second->getVarKind() == Var::DVK_Managed) {
|
||||
hipError_t err = ihipFree(elem.second->getManagedVarPtr());
|
||||
assert(err == hipSuccess);
|
||||
}
|
||||
@@ -553,7 +548,7 @@ hipError_t DynCO::getDynFunc(hipFunction_t* hfunc, std::string func_name) {
|
||||
|
||||
CheckDeviceIdMatch();
|
||||
|
||||
if(hfunc == nullptr) {
|
||||
if (hfunc == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
@@ -655,9 +650,8 @@ hipError_t DynCO::populateDynGlobalFuncs() {
|
||||
amd::ScopedLock lock(dclock_);
|
||||
|
||||
std::vector<std::string> func_names;
|
||||
device::Program* dev_program
|
||||
= fb_info_->GetProgram(ihipGetDevice())->getDeviceProgram(
|
||||
*hip::getCurrentDevice()->devices()[0]);
|
||||
device::Program* dev_program = fb_info_->GetProgram(ihipGetDevice())
|
||||
->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
|
||||
|
||||
// Get all the global func names from COMGR
|
||||
if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) {
|
||||
@@ -672,9 +666,8 @@ hipError_t DynCO::populateDynGlobalFuncs() {
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
//Static Code Object
|
||||
StatCO::StatCO() {
|
||||
}
|
||||
// Static Code Object
|
||||
StatCO::StatCO() {}
|
||||
|
||||
StatCO::~StatCO() {
|
||||
amd::ScopedLock lock(sclock_);
|
||||
@@ -784,7 +777,8 @@ hipError_t StatCO::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, i
|
||||
return it->second->getStatFunc(hfunc, deviceId);
|
||||
}
|
||||
|
||||
hipError_t StatCO::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId) {
|
||||
hipError_t StatCO::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction,
|
||||
int deviceId) {
|
||||
amd::ScopedLock lock(sclock_);
|
||||
|
||||
const auto it = functions_.find(hostFunction);
|
||||
@@ -838,10 +832,9 @@ hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) {
|
||||
IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId));
|
||||
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
if(queue != nullptr) {
|
||||
err = ihipMemcpy(reinterpret_cast<address>(dvar->device_ptr()),
|
||||
var->getManagedVarPtr(),
|
||||
dvar->size(), hipMemcpyHostToDevice, *queue);
|
||||
if (queue != nullptr) {
|
||||
err = ihipMemcpy(reinterpret_cast<address>(dvar->device_ptr()), var->getManagedVarPtr(),
|
||||
dvar->size(), hipMemcpyHostToDevice, *queue);
|
||||
} else {
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL");
|
||||
return hipErrorInvalidResourceHandle;
|
||||
@@ -851,4 +844,4 @@ hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) {
|
||||
}
|
||||
return err;
|
||||
}
|
||||
}; //namespace: hip
|
||||
}; // namespace hip
|
||||
|
||||
@@ -30,7 +30,7 @@ thread_local std::vector<hip::Stream*> l_captureStreams;
|
||||
thread_local hipStreamCaptureMode l_streamCaptureMode{hipStreamCaptureModeGlobal};
|
||||
|
||||
inline hipError_t ihipGraphAddNode(hipGraphNode_t graphNode, hipGraph_t graph,
|
||||
const hipGraphNode_t* pDependencies, size_t numDependencies) {
|
||||
const hipGraphNode_t* pDependencies, size_t numDependencies) {
|
||||
graph->AddNode(graphNode);
|
||||
for (size_t i = 0; i < numDependencies; i++) {
|
||||
if (!hipGraphNode::isNodeValid(pDependencies[i])) {
|
||||
@@ -43,7 +43,6 @@ inline hipError_t ihipGraphAddNode(hipGraphNode_t graphNode, hipGraph_t graph,
|
||||
|
||||
|
||||
hipError_t ihipValidateKernelParams(const hipKernelNodeParams* pNodeParams) {
|
||||
|
||||
if (pNodeParams->kernelParams == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
@@ -136,8 +135,9 @@ hipError_t ihipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph,
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
// The element size must be 1, 2, or 4 bytes
|
||||
if (pMemsetParams->elementSize != sizeof(int8_t) && pMemsetParams->elementSize != sizeof(int16_t)
|
||||
&& pMemsetParams->elementSize != sizeof(int32_t)) {
|
||||
if (pMemsetParams->elementSize != sizeof(int8_t) &&
|
||||
pMemsetParams->elementSize != sizeof(int16_t) &&
|
||||
pMemsetParams->elementSize != sizeof(int32_t)) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
@@ -704,8 +704,9 @@ hipError_t capturehipLaunchHostFunc(hipStream_t& stream, hipHostFn_t& fn, void*&
|
||||
hostParams.userData = userData;
|
||||
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
|
||||
hipGraphNode_t pGraphNode = new hipGraphHostNode(&hostParams);
|
||||
hipError_t status = ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(),
|
||||
s->GetLastCapturedNodes().size());
|
||||
hipError_t status =
|
||||
ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(),
|
||||
s->GetLastCapturedNodes().size());
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -729,8 +730,7 @@ hipError_t hipStreamIsCapturing(hipStream_t stream, hipStreamCaptureStatus* pCap
|
||||
hipError_t hipThreadExchangeStreamCaptureMode(hipStreamCaptureMode* mode) {
|
||||
HIP_INIT_API(hipThreadExchangeStreamCaptureMode, mode);
|
||||
|
||||
if (mode == nullptr ||
|
||||
*mode < hipStreamCaptureModeGlobal ||
|
||||
if (mode == nullptr || *mode < hipStreamCaptureModeGlobal ||
|
||||
*mode > hipStreamCaptureModeRelaxed) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -751,8 +751,7 @@ hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode)
|
||||
if (stream == nullptr) {
|
||||
HIP_RETURN(hipErrorStreamCaptureUnsupported);
|
||||
}
|
||||
if (mode < hipStreamCaptureModeGlobal ||
|
||||
mode > hipStreamCaptureModeRelaxed) {
|
||||
if (mode < hipStreamCaptureModeGlobal || mode > hipStreamCaptureModeRelaxed) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
|
||||
@@ -899,7 +898,7 @@ hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph,
|
||||
hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void* dst, const void* src,
|
||||
size_t count, hipMemcpyKind kind) {
|
||||
HIP_INIT_API(hipGraphMemcpyNodeSetParams1D, node, dst, src, count, kind);
|
||||
if (node == nullptr || dst == nullptr || src == nullptr || count == 0 || src == dst ) {
|
||||
if (node == nullptr || dst == nullptr || src == nullptr || count == 0 || src == dst) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
@@ -910,8 +909,8 @@ hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraph
|
||||
void* dst, const void* src, size_t count,
|
||||
hipMemcpyKind kind) {
|
||||
HIP_INIT_API(hipGraphExecMemcpyNodeSetParams1D, hGraphExec, node, dst, src, count, kind);
|
||||
if (hGraphExec == nullptr || node == nullptr || dst == nullptr ||
|
||||
src == nullptr || count == 0 || src == dst ) {
|
||||
if (hGraphExec == nullptr || node == nullptr || dst == nullptr || src == nullptr || count == 0 ||
|
||||
src == dst) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
@@ -996,7 +995,7 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t g
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
//invalid flag check
|
||||
// invalid flag check
|
||||
if (flags != 0 && flags != hipGraphInstantiateFlagAutoFreeOnLaunch) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -1248,7 +1247,8 @@ hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGra
|
||||
hipGraphNode_t hipErrorNode_out;
|
||||
hipGraphExecUpdateResult updateResult_out;
|
||||
// Check if this instantiated graph is updatable. All restrictions in hipGraphExecUpdate() apply.
|
||||
hipError_t status = hipGraphExecUpdate(hGraphExec, childGraph, &hipErrorNode_out, &updateResult_out);
|
||||
hipError_t status =
|
||||
hipGraphExecUpdate(hGraphExec, childGraph, &hipErrorNode_out, &updateResult_out);
|
||||
if (status != hipSuccess) {
|
||||
HIP_RETURN(status);
|
||||
}
|
||||
@@ -1354,8 +1354,7 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* fr
|
||||
hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t* from, hipGraphNode_t* to,
|
||||
size_t* numEdges) {
|
||||
HIP_INIT_API(hipGraphGetEdges, graph, from, to, numEdges);
|
||||
if (graph == nullptr || numEdges == nullptr ||
|
||||
(from == nullptr && to != nullptr) ||
|
||||
if (graph == nullptr || numEdges == nullptr || (from == nullptr && to != nullptr) ||
|
||||
(to == nullptr && from != nullptr)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -1463,7 +1462,6 @@ hipError_t hipGraphDestroyNode(hipGraphNode_t node) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph) {
|
||||
HIP_INIT_API(hipGraphClone, pGraphClone, originalGraph);
|
||||
if (originalGraph == nullptr || pGraphClone == nullptr) {
|
||||
@@ -1501,8 +1499,9 @@ hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_
|
||||
size_t count, size_t offset, hipMemcpyKind kind) {
|
||||
HIP_INIT_API(hipGraphAddMemcpyNodeFromSymbol, pGraphNode, graph, pDependencies, numDependencies,
|
||||
dst, symbol, count, offset, kind);
|
||||
if (graph == nullptr || pGraphNode == nullptr || (numDependencies > 0 && pDependencies == nullptr)
|
||||
|| dst == nullptr || !ihipGraph::isGraphValid(graph)) {
|
||||
if (graph == nullptr || pGraphNode == nullptr ||
|
||||
(numDependencies > 0 && pDependencies == nullptr) || dst == nullptr ||
|
||||
!ihipGraph::isGraphValid(graph)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
@@ -1521,7 +1520,7 @@ hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_
|
||||
hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void* dst, const void* symbol,
|
||||
size_t count, size_t offset, hipMemcpyKind kind) {
|
||||
HIP_INIT_API(hipGraphMemcpyNodeSetParamsFromSymbol, node, dst, symbol, count, offset, kind);
|
||||
if (symbol == nullptr) {
|
||||
if (symbol == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidSymbol);
|
||||
}
|
||||
if (node == nullptr || dst == nullptr || count == 0 || symbol == dst) {
|
||||
@@ -1537,7 +1536,7 @@ hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec,
|
||||
size_t offset, hipMemcpyKind kind) {
|
||||
HIP_INIT_API(hipGraphExecMemcpyNodeSetParamsFromSymbol, hGraphExec, node, dst, symbol, count,
|
||||
offset, kind);
|
||||
if (symbol == nullptr) {
|
||||
if (symbol == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidSymbol);
|
||||
}
|
||||
if (hGraphExec == nullptr || node == nullptr || dst == nullptr || count == 0 || symbol == dst) {
|
||||
@@ -1560,7 +1559,7 @@ hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t* pGraphNode, hipGraph_t
|
||||
HIP_INIT_API(hipGraphAddMemcpyNodeToSymbol, pGraphNode, graph, pDependencies, numDependencies,
|
||||
symbol, src, count, offset, kind);
|
||||
if (pGraphNode == nullptr || graph == nullptr || src == nullptr ||
|
||||
!ihipGraph::isGraphValid(graph) || (pDependencies == nullptr && numDependencies > 0)) {
|
||||
!ihipGraph::isGraphValid(graph) || (pDependencies == nullptr && numDependencies > 0)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
size_t sym_size = 0;
|
||||
@@ -1581,7 +1580,7 @@ hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, const void*
|
||||
const void* src, size_t count, size_t offset,
|
||||
hipMemcpyKind kind) {
|
||||
HIP_INIT_API(hipGraphMemcpyNodeSetParamsToSymbol, symbol, src, count, offset, kind);
|
||||
if (symbol == nullptr) {
|
||||
if (symbol == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidSymbol);
|
||||
}
|
||||
if (node == nullptr || src == nullptr || count == 0 || symbol == src) {
|
||||
@@ -1599,7 +1598,7 @@ hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol(hipGraphExec_t hGraphExec, hi
|
||||
hipMemcpyKind kind) {
|
||||
HIP_INIT_API(hipGraphExecMemcpyNodeSetParamsToSymbol, hGraphExec, node, symbol, src, count,
|
||||
offset, kind);
|
||||
if (symbol == nullptr) {
|
||||
if (symbol == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidSymbol);
|
||||
}
|
||||
if (hGraphExec == nullptr || src == nullptr || node == nullptr || count == 0 || src == symbol) {
|
||||
@@ -1619,8 +1618,8 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t gra
|
||||
hipEvent_t event) {
|
||||
HIP_INIT_API(hipGraphAddEventRecordNode, pGraphNode, graph, pDependencies, numDependencies,
|
||||
event);
|
||||
if (pGraphNode == nullptr || graph == nullptr || (numDependencies > 0 && pDependencies == nullptr)
|
||||
|| event == nullptr) {
|
||||
if (pGraphNode == nullptr || graph == nullptr ||
|
||||
(numDependencies > 0 && pDependencies == nullptr) || event == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
*pGraphNode = new hipGraphEventRecordNode(event);
|
||||
@@ -1630,8 +1629,7 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t gra
|
||||
|
||||
hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out) {
|
||||
HIP_INIT_API(hipGraphEventRecordNodeGetEvent, node, event_out);
|
||||
if (node == nullptr || event_out == nullptr ||
|
||||
node->GetType() != hipGraphNodeTypeEventRecord) {
|
||||
if (node == nullptr || event_out == nullptr || node->GetType() != hipGraphNodeTypeEventRecord) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
reinterpret_cast<hipGraphEventRecordNode*>(node)->GetParams(event_out);
|
||||
@@ -1640,8 +1638,7 @@ hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* even
|
||||
|
||||
hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event) {
|
||||
HIP_INIT_API(hipGraphEventRecordNodeSetEvent, node, event);
|
||||
if (node == nullptr || event == nullptr ||
|
||||
node->GetType() != hipGraphNodeTypeEventRecord ) {
|
||||
if (node == nullptr || event == nullptr || node->GetType() != hipGraphNodeTypeEventRecord) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN(reinterpret_cast<hipGraphEventRecordNode*>(node)->SetParams(event));
|
||||
@@ -1665,8 +1662,8 @@ hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph
|
||||
const hipGraphNode_t* pDependencies, size_t numDependencies,
|
||||
hipEvent_t event) {
|
||||
HIP_INIT_API(hipGraphAddEventWaitNode, pGraphNode, graph, pDependencies, numDependencies, event);
|
||||
if (pGraphNode == nullptr || graph == nullptr || (numDependencies > 0 && pDependencies == nullptr)
|
||||
|| event == nullptr) {
|
||||
if (pGraphNode == nullptr || graph == nullptr ||
|
||||
(numDependencies > 0 && pDependencies == nullptr) || event == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
*pGraphNode = new hipGraphEventWaitNode(event);
|
||||
@@ -1676,8 +1673,7 @@ hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph
|
||||
|
||||
hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out) {
|
||||
HIP_INIT_API(hipGraphEventWaitNodeGetEvent, node, event_out);
|
||||
if (node == nullptr || event_out == nullptr ||
|
||||
node->GetType() != hipGraphNodeTypeWaitEvent) {
|
||||
if (node == nullptr || event_out == nullptr || node->GetType() != hipGraphNodeTypeWaitEvent) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
reinterpret_cast<hipGraphEventWaitNode*>(node)->GetParams(event_out);
|
||||
@@ -1686,8 +1682,7 @@ hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_
|
||||
|
||||
hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event) {
|
||||
HIP_INIT_API(hipGraphEventWaitNodeSetEvent, node, event);
|
||||
if (node == nullptr || event == nullptr ||
|
||||
node->GetType() != hipGraphNodeTypeWaitEvent) {
|
||||
if (node == nullptr || event == nullptr || node->GetType() != hipGraphNodeTypeWaitEvent) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN(reinterpret_cast<hipGraphEventWaitNode*>(node)->SetParams(event));
|
||||
@@ -1759,7 +1754,7 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph,
|
||||
HIP_INIT_API(hipGraphExecUpdate, hGraphExec, hGraph, hErrorNode_out, updateResult_out);
|
||||
// parameter check
|
||||
if (hGraphExec == nullptr || hGraph == nullptr || hErrorNode_out == nullptr ||
|
||||
updateResult_out == nullptr) {
|
||||
updateResult_out == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
|
||||
@@ -267,6 +267,11 @@ hiprtcGetProgramLogSize
|
||||
hiprtcGetCode
|
||||
hiprtcGetCodeSize
|
||||
hiprtcGetErrorString
|
||||
hiprtcLinkCreate
|
||||
hiprtcLinkAddFile
|
||||
hiprtcLinkAddData
|
||||
hiprtcLinkComplete
|
||||
hiprtcLinkDestroy
|
||||
hipMipmappedArrayCreate
|
||||
hipMallocMipmappedArray
|
||||
hipMipmappedArrayDestroy
|
||||
|
||||
@@ -208,6 +208,11 @@ global:
|
||||
hiprtcGetErrorString;
|
||||
hiprtcAddNameExpression;
|
||||
hiprtcVersion;
|
||||
hiprtcLinkCreate;
|
||||
hiprtcLinkAddFile;
|
||||
hiprtcLinkAddData;
|
||||
hiprtcLinkComplete;
|
||||
hiprtcLinkDestroy;
|
||||
hipBindTexture;
|
||||
hipBindTexture2D;
|
||||
hipBindTextureToArray;
|
||||
|
||||
+118
-172
@@ -29,24 +29,15 @@
|
||||
|
||||
hipError_t ihipModuleLoadData(hipModule_t* module, const void* mmap_ptr, size_t mmap_size);
|
||||
|
||||
extern hipError_t ihipLaunchKernel(const void* hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent,
|
||||
int flags);
|
||||
extern hipError_t ihipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim,
|
||||
void** args, size_t sharedMemBytes, hipStream_t stream,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, int flags);
|
||||
|
||||
const std::string& FunctionName(const hipFunction_t f) {
|
||||
return hip::DeviceFunc::asFunction(f)->kernel()->name();
|
||||
}
|
||||
|
||||
static uint64_t ElfSize(const void *emi)
|
||||
{
|
||||
return amd::Elf::getElfSize(emi);
|
||||
}
|
||||
static uint64_t ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); }
|
||||
|
||||
hipError_t hipModuleUnload(hipModule_t hmod) {
|
||||
HIP_INIT_API(hipModuleUnload, hmod);
|
||||
@@ -60,31 +51,28 @@ hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
|
||||
HIP_RETURN(PlatformState::instance().loadModule(module, fname));
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
|
||||
{
|
||||
hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
|
||||
HIP_INIT_API(hipModuleLoadData, module, image);
|
||||
|
||||
HIP_RETURN(PlatformState::instance().loadModule(module, 0, image));
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image,
|
||||
unsigned int numOptions, hipJitOption* options,
|
||||
void** optionsValues)
|
||||
{
|
||||
hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions,
|
||||
hipJitOption* options, void** optionsValues) {
|
||||
/* TODO: Pass options to Program */
|
||||
HIP_INIT_API(hipModuleLoadDataEx, module, image);
|
||||
|
||||
HIP_RETURN(PlatformState::instance().loadModule(module, 0, image));
|
||||
}
|
||||
|
||||
extern hipError_t __hipExtractCodeObjectFromFatBinary(const void* data,
|
||||
const std::vector<std::string>& devices,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs);
|
||||
extern hipError_t __hipExtractCodeObjectFromFatBinary(
|
||||
const void* data, const std::vector<std::string>& devices,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs);
|
||||
|
||||
hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name) {
|
||||
hipError_t hipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const char* name) {
|
||||
HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name);
|
||||
|
||||
if(hfunc == nullptr || name == nullptr) {
|
||||
if (hfunc == nullptr || name == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
@@ -96,8 +84,8 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const ch
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name)
|
||||
{
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
|
||||
const char* name) {
|
||||
HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name);
|
||||
|
||||
if (dptr == nullptr || bytes == nullptr) {
|
||||
@@ -134,13 +122,13 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc
|
||||
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
||||
}
|
||||
|
||||
const device::Kernel::WorkGroupInfo* wrkGrpInfo
|
||||
= kernel->getDeviceKernel(*(hip::getCurrentDevice()->devices()[0]))->workGroupInfo();
|
||||
const device::Kernel::WorkGroupInfo* wrkGrpInfo =
|
||||
kernel->getDeviceKernel(*(hip::getCurrentDevice()->devices()[0]))->workGroupInfo();
|
||||
if (wrkGrpInfo == nullptr) {
|
||||
HIP_RETURN(hipErrorMissingConfiguration);
|
||||
}
|
||||
|
||||
switch(attrib) {
|
||||
switch (attrib) {
|
||||
case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES:
|
||||
*value = static_cast<int>(wrkGrpInfo->localMemSize_);
|
||||
break;
|
||||
@@ -157,7 +145,7 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc
|
||||
*value = static_cast<int>(wrkGrpInfo->usedVGPRs_);
|
||||
break;
|
||||
case HIP_FUNC_ATTRIBUTE_PTX_VERSION:
|
||||
*value = 30; // Defaults to 3.0 as HCC
|
||||
*value = 30; // Defaults to 3.0 as HCC
|
||||
break;
|
||||
case HIP_FUNC_ATTRIBUTE_BINARY_VERSION:
|
||||
*value = static_cast<int>(kernel->signature().version());
|
||||
@@ -171,15 +159,14 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc
|
||||
case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT:
|
||||
*value = 0;
|
||||
break;
|
||||
default:
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
default:
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
|
||||
{
|
||||
hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) {
|
||||
HIP_INIT_API(hipFuncGetAttributes, attr, func);
|
||||
|
||||
HIP_RETURN_ONFAIL(PlatformState::instance().getStatFuncAttr(attr, func, ihipGetDevice()));
|
||||
@@ -187,7 +174,7 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipFuncSetAttribute ( const void* func, hipFuncAttribute attr, int value ) {
|
||||
hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) {
|
||||
HIP_INIT_API(hipFuncSetAttribute, func, attr, value);
|
||||
|
||||
// No way to set function attribute yet.
|
||||
@@ -195,8 +182,7 @@ hipError_t hipFuncSetAttribute ( const void* func, hipFuncAttribute attr, int va
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) {
|
||||
|
||||
hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
|
||||
HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig);
|
||||
|
||||
// No way to set cache config yet.
|
||||
@@ -204,7 +190,7 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig)
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipFuncSetSharedMemConfig ( const void* func, hipSharedMemConfig config) {
|
||||
hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) {
|
||||
HIP_INIT_API(hipFuncSetSharedMemConfig, func, config);
|
||||
|
||||
// No way to set Shared Memory config function yet.
|
||||
@@ -213,11 +199,10 @@ hipError_t hipFuncSetSharedMemConfig ( const void* func, hipSharedMemConfig conf
|
||||
}
|
||||
|
||||
hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY,
|
||||
uint32_t blockDimZ, uint32_t sharedMemBytes,
|
||||
void** kernelParams, void** extra, int deviceId,
|
||||
uint32_t params = 0) {
|
||||
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, void** kernelParams, void** extra,
|
||||
int deviceId, uint32_t params = 0) {
|
||||
if (f == nullptr) {
|
||||
LogPrintfError("%s", "Function passed is null");
|
||||
return hipErrorInvalidImage;
|
||||
@@ -265,9 +250,9 @@ hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
int max_blocks_per_grid = 0;
|
||||
int best_block_size = 0;
|
||||
int block_size = blockDimX * blockDimY * blockDimZ;
|
||||
hipError_t err = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(&num_blocks, &max_blocks_per_grid,
|
||||
&best_block_size, *device, f,
|
||||
block_size, sharedMemBytes, true);
|
||||
hipError_t err = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, *device, f, block_size, sharedMemBytes,
|
||||
true);
|
||||
if (err != hipSuccess) {
|
||||
return err;
|
||||
}
|
||||
@@ -374,9 +359,9 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
amd::Kernel* kernel = function->kernel();
|
||||
amd::ScopedLock lock(function->dflock_);
|
||||
|
||||
hipError_t status =
|
||||
ihipLaunchKernel_validate(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX,
|
||||
blockDimY, blockDimZ, sharedMemBytes, kernelParams, extra, deviceId, params);
|
||||
hipError_t status = ihipLaunchKernel_validate(
|
||||
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, kernelParams, extra, deviceId, params);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -409,16 +394,12 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra)
|
||||
{
|
||||
HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra);
|
||||
hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY,
|
||||
uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY,
|
||||
uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
void** kernelParams, void** extra) {
|
||||
HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY,
|
||||
blockDimZ, sharedMemBytes, hStream, kernelParams, extra);
|
||||
size_t globalWorkSizeX = static_cast<size_t>(gridDimX) * blockDimX;
|
||||
size_t globalWorkSizeY = static_cast<size_t>(gridDimY) * blockDimY;
|
||||
size_t globalWorkSizeZ = static_cast<size_t>(gridDimZ) * blockDimZ;
|
||||
@@ -427,11 +408,10 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
globalWorkSizeZ > std::numeric_limits<uint32_t>::max()) {
|
||||
HIP_RETURN(hipErrorInvalidConfiguration);
|
||||
}
|
||||
HIP_RETURN(ihipModuleLaunchKernel(f, static_cast<uint32_t>(globalWorkSizeX),
|
||||
static_cast<uint32_t>(globalWorkSizeY),
|
||||
static_cast<uint32_t>(globalWorkSizeZ),
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr));
|
||||
HIP_RETURN(ihipModuleLaunchKernel(
|
||||
f, static_cast<uint32_t>(globalWorkSizeX), static_cast<uint32_t>(globalWorkSizeY),
|
||||
static_cast<uint32_t>(globalWorkSizeZ), blockDimX, blockDimY, blockDimZ, sharedMemBytes,
|
||||
hStream, kernelParams, extra, nullptr, nullptr));
|
||||
}
|
||||
|
||||
hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
@@ -439,106 +419,77 @@ hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
|
||||
uint32_t localWorkSizeZ, size_t sharedMemBytes,
|
||||
hipStream_t hStream, void** kernelParams, void** extra,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags)
|
||||
{
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
|
||||
HIP_INIT_API(hipExtModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
|
||||
sharedMemBytes, hStream,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream,
|
||||
kernelParams, extra, startEvent, stopEvent, flags);
|
||||
|
||||
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
|
||||
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags));
|
||||
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes,
|
||||
hStream, kernelParams, extra, startEvent, stopEvent, flags));
|
||||
}
|
||||
|
||||
|
||||
|
||||
hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY,
|
||||
uint32_t blockDimZ, size_t sharedMemBytes,
|
||||
hipStream_t hStream, void** kernelParams, void** extra,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent)
|
||||
{
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
size_t sharedMemBytes, hipStream_t hStream, void** kernelParams,
|
||||
void** extra, hipEvent_t startEvent, hipEvent_t stopEvent) {
|
||||
HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra, startEvent, stopEvent);
|
||||
blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra,
|
||||
startEvent, stopEvent);
|
||||
|
||||
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
|
||||
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX,
|
||||
blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams,
|
||||
extra, startEvent, stopEvent));
|
||||
}
|
||||
|
||||
hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY,
|
||||
uint32_t blockDimZ, size_t sharedMemBytes,
|
||||
hipStream_t hStream, void** kernelParams, void** extra,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent)
|
||||
{
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
size_t sharedMemBytes, hipStream_t hStream, void** kernelParams,
|
||||
void** extra, hipEvent_t startEvent, hipEvent_t stopEvent) {
|
||||
HIP_INIT_API(hipModuleLaunchKernelExt, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra, startEvent, stopEvent);
|
||||
blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra,
|
||||
startEvent, stopEvent);
|
||||
|
||||
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
|
||||
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX,
|
||||
blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams,
|
||||
extra, startEvent, stopEvent));
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipLaunchKernel_common(const void *hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream)
|
||||
{
|
||||
extern "C" hipError_t hipLaunchKernel_common(const void* hostFunction, dim3 gridDim, dim3 blockDim,
|
||||
void** args, size_t sharedMemBytes,
|
||||
hipStream_t stream) {
|
||||
STREAM_CAPTURE(hipLaunchKernel, stream, hostFunction, gridDim, blockDim, args, sharedMemBytes);
|
||||
return ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream,
|
||||
nullptr, nullptr, 0);
|
||||
return ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, nullptr,
|
||||
nullptr, 0);
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream)
|
||||
{
|
||||
extern "C" hipError_t hipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim,
|
||||
void** args, size_t sharedMemBytes, hipStream_t stream) {
|
||||
HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
|
||||
HIP_RETURN(hipLaunchKernel_common(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream));
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipLaunchKernel_spt(const void *hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream)
|
||||
{
|
||||
extern "C" hipError_t hipLaunchKernel_spt(const void* hostFunction, dim3 gridDim, dim3 blockDim,
|
||||
void** args, size_t sharedMemBytes, hipStream_t stream) {
|
||||
HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
|
||||
PER_THREAD_DEFAULT_STREAM(stream);
|
||||
HIP_RETURN(hipLaunchKernel_common(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream));
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent,
|
||||
int flags)
|
||||
{
|
||||
HIP_INIT_API(hipExtLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
|
||||
HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, startEvent, stopEvent, flags));
|
||||
extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim,
|
||||
void** args, size_t sharedMemBytes, hipStream_t stream,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, int flags) {
|
||||
HIP_INIT_API(hipExtLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
|
||||
HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream,
|
||||
startEvent, stopEvent, flags));
|
||||
}
|
||||
|
||||
hipError_t hipLaunchCooperativeKernel_common(const void* f,
|
||||
dim3 gridDim, dim3 blockDim,
|
||||
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
|
||||
{
|
||||
hipError_t hipLaunchCooperativeKernel_common(const void* f, dim3 gridDim, dim3 blockDim,
|
||||
void** kernelParams, uint32_t sharedMemBytes,
|
||||
hipStream_t hStream) {
|
||||
if (!hip::isValid(hStream)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -556,34 +507,30 @@ hipError_t hipLaunchCooperativeKernel_common(const void* f,
|
||||
}
|
||||
return ihipModuleLaunchKernel(func, static_cast<uint32_t>(globalWorkSizeX),
|
||||
static_cast<uint32_t>(globalWorkSizeY),
|
||||
static_cast<uint32_t>(globalWorkSizeZ),
|
||||
blockDim.x, blockDim.y, blockDim.z,
|
||||
sharedMemBytes, hStream, kernelParams, nullptr, nullptr, nullptr, 0,
|
||||
amd::NDRangeKernelCommand::CooperativeGroups);
|
||||
static_cast<uint32_t>(globalWorkSizeZ), blockDim.x, blockDim.y,
|
||||
blockDim.z, sharedMemBytes, hStream, kernelParams, nullptr, nullptr,
|
||||
nullptr, 0, amd::NDRangeKernelCommand::CooperativeGroups);
|
||||
}
|
||||
|
||||
hipError_t hipLaunchCooperativeKernel(const void* f,
|
||||
dim3 gridDim, dim3 blockDim,
|
||||
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
|
||||
{
|
||||
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim,
|
||||
sharedMemBytes, hStream);
|
||||
HIP_RETURN(hipLaunchCooperativeKernel_common(f,gridDim, blockDim, kernelParams, sharedMemBytes, hStream));
|
||||
hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim,
|
||||
void** kernelParams, uint32_t sharedMemBytes,
|
||||
hipStream_t hStream) {
|
||||
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, sharedMemBytes, hStream);
|
||||
HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes,
|
||||
hStream));
|
||||
}
|
||||
|
||||
hipError_t hipLaunchCooperativeKernel_spt(const void* f,
|
||||
dim3 gridDim, dim3 blockDim,
|
||||
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
|
||||
{
|
||||
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim,
|
||||
sharedMemBytes, hStream);
|
||||
hipError_t hipLaunchCooperativeKernel_spt(const void* f, dim3 gridDim, dim3 blockDim,
|
||||
void** kernelParams, uint32_t sharedMemBytes,
|
||||
hipStream_t hStream) {
|
||||
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, sharedMemBytes, hStream);
|
||||
PER_THREAD_DEFAULT_STREAM(hStream);
|
||||
HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes, hStream));
|
||||
HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes,
|
||||
hStream));
|
||||
}
|
||||
|
||||
hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices, unsigned int flags, uint32_t extFlags)
|
||||
{
|
||||
hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices,
|
||||
unsigned int flags, uint32_t extFlags) {
|
||||
int numActiveGPUs = 0;
|
||||
hipError_t result = hipSuccess;
|
||||
result = ihipDeviceGetCount(&numActiveGPUs);
|
||||
@@ -594,7 +541,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
// Validate all streams passed by user
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
if (!hip::isValid(launchParamsList[i].stream)) {
|
||||
return hipErrorInvalidValue;
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -605,8 +552,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
uint32_t blockDims = 0;
|
||||
const hipLaunchParams& launch = launchParamsList[i];
|
||||
blockDims = launch.blockDim.x * launch.blockDim.y * launch.blockDim.z;
|
||||
allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z *
|
||||
blockDims;
|
||||
allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z * blockDims;
|
||||
|
||||
// Make sure block dimensions are valid
|
||||
if (0 == blockDims) {
|
||||
@@ -633,7 +579,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) {
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
amd::HostQueue* queue =
|
||||
reinterpret_cast<hip::Stream*>(launchParamsList[i].stream)->asHostQueue();
|
||||
reinterpret_cast<hip::Stream*>(launchParamsList[i].stream)->asHostQueue();
|
||||
queue->finish();
|
||||
}
|
||||
}
|
||||
@@ -666,11 +612,11 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
globalWorkSizeZ > std::numeric_limits<uint32_t>::max()) {
|
||||
HIP_RETURN(hipErrorInvalidConfiguration);
|
||||
}
|
||||
result = ihipModuleLaunchKernel(func, static_cast<uint32_t>(globalWorkSizeX),
|
||||
static_cast<uint32_t>(globalWorkSizeY), static_cast<uint32_t>(globalWorkSizeZ),
|
||||
launch.blockDim.x, launch.blockDim.y, launch.blockDim.z,
|
||||
launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr,
|
||||
flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice);
|
||||
result = ihipModuleLaunchKernel(
|
||||
func, static_cast<uint32_t>(globalWorkSizeX), static_cast<uint32_t>(globalWorkSizeY),
|
||||
static_cast<uint32_t>(globalWorkSizeZ), launch.blockDim.x, launch.blockDim.y,
|
||||
launch.blockDim.z, launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr,
|
||||
flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice);
|
||||
if (result != hipSuccess) {
|
||||
break;
|
||||
}
|
||||
@@ -681,7 +627,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) {
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
amd::HostQueue* queue =
|
||||
reinterpret_cast<hip::Stream*>(launchParamsList[i].stream)->asHostQueue();
|
||||
reinterpret_cast<hip::Stream*>(launchParamsList[i].stream)->asHostQueue();
|
||||
queue->finish();
|
||||
}
|
||||
}
|
||||
@@ -689,18 +635,18 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
return result;
|
||||
}
|
||||
|
||||
hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices, unsigned int flags)
|
||||
{
|
||||
hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices,
|
||||
unsigned int flags) {
|
||||
HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, numDevices, flags);
|
||||
|
||||
HIP_RETURN(ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags,
|
||||
(amd::NDRangeKernelCommand::CooperativeGroups |
|
||||
amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups)));
|
||||
HIP_RETURN(ihipLaunchCooperativeKernelMultiDevice(
|
||||
launchParamsList, numDevices, flags,
|
||||
(amd::NDRangeKernelCommand::CooperativeGroups |
|
||||
amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups)));
|
||||
}
|
||||
|
||||
hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices, unsigned int flags) {
|
||||
hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices,
|
||||
unsigned int flags) {
|
||||
HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags);
|
||||
|
||||
HIP_RETURN(ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0));
|
||||
@@ -720,7 +666,7 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const
|
||||
HIP_RETURN(hipErrorNotSupported);
|
||||
}
|
||||
|
||||
/* Get address and size for the global symbol */
|
||||
/* Get address and size for the global symbol */
|
||||
if (hipSuccess != PlatformState::instance().getDynTexRef(name, hmod, texRef)) {
|
||||
LogPrintfError("Cannot get texRef for name: %s at module:0x%x \n", name, hmod);
|
||||
HIP_RETURN(hipErrorNotFound);
|
||||
|
||||
+205
-238
@@ -27,12 +27,12 @@
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
|
||||
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
|
||||
|
||||
thread_local std::stack<ihipExec_t> execStack_;
|
||||
PlatformState* PlatformState::platform_; // Initiaized as nullptr by default
|
||||
PlatformState* PlatformState::platform_; // Initiaized as nullptr by default
|
||||
|
||||
//forward declaration of methods required for __hipRegisrterManagedVar
|
||||
// forward declaration of methods required for __hipRegisrterManagedVar
|
||||
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
|
||||
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
amd::HostQueue& queue, bool isAsync = false);
|
||||
@@ -40,26 +40,23 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
|
||||
struct __CudaFatBinaryWrapper {
|
||||
unsigned int magic;
|
||||
unsigned int version;
|
||||
void* binary;
|
||||
void* dummy1;
|
||||
void* binary;
|
||||
void* dummy1;
|
||||
};
|
||||
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
|
||||
hipModule_t hmod, const char* name);
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
|
||||
const char* name);
|
||||
|
||||
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
|
||||
hipDeviceptr_t* dptr, size_t* bytes);
|
||||
|
||||
extern hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0,
|
||||
uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0,
|
||||
uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0);
|
||||
static bool isCompatibleCodeObject(const std::string& codeobj_target_id,
|
||||
const char* device_name) {
|
||||
extern hipError_t ihipModuleLaunchKernel(
|
||||
hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, uint32_t blockDimX,
|
||||
uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent,
|
||||
uint32_t flags = 0, uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0,
|
||||
uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0);
|
||||
static bool isCompatibleCodeObject(const std::string& codeobj_target_id, const char* device_name) {
|
||||
// Workaround for device name mismatch.
|
||||
// Device name may contain feature strings delimited by '+', e.g.
|
||||
// gfx900+xnack. Currently HIP-Clang does not include feature strings
|
||||
@@ -73,8 +70,7 @@ static bool isCompatibleCodeObject(const std::string& codeobj_target_id,
|
||||
return codeobj_target_id == short_name;
|
||||
}
|
||||
|
||||
extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data)
|
||||
{
|
||||
extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data) {
|
||||
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
|
||||
if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) {
|
||||
LogPrintfError("Cannot Register fat binary. FatMagic: %u version: %u ", fbwrapper->magic,
|
||||
@@ -84,21 +80,14 @@ extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data)
|
||||
return PlatformState::instance().addFatBinary(fbwrapper->binary);
|
||||
}
|
||||
|
||||
extern "C" void __hipRegisterFunction(
|
||||
hip::FatBinaryInfo** modules,
|
||||
const void* hostFunction,
|
||||
char* deviceFunction,
|
||||
const char* deviceName,
|
||||
unsigned int threadLimit,
|
||||
uint3* tid,
|
||||
uint3* bid,
|
||||
dim3* blockDim,
|
||||
dim3* gridDim,
|
||||
int* wSize) {
|
||||
static int enable_deferred_loading { []() {
|
||||
char *var = getenv("HIP_ENABLE_DEFERRED_LOADING");
|
||||
extern "C" void __hipRegisterFunction(hip::FatBinaryInfo** modules, const void* hostFunction,
|
||||
char* deviceFunction, const char* deviceName,
|
||||
unsigned int threadLimit, uint3* tid, uint3* bid,
|
||||
dim3* blockDim, dim3* gridDim, int* wSize) {
|
||||
static int enable_deferred_loading{[]() {
|
||||
char* var = getenv("HIP_ENABLE_DEFERRED_LOADING");
|
||||
return var ? atoi(var) : 1;
|
||||
}() };
|
||||
}()};
|
||||
hipError_t hip_error = hipSuccess;
|
||||
hip::Function* func = new hip::Function(std::string(deviceName), modules);
|
||||
hip_error = PlatformState::instance().registerStatFunction(hostFunction, func);
|
||||
@@ -121,42 +110,45 @@ extern "C" void __hipRegisterFunction(
|
||||
// track of the value of the device side global variable between kernel
|
||||
// executions.
|
||||
extern "C" void __hipRegisterVar(
|
||||
hip::FatBinaryInfo** modules, // The device modules containing code object
|
||||
void* var, // The shadow variable in host code
|
||||
char* hostVar, // Variable name in host code
|
||||
char* deviceVar, // Variable name in device code
|
||||
int ext, // Whether this variable is external
|
||||
size_t size, // Size of the variable
|
||||
int constant, // Whether this variable is constant
|
||||
int global) // Unknown, always 0
|
||||
hip::FatBinaryInfo** modules, // The device modules containing code object
|
||||
void* var, // The shadow variable in host code
|
||||
char* hostVar, // Variable name in host code
|
||||
char* deviceVar, // Variable name in device code
|
||||
int ext, // Whether this variable is external
|
||||
size_t size, // Size of the variable
|
||||
int constant, // Whether this variable is constant
|
||||
int global) // Unknown, always 0
|
||||
{
|
||||
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Variable, size, 0, 0, modules);
|
||||
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Variable,
|
||||
size, 0, 0, modules);
|
||||
hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr);
|
||||
guarantee((err == hipSuccess), "Cannot register Static Global Var");
|
||||
}
|
||||
|
||||
extern "C" void __hipRegisterSurface(hip::FatBinaryInfo** modules, // The device modules containing code object
|
||||
void* var, // The shadow variable in host code
|
||||
char* hostVar, // Variable name in host code
|
||||
char* deviceVar, // Variable name in device code
|
||||
int type, int ext) {
|
||||
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Surface, sizeof(surfaceReference), 0, 0, modules);
|
||||
extern "C" void __hipRegisterSurface(
|
||||
hip::FatBinaryInfo** modules, // The device modules containing code object
|
||||
void* var, // The shadow variable in host code
|
||||
char* hostVar, // Variable name in host code
|
||||
char* deviceVar, // Variable name in device code
|
||||
int type, int ext) {
|
||||
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Surface,
|
||||
sizeof(surfaceReference), 0, 0, modules);
|
||||
hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr);
|
||||
guarantee((err == hipSuccess), "Cannot register Static Glbal Var");
|
||||
}
|
||||
|
||||
extern "C" void __hipRegisterManagedVar(void *hipModule, // Pointer to hip module returned from __hipRegisterFatbinary
|
||||
void **pointer, // Pointer to a chunk of managed memory with size \p size and alignment \p align
|
||||
// HIP runtime allocates such managed memory and assign it to \p pointer
|
||||
void *init_value, // Initial value to be copied into \p pointer
|
||||
const char *name, // Name of the variable in code object
|
||||
size_t size,
|
||||
unsigned align) {
|
||||
extern "C" void __hipRegisterManagedVar(
|
||||
void* hipModule, // Pointer to hip module returned from __hipRegisterFatbinary
|
||||
void** pointer, // Pointer to a chunk of managed memory with size \p size and alignment \p
|
||||
// align HIP runtime allocates such managed memory and assign it to \p pointer
|
||||
void* init_value, // Initial value to be copied into \p pointer
|
||||
const char* name, // Name of the variable in code object
|
||||
size_t size, unsigned align) {
|
||||
HIP_INIT_VOID();
|
||||
hipError_t status = ihipMallocManaged(pointer, size, align);
|
||||
if( status == hipSuccess) {
|
||||
if (status == hipSuccess) {
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
if(queue != nullptr) {
|
||||
if (queue != nullptr) {
|
||||
status = ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *queue);
|
||||
guarantee((status == hipSuccess), "Error during memcpy to managed memory!");
|
||||
} else {
|
||||
@@ -171,28 +163,25 @@ extern "C" void __hipRegisterManagedVar(void *hipModule, // Pointer to hip mod
|
||||
guarantee((status == hipSuccess), "Cannot register Static Managed Var");
|
||||
}
|
||||
|
||||
extern "C" void __hipRegisterTexture(hip::FatBinaryInfo** modules, // The device modules containing code object
|
||||
void* var, // The shadow variable in host code
|
||||
char* hostVar, // Variable name in host code
|
||||
char* deviceVar, // Variable name in device code
|
||||
int type, int norm, int ext) {
|
||||
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Texture, sizeof(textureReference), 0, 0, modules);
|
||||
extern "C" void __hipRegisterTexture(
|
||||
hip::FatBinaryInfo** modules, // The device modules containing code object
|
||||
void* var, // The shadow variable in host code
|
||||
char* hostVar, // Variable name in host code
|
||||
char* deviceVar, // Variable name in device code
|
||||
int type, int norm, int ext) {
|
||||
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Texture,
|
||||
sizeof(textureReference), 0, 0, modules);
|
||||
hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr);
|
||||
guarantee((err == hipSuccess), "Cannot register Static Global Var");
|
||||
}
|
||||
|
||||
extern "C" void __hipUnregisterFatBinary(hip::FatBinaryInfo** modules)
|
||||
{
|
||||
extern "C" void __hipUnregisterFatBinary(hip::FatBinaryInfo** modules) {
|
||||
hipError_t err = PlatformState::instance().removeFatBinary(modules);
|
||||
guarantee((err == hipSuccess), "Cannot Unregister Fat Binary");
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipConfigureCall(
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
size_t sharedMem,
|
||||
hipStream_t stream)
|
||||
{
|
||||
extern "C" hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem,
|
||||
hipStream_t stream) {
|
||||
HIP_INIT_API(hipConfigureCall, gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
|
||||
@@ -200,12 +189,8 @@ extern "C" hipError_t hipConfigureCall(
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
extern "C" hipError_t __hipPushCallConfiguration(
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
size_t sharedMem,
|
||||
hipStream_t stream)
|
||||
{
|
||||
extern "C" hipError_t __hipPushCallConfiguration(dim3 gridDim, dim3 blockDim, size_t sharedMem,
|
||||
hipStream_t stream) {
|
||||
HIP_INIT_API(__hipPushCallConfiguration, gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
|
||||
@@ -213,10 +198,8 @@ extern "C" hipError_t __hipPushCallConfiguration(
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim,
|
||||
dim3 *blockDim,
|
||||
size_t *sharedMem,
|
||||
hipStream_t *stream) {
|
||||
extern "C" hipError_t __hipPopCallConfiguration(dim3* gridDim, dim3* blockDim, size_t* sharedMem,
|
||||
hipStream_t* stream) {
|
||||
HIP_INIT_API(__hipPopCallConfiguration, gridDim, blockDim, sharedMem, stream);
|
||||
|
||||
ihipExec_t exec;
|
||||
@@ -229,11 +212,7 @@ extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim,
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipSetupArgument(
|
||||
const void *arg,
|
||||
size_t size,
|
||||
size_t offset)
|
||||
{
|
||||
extern "C" hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) {
|
||||
HIP_INIT_API(hipSetupArgument, arg, size, offset);
|
||||
|
||||
PlatformState::instance().setupArgument(arg, size, offset);
|
||||
@@ -241,15 +220,14 @@ extern "C" hipError_t hipSetupArgument(
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
|
||||
{
|
||||
extern "C" hipError_t hipLaunchByPtr(const void* hostFunction) {
|
||||
HIP_INIT_API(hipLaunchByPtr, hostFunction);
|
||||
|
||||
ihipExec_t exec;
|
||||
PlatformState::instance().popExec(exec);
|
||||
|
||||
hip::Stream* stream = reinterpret_cast<hip::Stream*>(exec.hStream_);
|
||||
int deviceId = (stream != nullptr)? stream->DeviceId() : ihipGetDevice();
|
||||
int deviceId = (stream != nullptr) ? stream->DeviceId() : ihipGetDevice();
|
||||
if (deviceId == -1) {
|
||||
LogPrintfError("Wrong DeviceId: %d \n", deviceId);
|
||||
HIP_RETURN(hipErrorNoDevice);
|
||||
@@ -262,16 +240,12 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
|
||||
}
|
||||
|
||||
size_t size = exec.arguments_.size();
|
||||
void *extra[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0],
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
void* extra[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0],
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
|
||||
|
||||
HIP_RETURN(hipModuleLaunchKernel(func,
|
||||
exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z,
|
||||
exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z,
|
||||
exec.sharedMem_, exec.hStream_, nullptr, extra));
|
||||
HIP_RETURN(hipModuleLaunchKernel(func, exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z,
|
||||
exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z,
|
||||
exec.sharedMem_, exec.hStream_, nullptr, extra));
|
||||
}
|
||||
|
||||
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) {
|
||||
@@ -283,7 +257,8 @@ hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) {
|
||||
}
|
||||
size_t sym_size = 0;
|
||||
|
||||
HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), devPtr, &sym_size));
|
||||
HIP_RETURN_ONFAIL(
|
||||
PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), devPtr, &sym_size));
|
||||
|
||||
HIP_RETURN(hipSuccess, *devPtr);
|
||||
}
|
||||
@@ -295,14 +270,14 @@ hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
hipDeviceptr_t device_ptr = nullptr;
|
||||
HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, sizePtr));
|
||||
HIP_RETURN_ONFAIL(
|
||||
PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, sizePtr));
|
||||
|
||||
HIP_RETURN(hipSuccess, *sizePtr);
|
||||
}
|
||||
|
||||
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
|
||||
hipDeviceptr_t* dptr, size_t* bytes)
|
||||
{
|
||||
hipDeviceptr_t* dptr, size_t* bytes) {
|
||||
HIP_INIT();
|
||||
|
||||
/* Get Device Program pointer*/
|
||||
@@ -325,10 +300,8 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor
|
||||
|
||||
namespace hip_impl {
|
||||
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize,
|
||||
const amd::Device& device, hipFunction_t func, int inputBlockSize,
|
||||
size_t dynamicSMemSize, bool bCalcPotentialBlkSz)
|
||||
{
|
||||
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device,
|
||||
hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz) {
|
||||
hip::DeviceFunc* function = hip::DeviceFunc::asFunction(func);
|
||||
const amd::Kernel& kernel = *function->kernel();
|
||||
|
||||
@@ -340,14 +313,12 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
*bestBlockSize = 0;
|
||||
// Make sure the requested block size is smaller than max supported
|
||||
if (inputBlockSize > int(device.info().maxWorkGroupSize_)) {
|
||||
*maxBlocksPerCU = 0;
|
||||
*numBlocksPerGrid = 0;
|
||||
return hipSuccess;
|
||||
*maxBlocksPerCU = 0;
|
||||
*numBlocksPerGrid = 0;
|
||||
return hipSuccess;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (inputBlockSize > int(device.info().maxWorkGroupSize_) ||
|
||||
inputBlockSize <= 0) {
|
||||
} else {
|
||||
if (inputBlockSize > int(device.info().maxWorkGroupSize_) || inputBlockSize <= 0) {
|
||||
// The user wrote the kernel to work with a workgroup size
|
||||
// bigger than this hardware can support. Or they do not care
|
||||
// about the size So just assume its maximum size is
|
||||
@@ -367,18 +338,15 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
size_t maxVGPRs;
|
||||
uint32_t VgprGranularity;
|
||||
if (device.isa().versionMajor() <= 9) {
|
||||
if (device.isa().versionMajor() == 9 &&
|
||||
device.isa().versionMinor() == 0 &&
|
||||
if (device.isa().versionMajor() == 9 && device.isa().versionMinor() == 0 &&
|
||||
device.isa().versionStepping() == 10) {
|
||||
maxVGPRs = 512;
|
||||
VgprGranularity = 8;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
maxVGPRs = 256;
|
||||
VgprGranularity = 4;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
maxVGPRs = 1024;
|
||||
VgprGranularity = 8;
|
||||
}
|
||||
@@ -391,12 +359,10 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
size_t maxSGPRs;
|
||||
if (device.isa().versionMajor() < 8) {
|
||||
maxSGPRs = 512;
|
||||
}
|
||||
else if (device.isa().versionMajor() < 10) {
|
||||
} else if (device.isa().versionMajor() < 10) {
|
||||
maxSGPRs = 800;
|
||||
}
|
||||
else {
|
||||
maxSGPRs = SIZE_MAX; // gfx10+ does not share SGPRs between waves
|
||||
} else {
|
||||
maxSGPRs = SIZE_MAX; // gfx10+ does not share SGPRs between waves
|
||||
}
|
||||
const size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16);
|
||||
GprWaves = std::min(VgprWaves, SgprWaves);
|
||||
@@ -425,7 +391,8 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
// the maximum available block size for this kernel, which could have come from the
|
||||
// user. e.g., if the user indicates the maximum block size is 64 threads, but we
|
||||
// calculate that 128 threads can fit in each CU, we have to give up and return 64.
|
||||
*bestBlockSize = std::min(alu_limited_threads, amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_));
|
||||
*bestBlockSize =
|
||||
std::min(alu_limited_threads, amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_));
|
||||
// If the best block size is smaller than the block size used to fit the maximum,
|
||||
// then we need to make the grid bigger for full occupancy.
|
||||
const int bestBlocksPerCU = alu_limited_threads / (*bestBlockSize);
|
||||
@@ -434,13 +401,11 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
}
|
||||
} // namespace hip_impl
|
||||
|
||||
extern "C" {
|
||||
hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
|
||||
const void* f, size_t dynSharedMemPerBlk,
|
||||
int blockSizeLimit)
|
||||
{
|
||||
hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, const void* f,
|
||||
size_t dynSharedMemPerBlk, int blockSizeLimit) {
|
||||
HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit);
|
||||
if ((gridSize == nullptr) || (blockSize == nullptr)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
@@ -455,7 +420,8 @@ hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
|
||||
int num_blocks = 0;
|
||||
int best_block_size = 0;
|
||||
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSizeLimit, dynSharedMemPerBlk,true);
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSizeLimit,
|
||||
dynSharedMemPerBlk, true);
|
||||
if (ret == hipSuccess) {
|
||||
*blockSize = best_block_size;
|
||||
*gridSize = max_blocks_per_grid;
|
||||
@@ -463,10 +429,8 @@ hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
|
||||
HIP_RETURN(ret);
|
||||
}
|
||||
|
||||
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
|
||||
hipFunction_t f, size_t dynSharedMemPerBlk,
|
||||
int blockSizeLimit)
|
||||
{
|
||||
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f,
|
||||
size_t dynSharedMemPerBlk, int blockSizeLimit) {
|
||||
HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit);
|
||||
if ((gridSize == nullptr) || (blockSize == nullptr)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
@@ -476,7 +440,8 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize
|
||||
int num_blocks = 0;
|
||||
int best_block_size = 0;
|
||||
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, dynSharedMemPerBlk,true);
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit,
|
||||
dynSharedMemPerBlk, true);
|
||||
if (ret == hipSuccess) {
|
||||
*blockSize = best_block_size;
|
||||
*gridSize = max_blocks_per_grid;
|
||||
@@ -485,10 +450,12 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize
|
||||
}
|
||||
|
||||
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
|
||||
hipFunction_t f, size_t dynSharedMemPerBlk,
|
||||
int blockSizeLimit, unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk, blockSizeLimit, flags);
|
||||
hipFunction_t f,
|
||||
size_t dynSharedMemPerBlk,
|
||||
int blockSizeLimit,
|
||||
unsigned int flags) {
|
||||
HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk,
|
||||
blockSizeLimit, flags);
|
||||
if ((gridSize == nullptr) || (blockSize == nullptr)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -497,7 +464,8 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int*
|
||||
int num_blocks = 0;
|
||||
int best_block_size = 0;
|
||||
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, dynSharedMemPerBlk,true);
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit,
|
||||
dynSharedMemPerBlk, true);
|
||||
if (ret == hipSuccess) {
|
||||
*blockSize = best_block_size;
|
||||
*gridSize = max_blocks_per_grid;
|
||||
@@ -505,10 +473,11 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int*
|
||||
HIP_RETURN(ret);
|
||||
}
|
||||
|
||||
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
|
||||
hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
|
||||
{
|
||||
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynSharedMemPerBlk);
|
||||
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, hipFunction_t f,
|
||||
int blockSize,
|
||||
size_t dynSharedMemPerBlk) {
|
||||
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize,
|
||||
dynSharedMemPerBlk);
|
||||
if (numBlocks == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -518,16 +487,16 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
|
||||
int max_blocks_per_grid = 0;
|
||||
int best_block_size = 0;
|
||||
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, false);
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk,
|
||||
false);
|
||||
*numBlocks = num_blocks;
|
||||
HIP_RETURN(ret);
|
||||
}
|
||||
|
||||
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
|
||||
hipFunction_t f, int blockSize,
|
||||
size_t dynSharedMemPerBlk, unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynSharedMemPerBlk, flags);
|
||||
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) {
|
||||
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize,
|
||||
dynSharedMemPerBlk, flags);
|
||||
if (numBlocks == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -537,14 +506,14 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numB
|
||||
int max_blocks_per_grid = 0;
|
||||
int best_block_size = 0;
|
||||
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, false);
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk,
|
||||
false);
|
||||
*numBlocks = num_blocks;
|
||||
HIP_RETURN(ret);
|
||||
}
|
||||
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
|
||||
const void* f, int blockSize, size_t dynamicSMemSize)
|
||||
{
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* f,
|
||||
int blockSize, size_t dynamicSMemSize) {
|
||||
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynamicSMemSize);
|
||||
if (numBlocks == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
@@ -562,16 +531,18 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
|
||||
int max_blocks_per_grid = 0;
|
||||
int best_block_size = 0;
|
||||
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, false);
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize,
|
||||
false);
|
||||
*numBlocks = num_blocks;
|
||||
HIP_RETURN(ret);
|
||||
}
|
||||
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
|
||||
const void* f,
|
||||
int blockSize, size_t dynamicSMemSize, unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize, flags);
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, const void* f,
|
||||
int blockSize,
|
||||
size_t dynamicSMemSize,
|
||||
unsigned int flags) {
|
||||
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize,
|
||||
flags);
|
||||
if (numBlocks == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -588,7 +559,8 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
|
||||
int max_blocks_per_grid = 0;
|
||||
int best_block_size = 0;
|
||||
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, false);
|
||||
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize,
|
||||
false);
|
||||
*numBlocks = num_blocks;
|
||||
HIP_RETURN(ret);
|
||||
}
|
||||
@@ -599,68 +571,48 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
|
||||
|
||||
namespace hip_impl {
|
||||
|
||||
void hipLaunchKernelGGLImpl(
|
||||
uintptr_t function_address,
|
||||
const dim3& numBlocks,
|
||||
const dim3& dimBlocks,
|
||||
uint32_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
void** kernarg)
|
||||
{
|
||||
void hipLaunchKernelGGLImpl(uintptr_t function_address, const dim3& numBlocks,
|
||||
const dim3& dimBlocks, uint32_t sharedMemBytes, hipStream_t stream,
|
||||
void** kernarg) {
|
||||
HIP_INIT_VOID();
|
||||
|
||||
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
|
||||
int deviceId = (s != nullptr)? s->DeviceId() : ihipGetDevice();
|
||||
int deviceId = (s != nullptr) ? s->DeviceId() : ihipGetDevice();
|
||||
if (deviceId == -1) {
|
||||
LogPrintfError("Wrong Device Id: %d \n", deviceId);
|
||||
}
|
||||
|
||||
hipFunction_t func = nullptr;
|
||||
hipError_t hip_error =
|
||||
PlatformState::instance().getStatFunc(&func,
|
||||
reinterpret_cast<void*>(function_address),
|
||||
deviceId);
|
||||
hipError_t hip_error = PlatformState::instance().getStatFunc(
|
||||
&func, reinterpret_cast<void*>(function_address), deviceId);
|
||||
if ((hip_error != hipSuccess) || (func == nullptr)) {
|
||||
LogPrintfError("Cannot find the static function: 0x%x", function_address);
|
||||
}
|
||||
|
||||
hip_error = hipModuleLaunchKernel(func,
|
||||
numBlocks.x, numBlocks.y, numBlocks.z,
|
||||
dimBlocks.x, dimBlocks.y, dimBlocks.z,
|
||||
sharedMemBytes, stream, nullptr, kernarg);
|
||||
hip_error =
|
||||
hipModuleLaunchKernel(func, numBlocks.x, numBlocks.y, numBlocks.z, dimBlocks.x, dimBlocks.y,
|
||||
dimBlocks.z, sharedMemBytes, stream, nullptr, kernarg);
|
||||
assert(hip_error == hipSuccess);
|
||||
}
|
||||
|
||||
void hipLaunchCooperativeKernelGGLImpl(
|
||||
uintptr_t function_address,
|
||||
const dim3& numBlocks,
|
||||
const dim3& dimBlocks,
|
||||
uint32_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
void** kernarg)
|
||||
{
|
||||
void hipLaunchCooperativeKernelGGLImpl(uintptr_t function_address, const dim3& numBlocks,
|
||||
const dim3& dimBlocks, uint32_t sharedMemBytes,
|
||||
hipStream_t stream, void** kernarg) {
|
||||
HIP_INIT_VOID();
|
||||
|
||||
hipError_t err = hipLaunchCooperativeKernel(reinterpret_cast<void*>(function_address),
|
||||
numBlocks, dimBlocks, kernarg, sharedMemBytes, stream);
|
||||
hipError_t err = hipLaunchCooperativeKernel(reinterpret_cast<void*>(function_address), numBlocks,
|
||||
dimBlocks, kernarg, sharedMemBytes, stream);
|
||||
assert(err == hipSuccess);
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace hip_impl
|
||||
|
||||
#endif // defined(ATI_OS_LINUX)
|
||||
#endif // defined(ATI_OS_LINUX)
|
||||
|
||||
hipError_t ihipLaunchKernel(const void* hostFunction,
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
void** args,
|
||||
size_t sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent,
|
||||
int flags)
|
||||
{
|
||||
hipFunction_t func = nullptr;
|
||||
hipError_t ihipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim, void** args,
|
||||
size_t sharedMemBytes, hipStream_t stream, hipEvent_t startEvent,
|
||||
hipEvent_t stopEvent, int flags) {
|
||||
hipFunction_t func = nullptr;
|
||||
int deviceId = hip::Stream::DeviceId(stream);
|
||||
hipError_t hip_error = PlatformState::instance().getStatFunc(&func, hostFunction, deviceId);
|
||||
if ((hip_error != hipSuccess) || (func == nullptr)) {
|
||||
@@ -674,19 +626,31 @@ hipError_t ihipLaunchKernel(const void* hostFunction,
|
||||
globalWorkSizeZ > std::numeric_limits<uint32_t>::max()) {
|
||||
HIP_RETURN(hipErrorInvalidConfiguration);
|
||||
}
|
||||
HIP_RETURN(ihipModuleLaunchKernel(func, static_cast<uint32_t>(globalWorkSizeX),
|
||||
static_cast<uint32_t>(globalWorkSizeY),
|
||||
static_cast<uint32_t>(globalWorkSizeZ),
|
||||
blockDim.x, blockDim.y, blockDim.z,
|
||||
sharedMemBytes, stream, args, nullptr, startEvent, stopEvent,
|
||||
flags));
|
||||
HIP_RETURN(ihipModuleLaunchKernel(
|
||||
func, static_cast<uint32_t>(globalWorkSizeX), static_cast<uint32_t>(globalWorkSizeY),
|
||||
static_cast<uint32_t>(globalWorkSizeZ), blockDim.x, blockDim.y, blockDim.z, sharedMemBytes,
|
||||
stream, args, nullptr, startEvent, stopEvent, flags));
|
||||
}
|
||||
|
||||
// conversion routines between float and half precision
|
||||
|
||||
static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; }
|
||||
static inline std::uint32_t f32_as_u32(float f) {
|
||||
union {
|
||||
float f;
|
||||
std::uint32_t u;
|
||||
} v;
|
||||
v.f = f;
|
||||
return v.u;
|
||||
}
|
||||
|
||||
static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; }
|
||||
static inline float u32_as_f32(std::uint32_t u) {
|
||||
union {
|
||||
float f;
|
||||
std::uint32_t u;
|
||||
} v;
|
||||
v.u = u;
|
||||
return v.f;
|
||||
}
|
||||
|
||||
static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); }
|
||||
|
||||
@@ -694,15 +658,14 @@ static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l
|
||||
// half float, the f16 is in the low 16 bits of the input argument
|
||||
|
||||
static inline float __convert_half_to_float(std::uint32_t a) noexcept {
|
||||
|
||||
std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
|
||||
|
||||
std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U;
|
||||
std::uint32_t v =
|
||||
f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U) /*0x1.0p+112f*/) + 0x38000000U;
|
||||
|
||||
u = (a & 0x7fff) != 0 ? v : u;
|
||||
|
||||
return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/;
|
||||
|
||||
return u32_as_f32(u) * u32_as_f32(0x07800000U) /*0x1.0p-112f*/;
|
||||
}
|
||||
|
||||
// float half with nearest even rounding
|
||||
@@ -714,7 +677,7 @@ static inline std::uint32_t __convert_float_to_half(float a) noexcept {
|
||||
std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0);
|
||||
std::uint32_t n = ((std::uint32_t)e << 12) | m;
|
||||
std::uint32_t s = (u >> 16) & 0x8000;
|
||||
int b = clamp_int(1-e, 0, 13);
|
||||
int b = clamp_int(1 - e, 0, 13);
|
||||
std::uint32_t d = (0x1000 | m) >> b;
|
||||
d |= (d << b) != (0x1000 | m);
|
||||
std::uint32_t v = e < 1 ? d : n;
|
||||
@@ -726,24 +689,25 @@ static inline std::uint32_t __convert_float_to_half(float a) noexcept {
|
||||
|
||||
extern "C"
|
||||
#if !defined(_MSC_VER)
|
||||
__attribute__((weak))
|
||||
__attribute__((weak))
|
||||
#endif
|
||||
float __gnu_h2f_ieee(unsigned short h){
|
||||
return __convert_half_to_float((std::uint32_t) h);
|
||||
float
|
||||
__gnu_h2f_ieee(unsigned short h) {
|
||||
return __convert_half_to_float((std::uint32_t)h);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
#if !defined(_MSC_VER)
|
||||
__attribute__((weak))
|
||||
__attribute__((weak))
|
||||
#endif
|
||||
unsigned short __gnu_f2h_ieee(float f){
|
||||
unsigned short
|
||||
__gnu_f2h_ieee(float f) {
|
||||
return (unsigned short)__convert_float_to_half(f);
|
||||
}
|
||||
|
||||
void PlatformState::init()
|
||||
{
|
||||
void PlatformState::init() {
|
||||
amd::ScopedLock lock(lock_);
|
||||
if(initialized_ || g_devices.empty()) {
|
||||
if (initialized_ || g_devices.empty()) {
|
||||
return;
|
||||
}
|
||||
initialized_ = true;
|
||||
@@ -751,18 +715,18 @@ void PlatformState::init()
|
||||
hipError_t err = digestFatBinary(it.first, it.second);
|
||||
assert(err == hipSuccess);
|
||||
}
|
||||
for (auto &it : statCO_.vars_) {
|
||||
for (auto& it : statCO_.vars_) {
|
||||
it.second->resize_dVar(g_devices.size());
|
||||
}
|
||||
for (auto &it : statCO_.functions_) {
|
||||
for (auto& it : statCO_.functions_) {
|
||||
it.second->resize_dFunc(g_devices.size());
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t PlatformState::loadModule(hipModule_t *module, const char* fname, const void* image) {
|
||||
hipError_t PlatformState::loadModule(hipModule_t* module, const char* fname, const void* image) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
if(module == nullptr) {
|
||||
if (module == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
@@ -809,7 +773,7 @@ hipError_t PlatformState::unloadModule(hipModule_t hmod) {
|
||||
}
|
||||
|
||||
hipError_t PlatformState::getDynFunc(hipFunction_t* hfunc, hipModule_t hmod,
|
||||
const char* func_name) {
|
||||
const char* func_name) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
auto it = dynCO_map_.find(hmod);
|
||||
@@ -828,7 +792,7 @@ hipError_t PlatformState::getDynGlobalVar(const char* hostVar, hipModule_t hmod,
|
||||
hipDeviceptr_t* dev_ptr, size_t* size_ptr) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
if(hostVar == nullptr || dev_ptr == nullptr || size_ptr == nullptr) {
|
||||
if (hostVar == nullptr || dev_ptr == nullptr || size_ptr == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
@@ -880,7 +844,8 @@ hipError_t PlatformState::getDynTexGlobalVar(textureReference* texRef, hipDevice
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef) {
|
||||
hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod,
|
||||
textureReference** texRef) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
auto it = dynCO_map_.find(hmod);
|
||||
@@ -897,12 +862,12 @@ hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod, te
|
||||
}
|
||||
|
||||
dvar->shadowVptr = new texture<char>();
|
||||
*texRef = reinterpret_cast<textureReference*>(dvar->shadowVptr);
|
||||
*texRef = reinterpret_cast<textureReference*>(dvar->shadowVptr);
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t PlatformState::digestFatBinary(const void* data, hip::FatBinaryInfo*& programs) {
|
||||
return statCO_.digestFatBinary(data, programs);
|
||||
return statCO_.digestFatBinary(data, programs);
|
||||
}
|
||||
|
||||
hip::FatBinaryInfo** PlatformState::addFatBinary(const void* data) {
|
||||
@@ -925,19 +890,21 @@ hipError_t PlatformState::registerStatManagedVar(hip::Var* var) {
|
||||
return statCO_.registerStatManagedVar(var);
|
||||
}
|
||||
|
||||
hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) {
|
||||
hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction,
|
||||
int deviceId) {
|
||||
return statCO_.getStatFunc(hfunc, hostFunction, deviceId);
|
||||
}
|
||||
|
||||
hipError_t PlatformState::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId) {
|
||||
if(func_attr == nullptr || hostFunction == nullptr) {
|
||||
hipError_t PlatformState::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction,
|
||||
int deviceId) {
|
||||
if (func_attr == nullptr || hostFunction == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
return statCO_.getStatFuncAttr(func_attr, hostFunction, deviceId);
|
||||
}
|
||||
|
||||
hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr,
|
||||
size_t* size_ptr) {
|
||||
hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId,
|
||||
hipDeviceptr_t* dev_ptr, size_t* size_ptr) {
|
||||
return statCO_.getStatGlobalVar(hostVar, deviceId, dev_ptr, size_ptr);
|
||||
}
|
||||
|
||||
@@ -945,7 +912,7 @@ hipError_t PlatformState::initStatManagedVarDevicePtr(int deviceId) {
|
||||
return statCO_.initStatManagedVarDevicePtr(deviceId);
|
||||
}
|
||||
|
||||
void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) {
|
||||
void PlatformState::setupArgument(const void* arg, size_t size, size_t offset) {
|
||||
auto& arguments = execStack_.top().arguments_;
|
||||
|
||||
if (arguments.size() < offset + size) {
|
||||
|
||||
@@ -27,9 +27,8 @@
|
||||
namespace hip_impl {
|
||||
|
||||
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize,
|
||||
const amd::Device& device, hipFunction_t func, int inputBlockSize,
|
||||
size_t dynamicSMemSize, bool bCalcPotentialBlkSz);
|
||||
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device,
|
||||
hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz);
|
||||
} /* namespace hip_impl*/
|
||||
|
||||
class PlatformState {
|
||||
@@ -40,14 +39,14 @@ class PlatformState {
|
||||
PlatformState() {}
|
||||
~PlatformState() {}
|
||||
|
||||
public:
|
||||
public:
|
||||
void init();
|
||||
|
||||
//Dynamic Code Objects functions
|
||||
// Dynamic Code Objects functions
|
||||
hipError_t loadModule(hipModule_t* module, const char* fname, const void* image = nullptr);
|
||||
hipError_t unloadModule(hipModule_t hmod);
|
||||
|
||||
hipError_t getDynFunc(hipFunction_t *hfunc, hipModule_t hmod, const char* func_name);
|
||||
hipError_t getDynFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* func_name);
|
||||
hipError_t getDynGlobalVar(const char* hostVar, hipModule_t hmod, hipDeviceptr_t* dev_ptr,
|
||||
size_t* size_ptr);
|
||||
hipError_t getDynTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef);
|
||||
@@ -59,14 +58,14 @@ public:
|
||||
/* Singleton instance */
|
||||
static PlatformState& instance() {
|
||||
if (platform_ == nullptr) {
|
||||
// __hipRegisterFatBinary() will call this when app starts, thus
|
||||
// there is no multiple entry issue here.
|
||||
platform_ = new PlatformState();
|
||||
// __hipRegisterFatBinary() will call this when app starts, thus
|
||||
// there is no multiple entry issue here.
|
||||
platform_ = new PlatformState();
|
||||
}
|
||||
return *platform_;
|
||||
}
|
||||
|
||||
//Static Code Objects functions
|
||||
// Static Code Objects functions
|
||||
hip::FatBinaryInfo** addFatBinary(const void* data);
|
||||
hipError_t removeFatBinary(hip::FatBinaryInfo** module);
|
||||
hipError_t digestFatBinary(const void* data, hip::FatBinaryInfo*& programs);
|
||||
@@ -83,15 +82,15 @@ public:
|
||||
|
||||
hipError_t initStatManagedVarDevicePtr(int deviceId);
|
||||
|
||||
//Exec Functions
|
||||
void setupArgument(const void *arg, size_t size, size_t offset);
|
||||
// Exec Functions
|
||||
void setupArgument(const void* arg, size_t size, size_t offset);
|
||||
void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream);
|
||||
void popExec(ihipExec_t& exec);
|
||||
|
||||
private:
|
||||
//Dynamic Code Object map, keyin module to get the corresponding object
|
||||
private:
|
||||
// Dynamic Code Object map, keyin module to get the corresponding object
|
||||
std::unordered_map<hipModule_t, hip::DynCO*> dynCO_map_;
|
||||
hip::StatCO statCO_; //Static Code object var
|
||||
hip::StatCO statCO_; // Static Code object var
|
||||
bool initialized_{false};
|
||||
std::unordered_map<textureReference*, std::pair<hipModule_t, std::string>> texRef_map_;
|
||||
};
|
||||
|
||||
@@ -81,7 +81,7 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const cha
|
||||
progName = name;
|
||||
}
|
||||
|
||||
auto* rtcProgram = new hiprtc::RTCProgram(progName);
|
||||
auto* rtcProgram = new hiprtc::RTCCompileProgram(progName);
|
||||
if (rtcProgram == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE);
|
||||
}
|
||||
@@ -98,7 +98,7 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const cha
|
||||
}
|
||||
}
|
||||
|
||||
*prog = hiprtc::RTCProgram::as_hiprtcProgram(rtcProgram);
|
||||
*prog = hiprtc::RTCCompileProgram::as_hiprtcProgram(rtcProgram);
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
@@ -106,7 +106,7 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const cha
|
||||
hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) {
|
||||
HIPRTC_INIT_API(prog, numOptions, options);
|
||||
|
||||
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
|
||||
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
|
||||
|
||||
std::vector<std::string> opt;
|
||||
opt.reserve(numOptions);
|
||||
@@ -127,7 +127,7 @@ hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expres
|
||||
if (name_expression == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
|
||||
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
|
||||
std::string name = name_expression;
|
||||
if (!rtcProgram->trackMangledName(name)) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
@@ -144,7 +144,7 @@ hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expressio
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
|
||||
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
|
||||
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
|
||||
|
||||
if (!rtcProgram->getDemangledName(name_expression, loweredName)) {
|
||||
return HIPRTC_RETURN(HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID);
|
||||
@@ -158,7 +158,7 @@ hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) {
|
||||
if (prog == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(*prog);
|
||||
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(*prog);
|
||||
delete rtcProgram;
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
@@ -169,7 +169,7 @@ hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* binarySizeRet) {
|
||||
if (binarySizeRet == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
|
||||
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
|
||||
|
||||
*binarySizeRet = rtcProgram->getExecSize();
|
||||
|
||||
@@ -182,7 +182,7 @@ hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* binaryMem) {
|
||||
if (binaryMem == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
|
||||
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
|
||||
auto binary = rtcProgram->getExec();
|
||||
::memcpy(binaryMem, binary.data(), binary.size());
|
||||
|
||||
@@ -194,7 +194,7 @@ hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* dst) {
|
||||
if (dst == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
|
||||
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
|
||||
auto log = rtcProgram->getLog();
|
||||
::memcpy(dst, log.data(), log.size());
|
||||
|
||||
@@ -206,7 +206,7 @@ hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) {
|
||||
if (logSizeRet == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
|
||||
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
|
||||
|
||||
*logSizeRet = rtcProgram->getLogSize();
|
||||
|
||||
@@ -226,3 +226,95 @@ hiprtcResult hiprtcVersion(int* major, int* minor) {
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcLinkCreate(unsigned int num_options, hiprtcJIT_option* options_ptr,
|
||||
void** options_vals_pptr, hiprtcLinkState* hip_link_state_ptr) {
|
||||
HIPRTC_INIT_API(num_options, options_ptr, options_vals_pptr, hip_link_state_ptr);
|
||||
|
||||
if (options_ptr == nullptr || options_vals_pptr == nullptr || hip_link_state_ptr == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
|
||||
std::string name("Linker Program");
|
||||
hiprtc::RTCLinkProgram* rtc_link_prog_ptr = new hiprtc::RTCLinkProgram(name);
|
||||
if (!rtc_link_prog_ptr->AddLinkerOptions(num_options, options_ptr, options_vals_pptr)) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_OPTION);
|
||||
}
|
||||
|
||||
*hip_link_state_ptr = reinterpret_cast<hiprtcLinkState>(rtc_link_prog_ptr);
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcLinkAddFile(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type,
|
||||
const char* file_path, unsigned int num_options,
|
||||
hiprtcJIT_option* options_ptr, void** option_values) {
|
||||
HIPRTC_INIT_API(hip_link_state, input_type, file_path, num_options, options_ptr, option_values);
|
||||
|
||||
if (hip_link_state == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
|
||||
if (input_type == HIPRTC_JIT_INPUT_CUBIN || input_type == HIPRTC_JIT_INPUT_PTX
|
||||
|| input_type == HIPRTC_JIT_INPUT_FATBINARY || input_type == HIPRTC_JIT_INPUT_OBJECT
|
||||
|| input_type == HIPRTC_JIT_INPUT_LIBRARY || input_type == HIPRTC_JIT_INPUT_NVVM) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
|
||||
hiprtc::RTCLinkProgram* rtc_link_prog_ptr
|
||||
= reinterpret_cast<hiprtc::RTCLinkProgram*>(hip_link_state);
|
||||
if (!rtc_link_prog_ptr->AddLinkerFile(std::string(file_path), input_type)) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE);
|
||||
}
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcLinkAddData(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type,
|
||||
void* image, size_t image_size, const char* name,
|
||||
unsigned int num_options, hiprtcJIT_option* options_ptr,
|
||||
void** option_values) {
|
||||
HIPRTC_INIT_API(hip_link_state, image, image_size, name, num_options, options_ptr,
|
||||
option_values);
|
||||
|
||||
if (image == nullptr || image_size <= 0 || name == nullptr) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
|
||||
if (input_type == HIPRTC_JIT_INPUT_CUBIN || input_type == HIPRTC_JIT_INPUT_PTX
|
||||
|| input_type == HIPRTC_JIT_INPUT_FATBINARY || input_type == HIPRTC_JIT_INPUT_OBJECT
|
||||
|| input_type == HIPRTC_JIT_INPUT_LIBRARY || input_type == HIPRTC_JIT_INPUT_NVVM) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
|
||||
}
|
||||
|
||||
hiprtc::RTCLinkProgram* rtc_link_prog_ptr
|
||||
= reinterpret_cast<hiprtc::RTCLinkProgram*>(hip_link_state);
|
||||
if (!rtc_link_prog_ptr->AddLinkerData(image, image_size, name, input_type)) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE);
|
||||
}
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcLinkComplete(hiprtcLinkState hip_link_state, void** bin_out, size_t* size_out) {
|
||||
HIPRTC_INIT_API(hip_link_state, bin_out, size_out);
|
||||
hiprtc::RTCLinkProgram* rtc_link_prog_ptr
|
||||
= reinterpret_cast<hiprtc::RTCLinkProgram*>(hip_link_state);
|
||||
if (!rtc_link_prog_ptr->LinkComplete(bin_out, size_out)) {
|
||||
HIPRTC_RETURN(HIPRTC_ERROR_LINKING);
|
||||
}
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
|
||||
}
|
||||
|
||||
hiprtcResult hiprtcLinkDestroy(hiprtcLinkState hip_link_state) {
|
||||
HIPRTC_INIT_API(hip_link_state);
|
||||
|
||||
hiprtc::RTCLinkProgram* rtc_link_prog_ptr
|
||||
= reinterpret_cast<hiprtc::RTCLinkProgram*>(hip_link_state);
|
||||
delete rtc_link_prog_ptr;
|
||||
|
||||
HIPRTC_RETURN(HIPRTC_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
@@ -8,4 +8,9 @@ hiprtcGetProgramLog
|
||||
hiprtcGetProgramLogSize
|
||||
hiprtcGetCode
|
||||
hiprtcGetCodeSize
|
||||
hiprtcGetErrorString
|
||||
hiprtcGetErrorString
|
||||
hiprtcLinkCreate
|
||||
hiprtcLinkAddFile
|
||||
hiprtcLinkAddData
|
||||
hiprtcLinkComplete
|
||||
hiprtcLinkDestroy
|
||||
|
||||
@@ -11,6 +11,11 @@ global:
|
||||
hiprtcGetErrorString;
|
||||
hiprtcAddNameExpression;
|
||||
hiprtcVersion;
|
||||
hiprtcLinkCreate;
|
||||
hiprtcLinkAddFile;
|
||||
hiprtcLinkAddData;
|
||||
hiprtcLinkComplete;
|
||||
hiprtcLinkDestroy;
|
||||
local:
|
||||
*;
|
||||
};
|
||||
|
||||
@@ -22,110 +22,21 @@ THE SOFTWARE.
|
||||
|
||||
#include "hiprtcInternal.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "vdi_common.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
|
||||
namespace hiprtc {
|
||||
using namespace helpers;
|
||||
RTCProgram::RTCProgram(std::string name_) : name(name_) {
|
||||
|
||||
//RTC Program Member Functions
|
||||
RTCProgram::RTCProgram(std::string name) : name_(name) {
|
||||
std::call_once(amd::Comgr::initialized, amd::Comgr::LoadLib);
|
||||
if (amd::Comgr::create_data_set(&compileInput) != AMD_COMGR_STATUS_SUCCESS ||
|
||||
amd::Comgr::create_data_set(&linkInput) != AMD_COMGR_STATUS_SUCCESS ||
|
||||
amd::Comgr::create_data_set(&execInput) != AMD_COMGR_STATUS_SUCCESS) {
|
||||
if (amd::Comgr::create_data_set(&exec_input_) != AMD_COMGR_STATUS_SUCCESS) {
|
||||
crashWithMessage("Failed to allocate internal hiprtc structure");
|
||||
}
|
||||
|
||||
// Add internal header
|
||||
if (!addBuiltinHeader()) {
|
||||
crashWithMessage("Unable to add internal header");
|
||||
}
|
||||
|
||||
// Add compile options
|
||||
const std::string hipVerOpt{"--hip-version=" + std::to_string(HIP_VERSION_MAJOR) + '.' +
|
||||
std::to_string(HIP_VERSION_MINOR) + '.' +
|
||||
std::to_string(HIP_VERSION_PATCH)};
|
||||
const std::string hipVerMajor{"-DHIP_VERSION_MAJOR=" + std::to_string(HIP_VERSION_MAJOR)};
|
||||
const std::string hipVerMinor{"-DHIP_VERSION_MINOR=" + std::to_string(HIP_VERSION_MINOR)};
|
||||
const std::string hipVerPatch{"-DHIP_VERSION_PATCH=" + std::to_string(HIP_VERSION_PATCH)};
|
||||
|
||||
compileOptions.reserve(20); // count of options below
|
||||
compileOptions.push_back("-O3");
|
||||
|
||||
#ifdef HIPRTC_EARLY_INLINE
|
||||
compileOptions.push_back("-mllvm");
|
||||
compileOptions.push_back("-amdgpu-early-inline-all");
|
||||
#endif
|
||||
compileOptions.push_back("-mllvm");
|
||||
compileOptions.push_back("-amdgpu-prelink");
|
||||
|
||||
if (GPU_ENABLE_WGP_MODE) compileOptions.push_back("-mcumode");
|
||||
|
||||
if (!GPU_ENABLE_WAVE32_MODE) compileOptions.push_back("-mwavefrontsize64");
|
||||
|
||||
compileOptions.push_back(hipVerOpt);
|
||||
compileOptions.push_back(hipVerMajor);
|
||||
compileOptions.push_back(hipVerMinor);
|
||||
compileOptions.push_back(hipVerPatch);
|
||||
compileOptions.push_back("-D__HIPCC_RTC__");
|
||||
compileOptions.push_back("-include");
|
||||
compileOptions.push_back("hiprtc_runtime.h");
|
||||
compileOptions.push_back("-std=c++14");
|
||||
compileOptions.push_back("-nogpuinc");
|
||||
#ifdef _WIN32
|
||||
compileOptions.push_back("-target");
|
||||
compileOptions.push_back("x86_64-pc-windows-msvc");
|
||||
compileOptions.push_back("-fms-extensions");
|
||||
compileOptions.push_back("-fms-compatibility");
|
||||
#endif
|
||||
|
||||
if (!GPU_ENABLE_WAVE32_MODE) linkOptions.push_back("wavefrontsize64");
|
||||
|
||||
exeOptions.push_back("-O3");
|
||||
exeOptions.push_back("-mllvm");
|
||||
exeOptions.push_back("-amdgpu-internalize-symbols");
|
||||
exeOptions.push_back("-mcumode");
|
||||
if (!GPU_ENABLE_WAVE32_MODE) exeOptions.push_back("-mwavefrontsize64");
|
||||
}
|
||||
|
||||
bool RTCProgram::addSource(const std::string& source, const std::string& name) {
|
||||
if (source.size() == 0 || name.size() == 0) {
|
||||
LogError("Error in hiprtc: source or name is of size 0 in addSource");
|
||||
return false;
|
||||
}
|
||||
sourceCode += source;
|
||||
sourceName = name;
|
||||
return true;
|
||||
}
|
||||
|
||||
// addSource_impl is a different function because we need to add source when we track mangled
|
||||
// objects
|
||||
bool RTCProgram::addSource_impl() {
|
||||
std::vector<char> vsource(sourceCode.begin(), sourceCode.end());
|
||||
if (!addCodeObjData(compileInput, vsource, sourceName, AMD_COMGR_DATA_KIND_SOURCE)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RTCProgram::addHeader(const std::string& source, const std::string& name) {
|
||||
if (source.size() == 0 || name.size() == 0) {
|
||||
LogError("Error in hiprtc: source or name is of size 0 in addHeader");
|
||||
return false;
|
||||
}
|
||||
std::vector<char> vsource(source.begin(), source.end());
|
||||
if (!addCodeObjData(compileInput, vsource, name, AMD_COMGR_DATA_KIND_INCLUDE)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RTCProgram::addBuiltinHeader() {
|
||||
std::vector<char> source(__hipRTC_header, __hipRTC_header + __hipRTC_header_size);
|
||||
std::string name{"hiprtc_runtime.h"};
|
||||
if (!addCodeObjData(compileInput, source, name, AMD_COMGR_DATA_KIND_INCLUDE)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RTCProgram::findIsa() {
|
||||
@@ -140,7 +51,7 @@ bool RTCProgram::findIsa() {
|
||||
|
||||
if (!handle) {
|
||||
LogInfo("hip runtime failed to load using dlopen");
|
||||
buildLog +=
|
||||
build_log_ +=
|
||||
"Error: Please provide architecture for which code is to be "
|
||||
"generated.\n";
|
||||
return false;
|
||||
@@ -151,7 +62,7 @@ bool RTCProgram::findIsa() {
|
||||
|
||||
if (sym_hipGetDevice == nullptr || sym_hipGetDeviceProperties == nullptr) {
|
||||
LogInfo("ISA cannot be found to dlsym failure");
|
||||
buildLog +=
|
||||
build_log_ +=
|
||||
"Error: Please provide architecture for which code is to be "
|
||||
"generated.\n";
|
||||
return false;
|
||||
@@ -173,14 +84,106 @@ bool RTCProgram::findIsa() {
|
||||
if (status != hipSuccess) {
|
||||
return false;
|
||||
}
|
||||
isa = "amdgcn-amd-amdhsa--";
|
||||
isa.append(props.gcnArchName);
|
||||
isa_ = "amdgcn-amd-amdhsa--";
|
||||
isa_.append(props.gcnArchName);
|
||||
|
||||
amd::Os::unloadLibrary(handle);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RTCProgram::transformOptions() {
|
||||
//RTC Compile Program Member Functions
|
||||
RTCCompileProgram::RTCCompileProgram(std::string name_) : RTCProgram(name_) {
|
||||
|
||||
if ((amd::Comgr::create_data_set(&compile_input_) != AMD_COMGR_STATUS_SUCCESS) ||
|
||||
(amd::Comgr::create_data_set(&link_input_) != AMD_COMGR_STATUS_SUCCESS)) {
|
||||
crashWithMessage("Failed to allocate internal hiprtc structure");
|
||||
}
|
||||
// Add internal header
|
||||
if (!addBuiltinHeader()) {
|
||||
crashWithMessage("Unable to add internal header");
|
||||
}
|
||||
|
||||
// Add compile options
|
||||
const std::string hipVerOpt{"--hip-version=" + std::to_string(HIP_VERSION_MAJOR) + '.' +
|
||||
std::to_string(HIP_VERSION_MINOR) + '.' +
|
||||
std::to_string(HIP_VERSION_PATCH)};
|
||||
const std::string hipVerMajor{"-DHIP_VERSION_MAJOR=" + std::to_string(HIP_VERSION_MAJOR)};
|
||||
const std::string hipVerMinor{"-DHIP_VERSION_MINOR=" + std::to_string(HIP_VERSION_MINOR)};
|
||||
const std::string hipVerPatch{"-DHIP_VERSION_PATCH=" + std::to_string(HIP_VERSION_PATCH)};
|
||||
|
||||
compile_options_.reserve(20); // count of options below
|
||||
compile_options_.push_back("-O3");
|
||||
|
||||
#ifdef HIPRTC_EARLY_INLINE
|
||||
compile_options_.push_back("-mllvm");
|
||||
compile_options_.push_back("-amdgpu-early-inline-all");
|
||||
#endif
|
||||
|
||||
if (GPU_ENABLE_WGP_MODE) compile_options_.push_back("-mcumode");
|
||||
|
||||
if (!GPU_ENABLE_WAVE32_MODE) compile_options_.push_back("-mwavefrontsize64");
|
||||
|
||||
compile_options_.push_back(hipVerOpt);
|
||||
compile_options_.push_back(hipVerMajor);
|
||||
compile_options_.push_back(hipVerMinor);
|
||||
compile_options_.push_back(hipVerPatch);
|
||||
compile_options_.push_back("-D__HIPCC_RTC__");
|
||||
compile_options_.push_back("-include");
|
||||
compile_options_.push_back("hiprtc_runtime.h");
|
||||
compile_options_.push_back("-std=c++14");
|
||||
compile_options_.push_back("-nogpuinc");
|
||||
#ifdef _WIN32
|
||||
compile_options_.push_back("-target");
|
||||
compile_options_.push_back("x86_64-pc-windows-msvc");
|
||||
compile_options_.push_back("-fms-extensions");
|
||||
compile_options_.push_back("-fms-compatibility");
|
||||
#endif
|
||||
|
||||
exe_options_.push_back("-O3");
|
||||
}
|
||||
|
||||
bool RTCCompileProgram::addSource(const std::string& source, const std::string& name) {
|
||||
if (source.size() == 0 || name.size() == 0) {
|
||||
LogError("Error in hiprtc: source or name is of size 0 in addSource");
|
||||
return false;
|
||||
}
|
||||
source_code_ += source;
|
||||
source_name_ = name;
|
||||
return true;
|
||||
}
|
||||
|
||||
// addSource_impl is a different function because we need to add source when we track mangled
|
||||
// objects
|
||||
bool RTCCompileProgram::addSource_impl() {
|
||||
std::vector<char> vsource(source_code_.begin(), source_code_.end());
|
||||
if (!addCodeObjData(compile_input_, vsource, source_name_, AMD_COMGR_DATA_KIND_SOURCE)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RTCCompileProgram::addHeader(const std::string& source, const std::string& name) {
|
||||
if (source.size() == 0 || name.size() == 0) {
|
||||
LogError("Error in hiprtc: source or name is of size 0 in addHeader");
|
||||
return false;
|
||||
}
|
||||
std::vector<char> vsource(source.begin(), source.end());
|
||||
if (!addCodeObjData(compile_input_, vsource, name, AMD_COMGR_DATA_KIND_INCLUDE)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RTCCompileProgram::addBuiltinHeader() {
|
||||
std::vector<char> source(__hipRTC_header, __hipRTC_header + __hipRTC_header_size);
|
||||
std::string name{"hiprtc_runtime.h"};
|
||||
if (!addCodeObjData(compile_input_, source, name, AMD_COMGR_DATA_KIND_INCLUDE)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RTCCompileProgram::transformOptions() {
|
||||
auto getValueOf = [](const std::string& option) {
|
||||
std::string res;
|
||||
auto f = std::find(option.begin(), option.end(), '=');
|
||||
@@ -188,7 +191,7 @@ bool RTCProgram::transformOptions() {
|
||||
return res;
|
||||
};
|
||||
|
||||
for (auto& i : compileOptions) {
|
||||
for (auto& i : compile_options_) {
|
||||
if (i == "-hip-pch") {
|
||||
LogInfo(
|
||||
"-hip-pch is deprecated option, has no impact on execution of new hiprtc programs, it "
|
||||
@@ -204,18 +207,18 @@ bool RTCProgram::transformOptions() {
|
||||
continue;
|
||||
}
|
||||
if (i == "--save-temps") {
|
||||
settings.dumpISA = true;
|
||||
settings_.dumpISA = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (auto res = std::find_if(
|
||||
compileOptions.begin(), compileOptions.end(),
|
||||
compile_options_.begin(), compile_options_.end(),
|
||||
[](const std::string& str) { return str.find("--offload-arch=") != std::string::npos; });
|
||||
res != compileOptions.end()) {
|
||||
res != compile_options_.end()) {
|
||||
auto isaName = getValueOf(*res);
|
||||
isa = "amdgcn-amd-amdhsa--" + isaName;
|
||||
settings.offloadArchProvided = true;
|
||||
isa_ = "amdgcn-amd-amdhsa--" + isaName;
|
||||
settings_.offloadArchProvided = true;
|
||||
return true;
|
||||
}
|
||||
// App has not provided the gpu archiecture, need to find it
|
||||
@@ -224,7 +227,7 @@ bool RTCProgram::transformOptions() {
|
||||
|
||||
amd::Monitor RTCProgram::lock_("HIPRTC Program", true);
|
||||
|
||||
bool RTCProgram::compile(const std::vector<std::string>& options) {
|
||||
bool RTCCompileProgram::compile(const std::vector<std::string>& options) {
|
||||
amd::ScopedLock lock(lock_); // Lock, because LLVM is not multi threaded
|
||||
|
||||
if (!addSource_impl()) {
|
||||
@@ -233,8 +236,8 @@ bool RTCProgram::compile(const std::vector<std::string>& options) {
|
||||
}
|
||||
|
||||
// Append compile options
|
||||
compileOptions.reserve(compileOptions.size() + options.size());
|
||||
compileOptions.insert(compileOptions.end(), options.begin(), options.end());
|
||||
compile_options_.reserve(compile_options_.size() + options.size());
|
||||
compile_options_.insert(compile_options_.end(), options.begin(), options.end());
|
||||
|
||||
if (!transformOptions()) {
|
||||
LogError("Error in hiprtc: unable to transform options");
|
||||
@@ -242,48 +245,48 @@ bool RTCProgram::compile(const std::vector<std::string>& options) {
|
||||
}
|
||||
|
||||
std::vector<char> LLVMBitcode;
|
||||
if (!compileToBitCode(compileInput, isa, compileOptions, buildLog, LLVMBitcode)) {
|
||||
if (!compileToBitCode(compile_input_, isa_, compile_options_, build_log_, LLVMBitcode)) {
|
||||
LogError("Error in hiprtc: unable to compile source to bitcode");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string linkFileName = "linked";
|
||||
if (!addCodeObjData(linkInput, LLVMBitcode, linkFileName, AMD_COMGR_DATA_KIND_BC)) {
|
||||
if (!addCodeObjData(link_input_, LLVMBitcode, linkFileName, AMD_COMGR_DATA_KIND_BC)) {
|
||||
LogError("Error in hiprtc: unable to add linked code object");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<char> LinkedLLVMBitcode;
|
||||
if (!linkLLVMBitcode(linkInput, isa, linkOptions, buildLog, LinkedLLVMBitcode)) {
|
||||
if (!linkLLVMBitcode(link_input_, isa_, link_options_, build_log_, LinkedLLVMBitcode)) {
|
||||
LogError("Error in hiprtc: unable to add device libs to linked bitcode");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string linkedFileName = "LLVMBitcode.bc";
|
||||
if (!addCodeObjData(execInput, LinkedLLVMBitcode, linkedFileName, AMD_COMGR_DATA_KIND_BC)) {
|
||||
if (!addCodeObjData(exec_input_, LinkedLLVMBitcode, linkedFileName, AMD_COMGR_DATA_KIND_BC)) {
|
||||
LogError("Error in hiprtc: unable to add device libs linked code object");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (settings.dumpISA) {
|
||||
if (!dumpIsaFromBC(execInput, isa, exeOptions, name, buildLog)) {
|
||||
if (settings_.dumpISA) {
|
||||
if (!dumpIsaFromBC(exec_input_, isa_, exe_options_, name_, build_log_)) {
|
||||
LogError("Error in hiprtc: unable to dump isa code");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!createExecutable(execInput, isa, exeOptions, buildLog, executable)) {
|
||||
if (!createExecutable(exec_input_, isa_, exe_options_, build_log_, executable_)) {
|
||||
LogError("Error in hiprtc: unable to create executable");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::string> mangledNames;
|
||||
if (!fillDemangledNames(executable, mangledNames)) {
|
||||
if (!fillDemangledNames(executable_, mangledNames)) {
|
||||
LogError("Error in hiprtc: unable to fill demangled names");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!getMangledNames(mangledNames, strippedNames, demangledNames)) {
|
||||
if (!getMangledNames(mangledNames, stripped_names_, demangled_names_)) {
|
||||
LogError("Error in hiprtc: unable to get mangled names");
|
||||
return false;
|
||||
}
|
||||
@@ -291,7 +294,7 @@ bool RTCProgram::compile(const std::vector<std::string>& options) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RTCProgram::trackMangledName(std::string& name) {
|
||||
bool RTCCompileProgram::trackMangledName(std::string& name) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
if (name.size() == 0) return false;
|
||||
@@ -312,20 +315,20 @@ bool RTCProgram::trackMangledName(std::string& name) {
|
||||
return std::isspace(c);
|
||||
}), strippedNameNoSpace.end());
|
||||
|
||||
strippedNames.insert(std::pair<std::string, std::string>(name, strippedNameNoSpace));
|
||||
demangledNames.insert(std::pair<std::string, std::string>(strippedName, ""));
|
||||
stripped_names_.insert(std::pair<std::string, std::string>(name, strippedNameNoSpace));
|
||||
demangled_names_.insert(std::pair<std::string, std::string>(strippedName, ""));
|
||||
|
||||
const auto var{"__hiprtc_" + std::to_string(strippedNames.size())};
|
||||
const auto var{"__hiprtc_" + std::to_string(stripped_names_.size())};
|
||||
const auto code{"\nextern \"C\" constexpr auto " + var + " = " + name + ";\n"};
|
||||
|
||||
sourceCode += code;
|
||||
source_code_ += code;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RTCProgram::getDemangledName(const char* name_expression, const char** loweredName) {
|
||||
bool RTCCompileProgram::getDemangledName(const char* name_expression, const char** loweredName) {
|
||||
std::string name = name_expression;
|
||||
if (auto res = strippedNames.find(name); res != strippedNames.end()) {
|
||||
if (auto dres = demangledNames.find(res->second); dres != demangledNames.end()) {
|
||||
if (auto res = stripped_names_.find(name); res != stripped_names_.end()) {
|
||||
if (auto dres = demangled_names_.find(res->second); dres != demangled_names_.end()) {
|
||||
if (dres->second.size() != 0) {
|
||||
*loweredName = dres->second.c_str();
|
||||
return true;
|
||||
@@ -333,7 +336,7 @@ bool RTCProgram::getDemangledName(const char* name_expression, const char** lowe
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (auto dres = demangledNames.find(name); dres != demangledNames.end()) {
|
||||
if (auto dres = demangled_names_.find(name); dres != demangled_names_.end()) {
|
||||
if (dres->second.size() != 0) {
|
||||
*loweredName = dres->second.c_str();
|
||||
return true;
|
||||
@@ -343,4 +346,223 @@ bool RTCProgram::getDemangledName(const char* name_expression, const char** lowe
|
||||
return false;
|
||||
}
|
||||
|
||||
//RTC Link Program Member Functions
|
||||
RTCLinkProgram::RTCLinkProgram(std::string name) : RTCProgram(name) {
|
||||
if (amd::Comgr::create_data_set(&link_input_) != AMD_COMGR_STATUS_SUCCESS) {
|
||||
crashWithMessage("Failed to allocate internal hiprtc structure");
|
||||
}
|
||||
}
|
||||
|
||||
bool RTCLinkProgram::AddLinkerOptions(unsigned int num_options, hiprtcJIT_option* options_ptr,
|
||||
void** options_vals_ptr) {
|
||||
|
||||
if (options_ptr == nullptr || options_vals_ptr == nullptr) {
|
||||
crashWithMessage("JIT Options ptr cannot be null");
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t opt_idx = 0; opt_idx < num_options; ++opt_idx) {
|
||||
|
||||
if (options_vals_ptr[opt_idx] == nullptr) {
|
||||
crashWithMessage("JIT Options value ptr cannot be null");
|
||||
return false;
|
||||
}
|
||||
|
||||
switch(options_ptr[opt_idx]) {
|
||||
case HIPRTC_JIT_MAX_REGISTERS:
|
||||
link_args_.max_registers_ = *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_THREADS_PER_BLOCK:
|
||||
link_args_.threads_per_block_
|
||||
= *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_WALL_TIME:
|
||||
link_args_.wall_time_ = *(reinterpret_cast<long*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_INFO_LOG_BUFFER:
|
||||
link_args_.info_log_ = (reinterpret_cast<char*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES:
|
||||
link_args_.info_log_size_ = *(reinterpret_cast<size_t*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_ERROR_LOG_BUFFER:
|
||||
link_args_.error_log_ = reinterpret_cast<char*>(options_vals_ptr[opt_idx]);
|
||||
break;
|
||||
case HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES:
|
||||
link_args_.error_log_size_ = *(reinterpret_cast<size_t*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_OPTIMIZATION_LEVEL:
|
||||
link_args_.optimization_level_
|
||||
= *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_TARGET_FROM_HIPCONTEXT:
|
||||
link_args_.target_from_hip_context_
|
||||
= *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_TARGET:
|
||||
link_args_.jit_target_ = *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_FALLBACK_STRATEGY:
|
||||
link_args_.fallback_strategy_
|
||||
= *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_GENERATE_DEBUG_INFO:
|
||||
link_args_.generate_debug_info_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_LOG_VERBOSE:
|
||||
link_args_.log_verbose_ = reinterpret_cast<long>(options_vals_ptr[opt_idx]);
|
||||
break;
|
||||
case HIPRTC_JIT_GENERATE_LINE_INFO:
|
||||
link_args_.generate_line_info_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_CACHE_MODE:
|
||||
link_args_.cache_mode_ = *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_NEW_SM3X_OPT:
|
||||
link_args_.sm3x_opt_ = *(reinterpret_cast<bool*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_FAST_COMPILE:
|
||||
link_args_.fast_compile_ = *(reinterpret_cast<bool*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_GLOBAL_SYMBOL_NAMES:
|
||||
link_args_.global_symbol_names_ = reinterpret_cast<const char**>(options_vals_ptr[opt_idx]);
|
||||
break;
|
||||
case HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS:
|
||||
link_args_.global_symbol_addresses_ = reinterpret_cast<void**>(options_vals_ptr[opt_idx]);
|
||||
break;
|
||||
case HIPRTC_JIT_GLOBAL_SYMBOL_COUNT:
|
||||
link_args_.global_symbol_count_
|
||||
= *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_LTO:
|
||||
link_args_.lto_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_FTZ:
|
||||
link_args_.ftz_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_PREC_DIV:
|
||||
link_args_.prec_div_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_PREC_SQRT:
|
||||
link_args_.prec_sqrt_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
case HIPRTC_JIT_FMA:
|
||||
link_args_.fma_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
amd_comgr_data_kind_t RTCLinkProgram::GetCOMGRDataKind(hiprtcJITInputType input_type) {
|
||||
amd_comgr_data_kind_t data_kind = AMD_COMGR_DATA_KIND_UNDEF;
|
||||
|
||||
// Map the hiprtc input type to comgr data kind
|
||||
switch (input_type) {
|
||||
case HIPRTC_JIT_INPUT_LLVM_BITCODE :
|
||||
data_kind = AMD_COMGR_DATA_KIND_BC;
|
||||
break;
|
||||
case HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE :
|
||||
data_kind = AMD_COMGR_DATA_KIND_FATBIN;
|
||||
break;
|
||||
case HIPRTC_JIT_INPUT_LLVM_ARCHIVES_OF_BUNDLED_BITCODE :
|
||||
data_kind = AMD_COMGR_DATA_KIND_FATBIN;
|
||||
break;
|
||||
default :
|
||||
LogError("Cannot find the corresponding comgr data kind");
|
||||
break;
|
||||
}
|
||||
|
||||
return data_kind;
|
||||
}
|
||||
|
||||
bool RTCLinkProgram::AddLinkerFile(std::string file_path, hiprtcJITInputType input_type) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
struct stat stat_buf;
|
||||
if (stat(file_path.c_str(), &stat_buf)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string link_file_name_("Linker Program");
|
||||
std::vector<char> llvm_bitcode(stat_buf.st_size);
|
||||
std::ifstream bc_file(file_path, std::ios_base::in | std::ios_base::binary);
|
||||
if (!bc_file.good()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bc_file.read(llvm_bitcode.data(), stat_buf.st_size);
|
||||
bc_file.close();
|
||||
|
||||
amd_comgr_data_kind_t data_kind;
|
||||
if((data_kind = GetCOMGRDataKind(input_type)) == AMD_COMGR_DATA_KIND_UNDEF) {
|
||||
LogError("Cannot find the correct COMGR data kind");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!addCodeObjData(link_input_, llvm_bitcode, link_file_name_, data_kind)) {
|
||||
LogError("Error in hiprtc: unable to add linked code object");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RTCLinkProgram::AddLinkerData(void* image_ptr, size_t image_size, std::string link_file_name,
|
||||
hiprtcJITInputType input_type) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
char* image_char_buf = reinterpret_cast<char*>(image_ptr);
|
||||
std::vector<char> llvm_bitcode(image_char_buf, image_char_buf + image_size);
|
||||
|
||||
amd_comgr_data_kind_t data_kind;
|
||||
if((data_kind = GetCOMGRDataKind(input_type)) == AMD_COMGR_DATA_KIND_UNDEF) {
|
||||
LogError("Cannot find the correct COMGR data kind");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!addCodeObjData(link_input_,llvm_bitcode , link_file_name, data_kind)) {
|
||||
LogError("Error in hiprtc: unable to add linked code object");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RTCLinkProgram::LinkComplete(void** bin_out, size_t* size_out) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
if (!findIsa()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<char> linked_llvm_bitcode;
|
||||
if (!linkLLVMBitcode(link_input_, isa_, link_options_, build_log_, linked_llvm_bitcode)) {
|
||||
LogError("Error in hiprtc: unable to add device libs to linked bitcode");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string linkedFileName = "LLVMBitcode.bc";
|
||||
if (!addCodeObjData(exec_input_, linked_llvm_bitcode, linkedFileName, AMD_COMGR_DATA_KIND_BC)) {
|
||||
LogError("Error in hiprtc: unable to add linked bitcode");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::string> exe_options;
|
||||
exe_options.push_back("-O3");
|
||||
if (!createExecutable(exec_input_, isa_, exe_options, build_log_, executable_)) {
|
||||
LogError("Error in hiprtc: unable to create exectuable");
|
||||
return false;
|
||||
}
|
||||
|
||||
*size_out = executable_.size();
|
||||
char* bin_out_c = new char[*size_out];
|
||||
std::copy(executable_.begin(), executable_.end(), bin_out_c);
|
||||
*bin_out = reinterpret_cast<void*>(bin_out_c);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace hiprtc
|
||||
|
||||
@@ -46,7 +46,6 @@ extern unsigned __hipRTC_header_size;
|
||||
|
||||
#include "hiprtcComgrHelper.hpp"
|
||||
|
||||
|
||||
namespace hiprtc {
|
||||
namespace internal {
|
||||
template <typename T> inline std::string ToString(T v) {
|
||||
@@ -90,67 +89,139 @@ struct Settings {
|
||||
};
|
||||
|
||||
class RTCProgram {
|
||||
protected:
|
||||
// Lock and control variables
|
||||
static amd::Monitor lock_;
|
||||
static std::once_flag initialized;
|
||||
static std::once_flag initialized_;
|
||||
|
||||
std::string name;
|
||||
Settings settings;
|
||||
RTCProgram(std::string name);
|
||||
~RTCProgram() {
|
||||
amd::Comgr::destroy_data_set(exec_input_);
|
||||
}
|
||||
|
||||
std::string isa;
|
||||
std::string buildLog;
|
||||
|
||||
std::vector<char> executable;
|
||||
|
||||
std::map<std::string, std::string> strippedNames;
|
||||
std::map<std::string, std::string> demangledNames;
|
||||
std::string sourceCode;
|
||||
std::string sourceName;
|
||||
|
||||
std::vector<std::string> compileOptions;
|
||||
std::vector<std::string> linkOptions;
|
||||
std::vector<std::string> exeOptions;
|
||||
|
||||
amd_comgr_data_set_t compileInput;
|
||||
amd_comgr_data_set_t linkInput;
|
||||
amd_comgr_data_set_t execInput;
|
||||
|
||||
bool dumpIsa();
|
||||
// Member Functions
|
||||
bool findIsa();
|
||||
|
||||
// Data Members
|
||||
std::string name_;
|
||||
std::string isa_;
|
||||
std::string build_log_;
|
||||
std::vector<char> executable_;
|
||||
|
||||
amd_comgr_data_set_t exec_input_;
|
||||
std::vector<std::string> exe_options_;
|
||||
};
|
||||
|
||||
class RTCCompileProgram : public RTCProgram {
|
||||
|
||||
// Private Data Members
|
||||
Settings settings_;
|
||||
|
||||
std::string source_code_;
|
||||
std::string source_name_;
|
||||
std::map<std::string, std::string> stripped_names_;
|
||||
std::map<std::string, std::string> demangled_names_;
|
||||
|
||||
std::vector<std::string> compile_options_;
|
||||
std::vector<std::string> link_options_;
|
||||
|
||||
amd_comgr_data_set_t compile_input_;
|
||||
amd_comgr_data_set_t link_input_;
|
||||
|
||||
// Private Member functions
|
||||
bool addSource_impl();
|
||||
bool addBuiltinHeader();
|
||||
bool transformOptions();
|
||||
|
||||
RTCProgram() = delete;
|
||||
RTCProgram(RTCProgram&) = delete;
|
||||
RTCProgram& operator=(RTCProgram&) = delete;
|
||||
RTCCompileProgram() = delete;
|
||||
RTCCompileProgram(RTCCompileProgram&) = delete;
|
||||
RTCCompileProgram& operator=(RTCCompileProgram&) = delete;
|
||||
|
||||
public:
|
||||
RTCProgram(std::string);
|
||||
RTCCompileProgram(std::string);
|
||||
~RTCCompileProgram() {
|
||||
amd::Comgr::destroy_data_set(compile_input_);
|
||||
amd::Comgr::destroy_data_set(link_input_);
|
||||
}
|
||||
|
||||
// Converters
|
||||
inline static hiprtcProgram as_hiprtcProgram(RTCProgram* p) {
|
||||
inline static hiprtcProgram as_hiprtcProgram(RTCCompileProgram* p) {
|
||||
return reinterpret_cast<hiprtcProgram>(p);
|
||||
}
|
||||
inline static RTCProgram* as_RTCProgram(hiprtcProgram& p) {
|
||||
return reinterpret_cast<RTCProgram*>(p);
|
||||
inline static RTCCompileProgram* as_RTCCompileProgram(hiprtcProgram& p) {
|
||||
return reinterpret_cast<RTCCompileProgram*>(p);
|
||||
}
|
||||
|
||||
// Public Member Functions
|
||||
bool addSource(const std::string& source, const std::string& name);
|
||||
bool addHeader(const std::string& source, const std::string& name);
|
||||
bool compile(const std::vector<std::string>& options);
|
||||
bool getDemangledName(const char* name_expression, const char** loweredName);
|
||||
bool trackMangledName(std::string& name);
|
||||
|
||||
const std::vector<char>& getExec() const { return executable; }
|
||||
size_t getExecSize() const { return executable.size(); }
|
||||
const std::string& getLog() const { return buildLog; }
|
||||
size_t getLogSize() const { return buildLog.size(); }
|
||||
|
||||
~RTCProgram() {
|
||||
amd::Comgr::destroy_data_set(compileInput);
|
||||
amd::Comgr::destroy_data_set(linkInput);
|
||||
amd::Comgr::destroy_data_set(execInput);
|
||||
}
|
||||
// Public Getter/Setters
|
||||
const std::vector<char>& getExec() const { return executable_; }
|
||||
size_t getExecSize() const { return executable_.size(); }
|
||||
const std::string& getLog() const { return build_log_; }
|
||||
size_t getLogSize() const { return build_log_.size(); }
|
||||
};
|
||||
|
||||
// Linker Arguments passed via hipLinkCreate
|
||||
struct LinkArguments {
|
||||
unsigned int max_registers_;
|
||||
unsigned int threads_per_block_;
|
||||
float wall_time_;
|
||||
size_t info_log_size_;
|
||||
char* info_log_;
|
||||
size_t error_log_size_;
|
||||
char* error_log_;
|
||||
unsigned int optimization_level_;
|
||||
unsigned int target_from_hip_context_;
|
||||
unsigned int jit_target_;
|
||||
unsigned int fallback_strategy_;
|
||||
int generate_debug_info_;
|
||||
long log_verbose_;
|
||||
int generate_line_info_;
|
||||
unsigned int cache_mode_;
|
||||
bool sm3x_opt_;
|
||||
bool fast_compile_;
|
||||
const char** global_symbol_names_;
|
||||
void** global_symbol_addresses_;
|
||||
unsigned int global_symbol_count_;
|
||||
int lto_;
|
||||
int ftz_;
|
||||
int prec_div_;
|
||||
int prec_sqrt_;
|
||||
int fma_;
|
||||
};
|
||||
|
||||
class RTCLinkProgram : public RTCProgram {
|
||||
|
||||
// Private Member Functions (forbid these function calls)
|
||||
RTCLinkProgram() = delete;
|
||||
RTCLinkProgram(RTCLinkProgram&) = delete;
|
||||
RTCLinkProgram& operator=(RTCLinkProgram&) = delete;
|
||||
|
||||
amd_comgr_data_kind_t GetCOMGRDataKind(hiprtcJITInputType input_type);
|
||||
|
||||
// Linker Argumenets at hipLinkCreate
|
||||
LinkArguments link_args_;
|
||||
|
||||
// Private Data Members
|
||||
amd_comgr_data_set_t link_input_;
|
||||
std::vector<std::string> link_options_;
|
||||
public:
|
||||
RTCLinkProgram(std::string name);
|
||||
~RTCLinkProgram() {
|
||||
amd::Comgr::destroy_data_set(link_input_);
|
||||
}
|
||||
// Public Member Functions
|
||||
bool AddLinkerOptions(unsigned int num_options, hiprtcJIT_option* options_ptr,
|
||||
void** options_vals_ptr);
|
||||
bool AddLinkerFile(std::string file_path, hiprtcJITInputType input_type);
|
||||
bool AddLinkerData(void* image_ptr, size_t image_size, std::string link_file_name,
|
||||
hiprtcJITInputType input_type);
|
||||
bool LinkComplete(void** bin_out, size_t* size_out);
|
||||
};
|
||||
|
||||
} // namespace hiprtc
|
||||
|
||||
Ссылка в новой задаче
Block a user