diff --git a/hipamd/src/hip_code_object.cpp b/hipamd/src/hip_code_object.cpp index 4595e92ba0..be3fac910e 100644 --- a/hipamd/src/hip_code_object.cpp +++ b/hipamd/src/hip_code_object.cpp @@ -34,7 +34,7 @@ THE SOFTWARE. hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync = false); hipError_t ihipFree(void* ptr); -//forward declaration of methods required for managed variables +// forward declaration of methods required for managed variables hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0); namespace { size_t constexpr strLiteralLength(char const* str) { @@ -67,9 +67,7 @@ struct __ClangOffloadBundleHeader { namespace hip { -uint64_t CodeObject::ElfSize(const void *emi) { - return amd::Elf::getElfSize(emi); -} +uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); } static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported, bool& sramEccSupported) { @@ -375,10 +373,10 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, } // This will be moved to COMGR eventually -hipError_t CodeObject::ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize, - const void ** image, const std::vector& device_names, - std::vector>& code_objs) { - +hipError_t CodeObject::ExtractCodeObjectFromFile( + amd::Os::FileDesc fdesc, size_t fsize, const void** image, + const std::vector& device_names, + std::vector>& code_objs) { hipError_t hip_error = hipSuccess; if (fdesc < 0) { @@ -386,8 +384,8 @@ hipError_t CodeObject::ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t } // Map the file to memory, with offset 0. - //file will be unmapped in ModuleUnload - //const void* image = nullptr; + // file will be unmapped in ModuleUnload + // const void* image = nullptr; if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, 0, image)) { return hipErrorInvalidValue; } @@ -399,11 +397,9 @@ hipError_t CodeObject::ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t } // This will be moved to COMGR eventually -hipError_t CodeObject::ExtractCodeObjectFromMemory(const void* data, - const std::vector& device_names, - std::vector>& code_objs, - std::string& uri) { - +hipError_t CodeObject::ExtractCodeObjectFromMemory( + const void* data, const std::vector& device_names, + std::vector>& code_objs, std::string& uri) { // Get the URI from memory if (!amd::Os::GetURIFromMemory(data, 0, uri)) { return hipErrorInvalidValue; @@ -413,9 +409,9 @@ hipError_t CodeObject::ExtractCodeObjectFromMemory(const void* data, } // This will be moved to COMGR eventually -hipError_t CodeObject::extractCodeObjectFromFatBinary(const void* data, - const std::vector& agent_triple_target_ids, - std::vector>& code_objs) { +hipError_t CodeObject::extractCodeObjectFromFatBinary( + const void* data, const std::vector& agent_triple_target_ids, + std::vector>& code_objs) { std::string magic((const char*)data, bundle_magic_string_size); if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { return hipErrorInvalidKernelFile; @@ -492,12 +488,11 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(const void* data, } hipError_t DynCO::loadCodeObject(const char* fname, const void* image) { - amd::ScopedLock lock(dclock_); // Number of devices = 1 in dynamic code object fb_info_ = new FatBinaryInfo(fname, image); - std::vector devices = { g_devices[ihipGetDevice()] }; + std::vector devices = {g_devices[ihipGetDevice()]}; IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinary(devices)); // No Lazy loading for DynCO @@ -512,12 +507,12 @@ hipError_t DynCO::loadCodeObject(const char* fname, const void* image) { return hipSuccess; } -//Dynamic Code Object +// Dynamic Code Object DynCO::~DynCO() { amd::ScopedLock lock(dclock_); for (auto& elem : vars_) { - if(elem.second->getVarKind() == Var::DVK_Managed) { + if (elem.second->getVarKind() == Var::DVK_Managed) { hipError_t err = ihipFree(elem.second->getManagedVarPtr()); assert(err == hipSuccess); } @@ -553,7 +548,7 @@ hipError_t DynCO::getDynFunc(hipFunction_t* hfunc, std::string func_name) { CheckDeviceIdMatch(); - if(hfunc == nullptr) { + if (hfunc == nullptr) { return hipErrorInvalidValue; } @@ -655,9 +650,8 @@ hipError_t DynCO::populateDynGlobalFuncs() { amd::ScopedLock lock(dclock_); std::vector func_names; - device::Program* dev_program - = fb_info_->GetProgram(ihipGetDevice())->getDeviceProgram( - *hip::getCurrentDevice()->devices()[0]); + device::Program* dev_program = fb_info_->GetProgram(ihipGetDevice()) + ->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); // Get all the global func names from COMGR if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) { @@ -672,9 +666,8 @@ hipError_t DynCO::populateDynGlobalFuncs() { return hipSuccess; } -//Static Code Object -StatCO::StatCO() { -} +// Static Code Object +StatCO::StatCO() {} StatCO::~StatCO() { amd::ScopedLock lock(sclock_); @@ -784,7 +777,8 @@ hipError_t StatCO::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, i return it->second->getStatFunc(hfunc, deviceId); } -hipError_t StatCO::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId) { +hipError_t StatCO::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, + int deviceId) { amd::ScopedLock lock(sclock_); const auto it = functions_.find(hostFunction); @@ -838,10 +832,9 @@ hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) { IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId)); amd::HostQueue* queue = hip::getNullStream(); - if(queue != nullptr) { - err = ihipMemcpy(reinterpret_cast
(dvar->device_ptr()), - var->getManagedVarPtr(), - dvar->size(), hipMemcpyHostToDevice, *queue); + if (queue != nullptr) { + err = ihipMemcpy(reinterpret_cast
(dvar->device_ptr()), var->getManagedVarPtr(), + dvar->size(), hipMemcpyHostToDevice, *queue); } else { ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL"); return hipErrorInvalidResourceHandle; @@ -851,4 +844,4 @@ hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) { } return err; } -}; //namespace: hip +}; // namespace hip diff --git a/hipamd/src/hip_graph.cpp b/hipamd/src/hip_graph.cpp index 9f4b8cade6..c16b76d9d0 100644 --- a/hipamd/src/hip_graph.cpp +++ b/hipamd/src/hip_graph.cpp @@ -30,7 +30,7 @@ thread_local std::vector l_captureStreams; thread_local hipStreamCaptureMode l_streamCaptureMode{hipStreamCaptureModeGlobal}; inline hipError_t ihipGraphAddNode(hipGraphNode_t graphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, size_t numDependencies) { + const hipGraphNode_t* pDependencies, size_t numDependencies) { graph->AddNode(graphNode); for (size_t i = 0; i < numDependencies; i++) { if (!hipGraphNode::isNodeValid(pDependencies[i])) { @@ -43,7 +43,6 @@ inline hipError_t ihipGraphAddNode(hipGraphNode_t graphNode, hipGraph_t graph, hipError_t ihipValidateKernelParams(const hipKernelNodeParams* pNodeParams) { - if (pNodeParams->kernelParams == nullptr) { return hipErrorInvalidValue; } @@ -136,8 +135,9 @@ hipError_t ihipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, return hipErrorInvalidValue; } // The element size must be 1, 2, or 4 bytes - if (pMemsetParams->elementSize != sizeof(int8_t) && pMemsetParams->elementSize != sizeof(int16_t) - && pMemsetParams->elementSize != sizeof(int32_t)) { + if (pMemsetParams->elementSize != sizeof(int8_t) && + pMemsetParams->elementSize != sizeof(int16_t) && + pMemsetParams->elementSize != sizeof(int32_t)) { return hipErrorInvalidValue; } @@ -704,8 +704,9 @@ hipError_t capturehipLaunchHostFunc(hipStream_t& stream, hipHostFn_t& fn, void*& hostParams.userData = userData; hip::Stream* s = reinterpret_cast(stream); hipGraphNode_t pGraphNode = new hipGraphHostNode(&hostParams); - hipError_t status = ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), - s->GetLastCapturedNodes().size()); + hipError_t status = + ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size()); if (status != hipSuccess) { return status; } @@ -729,8 +730,7 @@ hipError_t hipStreamIsCapturing(hipStream_t stream, hipStreamCaptureStatus* pCap hipError_t hipThreadExchangeStreamCaptureMode(hipStreamCaptureMode* mode) { HIP_INIT_API(hipThreadExchangeStreamCaptureMode, mode); - if (mode == nullptr || - *mode < hipStreamCaptureModeGlobal || + if (mode == nullptr || *mode < hipStreamCaptureModeGlobal || *mode > hipStreamCaptureModeRelaxed) { HIP_RETURN(hipErrorInvalidValue); } @@ -751,8 +751,7 @@ hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode) if (stream == nullptr) { HIP_RETURN(hipErrorStreamCaptureUnsupported); } - if (mode < hipStreamCaptureModeGlobal || - mode > hipStreamCaptureModeRelaxed) { + if (mode < hipStreamCaptureModeGlobal || mode > hipStreamCaptureModeRelaxed) { HIP_RETURN(hipErrorInvalidValue); } hip::Stream* s = reinterpret_cast(stream); @@ -899,7 +898,7 @@ hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph, hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void* dst, const void* src, size_t count, hipMemcpyKind kind) { HIP_INIT_API(hipGraphMemcpyNodeSetParams1D, node, dst, src, count, kind); - if (node == nullptr || dst == nullptr || src == nullptr || count == 0 || src == dst ) { + if (node == nullptr || dst == nullptr || src == nullptr || count == 0 || src == dst) { HIP_RETURN(hipErrorInvalidValue); } @@ -910,8 +909,8 @@ hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraph void* dst, const void* src, size_t count, hipMemcpyKind kind) { HIP_INIT_API(hipGraphExecMemcpyNodeSetParams1D, hGraphExec, node, dst, src, count, kind); - if (hGraphExec == nullptr || node == nullptr || dst == nullptr || - src == nullptr || count == 0 || src == dst ) { + if (hGraphExec == nullptr || node == nullptr || dst == nullptr || src == nullptr || count == 0 || + src == dst) { HIP_RETURN(hipErrorInvalidValue); } @@ -996,7 +995,7 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t g HIP_RETURN(hipErrorInvalidValue); } - //invalid flag check + // invalid flag check if (flags != 0 && flags != hipGraphInstantiateFlagAutoFreeOnLaunch) { HIP_RETURN(hipErrorInvalidValue); } @@ -1248,7 +1247,8 @@ hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGra hipGraphNode_t hipErrorNode_out; hipGraphExecUpdateResult updateResult_out; // Check if this instantiated graph is updatable. All restrictions in hipGraphExecUpdate() apply. - hipError_t status = hipGraphExecUpdate(hGraphExec, childGraph, &hipErrorNode_out, &updateResult_out); + hipError_t status = + hipGraphExecUpdate(hGraphExec, childGraph, &hipErrorNode_out, &updateResult_out); if (status != hipSuccess) { HIP_RETURN(status); } @@ -1354,8 +1354,7 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* fr hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t* from, hipGraphNode_t* to, size_t* numEdges) { HIP_INIT_API(hipGraphGetEdges, graph, from, to, numEdges); - if (graph == nullptr || numEdges == nullptr || - (from == nullptr && to != nullptr) || + if (graph == nullptr || numEdges == nullptr || (from == nullptr && to != nullptr) || (to == nullptr && from != nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1463,7 +1462,6 @@ hipError_t hipGraphDestroyNode(hipGraphNode_t node) { } - hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph) { HIP_INIT_API(hipGraphClone, pGraphClone, originalGraph); if (originalGraph == nullptr || pGraphClone == nullptr) { @@ -1501,8 +1499,9 @@ hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_ size_t count, size_t offset, hipMemcpyKind kind) { HIP_INIT_API(hipGraphAddMemcpyNodeFromSymbol, pGraphNode, graph, pDependencies, numDependencies, dst, symbol, count, offset, kind); - if (graph == nullptr || pGraphNode == nullptr || (numDependencies > 0 && pDependencies == nullptr) - || dst == nullptr || !ihipGraph::isGraphValid(graph)) { + if (graph == nullptr || pGraphNode == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || dst == nullptr || + !ihipGraph::isGraphValid(graph)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1521,7 +1520,7 @@ hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_ hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void* dst, const void* symbol, size_t count, size_t offset, hipMemcpyKind kind) { HIP_INIT_API(hipGraphMemcpyNodeSetParamsFromSymbol, node, dst, symbol, count, offset, kind); - if (symbol == nullptr) { + if (symbol == nullptr) { HIP_RETURN(hipErrorInvalidSymbol); } if (node == nullptr || dst == nullptr || count == 0 || symbol == dst) { @@ -1537,7 +1536,7 @@ hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec, size_t offset, hipMemcpyKind kind) { HIP_INIT_API(hipGraphExecMemcpyNodeSetParamsFromSymbol, hGraphExec, node, dst, symbol, count, offset, kind); - if (symbol == nullptr) { + if (symbol == nullptr) { HIP_RETURN(hipErrorInvalidSymbol); } if (hGraphExec == nullptr || node == nullptr || dst == nullptr || count == 0 || symbol == dst) { @@ -1560,7 +1559,7 @@ hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t* pGraphNode, hipGraph_t HIP_INIT_API(hipGraphAddMemcpyNodeToSymbol, pGraphNode, graph, pDependencies, numDependencies, symbol, src, count, offset, kind); if (pGraphNode == nullptr || graph == nullptr || src == nullptr || - !ihipGraph::isGraphValid(graph) || (pDependencies == nullptr && numDependencies > 0)) { + !ihipGraph::isGraphValid(graph) || (pDependencies == nullptr && numDependencies > 0)) { HIP_RETURN(hipErrorInvalidValue); } size_t sym_size = 0; @@ -1581,7 +1580,7 @@ hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, const void* const void* src, size_t count, size_t offset, hipMemcpyKind kind) { HIP_INIT_API(hipGraphMemcpyNodeSetParamsToSymbol, symbol, src, count, offset, kind); - if (symbol == nullptr) { + if (symbol == nullptr) { HIP_RETURN(hipErrorInvalidSymbol); } if (node == nullptr || src == nullptr || count == 0 || symbol == src) { @@ -1599,7 +1598,7 @@ hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol(hipGraphExec_t hGraphExec, hi hipMemcpyKind kind) { HIP_INIT_API(hipGraphExecMemcpyNodeSetParamsToSymbol, hGraphExec, node, symbol, src, count, offset, kind); - if (symbol == nullptr) { + if (symbol == nullptr) { HIP_RETURN(hipErrorInvalidSymbol); } if (hGraphExec == nullptr || src == nullptr || node == nullptr || count == 0 || src == symbol) { @@ -1619,8 +1618,8 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t gra hipEvent_t event) { HIP_INIT_API(hipGraphAddEventRecordNode, pGraphNode, graph, pDependencies, numDependencies, event); - if (pGraphNode == nullptr || graph == nullptr || (numDependencies > 0 && pDependencies == nullptr) - || event == nullptr) { + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || event == nullptr) { HIP_RETURN(hipErrorInvalidValue); } *pGraphNode = new hipGraphEventRecordNode(event); @@ -1630,8 +1629,7 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t gra hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out) { HIP_INIT_API(hipGraphEventRecordNodeGetEvent, node, event_out); - if (node == nullptr || event_out == nullptr || - node->GetType() != hipGraphNodeTypeEventRecord) { + if (node == nullptr || event_out == nullptr || node->GetType() != hipGraphNodeTypeEventRecord) { HIP_RETURN(hipErrorInvalidValue); } reinterpret_cast(node)->GetParams(event_out); @@ -1640,8 +1638,7 @@ hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* even hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { HIP_INIT_API(hipGraphEventRecordNodeSetEvent, node, event); - if (node == nullptr || event == nullptr || - node->GetType() != hipGraphNodeTypeEventRecord ) { + if (node == nullptr || event == nullptr || node->GetType() != hipGraphNodeTypeEventRecord) { HIP_RETURN(hipErrorInvalidValue); } HIP_RETURN(reinterpret_cast(node)->SetParams(event)); @@ -1665,8 +1662,8 @@ hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph const hipGraphNode_t* pDependencies, size_t numDependencies, hipEvent_t event) { HIP_INIT_API(hipGraphAddEventWaitNode, pGraphNode, graph, pDependencies, numDependencies, event); - if (pGraphNode == nullptr || graph == nullptr || (numDependencies > 0 && pDependencies == nullptr) - || event == nullptr) { + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || event == nullptr) { HIP_RETURN(hipErrorInvalidValue); } *pGraphNode = new hipGraphEventWaitNode(event); @@ -1676,8 +1673,7 @@ hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out) { HIP_INIT_API(hipGraphEventWaitNodeGetEvent, node, event_out); - if (node == nullptr || event_out == nullptr || - node->GetType() != hipGraphNodeTypeWaitEvent) { + if (node == nullptr || event_out == nullptr || node->GetType() != hipGraphNodeTypeWaitEvent) { HIP_RETURN(hipErrorInvalidValue); } reinterpret_cast(node)->GetParams(event_out); @@ -1686,8 +1682,7 @@ hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_ hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { HIP_INIT_API(hipGraphEventWaitNodeSetEvent, node, event); - if (node == nullptr || event == nullptr || - node->GetType() != hipGraphNodeTypeWaitEvent) { + if (node == nullptr || event == nullptr || node->GetType() != hipGraphNodeTypeWaitEvent) { HIP_RETURN(hipErrorInvalidValue); } HIP_RETURN(reinterpret_cast(node)->SetParams(event)); @@ -1759,7 +1754,7 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, HIP_INIT_API(hipGraphExecUpdate, hGraphExec, hGraph, hErrorNode_out, updateResult_out); // parameter check if (hGraphExec == nullptr || hGraph == nullptr || hErrorNode_out == nullptr || - updateResult_out == nullptr) { + updateResult_out == nullptr) { HIP_RETURN(hipErrorInvalidValue); } diff --git a/hipamd/src/hip_hcc.def.in b/hipamd/src/hip_hcc.def.in index 4eba25dfd4..fcb4504a9c 100644 --- a/hipamd/src/hip_hcc.def.in +++ b/hipamd/src/hip_hcc.def.in @@ -267,6 +267,11 @@ hiprtcGetProgramLogSize hiprtcGetCode hiprtcGetCodeSize hiprtcGetErrorString +hiprtcLinkCreate +hiprtcLinkAddFile +hiprtcLinkAddData +hiprtcLinkComplete +hiprtcLinkDestroy hipMipmappedArrayCreate hipMallocMipmappedArray hipMipmappedArrayDestroy diff --git a/hipamd/src/hip_hcc.map.in b/hipamd/src/hip_hcc.map.in index 07897c75a8..2adb0ddc26 100644 --- a/hipamd/src/hip_hcc.map.in +++ b/hipamd/src/hip_hcc.map.in @@ -208,6 +208,11 @@ global: hiprtcGetErrorString; hiprtcAddNameExpression; hiprtcVersion; + hiprtcLinkCreate; + hiprtcLinkAddFile; + hiprtcLinkAddData; + hiprtcLinkComplete; + hiprtcLinkDestroy; hipBindTexture; hipBindTexture2D; hipBindTextureToArray; diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index 1fe0fed6ff..6d9b5885ad 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -29,24 +29,15 @@ hipError_t ihipModuleLoadData(hipModule_t* module, const void* mmap_ptr, size_t mmap_size); -extern hipError_t ihipLaunchKernel(const void* hostFunction, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMemBytes, - hipStream_t stream, - hipEvent_t startEvent, - hipEvent_t stopEvent, - int flags); +extern hipError_t ihipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, int flags); const std::string& FunctionName(const hipFunction_t f) { return hip::DeviceFunc::asFunction(f)->kernel()->name(); } -static uint64_t ElfSize(const void *emi) -{ - return amd::Elf::getElfSize(emi); -} +static uint64_t ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); } hipError_t hipModuleUnload(hipModule_t hmod) { HIP_INIT_API(hipModuleUnload, hmod); @@ -60,31 +51,28 @@ hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { HIP_RETURN(PlatformState::instance().loadModule(module, fname)); } -hipError_t hipModuleLoadData(hipModule_t *module, const void *image) -{ +hipError_t hipModuleLoadData(hipModule_t* module, const void* image) { HIP_INIT_API(hipModuleLoadData, module, image); HIP_RETURN(PlatformState::instance().loadModule(module, 0, image)); } -hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, - unsigned int numOptions, hipJitOption* options, - void** optionsValues) -{ +hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions, + hipJitOption* options, void** optionsValues) { /* TODO: Pass options to Program */ HIP_INIT_API(hipModuleLoadDataEx, module, image); HIP_RETURN(PlatformState::instance().loadModule(module, 0, image)); } -extern hipError_t __hipExtractCodeObjectFromFatBinary(const void* data, - const std::vector& devices, - std::vector>& code_objs); +extern hipError_t __hipExtractCodeObjectFromFatBinary( + const void* data, const std::vector& devices, + std::vector>& code_objs); -hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name) { +hipError_t hipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const char* name) { HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name); - if(hfunc == nullptr || name == nullptr) { + if (hfunc == nullptr || name == nullptr) { HIP_RETURN(hipErrorInvalidValue); } @@ -96,8 +84,8 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const ch HIP_RETURN(hipSuccess); } -hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name) -{ +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name) { HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name); if (dptr == nullptr || bytes == nullptr) { @@ -134,13 +122,13 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc HIP_RETURN(hipErrorInvalidDeviceFunction); } - const device::Kernel::WorkGroupInfo* wrkGrpInfo - = kernel->getDeviceKernel(*(hip::getCurrentDevice()->devices()[0]))->workGroupInfo(); + const device::Kernel::WorkGroupInfo* wrkGrpInfo = + kernel->getDeviceKernel(*(hip::getCurrentDevice()->devices()[0]))->workGroupInfo(); if (wrkGrpInfo == nullptr) { HIP_RETURN(hipErrorMissingConfiguration); } - switch(attrib) { + switch (attrib) { case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: *value = static_cast(wrkGrpInfo->localMemSize_); break; @@ -157,7 +145,7 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc *value = static_cast(wrkGrpInfo->usedVGPRs_); break; case HIP_FUNC_ATTRIBUTE_PTX_VERSION: - *value = 30; // Defaults to 3.0 as HCC + *value = 30; // Defaults to 3.0 as HCC break; case HIP_FUNC_ATTRIBUTE_BINARY_VERSION: *value = static_cast(kernel->signature().version()); @@ -171,15 +159,14 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: *value = 0; break; - default: - HIP_RETURN(hipErrorInvalidValue); + default: + HIP_RETURN(hipErrorInvalidValue); } HIP_RETURN(hipSuccess); } -hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) -{ +hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { HIP_INIT_API(hipFuncGetAttributes, attr, func); HIP_RETURN_ONFAIL(PlatformState::instance().getStatFuncAttr(attr, func, ihipGetDevice())); @@ -187,7 +174,7 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) HIP_RETURN(hipSuccess); } -hipError_t hipFuncSetAttribute ( const void* func, hipFuncAttribute attr, int value ) { +hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) { HIP_INIT_API(hipFuncSetAttribute, func, attr, value); // No way to set function attribute yet. @@ -195,8 +182,7 @@ hipError_t hipFuncSetAttribute ( const void* func, hipFuncAttribute attr, int va HIP_RETURN(hipSuccess); } -hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) { - +hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) { HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig); // No way to set cache config yet. @@ -204,7 +190,7 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) HIP_RETURN(hipSuccess); } -hipError_t hipFuncSetSharedMemConfig ( const void* func, hipSharedMemConfig config) { +hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) { HIP_INIT_API(hipFuncSetSharedMemConfig, func, config); // No way to set Shared Memory config function yet. @@ -213,11 +199,10 @@ hipError_t hipFuncSetSharedMemConfig ( const void* func, hipSharedMemConfig conf } hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX, - uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, - uint32_t blockDimX, uint32_t blockDimY, - uint32_t blockDimZ, uint32_t sharedMemBytes, - void** kernelParams, void** extra, int deviceId, - uint32_t params = 0) { + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, void** kernelParams, void** extra, + int deviceId, uint32_t params = 0) { if (f == nullptr) { LogPrintfError("%s", "Function passed is null"); return hipErrorInvalidImage; @@ -265,9 +250,9 @@ hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX, int max_blocks_per_grid = 0; int best_block_size = 0; int block_size = blockDimX * blockDimY * blockDimZ; - hipError_t err = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(&num_blocks, &max_blocks_per_grid, - &best_block_size, *device, f, - block_size, sharedMemBytes, true); + hipError_t err = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &max_blocks_per_grid, &best_block_size, *device, f, block_size, sharedMemBytes, + true); if (err != hipSuccess) { return err; } @@ -374,9 +359,9 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, amd::Kernel* kernel = function->kernel(); amd::ScopedLock lock(function->dflock_); - hipError_t status = - ihipLaunchKernel_validate(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, - blockDimY, blockDimZ, sharedMemBytes, kernelParams, extra, deviceId, params); + hipError_t status = ihipLaunchKernel_validate( + f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, kernelParams, extra, deviceId, params); if (status != hipSuccess) { return status; } @@ -409,16 +394,12 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, return hipSuccess; } -hipError_t hipModuleLaunchKernel(hipFunction_t f, - uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, - uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, - uint32_t sharedMemBytes, hipStream_t hStream, - void **kernelParams, void **extra) -{ - HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, - blockDimX, blockDimY, blockDimZ, - sharedMemBytes, hStream, - kernelParams, extra); +hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, + uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream, + void** kernelParams, void** extra) { + HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, + blockDimZ, sharedMemBytes, hStream, kernelParams, extra); size_t globalWorkSizeX = static_cast(gridDimX) * blockDimX; size_t globalWorkSizeY = static_cast(gridDimY) * blockDimY; size_t globalWorkSizeZ = static_cast(gridDimZ) * blockDimZ; @@ -427,11 +408,10 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, globalWorkSizeZ > std::numeric_limits::max()) { HIP_RETURN(hipErrorInvalidConfiguration); } - HIP_RETURN(ihipModuleLaunchKernel(f, static_cast(globalWorkSizeX), - static_cast(globalWorkSizeY), - static_cast(globalWorkSizeZ), - blockDimX, blockDimY, blockDimZ, - sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr)); + HIP_RETURN(ihipModuleLaunchKernel( + f, static_cast(globalWorkSizeX), static_cast(globalWorkSizeY), + static_cast(globalWorkSizeZ), blockDimX, blockDimY, blockDimZ, sharedMemBytes, + hStream, kernelParams, extra, nullptr, nullptr)); } hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, @@ -439,106 +419,77 @@ hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ, size_t sharedMemBytes, hipStream_t hStream, void** kernelParams, void** extra, - hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) -{ + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) { HIP_INIT_API(hipExtModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, - localWorkSizeX, localWorkSizeY, localWorkSizeZ, - sharedMemBytes, hStream, + localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags); - HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, - localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags)); + HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, + hStream, kernelParams, extra, startEvent, stopEvent, flags)); } - hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, - uint32_t blockDimX, uint32_t blockDimY, - uint32_t blockDimZ, size_t sharedMemBytes, - hipStream_t hStream, void** kernelParams, void** extra, - hipEvent_t startEvent, - hipEvent_t stopEvent) -{ + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + size_t sharedMemBytes, hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t startEvent, hipEvent_t stopEvent) { HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, - blockDimX, blockDimY, blockDimZ, - sharedMemBytes, hStream, - kernelParams, extra, startEvent, stopEvent); + blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, + startEvent, stopEvent); - HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, blockDimY, blockDimZ, - sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); + HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, + blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, + extra, startEvent, stopEvent)); } hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, - uint32_t blockDimX, uint32_t blockDimY, - uint32_t blockDimZ, size_t sharedMemBytes, - hipStream_t hStream, void** kernelParams, void** extra, - hipEvent_t startEvent, - hipEvent_t stopEvent) -{ + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + size_t sharedMemBytes, hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t startEvent, hipEvent_t stopEvent) { HIP_INIT_API(hipModuleLaunchKernelExt, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, - blockDimX, blockDimY, blockDimZ, - sharedMemBytes, hStream, - kernelParams, extra, startEvent, stopEvent); + blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, + startEvent, stopEvent); - HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, blockDimY, blockDimZ, - sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent)); + HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, + blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, + extra, startEvent, stopEvent)); } -extern "C" hipError_t hipLaunchKernel_common(const void *hostFunction, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMemBytes, - hipStream_t stream) -{ +extern "C" hipError_t hipLaunchKernel_common(const void* hostFunction, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMemBytes, + hipStream_t stream) { STREAM_CAPTURE(hipLaunchKernel, stream, hostFunction, gridDim, blockDim, args, sharedMemBytes); - return ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, - nullptr, nullptr, 0); + return ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, nullptr, + nullptr, 0); } -extern "C" hipError_t hipLaunchKernel(const void *hostFunction, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMemBytes, - hipStream_t stream) -{ +extern "C" hipError_t hipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMemBytes, hipStream_t stream) { HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); HIP_RETURN(hipLaunchKernel_common(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream)); } -extern "C" hipError_t hipLaunchKernel_spt(const void *hostFunction, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMemBytes, - hipStream_t stream) -{ +extern "C" hipError_t hipLaunchKernel_spt(const void* hostFunction, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMemBytes, hipStream_t stream) { HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); PER_THREAD_DEFAULT_STREAM(stream); HIP_RETURN(hipLaunchKernel_common(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream)); } -extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMemBytes, - hipStream_t stream, - hipEvent_t startEvent, - hipEvent_t stopEvent, - int flags) -{ - HIP_INIT_API(hipExtLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); - HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, startEvent, stopEvent, flags)); +extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, int flags) { + HIP_INIT_API(hipExtLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); + HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, + startEvent, stopEvent, flags)); } -hipError_t hipLaunchCooperativeKernel_common(const void* f, - dim3 gridDim, dim3 blockDim, - void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream) -{ +hipError_t hipLaunchCooperativeKernel_common(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, uint32_t sharedMemBytes, + hipStream_t hStream) { if (!hip::isValid(hStream)) { HIP_RETURN(hipErrorInvalidValue); } @@ -556,34 +507,30 @@ hipError_t hipLaunchCooperativeKernel_common(const void* f, } return ihipModuleLaunchKernel(func, static_cast(globalWorkSizeX), static_cast(globalWorkSizeY), - static_cast(globalWorkSizeZ), - blockDim.x, blockDim.y, blockDim.z, - sharedMemBytes, hStream, kernelParams, nullptr, nullptr, nullptr, 0, - amd::NDRangeKernelCommand::CooperativeGroups); + static_cast(globalWorkSizeZ), blockDim.x, blockDim.y, + blockDim.z, sharedMemBytes, hStream, kernelParams, nullptr, nullptr, + nullptr, 0, amd::NDRangeKernelCommand::CooperativeGroups); } -hipError_t hipLaunchCooperativeKernel(const void* f, - dim3 gridDim, dim3 blockDim, - void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream) -{ - HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, - sharedMemBytes, hStream); - HIP_RETURN(hipLaunchCooperativeKernel_common(f,gridDim, blockDim, kernelParams, sharedMemBytes, hStream)); +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, uint32_t sharedMemBytes, + hipStream_t hStream) { + HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, sharedMemBytes, hStream); + HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes, + hStream)); } -hipError_t hipLaunchCooperativeKernel_spt(const void* f, - dim3 gridDim, dim3 blockDim, - void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream) -{ - HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, - sharedMemBytes, hStream); +hipError_t hipLaunchCooperativeKernel_spt(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, uint32_t sharedMemBytes, + hipStream_t hStream) { + HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, sharedMemBytes, hStream); PER_THREAD_DEFAULT_STREAM(hStream); - HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes, hStream)); + HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes, + hStream)); } -hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, - int numDevices, unsigned int flags, uint32_t extFlags) -{ +hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags, uint32_t extFlags) { int numActiveGPUs = 0; hipError_t result = hipSuccess; result = ihipDeviceGetCount(&numActiveGPUs); @@ -594,7 +541,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL // Validate all streams passed by user for (int i = 0; i < numDevices; ++i) { if (!hip::isValid(launchParamsList[i].stream)) { - return hipErrorInvalidValue; + return hipErrorInvalidValue; } } @@ -605,8 +552,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL uint32_t blockDims = 0; const hipLaunchParams& launch = launchParamsList[i]; blockDims = launch.blockDim.x * launch.blockDim.y * launch.blockDim.z; - allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z * - blockDims; + allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z * blockDims; // Make sure block dimensions are valid if (0 == blockDims) { @@ -633,7 +579,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) { for (int i = 0; i < numDevices; ++i) { amd::HostQueue* queue = - reinterpret_cast(launchParamsList[i].stream)->asHostQueue(); + reinterpret_cast(launchParamsList[i].stream)->asHostQueue(); queue->finish(); } } @@ -666,11 +612,11 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL globalWorkSizeZ > std::numeric_limits::max()) { HIP_RETURN(hipErrorInvalidConfiguration); } - result = ihipModuleLaunchKernel(func, static_cast(globalWorkSizeX), - static_cast(globalWorkSizeY), static_cast(globalWorkSizeZ), - launch.blockDim.x, launch.blockDim.y, launch.blockDim.z, - launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr, - flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice); + result = ihipModuleLaunchKernel( + func, static_cast(globalWorkSizeX), static_cast(globalWorkSizeY), + static_cast(globalWorkSizeZ), launch.blockDim.x, launch.blockDim.y, + launch.blockDim.z, launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr, + flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice); if (result != hipSuccess) { break; } @@ -681,7 +627,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) { for (int i = 0; i < numDevices; ++i) { amd::HostQueue* queue = - reinterpret_cast(launchParamsList[i].stream)->asHostQueue(); + reinterpret_cast(launchParamsList[i].stream)->asHostQueue(); queue->finish(); } } @@ -689,18 +635,18 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL return result; } -hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, - int numDevices, unsigned int flags) -{ +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags) { HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, numDevices, flags); - HIP_RETURN(ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, - (amd::NDRangeKernelCommand::CooperativeGroups | - amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups))); + HIP_RETURN(ihipLaunchCooperativeKernelMultiDevice( + launchParamsList, numDevices, flags, + (amd::NDRangeKernelCommand::CooperativeGroups | + amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups))); } -hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, - int numDevices, unsigned int flags) { +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags) { HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags); HIP_RETURN(ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0)); @@ -720,7 +666,7 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const HIP_RETURN(hipErrorNotSupported); } - /* Get address and size for the global symbol */ + /* Get address and size for the global symbol */ if (hipSuccess != PlatformState::instance().getDynTexRef(name, hmod, texRef)) { LogPrintfError("Cannot get texRef for name: %s at module:0x%x \n", name, hmod); HIP_RETURN(hipErrorNotFound); diff --git a/hipamd/src/hip_platform.cpp b/hipamd/src/hip_platform.cpp index 9c791f3a10..db5a98e04a 100644 --- a/hipamd/src/hip_platform.cpp +++ b/hipamd/src/hip_platform.cpp @@ -27,12 +27,12 @@ #include -constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" +constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" thread_local std::stack execStack_; -PlatformState* PlatformState::platform_; // Initiaized as nullptr by default +PlatformState* PlatformState::platform_; // Initiaized as nullptr by default -//forward declaration of methods required for __hipRegisrterManagedVar +// forward declaration of methods required for __hipRegisrterManagedVar hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0); hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync = false); @@ -40,26 +40,23 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin struct __CudaFatBinaryWrapper { unsigned int magic; unsigned int version; - void* binary; - void* dummy1; + void* binary; + void* dummy1; }; -hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, - hipModule_t hmod, const char* name); +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name); hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, hipDeviceptr_t* dptr, size_t* bytes); -extern hipError_t ihipModuleLaunchKernel(hipFunction_t f, - uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, - uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, - uint32_t sharedMemBytes, hipStream_t hStream, - void **kernelParams, void **extra, - hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0, - uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0, - uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0); -static bool isCompatibleCodeObject(const std::string& codeobj_target_id, - const char* device_name) { +extern hipError_t ihipModuleLaunchKernel( + hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, uint32_t blockDimX, + uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream, + void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, + uint32_t flags = 0, uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0, + uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0); +static bool isCompatibleCodeObject(const std::string& codeobj_target_id, const char* device_name) { // Workaround for device name mismatch. // Device name may contain feature strings delimited by '+', e.g. // gfx900+xnack. Currently HIP-Clang does not include feature strings @@ -73,8 +70,7 @@ static bool isCompatibleCodeObject(const std::string& codeobj_target_id, return codeobj_target_id == short_name; } -extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data) -{ +extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data) { const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { LogPrintfError("Cannot Register fat binary. FatMagic: %u version: %u ", fbwrapper->magic, @@ -84,21 +80,14 @@ extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data) return PlatformState::instance().addFatBinary(fbwrapper->binary); } -extern "C" void __hipRegisterFunction( - hip::FatBinaryInfo** modules, - const void* hostFunction, - char* deviceFunction, - const char* deviceName, - unsigned int threadLimit, - uint3* tid, - uint3* bid, - dim3* blockDim, - dim3* gridDim, - int* wSize) { - static int enable_deferred_loading { []() { - char *var = getenv("HIP_ENABLE_DEFERRED_LOADING"); +extern "C" void __hipRegisterFunction(hip::FatBinaryInfo** modules, const void* hostFunction, + char* deviceFunction, const char* deviceName, + unsigned int threadLimit, uint3* tid, uint3* bid, + dim3* blockDim, dim3* gridDim, int* wSize) { + static int enable_deferred_loading{[]() { + char* var = getenv("HIP_ENABLE_DEFERRED_LOADING"); return var ? atoi(var) : 1; - }() }; + }()}; hipError_t hip_error = hipSuccess; hip::Function* func = new hip::Function(std::string(deviceName), modules); hip_error = PlatformState::instance().registerStatFunction(hostFunction, func); @@ -121,42 +110,45 @@ extern "C" void __hipRegisterFunction( // track of the value of the device side global variable between kernel // executions. extern "C" void __hipRegisterVar( - hip::FatBinaryInfo** modules, // The device modules containing code object - void* var, // The shadow variable in host code - char* hostVar, // Variable name in host code - char* deviceVar, // Variable name in device code - int ext, // Whether this variable is external - size_t size, // Size of the variable - int constant, // Whether this variable is constant - int global) // Unknown, always 0 + hip::FatBinaryInfo** modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int ext, // Whether this variable is external + size_t size, // Size of the variable + int constant, // Whether this variable is constant + int global) // Unknown, always 0 { - hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Variable, size, 0, 0, modules); + hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Variable, + size, 0, 0, modules); hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr); guarantee((err == hipSuccess), "Cannot register Static Global Var"); } -extern "C" void __hipRegisterSurface(hip::FatBinaryInfo** modules, // The device modules containing code object - void* var, // The shadow variable in host code - char* hostVar, // Variable name in host code - char* deviceVar, // Variable name in device code - int type, int ext) { - hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Surface, sizeof(surfaceReference), 0, 0, modules); +extern "C" void __hipRegisterSurface( + hip::FatBinaryInfo** modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int type, int ext) { + hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Surface, + sizeof(surfaceReference), 0, 0, modules); hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr); guarantee((err == hipSuccess), "Cannot register Static Glbal Var"); } -extern "C" void __hipRegisterManagedVar(void *hipModule, // Pointer to hip module returned from __hipRegisterFatbinary - void **pointer, // Pointer to a chunk of managed memory with size \p size and alignment \p align - // HIP runtime allocates such managed memory and assign it to \p pointer - void *init_value, // Initial value to be copied into \p pointer - const char *name, // Name of the variable in code object - size_t size, - unsigned align) { +extern "C" void __hipRegisterManagedVar( + void* hipModule, // Pointer to hip module returned from __hipRegisterFatbinary + void** pointer, // Pointer to a chunk of managed memory with size \p size and alignment \p + // align HIP runtime allocates such managed memory and assign it to \p pointer + void* init_value, // Initial value to be copied into \p pointer + const char* name, // Name of the variable in code object + size_t size, unsigned align) { HIP_INIT_VOID(); hipError_t status = ihipMallocManaged(pointer, size, align); - if( status == hipSuccess) { + if (status == hipSuccess) { amd::HostQueue* queue = hip::getNullStream(); - if(queue != nullptr) { + if (queue != nullptr) { status = ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *queue); guarantee((status == hipSuccess), "Error during memcpy to managed memory!"); } else { @@ -171,28 +163,25 @@ extern "C" void __hipRegisterManagedVar(void *hipModule, // Pointer to hip mod guarantee((status == hipSuccess), "Cannot register Static Managed Var"); } -extern "C" void __hipRegisterTexture(hip::FatBinaryInfo** modules, // The device modules containing code object - void* var, // The shadow variable in host code - char* hostVar, // Variable name in host code - char* deviceVar, // Variable name in device code - int type, int norm, int ext) { - hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Texture, sizeof(textureReference), 0, 0, modules); +extern "C" void __hipRegisterTexture( + hip::FatBinaryInfo** modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int type, int norm, int ext) { + hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Texture, + sizeof(textureReference), 0, 0, modules); hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr); guarantee((err == hipSuccess), "Cannot register Static Global Var"); } -extern "C" void __hipUnregisterFatBinary(hip::FatBinaryInfo** modules) -{ +extern "C" void __hipUnregisterFatBinary(hip::FatBinaryInfo** modules) { hipError_t err = PlatformState::instance().removeFatBinary(modules); guarantee((err == hipSuccess), "Cannot Unregister Fat Binary"); } -extern "C" hipError_t hipConfigureCall( - dim3 gridDim, - dim3 blockDim, - size_t sharedMem, - hipStream_t stream) -{ +extern "C" hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream) { HIP_INIT_API(hipConfigureCall, gridDim, blockDim, sharedMem, stream); PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); @@ -200,12 +189,8 @@ extern "C" hipError_t hipConfigureCall( HIP_RETURN(hipSuccess); } -extern "C" hipError_t __hipPushCallConfiguration( - dim3 gridDim, - dim3 blockDim, - size_t sharedMem, - hipStream_t stream) -{ +extern "C" hipError_t __hipPushCallConfiguration(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream) { HIP_INIT_API(__hipPushCallConfiguration, gridDim, blockDim, sharedMem, stream); PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); @@ -213,10 +198,8 @@ extern "C" hipError_t __hipPushCallConfiguration( HIP_RETURN(hipSuccess); } -extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim, - dim3 *blockDim, - size_t *sharedMem, - hipStream_t *stream) { +extern "C" hipError_t __hipPopCallConfiguration(dim3* gridDim, dim3* blockDim, size_t* sharedMem, + hipStream_t* stream) { HIP_INIT_API(__hipPopCallConfiguration, gridDim, blockDim, sharedMem, stream); ihipExec_t exec; @@ -229,11 +212,7 @@ extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim, HIP_RETURN(hipSuccess); } -extern "C" hipError_t hipSetupArgument( - const void *arg, - size_t size, - size_t offset) -{ +extern "C" hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) { HIP_INIT_API(hipSetupArgument, arg, size, offset); PlatformState::instance().setupArgument(arg, size, offset); @@ -241,15 +220,14 @@ extern "C" hipError_t hipSetupArgument( HIP_RETURN(hipSuccess); } -extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) -{ +extern "C" hipError_t hipLaunchByPtr(const void* hostFunction) { HIP_INIT_API(hipLaunchByPtr, hostFunction); ihipExec_t exec; PlatformState::instance().popExec(exec); hip::Stream* stream = reinterpret_cast(exec.hStream_); - int deviceId = (stream != nullptr)? stream->DeviceId() : ihipGetDevice(); + int deviceId = (stream != nullptr) ? stream->DeviceId() : ihipGetDevice(); if (deviceId == -1) { LogPrintfError("Wrong DeviceId: %d \n", deviceId); HIP_RETURN(hipErrorNoDevice); @@ -262,16 +240,12 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) } size_t size = exec.arguments_.size(); - void *extra[] = { - HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0], - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END - }; + void* extra[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0], + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; - HIP_RETURN(hipModuleLaunchKernel(func, - exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z, - exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z, - exec.sharedMem_, exec.hStream_, nullptr, extra)); + HIP_RETURN(hipModuleLaunchKernel(func, exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z, + exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z, + exec.sharedMem_, exec.hStream_, nullptr, extra)); } hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) { @@ -283,7 +257,8 @@ hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) { } size_t sym_size = 0; - HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), devPtr, &sym_size)); + HIP_RETURN_ONFAIL( + PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), devPtr, &sym_size)); HIP_RETURN(hipSuccess, *devPtr); } @@ -295,14 +270,14 @@ hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) { HIP_RETURN(hipErrorInvalidValue); } hipDeviceptr_t device_ptr = nullptr; - HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, sizePtr)); + HIP_RETURN_ONFAIL( + PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, sizePtr)); HIP_RETURN(hipSuccess, *sizePtr); } hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, - hipDeviceptr_t* dptr, size_t* bytes) -{ + hipDeviceptr_t* dptr, size_t* bytes) { HIP_INIT(); /* Get Device Program pointer*/ @@ -325,10 +300,8 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor namespace hip_impl { hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( - int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, - const amd::Device& device, hipFunction_t func, int inputBlockSize, - size_t dynamicSMemSize, bool bCalcPotentialBlkSz) -{ + int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device, + hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz) { hip::DeviceFunc* function = hip::DeviceFunc::asFunction(func); const amd::Kernel& kernel = *function->kernel(); @@ -340,14 +313,12 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( *bestBlockSize = 0; // Make sure the requested block size is smaller than max supported if (inputBlockSize > int(device.info().maxWorkGroupSize_)) { - *maxBlocksPerCU = 0; - *numBlocksPerGrid = 0; - return hipSuccess; + *maxBlocksPerCU = 0; + *numBlocksPerGrid = 0; + return hipSuccess; } - } - else { - if (inputBlockSize > int(device.info().maxWorkGroupSize_) || - inputBlockSize <= 0) { + } else { + if (inputBlockSize > int(device.info().maxWorkGroupSize_) || inputBlockSize <= 0) { // The user wrote the kernel to work with a workgroup size // bigger than this hardware can support. Or they do not care // about the size So just assume its maximum size is @@ -367,18 +338,15 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( size_t maxVGPRs; uint32_t VgprGranularity; if (device.isa().versionMajor() <= 9) { - if (device.isa().versionMajor() == 9 && - device.isa().versionMinor() == 0 && + if (device.isa().versionMajor() == 9 && device.isa().versionMinor() == 0 && device.isa().versionStepping() == 10) { maxVGPRs = 512; VgprGranularity = 8; - } - else { + } else { maxVGPRs = 256; VgprGranularity = 4; } - } - else { + } else { maxVGPRs = 1024; VgprGranularity = 8; } @@ -391,12 +359,10 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( size_t maxSGPRs; if (device.isa().versionMajor() < 8) { maxSGPRs = 512; - } - else if (device.isa().versionMajor() < 10) { + } else if (device.isa().versionMajor() < 10) { maxSGPRs = 800; - } - else { - maxSGPRs = SIZE_MAX; // gfx10+ does not share SGPRs between waves + } else { + maxSGPRs = SIZE_MAX; // gfx10+ does not share SGPRs between waves } const size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16); GprWaves = std::min(VgprWaves, SgprWaves); @@ -425,7 +391,8 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( // the maximum available block size for this kernel, which could have come from the // user. e.g., if the user indicates the maximum block size is 64 threads, but we // calculate that 128 threads can fit in each CU, we have to give up and return 64. - *bestBlockSize = std::min(alu_limited_threads, amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_)); + *bestBlockSize = + std::min(alu_limited_threads, amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_)); // If the best block size is smaller than the block size used to fit the maximum, // then we need to make the grid bigger for full occupancy. const int bestBlocksPerCU = alu_limited_threads / (*bestBlockSize); @@ -434,13 +401,11 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( return hipSuccess; } -} +} // namespace hip_impl extern "C" { -hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, - const void* f, size_t dynSharedMemPerBlk, - int blockSizeLimit) -{ +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, const void* f, + size_t dynSharedMemPerBlk, int blockSizeLimit) { HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit); if ((gridSize == nullptr) || (blockSize == nullptr)) { HIP_RETURN(hipErrorInvalidValue); @@ -455,7 +420,8 @@ hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, int num_blocks = 0; int best_block_size = 0; hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( - &num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSizeLimit, dynSharedMemPerBlk,true); + &num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSizeLimit, + dynSharedMemPerBlk, true); if (ret == hipSuccess) { *blockSize = best_block_size; *gridSize = max_blocks_per_grid; @@ -463,10 +429,8 @@ hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, HIP_RETURN(ret); } -hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, - hipFunction_t f, size_t dynSharedMemPerBlk, - int blockSizeLimit) -{ +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f, + size_t dynSharedMemPerBlk, int blockSizeLimit) { HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit); if ((gridSize == nullptr) || (blockSize == nullptr)) { HIP_RETURN(hipErrorInvalidValue); @@ -476,7 +440,8 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize int num_blocks = 0; int best_block_size = 0; hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( - &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, dynSharedMemPerBlk,true); + &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, + dynSharedMemPerBlk, true); if (ret == hipSuccess) { *blockSize = best_block_size; *gridSize = max_blocks_per_grid; @@ -485,10 +450,12 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize } hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, - hipFunction_t f, size_t dynSharedMemPerBlk, - int blockSizeLimit, unsigned int flags) -{ - HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk, blockSizeLimit, flags); + hipFunction_t f, + size_t dynSharedMemPerBlk, + int blockSizeLimit, + unsigned int flags) { + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk, + blockSizeLimit, flags); if ((gridSize == nullptr) || (blockSize == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -497,7 +464,8 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* int num_blocks = 0; int best_block_size = 0; hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( - &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, dynSharedMemPerBlk,true); + &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, + dynSharedMemPerBlk, true); if (ret == hipSuccess) { *blockSize = best_block_size; *gridSize = max_blocks_per_grid; @@ -505,10 +473,11 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* HIP_RETURN(ret); } -hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, - hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk) -{ - HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynSharedMemPerBlk); +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, hipFunction_t f, + int blockSize, + size_t dynSharedMemPerBlk) { + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, + dynSharedMemPerBlk); if (numBlocks == nullptr) { HIP_RETURN(hipErrorInvalidValue); } @@ -518,16 +487,16 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, int max_blocks_per_grid = 0; int best_block_size = 0; hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( - &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, false); + &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, + false); *numBlocks = num_blocks; HIP_RETURN(ret); } -hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, - hipFunction_t f, int blockSize, - size_t dynSharedMemPerBlk, unsigned int flags) -{ - HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynSharedMemPerBlk, flags); +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, + dynSharedMemPerBlk, flags); if (numBlocks == nullptr) { HIP_RETURN(hipErrorInvalidValue); } @@ -537,14 +506,14 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numB int max_blocks_per_grid = 0; int best_block_size = 0; hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( - &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, false); + &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, + false); *numBlocks = num_blocks; HIP_RETURN(ret); } -hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, - const void* f, int blockSize, size_t dynamicSMemSize) -{ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* f, + int blockSize, size_t dynamicSMemSize) { HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynamicSMemSize); if (numBlocks == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -562,16 +531,18 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, int max_blocks_per_grid = 0; int best_block_size = 0; hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( - &num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, false); + &num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, + false); *numBlocks = num_blocks; HIP_RETURN(ret); } -hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, - const void* f, - int blockSize, size_t dynamicSMemSize, unsigned int flags) -{ - HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize, flags); +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, const void* f, + int blockSize, + size_t dynamicSMemSize, + unsigned int flags) { + HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize, + flags); if (numBlocks == nullptr) { HIP_RETURN(hipErrorInvalidValue); } @@ -588,7 +559,8 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, int max_blocks_per_grid = 0; int best_block_size = 0; hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( - &num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, false); + &num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, + false); *numBlocks = num_blocks; HIP_RETURN(ret); } @@ -599,68 +571,48 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, namespace hip_impl { -void hipLaunchKernelGGLImpl( - uintptr_t function_address, - const dim3& numBlocks, - const dim3& dimBlocks, - uint32_t sharedMemBytes, - hipStream_t stream, - void** kernarg) -{ +void hipLaunchKernelGGLImpl(uintptr_t function_address, const dim3& numBlocks, + const dim3& dimBlocks, uint32_t sharedMemBytes, hipStream_t stream, + void** kernarg) { HIP_INIT_VOID(); hip::Stream* s = reinterpret_cast(stream); - int deviceId = (s != nullptr)? s->DeviceId() : ihipGetDevice(); + int deviceId = (s != nullptr) ? s->DeviceId() : ihipGetDevice(); if (deviceId == -1) { LogPrintfError("Wrong Device Id: %d \n", deviceId); } hipFunction_t func = nullptr; - hipError_t hip_error = - PlatformState::instance().getStatFunc(&func, - reinterpret_cast(function_address), - deviceId); + hipError_t hip_error = PlatformState::instance().getStatFunc( + &func, reinterpret_cast(function_address), deviceId); if ((hip_error != hipSuccess) || (func == nullptr)) { LogPrintfError("Cannot find the static function: 0x%x", function_address); } - hip_error = hipModuleLaunchKernel(func, - numBlocks.x, numBlocks.y, numBlocks.z, - dimBlocks.x, dimBlocks.y, dimBlocks.z, - sharedMemBytes, stream, nullptr, kernarg); + hip_error = + hipModuleLaunchKernel(func, numBlocks.x, numBlocks.y, numBlocks.z, dimBlocks.x, dimBlocks.y, + dimBlocks.z, sharedMemBytes, stream, nullptr, kernarg); assert(hip_error == hipSuccess); } -void hipLaunchCooperativeKernelGGLImpl( - uintptr_t function_address, - const dim3& numBlocks, - const dim3& dimBlocks, - uint32_t sharedMemBytes, - hipStream_t stream, - void** kernarg) -{ +void hipLaunchCooperativeKernelGGLImpl(uintptr_t function_address, const dim3& numBlocks, + const dim3& dimBlocks, uint32_t sharedMemBytes, + hipStream_t stream, void** kernarg) { HIP_INIT_VOID(); - hipError_t err = hipLaunchCooperativeKernel(reinterpret_cast(function_address), - numBlocks, dimBlocks, kernarg, sharedMemBytes, stream); + hipError_t err = hipLaunchCooperativeKernel(reinterpret_cast(function_address), numBlocks, + dimBlocks, kernarg, sharedMemBytes, stream); assert(err == hipSuccess); } -} +} // namespace hip_impl -#endif // defined(ATI_OS_LINUX) +#endif // defined(ATI_OS_LINUX) -hipError_t ihipLaunchKernel(const void* hostFunction, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMemBytes, - hipStream_t stream, - hipEvent_t startEvent, - hipEvent_t stopEvent, - int flags) -{ - hipFunction_t func = nullptr; +hipError_t ihipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim, void** args, + size_t sharedMemBytes, hipStream_t stream, hipEvent_t startEvent, + hipEvent_t stopEvent, int flags) { + hipFunction_t func = nullptr; int deviceId = hip::Stream::DeviceId(stream); hipError_t hip_error = PlatformState::instance().getStatFunc(&func, hostFunction, deviceId); if ((hip_error != hipSuccess) || (func == nullptr)) { @@ -674,19 +626,31 @@ hipError_t ihipLaunchKernel(const void* hostFunction, globalWorkSizeZ > std::numeric_limits::max()) { HIP_RETURN(hipErrorInvalidConfiguration); } - HIP_RETURN(ihipModuleLaunchKernel(func, static_cast(globalWorkSizeX), - static_cast(globalWorkSizeY), - static_cast(globalWorkSizeZ), - blockDim.x, blockDim.y, blockDim.z, - sharedMemBytes, stream, args, nullptr, startEvent, stopEvent, - flags)); + HIP_RETURN(ihipModuleLaunchKernel( + func, static_cast(globalWorkSizeX), static_cast(globalWorkSizeY), + static_cast(globalWorkSizeZ), blockDim.x, blockDim.y, blockDim.z, sharedMemBytes, + stream, args, nullptr, startEvent, stopEvent, flags)); } // conversion routines between float and half precision -static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; } +static inline std::uint32_t f32_as_u32(float f) { + union { + float f; + std::uint32_t u; + } v; + v.f = f; + return v.u; +} -static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; } +static inline float u32_as_f32(std::uint32_t u) { + union { + float f; + std::uint32_t u; + } v; + v.u = u; + return v.f; +} static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); } @@ -694,15 +658,14 @@ static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l // half float, the f16 is in the low 16 bits of the input argument static inline float __convert_half_to_float(std::uint32_t a) noexcept { - std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; - std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U; + std::uint32_t v = + f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U) /*0x1.0p+112f*/) + 0x38000000U; u = (a & 0x7fff) != 0 ? v : u; - return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/; - + return u32_as_f32(u) * u32_as_f32(0x07800000U) /*0x1.0p-112f*/; } // float half with nearest even rounding @@ -714,7 +677,7 @@ static inline std::uint32_t __convert_float_to_half(float a) noexcept { std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0); std::uint32_t n = ((std::uint32_t)e << 12) | m; std::uint32_t s = (u >> 16) & 0x8000; - int b = clamp_int(1-e, 0, 13); + int b = clamp_int(1 - e, 0, 13); std::uint32_t d = (0x1000 | m) >> b; d |= (d << b) != (0x1000 | m); std::uint32_t v = e < 1 ? d : n; @@ -726,24 +689,25 @@ static inline std::uint32_t __convert_float_to_half(float a) noexcept { extern "C" #if !defined(_MSC_VER) -__attribute__((weak)) + __attribute__((weak)) #endif -float __gnu_h2f_ieee(unsigned short h){ - return __convert_half_to_float((std::uint32_t) h); + float + __gnu_h2f_ieee(unsigned short h) { + return __convert_half_to_float((std::uint32_t)h); } extern "C" #if !defined(_MSC_VER) -__attribute__((weak)) + __attribute__((weak)) #endif -unsigned short __gnu_f2h_ieee(float f){ + unsigned short + __gnu_f2h_ieee(float f) { return (unsigned short)__convert_float_to_half(f); } -void PlatformState::init() -{ +void PlatformState::init() { amd::ScopedLock lock(lock_); - if(initialized_ || g_devices.empty()) { + if (initialized_ || g_devices.empty()) { return; } initialized_ = true; @@ -751,18 +715,18 @@ void PlatformState::init() hipError_t err = digestFatBinary(it.first, it.second); assert(err == hipSuccess); } - for (auto &it : statCO_.vars_) { + for (auto& it : statCO_.vars_) { it.second->resize_dVar(g_devices.size()); } - for (auto &it : statCO_.functions_) { + for (auto& it : statCO_.functions_) { it.second->resize_dFunc(g_devices.size()); } } -hipError_t PlatformState::loadModule(hipModule_t *module, const char* fname, const void* image) { +hipError_t PlatformState::loadModule(hipModule_t* module, const char* fname, const void* image) { amd::ScopedLock lock(lock_); - if(module == nullptr) { + if (module == nullptr) { return hipErrorInvalidValue; } @@ -809,7 +773,7 @@ hipError_t PlatformState::unloadModule(hipModule_t hmod) { } hipError_t PlatformState::getDynFunc(hipFunction_t* hfunc, hipModule_t hmod, - const char* func_name) { + const char* func_name) { amd::ScopedLock lock(lock_); auto it = dynCO_map_.find(hmod); @@ -828,7 +792,7 @@ hipError_t PlatformState::getDynGlobalVar(const char* hostVar, hipModule_t hmod, hipDeviceptr_t* dev_ptr, size_t* size_ptr) { amd::ScopedLock lock(lock_); - if(hostVar == nullptr || dev_ptr == nullptr || size_ptr == nullptr) { + if (hostVar == nullptr || dev_ptr == nullptr || size_ptr == nullptr) { return hipErrorInvalidValue; } @@ -880,7 +844,8 @@ hipError_t PlatformState::getDynTexGlobalVar(textureReference* texRef, hipDevice return hipSuccess; } -hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef) { +hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod, + textureReference** texRef) { amd::ScopedLock lock(lock_); auto it = dynCO_map_.find(hmod); @@ -897,12 +862,12 @@ hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod, te } dvar->shadowVptr = new texture(); - *texRef = reinterpret_cast(dvar->shadowVptr); + *texRef = reinterpret_cast(dvar->shadowVptr); return hipSuccess; } hipError_t PlatformState::digestFatBinary(const void* data, hip::FatBinaryInfo*& programs) { - return statCO_.digestFatBinary(data, programs); + return statCO_.digestFatBinary(data, programs); } hip::FatBinaryInfo** PlatformState::addFatBinary(const void* data) { @@ -925,19 +890,21 @@ hipError_t PlatformState::registerStatManagedVar(hip::Var* var) { return statCO_.registerStatManagedVar(var); } -hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) { +hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, + int deviceId) { return statCO_.getStatFunc(hfunc, hostFunction, deviceId); } -hipError_t PlatformState::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId) { - if(func_attr == nullptr || hostFunction == nullptr) { +hipError_t PlatformState::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, + int deviceId) { + if (func_attr == nullptr || hostFunction == nullptr) { return hipErrorInvalidValue; } return statCO_.getStatFuncAttr(func_attr, hostFunction, deviceId); } -hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, - size_t* size_ptr) { +hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId, + hipDeviceptr_t* dev_ptr, size_t* size_ptr) { return statCO_.getStatGlobalVar(hostVar, deviceId, dev_ptr, size_ptr); } @@ -945,7 +912,7 @@ hipError_t PlatformState::initStatManagedVarDevicePtr(int deviceId) { return statCO_.initStatManagedVarDevicePtr(deviceId); } -void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) { +void PlatformState::setupArgument(const void* arg, size_t size, size_t offset) { auto& arguments = execStack_.top().arguments_; if (arguments.size() < offset + size) { diff --git a/hipamd/src/hip_platform.hpp b/hipamd/src/hip_platform.hpp index 4bed5f2ef1..b85bbbf5c7 100644 --- a/hipamd/src/hip_platform.hpp +++ b/hipamd/src/hip_platform.hpp @@ -27,9 +27,8 @@ namespace hip_impl { hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( - int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, - const amd::Device& device, hipFunction_t func, int inputBlockSize, - size_t dynamicSMemSize, bool bCalcPotentialBlkSz); + int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device, + hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz); } /* namespace hip_impl*/ class PlatformState { @@ -40,14 +39,14 @@ class PlatformState { PlatformState() {} ~PlatformState() {} -public: + public: void init(); - //Dynamic Code Objects functions + // Dynamic Code Objects functions hipError_t loadModule(hipModule_t* module, const char* fname, const void* image = nullptr); hipError_t unloadModule(hipModule_t hmod); - hipError_t getDynFunc(hipFunction_t *hfunc, hipModule_t hmod, const char* func_name); + hipError_t getDynFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* func_name); hipError_t getDynGlobalVar(const char* hostVar, hipModule_t hmod, hipDeviceptr_t* dev_ptr, size_t* size_ptr); hipError_t getDynTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef); @@ -59,14 +58,14 @@ public: /* Singleton instance */ static PlatformState& instance() { if (platform_ == nullptr) { - // __hipRegisterFatBinary() will call this when app starts, thus - // there is no multiple entry issue here. - platform_ = new PlatformState(); + // __hipRegisterFatBinary() will call this when app starts, thus + // there is no multiple entry issue here. + platform_ = new PlatformState(); } return *platform_; } - //Static Code Objects functions + // Static Code Objects functions hip::FatBinaryInfo** addFatBinary(const void* data); hipError_t removeFatBinary(hip::FatBinaryInfo** module); hipError_t digestFatBinary(const void* data, hip::FatBinaryInfo*& programs); @@ -83,15 +82,15 @@ public: hipError_t initStatManagedVarDevicePtr(int deviceId); - //Exec Functions - void setupArgument(const void *arg, size_t size, size_t offset); + // Exec Functions + void setupArgument(const void* arg, size_t size, size_t offset); void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); void popExec(ihipExec_t& exec); -private: - //Dynamic Code Object map, keyin module to get the corresponding object + private: + // Dynamic Code Object map, keyin module to get the corresponding object std::unordered_map dynCO_map_; - hip::StatCO statCO_; //Static Code object var + hip::StatCO statCO_; // Static Code object var bool initialized_{false}; std::unordered_map> texRef_map_; }; diff --git a/hipamd/src/hiprtc/hiprtc.cpp b/hipamd/src/hiprtc/hiprtc.cpp index 71dc59f9d8..f967e95a2c 100644 --- a/hipamd/src/hiprtc/hiprtc.cpp +++ b/hipamd/src/hiprtc/hiprtc.cpp @@ -81,7 +81,7 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const cha progName = name; } - auto* rtcProgram = new hiprtc::RTCProgram(progName); + auto* rtcProgram = new hiprtc::RTCCompileProgram(progName); if (rtcProgram == nullptr) { HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE); } @@ -98,7 +98,7 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const cha } } - *prog = hiprtc::RTCProgram::as_hiprtcProgram(rtcProgram); + *prog = hiprtc::RTCCompileProgram::as_hiprtcProgram(rtcProgram); HIPRTC_RETURN(HIPRTC_SUCCESS); } @@ -106,7 +106,7 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const cha hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) { HIPRTC_INIT_API(prog, numOptions, options); - auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog); + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); std::vector opt; opt.reserve(numOptions); @@ -127,7 +127,7 @@ hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expres if (name_expression == nullptr) { HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } - auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog); + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); std::string name = name_expression; if (!rtcProgram->trackMangledName(name)) { HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); @@ -144,7 +144,7 @@ hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expressio HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } - auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog); + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); if (!rtcProgram->getDemangledName(name_expression, loweredName)) { return HIPRTC_RETURN(HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID); @@ -158,7 +158,7 @@ hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) { if (prog == nullptr) { HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } - auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(*prog); + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(*prog); delete rtcProgram; HIPRTC_RETURN(HIPRTC_SUCCESS); } @@ -169,7 +169,7 @@ hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* binarySizeRet) { if (binarySizeRet == nullptr) { HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } - auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog); + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); *binarySizeRet = rtcProgram->getExecSize(); @@ -182,7 +182,7 @@ hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* binaryMem) { if (binaryMem == nullptr) { HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } - auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog); + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); auto binary = rtcProgram->getExec(); ::memcpy(binaryMem, binary.data(), binary.size()); @@ -194,7 +194,7 @@ hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* dst) { if (dst == nullptr) { HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } - auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog); + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); auto log = rtcProgram->getLog(); ::memcpy(dst, log.data(), log.size()); @@ -206,7 +206,7 @@ hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) { if (logSizeRet == nullptr) { HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); } - auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog); + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); *logSizeRet = rtcProgram->getLogSize(); @@ -226,3 +226,95 @@ hiprtcResult hiprtcVersion(int* major, int* minor) { HIPRTC_RETURN(HIPRTC_SUCCESS); } + +hiprtcResult hiprtcLinkCreate(unsigned int num_options, hiprtcJIT_option* options_ptr, + void** options_vals_pptr, hiprtcLinkState* hip_link_state_ptr) { + HIPRTC_INIT_API(num_options, options_ptr, options_vals_pptr, hip_link_state_ptr); + + if (options_ptr == nullptr || options_vals_pptr == nullptr || hip_link_state_ptr == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + std::string name("Linker Program"); + hiprtc::RTCLinkProgram* rtc_link_prog_ptr = new hiprtc::RTCLinkProgram(name); + if (!rtc_link_prog_ptr->AddLinkerOptions(num_options, options_ptr, options_vals_pptr)) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_OPTION); + } + + *hip_link_state_ptr = reinterpret_cast(rtc_link_prog_ptr); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcLinkAddFile(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, + const char* file_path, unsigned int num_options, + hiprtcJIT_option* options_ptr, void** option_values) { + HIPRTC_INIT_API(hip_link_state, input_type, file_path, num_options, options_ptr, option_values); + + if (hip_link_state == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + if (input_type == HIPRTC_JIT_INPUT_CUBIN || input_type == HIPRTC_JIT_INPUT_PTX + || input_type == HIPRTC_JIT_INPUT_FATBINARY || input_type == HIPRTC_JIT_INPUT_OBJECT + || input_type == HIPRTC_JIT_INPUT_LIBRARY || input_type == HIPRTC_JIT_INPUT_NVVM) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + hiprtc::RTCLinkProgram* rtc_link_prog_ptr + = reinterpret_cast(hip_link_state); + if (!rtc_link_prog_ptr->AddLinkerFile(std::string(file_path), input_type)) { + HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcLinkAddData(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, + void* image, size_t image_size, const char* name, + unsigned int num_options, hiprtcJIT_option* options_ptr, + void** option_values) { + HIPRTC_INIT_API(hip_link_state, image, image_size, name, num_options, options_ptr, + option_values); + + if (image == nullptr || image_size <= 0 || name == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + if (input_type == HIPRTC_JIT_INPUT_CUBIN || input_type == HIPRTC_JIT_INPUT_PTX + || input_type == HIPRTC_JIT_INPUT_FATBINARY || input_type == HIPRTC_JIT_INPUT_OBJECT + || input_type == HIPRTC_JIT_INPUT_LIBRARY || input_type == HIPRTC_JIT_INPUT_NVVM) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + hiprtc::RTCLinkProgram* rtc_link_prog_ptr + = reinterpret_cast(hip_link_state); + if (!rtc_link_prog_ptr->AddLinkerData(image, image_size, name, input_type)) { + HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); + +} + +hiprtcResult hiprtcLinkComplete(hiprtcLinkState hip_link_state, void** bin_out, size_t* size_out) { + HIPRTC_INIT_API(hip_link_state, bin_out, size_out); + hiprtc::RTCLinkProgram* rtc_link_prog_ptr + = reinterpret_cast(hip_link_state); + if (!rtc_link_prog_ptr->LinkComplete(bin_out, size_out)) { + HIPRTC_RETURN(HIPRTC_ERROR_LINKING); + } + HIPRTC_RETURN(HIPRTC_SUCCESS); + +} + +hiprtcResult hiprtcLinkDestroy(hiprtcLinkState hip_link_state) { + HIPRTC_INIT_API(hip_link_state); + + hiprtc::RTCLinkProgram* rtc_link_prog_ptr + = reinterpret_cast(hip_link_state); + delete rtc_link_prog_ptr; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + diff --git a/hipamd/src/hiprtc/hiprtc.def b/hipamd/src/hiprtc/hiprtc.def index f69854720b..63856ac80e 100644 --- a/hipamd/src/hiprtc/hiprtc.def +++ b/hipamd/src/hiprtc/hiprtc.def @@ -8,4 +8,9 @@ hiprtcGetProgramLog hiprtcGetProgramLogSize hiprtcGetCode hiprtcGetCodeSize -hiprtcGetErrorString \ No newline at end of file +hiprtcGetErrorString +hiprtcLinkCreate +hiprtcLinkAddFile +hiprtcLinkAddData +hiprtcLinkComplete +hiprtcLinkDestroy diff --git a/hipamd/src/hiprtc/hiprtc.map.in b/hipamd/src/hiprtc/hiprtc.map.in index abb3fe8d1f..1b10bcd7a5 100644 --- a/hipamd/src/hiprtc/hiprtc.map.in +++ b/hipamd/src/hiprtc/hiprtc.map.in @@ -11,6 +11,11 @@ global: hiprtcGetErrorString; hiprtcAddNameExpression; hiprtcVersion; + hiprtcLinkCreate; + hiprtcLinkAddFile; + hiprtcLinkAddData; + hiprtcLinkComplete; + hiprtcLinkDestroy; local: *; }; diff --git a/hipamd/src/hiprtc/hiprtcInternal.cpp b/hipamd/src/hiprtc/hiprtcInternal.cpp index 07f903d8b6..233fc358aa 100644 --- a/hipamd/src/hiprtc/hiprtcInternal.cpp +++ b/hipamd/src/hiprtc/hiprtcInternal.cpp @@ -22,110 +22,21 @@ THE SOFTWARE. #include "hiprtcInternal.hpp" +#include +#include + #include "vdi_common.hpp" #include "utils/flags.hpp" namespace hiprtc { using namespace helpers; -RTCProgram::RTCProgram(std::string name_) : name(name_) { + +//RTC Program Member Functions +RTCProgram::RTCProgram(std::string name) : name_(name) { std::call_once(amd::Comgr::initialized, amd::Comgr::LoadLib); - if (amd::Comgr::create_data_set(&compileInput) != AMD_COMGR_STATUS_SUCCESS || - amd::Comgr::create_data_set(&linkInput) != AMD_COMGR_STATUS_SUCCESS || - amd::Comgr::create_data_set(&execInput) != AMD_COMGR_STATUS_SUCCESS) { + if (amd::Comgr::create_data_set(&exec_input_) != AMD_COMGR_STATUS_SUCCESS) { crashWithMessage("Failed to allocate internal hiprtc structure"); } - - // Add internal header - if (!addBuiltinHeader()) { - crashWithMessage("Unable to add internal header"); - } - - // Add compile options - const std::string hipVerOpt{"--hip-version=" + std::to_string(HIP_VERSION_MAJOR) + '.' + - std::to_string(HIP_VERSION_MINOR) + '.' + - std::to_string(HIP_VERSION_PATCH)}; - const std::string hipVerMajor{"-DHIP_VERSION_MAJOR=" + std::to_string(HIP_VERSION_MAJOR)}; - const std::string hipVerMinor{"-DHIP_VERSION_MINOR=" + std::to_string(HIP_VERSION_MINOR)}; - const std::string hipVerPatch{"-DHIP_VERSION_PATCH=" + std::to_string(HIP_VERSION_PATCH)}; - - compileOptions.reserve(20); // count of options below - compileOptions.push_back("-O3"); - -#ifdef HIPRTC_EARLY_INLINE - compileOptions.push_back("-mllvm"); - compileOptions.push_back("-amdgpu-early-inline-all"); -#endif - compileOptions.push_back("-mllvm"); - compileOptions.push_back("-amdgpu-prelink"); - - if (GPU_ENABLE_WGP_MODE) compileOptions.push_back("-mcumode"); - - if (!GPU_ENABLE_WAVE32_MODE) compileOptions.push_back("-mwavefrontsize64"); - - compileOptions.push_back(hipVerOpt); - compileOptions.push_back(hipVerMajor); - compileOptions.push_back(hipVerMinor); - compileOptions.push_back(hipVerPatch); - compileOptions.push_back("-D__HIPCC_RTC__"); - compileOptions.push_back("-include"); - compileOptions.push_back("hiprtc_runtime.h"); - compileOptions.push_back("-std=c++14"); - compileOptions.push_back("-nogpuinc"); -#ifdef _WIN32 - compileOptions.push_back("-target"); - compileOptions.push_back("x86_64-pc-windows-msvc"); - compileOptions.push_back("-fms-extensions"); - compileOptions.push_back("-fms-compatibility"); -#endif - - if (!GPU_ENABLE_WAVE32_MODE) linkOptions.push_back("wavefrontsize64"); - - exeOptions.push_back("-O3"); - exeOptions.push_back("-mllvm"); - exeOptions.push_back("-amdgpu-internalize-symbols"); - exeOptions.push_back("-mcumode"); - if (!GPU_ENABLE_WAVE32_MODE) exeOptions.push_back("-mwavefrontsize64"); -} - -bool RTCProgram::addSource(const std::string& source, const std::string& name) { - if (source.size() == 0 || name.size() == 0) { - LogError("Error in hiprtc: source or name is of size 0 in addSource"); - return false; - } - sourceCode += source; - sourceName = name; - return true; -} - -// addSource_impl is a different function because we need to add source when we track mangled -// objects -bool RTCProgram::addSource_impl() { - std::vector vsource(sourceCode.begin(), sourceCode.end()); - if (!addCodeObjData(compileInput, vsource, sourceName, AMD_COMGR_DATA_KIND_SOURCE)) { - return false; - } - return true; -} - -bool RTCProgram::addHeader(const std::string& source, const std::string& name) { - if (source.size() == 0 || name.size() == 0) { - LogError("Error in hiprtc: source or name is of size 0 in addHeader"); - return false; - } - std::vector vsource(source.begin(), source.end()); - if (!addCodeObjData(compileInput, vsource, name, AMD_COMGR_DATA_KIND_INCLUDE)) { - return false; - } - return true; -} - -bool RTCProgram::addBuiltinHeader() { - std::vector source(__hipRTC_header, __hipRTC_header + __hipRTC_header_size); - std::string name{"hiprtc_runtime.h"}; - if (!addCodeObjData(compileInput, source, name, AMD_COMGR_DATA_KIND_INCLUDE)) { - return false; - } - return true; } bool RTCProgram::findIsa() { @@ -140,7 +51,7 @@ bool RTCProgram::findIsa() { if (!handle) { LogInfo("hip runtime failed to load using dlopen"); - buildLog += + build_log_ += "Error: Please provide architecture for which code is to be " "generated.\n"; return false; @@ -151,7 +62,7 @@ bool RTCProgram::findIsa() { if (sym_hipGetDevice == nullptr || sym_hipGetDeviceProperties == nullptr) { LogInfo("ISA cannot be found to dlsym failure"); - buildLog += + build_log_ += "Error: Please provide architecture for which code is to be " "generated.\n"; return false; @@ -173,14 +84,106 @@ bool RTCProgram::findIsa() { if (status != hipSuccess) { return false; } - isa = "amdgcn-amd-amdhsa--"; - isa.append(props.gcnArchName); + isa_ = "amdgcn-amd-amdhsa--"; + isa_.append(props.gcnArchName); amd::Os::unloadLibrary(handle); return true; } -bool RTCProgram::transformOptions() { +//RTC Compile Program Member Functions +RTCCompileProgram::RTCCompileProgram(std::string name_) : RTCProgram(name_) { + + if ((amd::Comgr::create_data_set(&compile_input_) != AMD_COMGR_STATUS_SUCCESS) || + (amd::Comgr::create_data_set(&link_input_) != AMD_COMGR_STATUS_SUCCESS)) { + crashWithMessage("Failed to allocate internal hiprtc structure"); + } + // Add internal header + if (!addBuiltinHeader()) { + crashWithMessage("Unable to add internal header"); + } + + // Add compile options + const std::string hipVerOpt{"--hip-version=" + std::to_string(HIP_VERSION_MAJOR) + '.' + + std::to_string(HIP_VERSION_MINOR) + '.' + + std::to_string(HIP_VERSION_PATCH)}; + const std::string hipVerMajor{"-DHIP_VERSION_MAJOR=" + std::to_string(HIP_VERSION_MAJOR)}; + const std::string hipVerMinor{"-DHIP_VERSION_MINOR=" + std::to_string(HIP_VERSION_MINOR)}; + const std::string hipVerPatch{"-DHIP_VERSION_PATCH=" + std::to_string(HIP_VERSION_PATCH)}; + + compile_options_.reserve(20); // count of options below + compile_options_.push_back("-O3"); + +#ifdef HIPRTC_EARLY_INLINE + compile_options_.push_back("-mllvm"); + compile_options_.push_back("-amdgpu-early-inline-all"); +#endif + + if (GPU_ENABLE_WGP_MODE) compile_options_.push_back("-mcumode"); + + if (!GPU_ENABLE_WAVE32_MODE) compile_options_.push_back("-mwavefrontsize64"); + + compile_options_.push_back(hipVerOpt); + compile_options_.push_back(hipVerMajor); + compile_options_.push_back(hipVerMinor); + compile_options_.push_back(hipVerPatch); + compile_options_.push_back("-D__HIPCC_RTC__"); + compile_options_.push_back("-include"); + compile_options_.push_back("hiprtc_runtime.h"); + compile_options_.push_back("-std=c++14"); + compile_options_.push_back("-nogpuinc"); +#ifdef _WIN32 + compile_options_.push_back("-target"); + compile_options_.push_back("x86_64-pc-windows-msvc"); + compile_options_.push_back("-fms-extensions"); + compile_options_.push_back("-fms-compatibility"); +#endif + + exe_options_.push_back("-O3"); +} + +bool RTCCompileProgram::addSource(const std::string& source, const std::string& name) { + if (source.size() == 0 || name.size() == 0) { + LogError("Error in hiprtc: source or name is of size 0 in addSource"); + return false; + } + source_code_ += source; + source_name_ = name; + return true; +} + +// addSource_impl is a different function because we need to add source when we track mangled +// objects +bool RTCCompileProgram::addSource_impl() { + std::vector vsource(source_code_.begin(), source_code_.end()); + if (!addCodeObjData(compile_input_, vsource, source_name_, AMD_COMGR_DATA_KIND_SOURCE)) { + return false; + } + return true; +} + +bool RTCCompileProgram::addHeader(const std::string& source, const std::string& name) { + if (source.size() == 0 || name.size() == 0) { + LogError("Error in hiprtc: source or name is of size 0 in addHeader"); + return false; + } + std::vector vsource(source.begin(), source.end()); + if (!addCodeObjData(compile_input_, vsource, name, AMD_COMGR_DATA_KIND_INCLUDE)) { + return false; + } + return true; +} + +bool RTCCompileProgram::addBuiltinHeader() { + std::vector source(__hipRTC_header, __hipRTC_header + __hipRTC_header_size); + std::string name{"hiprtc_runtime.h"}; + if (!addCodeObjData(compile_input_, source, name, AMD_COMGR_DATA_KIND_INCLUDE)) { + return false; + } + return true; +} + +bool RTCCompileProgram::transformOptions() { auto getValueOf = [](const std::string& option) { std::string res; auto f = std::find(option.begin(), option.end(), '='); @@ -188,7 +191,7 @@ bool RTCProgram::transformOptions() { return res; }; - for (auto& i : compileOptions) { + for (auto& i : compile_options_) { if (i == "-hip-pch") { LogInfo( "-hip-pch is deprecated option, has no impact on execution of new hiprtc programs, it " @@ -204,18 +207,18 @@ bool RTCProgram::transformOptions() { continue; } if (i == "--save-temps") { - settings.dumpISA = true; + settings_.dumpISA = true; continue; } } if (auto res = std::find_if( - compileOptions.begin(), compileOptions.end(), + compile_options_.begin(), compile_options_.end(), [](const std::string& str) { return str.find("--offload-arch=") != std::string::npos; }); - res != compileOptions.end()) { + res != compile_options_.end()) { auto isaName = getValueOf(*res); - isa = "amdgcn-amd-amdhsa--" + isaName; - settings.offloadArchProvided = true; + isa_ = "amdgcn-amd-amdhsa--" + isaName; + settings_.offloadArchProvided = true; return true; } // App has not provided the gpu archiecture, need to find it @@ -224,7 +227,7 @@ bool RTCProgram::transformOptions() { amd::Monitor RTCProgram::lock_("HIPRTC Program", true); -bool RTCProgram::compile(const std::vector& options) { +bool RTCCompileProgram::compile(const std::vector& options) { amd::ScopedLock lock(lock_); // Lock, because LLVM is not multi threaded if (!addSource_impl()) { @@ -233,8 +236,8 @@ bool RTCProgram::compile(const std::vector& options) { } // Append compile options - compileOptions.reserve(compileOptions.size() + options.size()); - compileOptions.insert(compileOptions.end(), options.begin(), options.end()); + compile_options_.reserve(compile_options_.size() + options.size()); + compile_options_.insert(compile_options_.end(), options.begin(), options.end()); if (!transformOptions()) { LogError("Error in hiprtc: unable to transform options"); @@ -242,48 +245,48 @@ bool RTCProgram::compile(const std::vector& options) { } std::vector LLVMBitcode; - if (!compileToBitCode(compileInput, isa, compileOptions, buildLog, LLVMBitcode)) { + if (!compileToBitCode(compile_input_, isa_, compile_options_, build_log_, LLVMBitcode)) { LogError("Error in hiprtc: unable to compile source to bitcode"); return false; } std::string linkFileName = "linked"; - if (!addCodeObjData(linkInput, LLVMBitcode, linkFileName, AMD_COMGR_DATA_KIND_BC)) { + if (!addCodeObjData(link_input_, LLVMBitcode, linkFileName, AMD_COMGR_DATA_KIND_BC)) { LogError("Error in hiprtc: unable to add linked code object"); return false; } std::vector LinkedLLVMBitcode; - if (!linkLLVMBitcode(linkInput, isa, linkOptions, buildLog, LinkedLLVMBitcode)) { + if (!linkLLVMBitcode(link_input_, isa_, link_options_, build_log_, LinkedLLVMBitcode)) { LogError("Error in hiprtc: unable to add device libs to linked bitcode"); return false; } std::string linkedFileName = "LLVMBitcode.bc"; - if (!addCodeObjData(execInput, LinkedLLVMBitcode, linkedFileName, AMD_COMGR_DATA_KIND_BC)) { + if (!addCodeObjData(exec_input_, LinkedLLVMBitcode, linkedFileName, AMD_COMGR_DATA_KIND_BC)) { LogError("Error in hiprtc: unable to add device libs linked code object"); return false; } - if (settings.dumpISA) { - if (!dumpIsaFromBC(execInput, isa, exeOptions, name, buildLog)) { + if (settings_.dumpISA) { + if (!dumpIsaFromBC(exec_input_, isa_, exe_options_, name_, build_log_)) { LogError("Error in hiprtc: unable to dump isa code"); return false; } } - if (!createExecutable(execInput, isa, exeOptions, buildLog, executable)) { + if (!createExecutable(exec_input_, isa_, exe_options_, build_log_, executable_)) { LogError("Error in hiprtc: unable to create executable"); return false; } std::vector mangledNames; - if (!fillDemangledNames(executable, mangledNames)) { + if (!fillDemangledNames(executable_, mangledNames)) { LogError("Error in hiprtc: unable to fill demangled names"); return false; } - if (!getMangledNames(mangledNames, strippedNames, demangledNames)) { + if (!getMangledNames(mangledNames, stripped_names_, demangled_names_)) { LogError("Error in hiprtc: unable to get mangled names"); return false; } @@ -291,7 +294,7 @@ bool RTCProgram::compile(const std::vector& options) { return true; } -bool RTCProgram::trackMangledName(std::string& name) { +bool RTCCompileProgram::trackMangledName(std::string& name) { amd::ScopedLock lock(lock_); if (name.size() == 0) return false; @@ -312,20 +315,20 @@ bool RTCProgram::trackMangledName(std::string& name) { return std::isspace(c); }), strippedNameNoSpace.end()); - strippedNames.insert(std::pair(name, strippedNameNoSpace)); - demangledNames.insert(std::pair(strippedName, "")); + stripped_names_.insert(std::pair(name, strippedNameNoSpace)); + demangled_names_.insert(std::pair(strippedName, "")); - const auto var{"__hiprtc_" + std::to_string(strippedNames.size())}; + const auto var{"__hiprtc_" + std::to_string(stripped_names_.size())}; const auto code{"\nextern \"C\" constexpr auto " + var + " = " + name + ";\n"}; - sourceCode += code; + source_code_ += code; return true; } -bool RTCProgram::getDemangledName(const char* name_expression, const char** loweredName) { +bool RTCCompileProgram::getDemangledName(const char* name_expression, const char** loweredName) { std::string name = name_expression; - if (auto res = strippedNames.find(name); res != strippedNames.end()) { - if (auto dres = demangledNames.find(res->second); dres != demangledNames.end()) { + if (auto res = stripped_names_.find(name); res != stripped_names_.end()) { + if (auto dres = demangled_names_.find(res->second); dres != demangled_names_.end()) { if (dres->second.size() != 0) { *loweredName = dres->second.c_str(); return true; @@ -333,7 +336,7 @@ bool RTCProgram::getDemangledName(const char* name_expression, const char** lowe return false; } } - if (auto dres = demangledNames.find(name); dres != demangledNames.end()) { + if (auto dres = demangled_names_.find(name); dres != demangled_names_.end()) { if (dres->second.size() != 0) { *loweredName = dres->second.c_str(); return true; @@ -343,4 +346,223 @@ bool RTCProgram::getDemangledName(const char* name_expression, const char** lowe return false; } +//RTC Link Program Member Functions +RTCLinkProgram::RTCLinkProgram(std::string name) : RTCProgram(name) { + if (amd::Comgr::create_data_set(&link_input_) != AMD_COMGR_STATUS_SUCCESS) { + crashWithMessage("Failed to allocate internal hiprtc structure"); + } +} + +bool RTCLinkProgram::AddLinkerOptions(unsigned int num_options, hiprtcJIT_option* options_ptr, + void** options_vals_ptr) { + + if (options_ptr == nullptr || options_vals_ptr == nullptr) { + crashWithMessage("JIT Options ptr cannot be null"); + return false; + } + + for (size_t opt_idx = 0; opt_idx < num_options; ++opt_idx) { + + if (options_vals_ptr[opt_idx] == nullptr) { + crashWithMessage("JIT Options value ptr cannot be null"); + return false; + } + + switch(options_ptr[opt_idx]) { + case HIPRTC_JIT_MAX_REGISTERS: + link_args_.max_registers_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_THREADS_PER_BLOCK: + link_args_.threads_per_block_ + = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_WALL_TIME: + link_args_.wall_time_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_INFO_LOG_BUFFER: + link_args_.info_log_ = (reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES: + link_args_.info_log_size_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_ERROR_LOG_BUFFER: + link_args_.error_log_ = reinterpret_cast(options_vals_ptr[opt_idx]); + break; + case HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: + link_args_.error_log_size_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_OPTIMIZATION_LEVEL: + link_args_.optimization_level_ + = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_TARGET_FROM_HIPCONTEXT: + link_args_.target_from_hip_context_ + = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_TARGET: + link_args_.jit_target_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_FALLBACK_STRATEGY: + link_args_.fallback_strategy_ + = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_GENERATE_DEBUG_INFO: + link_args_.generate_debug_info_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_LOG_VERBOSE: + link_args_.log_verbose_ = reinterpret_cast(options_vals_ptr[opt_idx]); + break; + case HIPRTC_JIT_GENERATE_LINE_INFO: + link_args_.generate_line_info_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_CACHE_MODE: + link_args_.cache_mode_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_NEW_SM3X_OPT: + link_args_.sm3x_opt_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_FAST_COMPILE: + link_args_.fast_compile_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_GLOBAL_SYMBOL_NAMES: + link_args_.global_symbol_names_ = reinterpret_cast(options_vals_ptr[opt_idx]); + break; + case HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS: + link_args_.global_symbol_addresses_ = reinterpret_cast(options_vals_ptr[opt_idx]); + break; + case HIPRTC_JIT_GLOBAL_SYMBOL_COUNT: + link_args_.global_symbol_count_ + = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_LTO: + link_args_.lto_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_FTZ: + link_args_.ftz_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_PREC_DIV: + link_args_.prec_div_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_PREC_SQRT: + link_args_.prec_sqrt_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_FMA: + link_args_.fma_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + default: + break; + } + } + + return true; +} + +amd_comgr_data_kind_t RTCLinkProgram::GetCOMGRDataKind(hiprtcJITInputType input_type) { + amd_comgr_data_kind_t data_kind = AMD_COMGR_DATA_KIND_UNDEF; + + // Map the hiprtc input type to comgr data kind + switch (input_type) { + case HIPRTC_JIT_INPUT_LLVM_BITCODE : + data_kind = AMD_COMGR_DATA_KIND_BC; + break; + case HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE : + data_kind = AMD_COMGR_DATA_KIND_FATBIN; + break; + case HIPRTC_JIT_INPUT_LLVM_ARCHIVES_OF_BUNDLED_BITCODE : + data_kind = AMD_COMGR_DATA_KIND_FATBIN; + break; + default : + LogError("Cannot find the corresponding comgr data kind"); + break; + } + + return data_kind; +} + +bool RTCLinkProgram::AddLinkerFile(std::string file_path, hiprtcJITInputType input_type) { + amd::ScopedLock lock(lock_); + + struct stat stat_buf; + if (stat(file_path.c_str(), &stat_buf)) { + return false; + } + + std::string link_file_name_("Linker Program"); + std::vector llvm_bitcode(stat_buf.st_size); + std::ifstream bc_file(file_path, std::ios_base::in | std::ios_base::binary); + if (!bc_file.good()) { + return true; + } + + bc_file.read(llvm_bitcode.data(), stat_buf.st_size); + bc_file.close(); + + amd_comgr_data_kind_t data_kind; + if((data_kind = GetCOMGRDataKind(input_type)) == AMD_COMGR_DATA_KIND_UNDEF) { + LogError("Cannot find the correct COMGR data kind"); + return false; + } + + if (!addCodeObjData(link_input_, llvm_bitcode, link_file_name_, data_kind)) { + LogError("Error in hiprtc: unable to add linked code object"); + return false; + } + + return true; +} + +bool RTCLinkProgram::AddLinkerData(void* image_ptr, size_t image_size, std::string link_file_name, + hiprtcJITInputType input_type) { + amd::ScopedLock lock(lock_); + + char* image_char_buf = reinterpret_cast(image_ptr); + std::vector llvm_bitcode(image_char_buf, image_char_buf + image_size); + + amd_comgr_data_kind_t data_kind; + if((data_kind = GetCOMGRDataKind(input_type)) == AMD_COMGR_DATA_KIND_UNDEF) { + LogError("Cannot find the correct COMGR data kind"); + return false; + } + + if(!addCodeObjData(link_input_,llvm_bitcode , link_file_name, data_kind)) { + LogError("Error in hiprtc: unable to add linked code object"); + return false; + } + return true; +} + +bool RTCLinkProgram::LinkComplete(void** bin_out, size_t* size_out) { + amd::ScopedLock lock(lock_); + + if (!findIsa()) { + return false; + } + + std::vector linked_llvm_bitcode; + if (!linkLLVMBitcode(link_input_, isa_, link_options_, build_log_, linked_llvm_bitcode)) { + LogError("Error in hiprtc: unable to add device libs to linked bitcode"); + return false; + } + + std::string linkedFileName = "LLVMBitcode.bc"; + if (!addCodeObjData(exec_input_, linked_llvm_bitcode, linkedFileName, AMD_COMGR_DATA_KIND_BC)) { + LogError("Error in hiprtc: unable to add linked bitcode"); + return false; + } + + std::vector exe_options; + exe_options.push_back("-O3"); + if (!createExecutable(exec_input_, isa_, exe_options, build_log_, executable_)) { + LogError("Error in hiprtc: unable to create exectuable"); + return false; + } + + *size_out = executable_.size(); + char* bin_out_c = new char[*size_out]; + std::copy(executable_.begin(), executable_.end(), bin_out_c); + *bin_out = reinterpret_cast(bin_out_c); + + return true; +} + } // namespace hiprtc diff --git a/hipamd/src/hiprtc/hiprtcInternal.hpp b/hipamd/src/hiprtc/hiprtcInternal.hpp index c52dd879a6..763cb12695 100644 --- a/hipamd/src/hiprtc/hiprtcInternal.hpp +++ b/hipamd/src/hiprtc/hiprtcInternal.hpp @@ -46,7 +46,6 @@ extern unsigned __hipRTC_header_size; #include "hiprtcComgrHelper.hpp" - namespace hiprtc { namespace internal { template inline std::string ToString(T v) { @@ -90,67 +89,139 @@ struct Settings { }; class RTCProgram { +protected: + // Lock and control variables static amd::Monitor lock_; - static std::once_flag initialized; + static std::once_flag initialized_; - std::string name; - Settings settings; + RTCProgram(std::string name); + ~RTCProgram() { + amd::Comgr::destroy_data_set(exec_input_); + } - std::string isa; - std::string buildLog; - - std::vector executable; - - std::map strippedNames; - std::map demangledNames; - std::string sourceCode; - std::string sourceName; - - std::vector compileOptions; - std::vector linkOptions; - std::vector exeOptions; - - amd_comgr_data_set_t compileInput; - amd_comgr_data_set_t linkInput; - amd_comgr_data_set_t execInput; - - bool dumpIsa(); + // Member Functions bool findIsa(); + + // Data Members + std::string name_; + std::string isa_; + std::string build_log_; + std::vector executable_; + amd_comgr_data_set_t exec_input_; + std::vector exe_options_; +}; + +class RTCCompileProgram : public RTCProgram { + + // Private Data Members + Settings settings_; + + std::string source_code_; + std::string source_name_; + std::map stripped_names_; + std::map demangled_names_; + + std::vector compile_options_; + std::vector link_options_; + + amd_comgr_data_set_t compile_input_; + amd_comgr_data_set_t link_input_; + + // Private Member functions bool addSource_impl(); bool addBuiltinHeader(); bool transformOptions(); - RTCProgram() = delete; - RTCProgram(RTCProgram&) = delete; - RTCProgram& operator=(RTCProgram&) = delete; + RTCCompileProgram() = delete; + RTCCompileProgram(RTCCompileProgram&) = delete; + RTCCompileProgram& operator=(RTCCompileProgram&) = delete; public: - RTCProgram(std::string); + RTCCompileProgram(std::string); + ~RTCCompileProgram() { + amd::Comgr::destroy_data_set(compile_input_); + amd::Comgr::destroy_data_set(link_input_); + } // Converters - inline static hiprtcProgram as_hiprtcProgram(RTCProgram* p) { + inline static hiprtcProgram as_hiprtcProgram(RTCCompileProgram* p) { return reinterpret_cast(p); } - inline static RTCProgram* as_RTCProgram(hiprtcProgram& p) { - return reinterpret_cast(p); + inline static RTCCompileProgram* as_RTCCompileProgram(hiprtcProgram& p) { + return reinterpret_cast(p); } + // Public Member Functions bool addSource(const std::string& source, const std::string& name); bool addHeader(const std::string& source, const std::string& name); bool compile(const std::vector& options); bool getDemangledName(const char* name_expression, const char** loweredName); bool trackMangledName(std::string& name); - const std::vector& getExec() const { return executable; } - size_t getExecSize() const { return executable.size(); } - const std::string& getLog() const { return buildLog; } - size_t getLogSize() const { return buildLog.size(); } - - ~RTCProgram() { - amd::Comgr::destroy_data_set(compileInput); - amd::Comgr::destroy_data_set(linkInput); - amd::Comgr::destroy_data_set(execInput); - } + // Public Getter/Setters + const std::vector& getExec() const { return executable_; } + size_t getExecSize() const { return executable_.size(); } + const std::string& getLog() const { return build_log_; } + size_t getLogSize() const { return build_log_.size(); } }; + +// Linker Arguments passed via hipLinkCreate +struct LinkArguments { + unsigned int max_registers_; + unsigned int threads_per_block_; + float wall_time_; + size_t info_log_size_; + char* info_log_; + size_t error_log_size_; + char* error_log_; + unsigned int optimization_level_; + unsigned int target_from_hip_context_; + unsigned int jit_target_; + unsigned int fallback_strategy_; + int generate_debug_info_; + long log_verbose_; + int generate_line_info_; + unsigned int cache_mode_; + bool sm3x_opt_; + bool fast_compile_; + const char** global_symbol_names_; + void** global_symbol_addresses_; + unsigned int global_symbol_count_; + int lto_; + int ftz_; + int prec_div_; + int prec_sqrt_; + int fma_; +}; + +class RTCLinkProgram : public RTCProgram { + + // Private Member Functions (forbid these function calls) + RTCLinkProgram() = delete; + RTCLinkProgram(RTCLinkProgram&) = delete; + RTCLinkProgram& operator=(RTCLinkProgram&) = delete; + + amd_comgr_data_kind_t GetCOMGRDataKind(hiprtcJITInputType input_type); + + // Linker Argumenets at hipLinkCreate + LinkArguments link_args_; + + // Private Data Members + amd_comgr_data_set_t link_input_; + std::vector link_options_; +public: + RTCLinkProgram(std::string name); + ~RTCLinkProgram() { + amd::Comgr::destroy_data_set(link_input_); + } + // Public Member Functions + bool AddLinkerOptions(unsigned int num_options, hiprtcJIT_option* options_ptr, + void** options_vals_ptr); + bool AddLinkerFile(std::string file_path, hiprtcJITInputType input_type); + bool AddLinkerData(void* image_ptr, size_t image_size, std::string link_file_name, + hiprtcJITInputType input_type); + bool LinkComplete(void** bin_out, size_t* size_out); +}; + } // namespace hiprtc