SWDEV-290384 - Add Linker API support in hiprtc

Change-Id: I4621a033a22e4da0201c3804e2b357470a681ab0
Этот коммит содержится в:
kjayapra-amd
2022-03-14 12:36:16 -04:00
коммит произвёл Karthik Jayaprakash
родитель 176acb9315
Коммит 84f94fd134
12 изменённых файлов: 982 добавлений и 677 удалений
+28 -35
Просмотреть файл
@@ -34,7 +34,7 @@ THE SOFTWARE.
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
amd::HostQueue& queue, bool isAsync = false);
hipError_t ihipFree(void* ptr);
//forward declaration of methods required for managed variables
// forward declaration of methods required for managed variables
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
namespace {
size_t constexpr strLiteralLength(char const* str) {
@@ -67,9 +67,7 @@ struct __ClangOffloadBundleHeader {
namespace hip {
uint64_t CodeObject::ElfSize(const void *emi) {
return amd::Elf::getElfSize(emi);
}
uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); }
static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported,
bool& sramEccSupported) {
@@ -375,10 +373,10 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
}
// This will be moved to COMGR eventually
hipError_t CodeObject::ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize,
const void ** image, const std::vector<std::string>& device_names,
std::vector<std::pair<const void*, size_t>>& code_objs) {
hipError_t CodeObject::ExtractCodeObjectFromFile(
amd::Os::FileDesc fdesc, size_t fsize, const void** image,
const std::vector<std::string>& device_names,
std::vector<std::pair<const void*, size_t>>& code_objs) {
hipError_t hip_error = hipSuccess;
if (fdesc < 0) {
@@ -386,8 +384,8 @@ hipError_t CodeObject::ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t
}
// Map the file to memory, with offset 0.
//file will be unmapped in ModuleUnload
//const void* image = nullptr;
// file will be unmapped in ModuleUnload
// const void* image = nullptr;
if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, 0, image)) {
return hipErrorInvalidValue;
}
@@ -399,11 +397,9 @@ hipError_t CodeObject::ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t
}
// This will be moved to COMGR eventually
hipError_t CodeObject::ExtractCodeObjectFromMemory(const void* data,
const std::vector<std::string>& device_names,
std::vector<std::pair<const void*, size_t>>& code_objs,
std::string& uri) {
hipError_t CodeObject::ExtractCodeObjectFromMemory(
const void* data, const std::vector<std::string>& device_names,
std::vector<std::pair<const void*, size_t>>& code_objs, std::string& uri) {
// Get the URI from memory
if (!amd::Os::GetURIFromMemory(data, 0, uri)) {
return hipErrorInvalidValue;
@@ -413,9 +409,9 @@ hipError_t CodeObject::ExtractCodeObjectFromMemory(const void* data,
}
// This will be moved to COMGR eventually
hipError_t CodeObject::extractCodeObjectFromFatBinary(const void* data,
const std::vector<std::string>& agent_triple_target_ids,
std::vector<std::pair<const void*, size_t>>& code_objs) {
hipError_t CodeObject::extractCodeObjectFromFatBinary(
const void* data, const std::vector<std::string>& agent_triple_target_ids,
std::vector<std::pair<const void*, size_t>>& code_objs) {
std::string magic((const char*)data, bundle_magic_string_size);
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) {
return hipErrorInvalidKernelFile;
@@ -492,12 +488,11 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(const void* data,
}
hipError_t DynCO::loadCodeObject(const char* fname, const void* image) {
amd::ScopedLock lock(dclock_);
// Number of devices = 1 in dynamic code object
fb_info_ = new FatBinaryInfo(fname, image);
std::vector<hip::Device*> devices = { g_devices[ihipGetDevice()] };
std::vector<hip::Device*> devices = {g_devices[ihipGetDevice()]};
IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinary(devices));
// No Lazy loading for DynCO
@@ -512,12 +507,12 @@ hipError_t DynCO::loadCodeObject(const char* fname, const void* image) {
return hipSuccess;
}
//Dynamic Code Object
// Dynamic Code Object
DynCO::~DynCO() {
amd::ScopedLock lock(dclock_);
for (auto& elem : vars_) {
if(elem.second->getVarKind() == Var::DVK_Managed) {
if (elem.second->getVarKind() == Var::DVK_Managed) {
hipError_t err = ihipFree(elem.second->getManagedVarPtr());
assert(err == hipSuccess);
}
@@ -553,7 +548,7 @@ hipError_t DynCO::getDynFunc(hipFunction_t* hfunc, std::string func_name) {
CheckDeviceIdMatch();
if(hfunc == nullptr) {
if (hfunc == nullptr) {
return hipErrorInvalidValue;
}
@@ -655,9 +650,8 @@ hipError_t DynCO::populateDynGlobalFuncs() {
amd::ScopedLock lock(dclock_);
std::vector<std::string> func_names;
device::Program* dev_program
= fb_info_->GetProgram(ihipGetDevice())->getDeviceProgram(
*hip::getCurrentDevice()->devices()[0]);
device::Program* dev_program = fb_info_->GetProgram(ihipGetDevice())
->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
// Get all the global func names from COMGR
if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) {
@@ -672,9 +666,8 @@ hipError_t DynCO::populateDynGlobalFuncs() {
return hipSuccess;
}
//Static Code Object
StatCO::StatCO() {
}
// Static Code Object
StatCO::StatCO() {}
StatCO::~StatCO() {
amd::ScopedLock lock(sclock_);
@@ -784,7 +777,8 @@ hipError_t StatCO::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, i
return it->second->getStatFunc(hfunc, deviceId);
}
hipError_t StatCO::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId) {
hipError_t StatCO::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction,
int deviceId) {
amd::ScopedLock lock(sclock_);
const auto it = functions_.find(hostFunction);
@@ -838,10 +832,9 @@ hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) {
IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId));
amd::HostQueue* queue = hip::getNullStream();
if(queue != nullptr) {
err = ihipMemcpy(reinterpret_cast<address>(dvar->device_ptr()),
var->getManagedVarPtr(),
dvar->size(), hipMemcpyHostToDevice, *queue);
if (queue != nullptr) {
err = ihipMemcpy(reinterpret_cast<address>(dvar->device_ptr()), var->getManagedVarPtr(),
dvar->size(), hipMemcpyHostToDevice, *queue);
} else {
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL");
return hipErrorInvalidResourceHandle;
@@ -851,4 +844,4 @@ hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) {
}
return err;
}
}; //namespace: hip
}; // namespace hip
+33 -38
Просмотреть файл
@@ -30,7 +30,7 @@ thread_local std::vector<hip::Stream*> l_captureStreams;
thread_local hipStreamCaptureMode l_streamCaptureMode{hipStreamCaptureModeGlobal};
inline hipError_t ihipGraphAddNode(hipGraphNode_t graphNode, hipGraph_t graph,
const hipGraphNode_t* pDependencies, size_t numDependencies) {
const hipGraphNode_t* pDependencies, size_t numDependencies) {
graph->AddNode(graphNode);
for (size_t i = 0; i < numDependencies; i++) {
if (!hipGraphNode::isNodeValid(pDependencies[i])) {
@@ -43,7 +43,6 @@ inline hipError_t ihipGraphAddNode(hipGraphNode_t graphNode, hipGraph_t graph,
hipError_t ihipValidateKernelParams(const hipKernelNodeParams* pNodeParams) {
if (pNodeParams->kernelParams == nullptr) {
return hipErrorInvalidValue;
}
@@ -136,8 +135,9 @@ hipError_t ihipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph,
return hipErrorInvalidValue;
}
// The element size must be 1, 2, or 4 bytes
if (pMemsetParams->elementSize != sizeof(int8_t) && pMemsetParams->elementSize != sizeof(int16_t)
&& pMemsetParams->elementSize != sizeof(int32_t)) {
if (pMemsetParams->elementSize != sizeof(int8_t) &&
pMemsetParams->elementSize != sizeof(int16_t) &&
pMemsetParams->elementSize != sizeof(int32_t)) {
return hipErrorInvalidValue;
}
@@ -704,8 +704,9 @@ hipError_t capturehipLaunchHostFunc(hipStream_t& stream, hipHostFn_t& fn, void*&
hostParams.userData = userData;
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
hipGraphNode_t pGraphNode = new hipGraphHostNode(&hostParams);
hipError_t status = ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(),
s->GetLastCapturedNodes().size());
hipError_t status =
ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(),
s->GetLastCapturedNodes().size());
if (status != hipSuccess) {
return status;
}
@@ -729,8 +730,7 @@ hipError_t hipStreamIsCapturing(hipStream_t stream, hipStreamCaptureStatus* pCap
hipError_t hipThreadExchangeStreamCaptureMode(hipStreamCaptureMode* mode) {
HIP_INIT_API(hipThreadExchangeStreamCaptureMode, mode);
if (mode == nullptr ||
*mode < hipStreamCaptureModeGlobal ||
if (mode == nullptr || *mode < hipStreamCaptureModeGlobal ||
*mode > hipStreamCaptureModeRelaxed) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -751,8 +751,7 @@ hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode)
if (stream == nullptr) {
HIP_RETURN(hipErrorStreamCaptureUnsupported);
}
if (mode < hipStreamCaptureModeGlobal ||
mode > hipStreamCaptureModeRelaxed) {
if (mode < hipStreamCaptureModeGlobal || mode > hipStreamCaptureModeRelaxed) {
HIP_RETURN(hipErrorInvalidValue);
}
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
@@ -899,7 +898,7 @@ hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph,
hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void* dst, const void* src,
size_t count, hipMemcpyKind kind) {
HIP_INIT_API(hipGraphMemcpyNodeSetParams1D, node, dst, src, count, kind);
if (node == nullptr || dst == nullptr || src == nullptr || count == 0 || src == dst ) {
if (node == nullptr || dst == nullptr || src == nullptr || count == 0 || src == dst) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -910,8 +909,8 @@ hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraph
void* dst, const void* src, size_t count,
hipMemcpyKind kind) {
HIP_INIT_API(hipGraphExecMemcpyNodeSetParams1D, hGraphExec, node, dst, src, count, kind);
if (hGraphExec == nullptr || node == nullptr || dst == nullptr ||
src == nullptr || count == 0 || src == dst ) {
if (hGraphExec == nullptr || node == nullptr || dst == nullptr || src == nullptr || count == 0 ||
src == dst) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -996,7 +995,7 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t g
HIP_RETURN(hipErrorInvalidValue);
}
//invalid flag check
// invalid flag check
if (flags != 0 && flags != hipGraphInstantiateFlagAutoFreeOnLaunch) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -1248,7 +1247,8 @@ hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGra
hipGraphNode_t hipErrorNode_out;
hipGraphExecUpdateResult updateResult_out;
// Check if this instantiated graph is updatable. All restrictions in hipGraphExecUpdate() apply.
hipError_t status = hipGraphExecUpdate(hGraphExec, childGraph, &hipErrorNode_out, &updateResult_out);
hipError_t status =
hipGraphExecUpdate(hGraphExec, childGraph, &hipErrorNode_out, &updateResult_out);
if (status != hipSuccess) {
HIP_RETURN(status);
}
@@ -1354,8 +1354,7 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* fr
hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t* from, hipGraphNode_t* to,
size_t* numEdges) {
HIP_INIT_API(hipGraphGetEdges, graph, from, to, numEdges);
if (graph == nullptr || numEdges == nullptr ||
(from == nullptr && to != nullptr) ||
if (graph == nullptr || numEdges == nullptr || (from == nullptr && to != nullptr) ||
(to == nullptr && from != nullptr)) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -1463,7 +1462,6 @@ hipError_t hipGraphDestroyNode(hipGraphNode_t node) {
}
hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph) {
HIP_INIT_API(hipGraphClone, pGraphClone, originalGraph);
if (originalGraph == nullptr || pGraphClone == nullptr) {
@@ -1501,8 +1499,9 @@ hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_
size_t count, size_t offset, hipMemcpyKind kind) {
HIP_INIT_API(hipGraphAddMemcpyNodeFromSymbol, pGraphNode, graph, pDependencies, numDependencies,
dst, symbol, count, offset, kind);
if (graph == nullptr || pGraphNode == nullptr || (numDependencies > 0 && pDependencies == nullptr)
|| dst == nullptr || !ihipGraph::isGraphValid(graph)) {
if (graph == nullptr || pGraphNode == nullptr ||
(numDependencies > 0 && pDependencies == nullptr) || dst == nullptr ||
!ihipGraph::isGraphValid(graph)) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -1521,7 +1520,7 @@ hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_
hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void* dst, const void* symbol,
size_t count, size_t offset, hipMemcpyKind kind) {
HIP_INIT_API(hipGraphMemcpyNodeSetParamsFromSymbol, node, dst, symbol, count, offset, kind);
if (symbol == nullptr) {
if (symbol == nullptr) {
HIP_RETURN(hipErrorInvalidSymbol);
}
if (node == nullptr || dst == nullptr || count == 0 || symbol == dst) {
@@ -1537,7 +1536,7 @@ hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec,
size_t offset, hipMemcpyKind kind) {
HIP_INIT_API(hipGraphExecMemcpyNodeSetParamsFromSymbol, hGraphExec, node, dst, symbol, count,
offset, kind);
if (symbol == nullptr) {
if (symbol == nullptr) {
HIP_RETURN(hipErrorInvalidSymbol);
}
if (hGraphExec == nullptr || node == nullptr || dst == nullptr || count == 0 || symbol == dst) {
@@ -1560,7 +1559,7 @@ hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t* pGraphNode, hipGraph_t
HIP_INIT_API(hipGraphAddMemcpyNodeToSymbol, pGraphNode, graph, pDependencies, numDependencies,
symbol, src, count, offset, kind);
if (pGraphNode == nullptr || graph == nullptr || src == nullptr ||
!ihipGraph::isGraphValid(graph) || (pDependencies == nullptr && numDependencies > 0)) {
!ihipGraph::isGraphValid(graph) || (pDependencies == nullptr && numDependencies > 0)) {
HIP_RETURN(hipErrorInvalidValue);
}
size_t sym_size = 0;
@@ -1581,7 +1580,7 @@ hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, const void*
const void* src, size_t count, size_t offset,
hipMemcpyKind kind) {
HIP_INIT_API(hipGraphMemcpyNodeSetParamsToSymbol, symbol, src, count, offset, kind);
if (symbol == nullptr) {
if (symbol == nullptr) {
HIP_RETURN(hipErrorInvalidSymbol);
}
if (node == nullptr || src == nullptr || count == 0 || symbol == src) {
@@ -1599,7 +1598,7 @@ hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol(hipGraphExec_t hGraphExec, hi
hipMemcpyKind kind) {
HIP_INIT_API(hipGraphExecMemcpyNodeSetParamsToSymbol, hGraphExec, node, symbol, src, count,
offset, kind);
if (symbol == nullptr) {
if (symbol == nullptr) {
HIP_RETURN(hipErrorInvalidSymbol);
}
if (hGraphExec == nullptr || src == nullptr || node == nullptr || count == 0 || src == symbol) {
@@ -1619,8 +1618,8 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t gra
hipEvent_t event) {
HIP_INIT_API(hipGraphAddEventRecordNode, pGraphNode, graph, pDependencies, numDependencies,
event);
if (pGraphNode == nullptr || graph == nullptr || (numDependencies > 0 && pDependencies == nullptr)
|| event == nullptr) {
if (pGraphNode == nullptr || graph == nullptr ||
(numDependencies > 0 && pDependencies == nullptr) || event == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
*pGraphNode = new hipGraphEventRecordNode(event);
@@ -1630,8 +1629,7 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t gra
hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out) {
HIP_INIT_API(hipGraphEventRecordNodeGetEvent, node, event_out);
if (node == nullptr || event_out == nullptr ||
node->GetType() != hipGraphNodeTypeEventRecord) {
if (node == nullptr || event_out == nullptr || node->GetType() != hipGraphNodeTypeEventRecord) {
HIP_RETURN(hipErrorInvalidValue);
}
reinterpret_cast<hipGraphEventRecordNode*>(node)->GetParams(event_out);
@@ -1640,8 +1638,7 @@ hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* even
hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event) {
HIP_INIT_API(hipGraphEventRecordNodeSetEvent, node, event);
if (node == nullptr || event == nullptr ||
node->GetType() != hipGraphNodeTypeEventRecord ) {
if (node == nullptr || event == nullptr || node->GetType() != hipGraphNodeTypeEventRecord) {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(reinterpret_cast<hipGraphEventRecordNode*>(node)->SetParams(event));
@@ -1665,8 +1662,8 @@ hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph
const hipGraphNode_t* pDependencies, size_t numDependencies,
hipEvent_t event) {
HIP_INIT_API(hipGraphAddEventWaitNode, pGraphNode, graph, pDependencies, numDependencies, event);
if (pGraphNode == nullptr || graph == nullptr || (numDependencies > 0 && pDependencies == nullptr)
|| event == nullptr) {
if (pGraphNode == nullptr || graph == nullptr ||
(numDependencies > 0 && pDependencies == nullptr) || event == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
*pGraphNode = new hipGraphEventWaitNode(event);
@@ -1676,8 +1673,7 @@ hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph
hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out) {
HIP_INIT_API(hipGraphEventWaitNodeGetEvent, node, event_out);
if (node == nullptr || event_out == nullptr ||
node->GetType() != hipGraphNodeTypeWaitEvent) {
if (node == nullptr || event_out == nullptr || node->GetType() != hipGraphNodeTypeWaitEvent) {
HIP_RETURN(hipErrorInvalidValue);
}
reinterpret_cast<hipGraphEventWaitNode*>(node)->GetParams(event_out);
@@ -1686,8 +1682,7 @@ hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_
hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event) {
HIP_INIT_API(hipGraphEventWaitNodeSetEvent, node, event);
if (node == nullptr || event == nullptr ||
node->GetType() != hipGraphNodeTypeWaitEvent) {
if (node == nullptr || event == nullptr || node->GetType() != hipGraphNodeTypeWaitEvent) {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(reinterpret_cast<hipGraphEventWaitNode*>(node)->SetParams(event));
@@ -1759,7 +1754,7 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph,
HIP_INIT_API(hipGraphExecUpdate, hGraphExec, hGraph, hErrorNode_out, updateResult_out);
// parameter check
if (hGraphExec == nullptr || hGraph == nullptr || hErrorNode_out == nullptr ||
updateResult_out == nullptr) {
updateResult_out == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
+5
Просмотреть файл
@@ -267,6 +267,11 @@ hiprtcGetProgramLogSize
hiprtcGetCode
hiprtcGetCodeSize
hiprtcGetErrorString
hiprtcLinkCreate
hiprtcLinkAddFile
hiprtcLinkAddData
hiprtcLinkComplete
hiprtcLinkDestroy
hipMipmappedArrayCreate
hipMallocMipmappedArray
hipMipmappedArrayDestroy
+5
Просмотреть файл
@@ -208,6 +208,11 @@ global:
hiprtcGetErrorString;
hiprtcAddNameExpression;
hiprtcVersion;
hiprtcLinkCreate;
hiprtcLinkAddFile;
hiprtcLinkAddData;
hiprtcLinkComplete;
hiprtcLinkDestroy;
hipBindTexture;
hipBindTexture2D;
hipBindTextureToArray;
+118 -172
Просмотреть файл
@@ -29,24 +29,15 @@
hipError_t ihipModuleLoadData(hipModule_t* module, const void* mmap_ptr, size_t mmap_size);
extern hipError_t ihipLaunchKernel(const void* hostFunction,
dim3 gridDim,
dim3 blockDim,
void** args,
size_t sharedMemBytes,
hipStream_t stream,
hipEvent_t startEvent,
hipEvent_t stopEvent,
int flags);
extern hipError_t ihipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim,
void** args, size_t sharedMemBytes, hipStream_t stream,
hipEvent_t startEvent, hipEvent_t stopEvent, int flags);
const std::string& FunctionName(const hipFunction_t f) {
return hip::DeviceFunc::asFunction(f)->kernel()->name();
}
static uint64_t ElfSize(const void *emi)
{
return amd::Elf::getElfSize(emi);
}
static uint64_t ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); }
hipError_t hipModuleUnload(hipModule_t hmod) {
HIP_INIT_API(hipModuleUnload, hmod);
@@ -60,31 +51,28 @@ hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
HIP_RETURN(PlatformState::instance().loadModule(module, fname));
}
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
{
hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
HIP_INIT_API(hipModuleLoadData, module, image);
HIP_RETURN(PlatformState::instance().loadModule(module, 0, image));
}
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image,
unsigned int numOptions, hipJitOption* options,
void** optionsValues)
{
hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions,
hipJitOption* options, void** optionsValues) {
/* TODO: Pass options to Program */
HIP_INIT_API(hipModuleLoadDataEx, module, image);
HIP_RETURN(PlatformState::instance().loadModule(module, 0, image));
}
extern hipError_t __hipExtractCodeObjectFromFatBinary(const void* data,
const std::vector<std::string>& devices,
std::vector<std::pair<const void*, size_t>>& code_objs);
extern hipError_t __hipExtractCodeObjectFromFatBinary(
const void* data, const std::vector<std::string>& devices,
std::vector<std::pair<const void*, size_t>>& code_objs);
hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name) {
hipError_t hipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const char* name) {
HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name);
if(hfunc == nullptr || name == nullptr) {
if (hfunc == nullptr || name == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -96,8 +84,8 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const ch
HIP_RETURN(hipSuccess);
}
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name)
{
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
const char* name) {
HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name);
if (dptr == nullptr || bytes == nullptr) {
@@ -134,13 +122,13 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc
HIP_RETURN(hipErrorInvalidDeviceFunction);
}
const device::Kernel::WorkGroupInfo* wrkGrpInfo
= kernel->getDeviceKernel(*(hip::getCurrentDevice()->devices()[0]))->workGroupInfo();
const device::Kernel::WorkGroupInfo* wrkGrpInfo =
kernel->getDeviceKernel(*(hip::getCurrentDevice()->devices()[0]))->workGroupInfo();
if (wrkGrpInfo == nullptr) {
HIP_RETURN(hipErrorMissingConfiguration);
}
switch(attrib) {
switch (attrib) {
case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES:
*value = static_cast<int>(wrkGrpInfo->localMemSize_);
break;
@@ -157,7 +145,7 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc
*value = static_cast<int>(wrkGrpInfo->usedVGPRs_);
break;
case HIP_FUNC_ATTRIBUTE_PTX_VERSION:
*value = 30; // Defaults to 3.0 as HCC
*value = 30; // Defaults to 3.0 as HCC
break;
case HIP_FUNC_ATTRIBUTE_BINARY_VERSION:
*value = static_cast<int>(kernel->signature().version());
@@ -171,15 +159,14 @@ hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunc
case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT:
*value = 0;
break;
default:
HIP_RETURN(hipErrorInvalidValue);
default:
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
{
hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) {
HIP_INIT_API(hipFuncGetAttributes, attr, func);
HIP_RETURN_ONFAIL(PlatformState::instance().getStatFuncAttr(attr, func, ihipGetDevice()));
@@ -187,7 +174,7 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
HIP_RETURN(hipSuccess);
}
hipError_t hipFuncSetAttribute ( const void* func, hipFuncAttribute attr, int value ) {
hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) {
HIP_INIT_API(hipFuncSetAttribute, func, attr, value);
// No way to set function attribute yet.
@@ -195,8 +182,7 @@ hipError_t hipFuncSetAttribute ( const void* func, hipFuncAttribute attr, int va
HIP_RETURN(hipSuccess);
}
hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig) {
hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig);
// No way to set cache config yet.
@@ -204,7 +190,7 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t cacheConfig)
HIP_RETURN(hipSuccess);
}
hipError_t hipFuncSetSharedMemConfig ( const void* func, hipSharedMemConfig config) {
hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) {
HIP_INIT_API(hipFuncSetSharedMemConfig, func, config);
// No way to set Shared Memory config function yet.
@@ -213,11 +199,10 @@ hipError_t hipFuncSetSharedMemConfig ( const void* func, hipSharedMemConfig conf
}
hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t blockDimX, uint32_t blockDimY,
uint32_t blockDimZ, uint32_t sharedMemBytes,
void** kernelParams, void** extra, int deviceId,
uint32_t params = 0) {
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t sharedMemBytes, void** kernelParams, void** extra,
int deviceId, uint32_t params = 0) {
if (f == nullptr) {
LogPrintfError("%s", "Function passed is null");
return hipErrorInvalidImage;
@@ -265,9 +250,9 @@ hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX,
int max_blocks_per_grid = 0;
int best_block_size = 0;
int block_size = blockDimX * blockDimY * blockDimZ;
hipError_t err = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(&num_blocks, &max_blocks_per_grid,
&best_block_size, *device, f,
block_size, sharedMemBytes, true);
hipError_t err = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, *device, f, block_size, sharedMemBytes,
true);
if (err != hipSuccess) {
return err;
}
@@ -374,9 +359,9 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
amd::Kernel* kernel = function->kernel();
amd::ScopedLock lock(function->dflock_);
hipError_t status =
ihipLaunchKernel_validate(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX,
blockDimY, blockDimZ, sharedMemBytes, kernelParams, extra, deviceId, params);
hipError_t status = ihipLaunchKernel_validate(
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, blockDimY, blockDimZ,
sharedMemBytes, kernelParams, extra, deviceId, params);
if (status != hipSuccess) {
return status;
}
@@ -409,16 +394,12 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
return hipSuccess;
}
hipError_t hipModuleLaunchKernel(hipFunction_t f,
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t sharedMemBytes, hipStream_t hStream,
void **kernelParams, void **extra)
{
HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ,
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream,
kernelParams, extra);
hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY,
uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY,
uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream,
void** kernelParams, void** extra) {
HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY,
blockDimZ, sharedMemBytes, hStream, kernelParams, extra);
size_t globalWorkSizeX = static_cast<size_t>(gridDimX) * blockDimX;
size_t globalWorkSizeY = static_cast<size_t>(gridDimY) * blockDimY;
size_t globalWorkSizeZ = static_cast<size_t>(gridDimZ) * blockDimZ;
@@ -427,11 +408,10 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
globalWorkSizeZ > std::numeric_limits<uint32_t>::max()) {
HIP_RETURN(hipErrorInvalidConfiguration);
}
HIP_RETURN(ihipModuleLaunchKernel(f, static_cast<uint32_t>(globalWorkSizeX),
static_cast<uint32_t>(globalWorkSizeY),
static_cast<uint32_t>(globalWorkSizeZ),
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr));
HIP_RETURN(ihipModuleLaunchKernel(
f, static_cast<uint32_t>(globalWorkSizeX), static_cast<uint32_t>(globalWorkSizeY),
static_cast<uint32_t>(globalWorkSizeZ), blockDimX, blockDimY, blockDimZ, sharedMemBytes,
hStream, kernelParams, extra, nullptr, nullptr));
}
hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
@@ -439,106 +419,77 @@ hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
uint32_t localWorkSizeZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags)
{
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
HIP_INIT_API(hipExtModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
sharedMemBytes, hStream,
localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream,
kernelParams, extra, startEvent, stopEvent, flags);
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags));
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes,
hStream, kernelParams, extra, startEvent, stopEvent, flags));
}
hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t blockDimX, uint32_t blockDimY,
uint32_t blockDimZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent,
hipEvent_t stopEvent)
{
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
size_t sharedMemBytes, hipStream_t hStream, void** kernelParams,
void** extra, hipEvent_t startEvent, hipEvent_t stopEvent) {
HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream,
kernelParams, extra, startEvent, stopEvent);
blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra,
startEvent, stopEvent);
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX,
blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams,
extra, startEvent, stopEvent));
}
hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t blockDimX, uint32_t blockDimY,
uint32_t blockDimZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent,
hipEvent_t stopEvent)
{
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
size_t sharedMemBytes, hipStream_t hStream, void** kernelParams,
void** extra, hipEvent_t startEvent, hipEvent_t stopEvent) {
HIP_INIT_API(hipModuleLaunchKernelExt, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream,
kernelParams, extra, startEvent, stopEvent);
blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra,
startEvent, stopEvent);
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX,
blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams,
extra, startEvent, stopEvent));
}
extern "C" hipError_t hipLaunchKernel_common(const void *hostFunction,
dim3 gridDim,
dim3 blockDim,
void** args,
size_t sharedMemBytes,
hipStream_t stream)
{
extern "C" hipError_t hipLaunchKernel_common(const void* hostFunction, dim3 gridDim, dim3 blockDim,
void** args, size_t sharedMemBytes,
hipStream_t stream) {
STREAM_CAPTURE(hipLaunchKernel, stream, hostFunction, gridDim, blockDim, args, sharedMemBytes);
return ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream,
nullptr, nullptr, 0);
return ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, nullptr,
nullptr, 0);
}
extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
dim3 gridDim,
dim3 blockDim,
void** args,
size_t sharedMemBytes,
hipStream_t stream)
{
extern "C" hipError_t hipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim,
void** args, size_t sharedMemBytes, hipStream_t stream) {
HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
HIP_RETURN(hipLaunchKernel_common(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream));
}
extern "C" hipError_t hipLaunchKernel_spt(const void *hostFunction,
dim3 gridDim,
dim3 blockDim,
void** args,
size_t sharedMemBytes,
hipStream_t stream)
{
extern "C" hipError_t hipLaunchKernel_spt(const void* hostFunction, dim3 gridDim, dim3 blockDim,
void** args, size_t sharedMemBytes, hipStream_t stream) {
HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
PER_THREAD_DEFAULT_STREAM(stream);
HIP_RETURN(hipLaunchKernel_common(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream));
}
extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction,
dim3 gridDim,
dim3 blockDim,
void** args,
size_t sharedMemBytes,
hipStream_t stream,
hipEvent_t startEvent,
hipEvent_t stopEvent,
int flags)
{
HIP_INIT_API(hipExtLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, startEvent, stopEvent, flags));
extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim,
void** args, size_t sharedMemBytes, hipStream_t stream,
hipEvent_t startEvent, hipEvent_t stopEvent, int flags) {
HIP_INIT_API(hipExtLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream);
HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream,
startEvent, stopEvent, flags));
}
hipError_t hipLaunchCooperativeKernel_common(const void* f,
dim3 gridDim, dim3 blockDim,
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
{
hipError_t hipLaunchCooperativeKernel_common(const void* f, dim3 gridDim, dim3 blockDim,
void** kernelParams, uint32_t sharedMemBytes,
hipStream_t hStream) {
if (!hip::isValid(hStream)) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -556,34 +507,30 @@ hipError_t hipLaunchCooperativeKernel_common(const void* f,
}
return ihipModuleLaunchKernel(func, static_cast<uint32_t>(globalWorkSizeX),
static_cast<uint32_t>(globalWorkSizeY),
static_cast<uint32_t>(globalWorkSizeZ),
blockDim.x, blockDim.y, blockDim.z,
sharedMemBytes, hStream, kernelParams, nullptr, nullptr, nullptr, 0,
amd::NDRangeKernelCommand::CooperativeGroups);
static_cast<uint32_t>(globalWorkSizeZ), blockDim.x, blockDim.y,
blockDim.z, sharedMemBytes, hStream, kernelParams, nullptr, nullptr,
nullptr, 0, amd::NDRangeKernelCommand::CooperativeGroups);
}
hipError_t hipLaunchCooperativeKernel(const void* f,
dim3 gridDim, dim3 blockDim,
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
{
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim,
sharedMemBytes, hStream);
HIP_RETURN(hipLaunchCooperativeKernel_common(f,gridDim, blockDim, kernelParams, sharedMemBytes, hStream));
hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim,
void** kernelParams, uint32_t sharedMemBytes,
hipStream_t hStream) {
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, sharedMemBytes, hStream);
HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes,
hStream));
}
hipError_t hipLaunchCooperativeKernel_spt(const void* f,
dim3 gridDim, dim3 blockDim,
void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream)
{
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim,
sharedMemBytes, hStream);
hipError_t hipLaunchCooperativeKernel_spt(const void* f, dim3 gridDim, dim3 blockDim,
void** kernelParams, uint32_t sharedMemBytes,
hipStream_t hStream) {
HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, sharedMemBytes, hStream);
PER_THREAD_DEFAULT_STREAM(hStream);
HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes, hStream));
HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes,
hStream));
}
hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
int numDevices, unsigned int flags, uint32_t extFlags)
{
hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices,
unsigned int flags, uint32_t extFlags) {
int numActiveGPUs = 0;
hipError_t result = hipSuccess;
result = ihipDeviceGetCount(&numActiveGPUs);
@@ -594,7 +541,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
// Validate all streams passed by user
for (int i = 0; i < numDevices; ++i) {
if (!hip::isValid(launchParamsList[i].stream)) {
return hipErrorInvalidValue;
return hipErrorInvalidValue;
}
}
@@ -605,8 +552,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
uint32_t blockDims = 0;
const hipLaunchParams& launch = launchParamsList[i];
blockDims = launch.blockDim.x * launch.blockDim.y * launch.blockDim.z;
allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z *
blockDims;
allGridSize += launch.gridDim.x * launch.gridDim.y * launch.gridDim.z * blockDims;
// Make sure block dimensions are valid
if (0 == blockDims) {
@@ -633,7 +579,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) {
for (int i = 0; i < numDevices; ++i) {
amd::HostQueue* queue =
reinterpret_cast<hip::Stream*>(launchParamsList[i].stream)->asHostQueue();
reinterpret_cast<hip::Stream*>(launchParamsList[i].stream)->asHostQueue();
queue->finish();
}
}
@@ -666,11 +612,11 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
globalWorkSizeZ > std::numeric_limits<uint32_t>::max()) {
HIP_RETURN(hipErrorInvalidConfiguration);
}
result = ihipModuleLaunchKernel(func, static_cast<uint32_t>(globalWorkSizeX),
static_cast<uint32_t>(globalWorkSizeY), static_cast<uint32_t>(globalWorkSizeZ),
launch.blockDim.x, launch.blockDim.y, launch.blockDim.z,
launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr,
flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice);
result = ihipModuleLaunchKernel(
func, static_cast<uint32_t>(globalWorkSizeX), static_cast<uint32_t>(globalWorkSizeY),
static_cast<uint32_t>(globalWorkSizeZ), launch.blockDim.x, launch.blockDim.y,
launch.blockDim.z, launch.sharedMem, launch.stream, launch.args, nullptr, nullptr, nullptr,
flags, extFlags, i, numDevices, prevGridSize, allGridSize, firstDevice);
if (result != hipSuccess) {
break;
}
@@ -681,7 +627,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) {
for (int i = 0; i < numDevices; ++i) {
amd::HostQueue* queue =
reinterpret_cast<hip::Stream*>(launchParamsList[i].stream)->asHostQueue();
reinterpret_cast<hip::Stream*>(launchParamsList[i].stream)->asHostQueue();
queue->finish();
}
}
@@ -689,18 +635,18 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
return result;
}
hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
int numDevices, unsigned int flags)
{
hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices,
unsigned int flags) {
HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, numDevices, flags);
HIP_RETURN(ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags,
(amd::NDRangeKernelCommand::CooperativeGroups |
amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups)));
HIP_RETURN(ihipLaunchCooperativeKernelMultiDevice(
launchParamsList, numDevices, flags,
(amd::NDRangeKernelCommand::CooperativeGroups |
amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups)));
}
hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
int numDevices, unsigned int flags) {
hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices,
unsigned int flags) {
HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags);
HIP_RETURN(ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0));
@@ -720,7 +666,7 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const
HIP_RETURN(hipErrorNotSupported);
}
/* Get address and size for the global symbol */
/* Get address and size for the global symbol */
if (hipSuccess != PlatformState::instance().getDynTexRef(name, hmod, texRef)) {
LogPrintfError("Cannot get texRef for name: %s at module:0x%x \n", name, hmod);
HIP_RETURN(hipErrorNotFound);
+205 -238
Просмотреть файл
@@ -27,12 +27,12 @@
#include <unordered_map>
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
thread_local std::stack<ihipExec_t> execStack_;
PlatformState* PlatformState::platform_; // Initiaized as nullptr by default
PlatformState* PlatformState::platform_; // Initiaized as nullptr by default
//forward declaration of methods required for __hipRegisrterManagedVar
// forward declaration of methods required for __hipRegisrterManagedVar
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
amd::HostQueue& queue, bool isAsync = false);
@@ -40,26 +40,23 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
struct __CudaFatBinaryWrapper {
unsigned int magic;
unsigned int version;
void* binary;
void* dummy1;
void* binary;
void* dummy1;
};
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
hipModule_t hmod, const char* name);
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
const char* name);
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
hipDeviceptr_t* dptr, size_t* bytes);
extern hipError_t ihipModuleLaunchKernel(hipFunction_t f,
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t sharedMemBytes, hipStream_t hStream,
void **kernelParams, void **extra,
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags = 0,
uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0,
uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0);
static bool isCompatibleCodeObject(const std::string& codeobj_target_id,
const char* device_name) {
extern hipError_t ihipModuleLaunchKernel(
hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, uint32_t blockDimX,
uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream,
void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent,
uint32_t flags = 0, uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0,
uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0);
static bool isCompatibleCodeObject(const std::string& codeobj_target_id, const char* device_name) {
// Workaround for device name mismatch.
// Device name may contain feature strings delimited by '+', e.g.
// gfx900+xnack. Currently HIP-Clang does not include feature strings
@@ -73,8 +70,7 @@ static bool isCompatibleCodeObject(const std::string& codeobj_target_id,
return codeobj_target_id == short_name;
}
extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data)
{
extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data) {
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) {
LogPrintfError("Cannot Register fat binary. FatMagic: %u version: %u ", fbwrapper->magic,
@@ -84,21 +80,14 @@ extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data)
return PlatformState::instance().addFatBinary(fbwrapper->binary);
}
extern "C" void __hipRegisterFunction(
hip::FatBinaryInfo** modules,
const void* hostFunction,
char* deviceFunction,
const char* deviceName,
unsigned int threadLimit,
uint3* tid,
uint3* bid,
dim3* blockDim,
dim3* gridDim,
int* wSize) {
static int enable_deferred_loading { []() {
char *var = getenv("HIP_ENABLE_DEFERRED_LOADING");
extern "C" void __hipRegisterFunction(hip::FatBinaryInfo** modules, const void* hostFunction,
char* deviceFunction, const char* deviceName,
unsigned int threadLimit, uint3* tid, uint3* bid,
dim3* blockDim, dim3* gridDim, int* wSize) {
static int enable_deferred_loading{[]() {
char* var = getenv("HIP_ENABLE_DEFERRED_LOADING");
return var ? atoi(var) : 1;
}() };
}()};
hipError_t hip_error = hipSuccess;
hip::Function* func = new hip::Function(std::string(deviceName), modules);
hip_error = PlatformState::instance().registerStatFunction(hostFunction, func);
@@ -121,42 +110,45 @@ extern "C" void __hipRegisterFunction(
// track of the value of the device side global variable between kernel
// executions.
extern "C" void __hipRegisterVar(
hip::FatBinaryInfo** modules, // The device modules containing code object
void* var, // The shadow variable in host code
char* hostVar, // Variable name in host code
char* deviceVar, // Variable name in device code
int ext, // Whether this variable is external
size_t size, // Size of the variable
int constant, // Whether this variable is constant
int global) // Unknown, always 0
hip::FatBinaryInfo** modules, // The device modules containing code object
void* var, // The shadow variable in host code
char* hostVar, // Variable name in host code
char* deviceVar, // Variable name in device code
int ext, // Whether this variable is external
size_t size, // Size of the variable
int constant, // Whether this variable is constant
int global) // Unknown, always 0
{
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Variable, size, 0, 0, modules);
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Variable,
size, 0, 0, modules);
hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr);
guarantee((err == hipSuccess), "Cannot register Static Global Var");
}
extern "C" void __hipRegisterSurface(hip::FatBinaryInfo** modules, // The device modules containing code object
void* var, // The shadow variable in host code
char* hostVar, // Variable name in host code
char* deviceVar, // Variable name in device code
int type, int ext) {
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Surface, sizeof(surfaceReference), 0, 0, modules);
extern "C" void __hipRegisterSurface(
hip::FatBinaryInfo** modules, // The device modules containing code object
void* var, // The shadow variable in host code
char* hostVar, // Variable name in host code
char* deviceVar, // Variable name in device code
int type, int ext) {
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Surface,
sizeof(surfaceReference), 0, 0, modules);
hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr);
guarantee((err == hipSuccess), "Cannot register Static Glbal Var");
}
extern "C" void __hipRegisterManagedVar(void *hipModule, // Pointer to hip module returned from __hipRegisterFatbinary
void **pointer, // Pointer to a chunk of managed memory with size \p size and alignment \p align
// HIP runtime allocates such managed memory and assign it to \p pointer
void *init_value, // Initial value to be copied into \p pointer
const char *name, // Name of the variable in code object
size_t size,
unsigned align) {
extern "C" void __hipRegisterManagedVar(
void* hipModule, // Pointer to hip module returned from __hipRegisterFatbinary
void** pointer, // Pointer to a chunk of managed memory with size \p size and alignment \p
// align HIP runtime allocates such managed memory and assign it to \p pointer
void* init_value, // Initial value to be copied into \p pointer
const char* name, // Name of the variable in code object
size_t size, unsigned align) {
HIP_INIT_VOID();
hipError_t status = ihipMallocManaged(pointer, size, align);
if( status == hipSuccess) {
if (status == hipSuccess) {
amd::HostQueue* queue = hip::getNullStream();
if(queue != nullptr) {
if (queue != nullptr) {
status = ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *queue);
guarantee((status == hipSuccess), "Error during memcpy to managed memory!");
} else {
@@ -171,28 +163,25 @@ extern "C" void __hipRegisterManagedVar(void *hipModule, // Pointer to hip mod
guarantee((status == hipSuccess), "Cannot register Static Managed Var");
}
extern "C" void __hipRegisterTexture(hip::FatBinaryInfo** modules, // The device modules containing code object
void* var, // The shadow variable in host code
char* hostVar, // Variable name in host code
char* deviceVar, // Variable name in device code
int type, int norm, int ext) {
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Texture, sizeof(textureReference), 0, 0, modules);
extern "C" void __hipRegisterTexture(
hip::FatBinaryInfo** modules, // The device modules containing code object
void* var, // The shadow variable in host code
char* hostVar, // Variable name in host code
char* deviceVar, // Variable name in device code
int type, int norm, int ext) {
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Texture,
sizeof(textureReference), 0, 0, modules);
hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr);
guarantee((err == hipSuccess), "Cannot register Static Global Var");
}
extern "C" void __hipUnregisterFatBinary(hip::FatBinaryInfo** modules)
{
extern "C" void __hipUnregisterFatBinary(hip::FatBinaryInfo** modules) {
hipError_t err = PlatformState::instance().removeFatBinary(modules);
guarantee((err == hipSuccess), "Cannot Unregister Fat Binary");
}
extern "C" hipError_t hipConfigureCall(
dim3 gridDim,
dim3 blockDim,
size_t sharedMem,
hipStream_t stream)
{
extern "C" hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem,
hipStream_t stream) {
HIP_INIT_API(hipConfigureCall, gridDim, blockDim, sharedMem, stream);
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
@@ -200,12 +189,8 @@ extern "C" hipError_t hipConfigureCall(
HIP_RETURN(hipSuccess);
}
extern "C" hipError_t __hipPushCallConfiguration(
dim3 gridDim,
dim3 blockDim,
size_t sharedMem,
hipStream_t stream)
{
extern "C" hipError_t __hipPushCallConfiguration(dim3 gridDim, dim3 blockDim, size_t sharedMem,
hipStream_t stream) {
HIP_INIT_API(__hipPushCallConfiguration, gridDim, blockDim, sharedMem, stream);
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
@@ -213,10 +198,8 @@ extern "C" hipError_t __hipPushCallConfiguration(
HIP_RETURN(hipSuccess);
}
extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim,
dim3 *blockDim,
size_t *sharedMem,
hipStream_t *stream) {
extern "C" hipError_t __hipPopCallConfiguration(dim3* gridDim, dim3* blockDim, size_t* sharedMem,
hipStream_t* stream) {
HIP_INIT_API(__hipPopCallConfiguration, gridDim, blockDim, sharedMem, stream);
ihipExec_t exec;
@@ -229,11 +212,7 @@ extern "C" hipError_t __hipPopCallConfiguration(dim3 *gridDim,
HIP_RETURN(hipSuccess);
}
extern "C" hipError_t hipSetupArgument(
const void *arg,
size_t size,
size_t offset)
{
extern "C" hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) {
HIP_INIT_API(hipSetupArgument, arg, size, offset);
PlatformState::instance().setupArgument(arg, size, offset);
@@ -241,15 +220,14 @@ extern "C" hipError_t hipSetupArgument(
HIP_RETURN(hipSuccess);
}
extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
{
extern "C" hipError_t hipLaunchByPtr(const void* hostFunction) {
HIP_INIT_API(hipLaunchByPtr, hostFunction);
ihipExec_t exec;
PlatformState::instance().popExec(exec);
hip::Stream* stream = reinterpret_cast<hip::Stream*>(exec.hStream_);
int deviceId = (stream != nullptr)? stream->DeviceId() : ihipGetDevice();
int deviceId = (stream != nullptr) ? stream->DeviceId() : ihipGetDevice();
if (deviceId == -1) {
LogPrintfError("Wrong DeviceId: %d \n", deviceId);
HIP_RETURN(hipErrorNoDevice);
@@ -262,16 +240,12 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
}
size_t size = exec.arguments_.size();
void *extra[] = {
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0],
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END
};
void* extra[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0],
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
HIP_RETURN(hipModuleLaunchKernel(func,
exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z,
exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z,
exec.sharedMem_, exec.hStream_, nullptr, extra));
HIP_RETURN(hipModuleLaunchKernel(func, exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z,
exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z,
exec.sharedMem_, exec.hStream_, nullptr, extra));
}
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) {
@@ -283,7 +257,8 @@ hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) {
}
size_t sym_size = 0;
HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), devPtr, &sym_size));
HIP_RETURN_ONFAIL(
PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), devPtr, &sym_size));
HIP_RETURN(hipSuccess, *devPtr);
}
@@ -295,14 +270,14 @@ hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) {
HIP_RETURN(hipErrorInvalidValue);
}
hipDeviceptr_t device_ptr = nullptr;
HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, sizePtr));
HIP_RETURN_ONFAIL(
PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, sizePtr));
HIP_RETURN(hipSuccess, *sizePtr);
}
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
hipDeviceptr_t* dptr, size_t* bytes)
{
hipDeviceptr_t* dptr, size_t* bytes) {
HIP_INIT();
/* Get Device Program pointer*/
@@ -325,10 +300,8 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor
namespace hip_impl {
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize,
const amd::Device& device, hipFunction_t func, int inputBlockSize,
size_t dynamicSMemSize, bool bCalcPotentialBlkSz)
{
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device,
hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz) {
hip::DeviceFunc* function = hip::DeviceFunc::asFunction(func);
const amd::Kernel& kernel = *function->kernel();
@@ -340,14 +313,12 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
*bestBlockSize = 0;
// Make sure the requested block size is smaller than max supported
if (inputBlockSize > int(device.info().maxWorkGroupSize_)) {
*maxBlocksPerCU = 0;
*numBlocksPerGrid = 0;
return hipSuccess;
*maxBlocksPerCU = 0;
*numBlocksPerGrid = 0;
return hipSuccess;
}
}
else {
if (inputBlockSize > int(device.info().maxWorkGroupSize_) ||
inputBlockSize <= 0) {
} else {
if (inputBlockSize > int(device.info().maxWorkGroupSize_) || inputBlockSize <= 0) {
// The user wrote the kernel to work with a workgroup size
// bigger than this hardware can support. Or they do not care
// about the size So just assume its maximum size is
@@ -367,18 +338,15 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
size_t maxVGPRs;
uint32_t VgprGranularity;
if (device.isa().versionMajor() <= 9) {
if (device.isa().versionMajor() == 9 &&
device.isa().versionMinor() == 0 &&
if (device.isa().versionMajor() == 9 && device.isa().versionMinor() == 0 &&
device.isa().versionStepping() == 10) {
maxVGPRs = 512;
VgprGranularity = 8;
}
else {
} else {
maxVGPRs = 256;
VgprGranularity = 4;
}
}
else {
} else {
maxVGPRs = 1024;
VgprGranularity = 8;
}
@@ -391,12 +359,10 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
size_t maxSGPRs;
if (device.isa().versionMajor() < 8) {
maxSGPRs = 512;
}
else if (device.isa().versionMajor() < 10) {
} else if (device.isa().versionMajor() < 10) {
maxSGPRs = 800;
}
else {
maxSGPRs = SIZE_MAX; // gfx10+ does not share SGPRs between waves
} else {
maxSGPRs = SIZE_MAX; // gfx10+ does not share SGPRs between waves
}
const size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16);
GprWaves = std::min(VgprWaves, SgprWaves);
@@ -425,7 +391,8 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
// the maximum available block size for this kernel, which could have come from the
// user. e.g., if the user indicates the maximum block size is 64 threads, but we
// calculate that 128 threads can fit in each CU, we have to give up and return 64.
*bestBlockSize = std::min(alu_limited_threads, amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_));
*bestBlockSize =
std::min(alu_limited_threads, amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_));
// If the best block size is smaller than the block size used to fit the maximum,
// then we need to make the grid bigger for full occupancy.
const int bestBlocksPerCU = alu_limited_threads / (*bestBlockSize);
@@ -434,13 +401,11 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
return hipSuccess;
}
}
} // namespace hip_impl
extern "C" {
hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
const void* f, size_t dynSharedMemPerBlk,
int blockSizeLimit)
{
hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, const void* f,
size_t dynSharedMemPerBlk, int blockSizeLimit) {
HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit);
if ((gridSize == nullptr) || (blockSize == nullptr)) {
HIP_RETURN(hipErrorInvalidValue);
@@ -455,7 +420,8 @@ hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
int num_blocks = 0;
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSizeLimit, dynSharedMemPerBlk,true);
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSizeLimit,
dynSharedMemPerBlk, true);
if (ret == hipSuccess) {
*blockSize = best_block_size;
*gridSize = max_blocks_per_grid;
@@ -463,10 +429,8 @@ hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
HIP_RETURN(ret);
}
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
hipFunction_t f, size_t dynSharedMemPerBlk,
int blockSizeLimit)
{
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f,
size_t dynSharedMemPerBlk, int blockSizeLimit) {
HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit);
if ((gridSize == nullptr) || (blockSize == nullptr)) {
HIP_RETURN(hipErrorInvalidValue);
@@ -476,7 +440,8 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize
int num_blocks = 0;
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, dynSharedMemPerBlk,true);
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit,
dynSharedMemPerBlk, true);
if (ret == hipSuccess) {
*blockSize = best_block_size;
*gridSize = max_blocks_per_grid;
@@ -485,10 +450,12 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize
}
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
hipFunction_t f, size_t dynSharedMemPerBlk,
int blockSizeLimit, unsigned int flags)
{
HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk, blockSizeLimit, flags);
hipFunction_t f,
size_t dynSharedMemPerBlk,
int blockSizeLimit,
unsigned int flags) {
HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk,
blockSizeLimit, flags);
if ((gridSize == nullptr) || (blockSize == nullptr)) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -497,7 +464,8 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int*
int num_blocks = 0;
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, dynSharedMemPerBlk,true);
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit,
dynSharedMemPerBlk, true);
if (ret == hipSuccess) {
*blockSize = best_block_size;
*gridSize = max_blocks_per_grid;
@@ -505,10 +473,11 @@ hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int*
HIP_RETURN(ret);
}
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
{
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynSharedMemPerBlk);
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, hipFunction_t f,
int blockSize,
size_t dynSharedMemPerBlk) {
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize,
dynSharedMemPerBlk);
if (numBlocks == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -518,16 +487,16 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
int max_blocks_per_grid = 0;
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, false);
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk,
false);
*numBlocks = num_blocks;
HIP_RETURN(ret);
}
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
hipFunction_t f, int blockSize,
size_t dynSharedMemPerBlk, unsigned int flags)
{
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynSharedMemPerBlk, flags);
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) {
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize,
dynSharedMemPerBlk, flags);
if (numBlocks == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -537,14 +506,14 @@ hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numB
int max_blocks_per_grid = 0;
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, false);
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk,
false);
*numBlocks = num_blocks;
HIP_RETURN(ret);
}
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
const void* f, int blockSize, size_t dynamicSMemSize)
{
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* f,
int blockSize, size_t dynamicSMemSize) {
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynamicSMemSize);
if (numBlocks == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
@@ -562,16 +531,18 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
int max_blocks_per_grid = 0;
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, false);
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize,
false);
*numBlocks = num_blocks;
HIP_RETURN(ret);
}
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
const void* f,
int blockSize, size_t dynamicSMemSize, unsigned int flags)
{
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize, flags);
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, const void* f,
int blockSize,
size_t dynamicSMemSize,
unsigned int flags) {
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize,
flags);
if (numBlocks == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -588,7 +559,8 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
int max_blocks_per_grid = 0;
int best_block_size = 0;
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, false);
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize,
false);
*numBlocks = num_blocks;
HIP_RETURN(ret);
}
@@ -599,68 +571,48 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks,
namespace hip_impl {
void hipLaunchKernelGGLImpl(
uintptr_t function_address,
const dim3& numBlocks,
const dim3& dimBlocks,
uint32_t sharedMemBytes,
hipStream_t stream,
void** kernarg)
{
void hipLaunchKernelGGLImpl(uintptr_t function_address, const dim3& numBlocks,
const dim3& dimBlocks, uint32_t sharedMemBytes, hipStream_t stream,
void** kernarg) {
HIP_INIT_VOID();
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
int deviceId = (s != nullptr)? s->DeviceId() : ihipGetDevice();
int deviceId = (s != nullptr) ? s->DeviceId() : ihipGetDevice();
if (deviceId == -1) {
LogPrintfError("Wrong Device Id: %d \n", deviceId);
}
hipFunction_t func = nullptr;
hipError_t hip_error =
PlatformState::instance().getStatFunc(&func,
reinterpret_cast<void*>(function_address),
deviceId);
hipError_t hip_error = PlatformState::instance().getStatFunc(
&func, reinterpret_cast<void*>(function_address), deviceId);
if ((hip_error != hipSuccess) || (func == nullptr)) {
LogPrintfError("Cannot find the static function: 0x%x", function_address);
}
hip_error = hipModuleLaunchKernel(func,
numBlocks.x, numBlocks.y, numBlocks.z,
dimBlocks.x, dimBlocks.y, dimBlocks.z,
sharedMemBytes, stream, nullptr, kernarg);
hip_error =
hipModuleLaunchKernel(func, numBlocks.x, numBlocks.y, numBlocks.z, dimBlocks.x, dimBlocks.y,
dimBlocks.z, sharedMemBytes, stream, nullptr, kernarg);
assert(hip_error == hipSuccess);
}
void hipLaunchCooperativeKernelGGLImpl(
uintptr_t function_address,
const dim3& numBlocks,
const dim3& dimBlocks,
uint32_t sharedMemBytes,
hipStream_t stream,
void** kernarg)
{
void hipLaunchCooperativeKernelGGLImpl(uintptr_t function_address, const dim3& numBlocks,
const dim3& dimBlocks, uint32_t sharedMemBytes,
hipStream_t stream, void** kernarg) {
HIP_INIT_VOID();
hipError_t err = hipLaunchCooperativeKernel(reinterpret_cast<void*>(function_address),
numBlocks, dimBlocks, kernarg, sharedMemBytes, stream);
hipError_t err = hipLaunchCooperativeKernel(reinterpret_cast<void*>(function_address), numBlocks,
dimBlocks, kernarg, sharedMemBytes, stream);
assert(err == hipSuccess);
}
}
} // namespace hip_impl
#endif // defined(ATI_OS_LINUX)
#endif // defined(ATI_OS_LINUX)
hipError_t ihipLaunchKernel(const void* hostFunction,
dim3 gridDim,
dim3 blockDim,
void** args,
size_t sharedMemBytes,
hipStream_t stream,
hipEvent_t startEvent,
hipEvent_t stopEvent,
int flags)
{
hipFunction_t func = nullptr;
hipError_t ihipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim, void** args,
size_t sharedMemBytes, hipStream_t stream, hipEvent_t startEvent,
hipEvent_t stopEvent, int flags) {
hipFunction_t func = nullptr;
int deviceId = hip::Stream::DeviceId(stream);
hipError_t hip_error = PlatformState::instance().getStatFunc(&func, hostFunction, deviceId);
if ((hip_error != hipSuccess) || (func == nullptr)) {
@@ -674,19 +626,31 @@ hipError_t ihipLaunchKernel(const void* hostFunction,
globalWorkSizeZ > std::numeric_limits<uint32_t>::max()) {
HIP_RETURN(hipErrorInvalidConfiguration);
}
HIP_RETURN(ihipModuleLaunchKernel(func, static_cast<uint32_t>(globalWorkSizeX),
static_cast<uint32_t>(globalWorkSizeY),
static_cast<uint32_t>(globalWorkSizeZ),
blockDim.x, blockDim.y, blockDim.z,
sharedMemBytes, stream, args, nullptr, startEvent, stopEvent,
flags));
HIP_RETURN(ihipModuleLaunchKernel(
func, static_cast<uint32_t>(globalWorkSizeX), static_cast<uint32_t>(globalWorkSizeY),
static_cast<uint32_t>(globalWorkSizeZ), blockDim.x, blockDim.y, blockDim.z, sharedMemBytes,
stream, args, nullptr, startEvent, stopEvent, flags));
}
// conversion routines between float and half precision
static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; }
static inline std::uint32_t f32_as_u32(float f) {
union {
float f;
std::uint32_t u;
} v;
v.f = f;
return v.u;
}
static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; }
static inline float u32_as_f32(std::uint32_t u) {
union {
float f;
std::uint32_t u;
} v;
v.u = u;
return v.f;
}
static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); }
@@ -694,15 +658,14 @@ static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l
// half float, the f16 is in the low 16 bits of the input argument
static inline float __convert_half_to_float(std::uint32_t a) noexcept {
std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U;
std::uint32_t v =
f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U) /*0x1.0p+112f*/) + 0x38000000U;
u = (a & 0x7fff) != 0 ? v : u;
return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/;
return u32_as_f32(u) * u32_as_f32(0x07800000U) /*0x1.0p-112f*/;
}
// float half with nearest even rounding
@@ -714,7 +677,7 @@ static inline std::uint32_t __convert_float_to_half(float a) noexcept {
std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0);
std::uint32_t n = ((std::uint32_t)e << 12) | m;
std::uint32_t s = (u >> 16) & 0x8000;
int b = clamp_int(1-e, 0, 13);
int b = clamp_int(1 - e, 0, 13);
std::uint32_t d = (0x1000 | m) >> b;
d |= (d << b) != (0x1000 | m);
std::uint32_t v = e < 1 ? d : n;
@@ -726,24 +689,25 @@ static inline std::uint32_t __convert_float_to_half(float a) noexcept {
extern "C"
#if !defined(_MSC_VER)
__attribute__((weak))
__attribute__((weak))
#endif
float __gnu_h2f_ieee(unsigned short h){
return __convert_half_to_float((std::uint32_t) h);
float
__gnu_h2f_ieee(unsigned short h) {
return __convert_half_to_float((std::uint32_t)h);
}
extern "C"
#if !defined(_MSC_VER)
__attribute__((weak))
__attribute__((weak))
#endif
unsigned short __gnu_f2h_ieee(float f){
unsigned short
__gnu_f2h_ieee(float f) {
return (unsigned short)__convert_float_to_half(f);
}
void PlatformState::init()
{
void PlatformState::init() {
amd::ScopedLock lock(lock_);
if(initialized_ || g_devices.empty()) {
if (initialized_ || g_devices.empty()) {
return;
}
initialized_ = true;
@@ -751,18 +715,18 @@ void PlatformState::init()
hipError_t err = digestFatBinary(it.first, it.second);
assert(err == hipSuccess);
}
for (auto &it : statCO_.vars_) {
for (auto& it : statCO_.vars_) {
it.second->resize_dVar(g_devices.size());
}
for (auto &it : statCO_.functions_) {
for (auto& it : statCO_.functions_) {
it.second->resize_dFunc(g_devices.size());
}
}
hipError_t PlatformState::loadModule(hipModule_t *module, const char* fname, const void* image) {
hipError_t PlatformState::loadModule(hipModule_t* module, const char* fname, const void* image) {
amd::ScopedLock lock(lock_);
if(module == nullptr) {
if (module == nullptr) {
return hipErrorInvalidValue;
}
@@ -809,7 +773,7 @@ hipError_t PlatformState::unloadModule(hipModule_t hmod) {
}
hipError_t PlatformState::getDynFunc(hipFunction_t* hfunc, hipModule_t hmod,
const char* func_name) {
const char* func_name) {
amd::ScopedLock lock(lock_);
auto it = dynCO_map_.find(hmod);
@@ -828,7 +792,7 @@ hipError_t PlatformState::getDynGlobalVar(const char* hostVar, hipModule_t hmod,
hipDeviceptr_t* dev_ptr, size_t* size_ptr) {
amd::ScopedLock lock(lock_);
if(hostVar == nullptr || dev_ptr == nullptr || size_ptr == nullptr) {
if (hostVar == nullptr || dev_ptr == nullptr || size_ptr == nullptr) {
return hipErrorInvalidValue;
}
@@ -880,7 +844,8 @@ hipError_t PlatformState::getDynTexGlobalVar(textureReference* texRef, hipDevice
return hipSuccess;
}
hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef) {
hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod,
textureReference** texRef) {
amd::ScopedLock lock(lock_);
auto it = dynCO_map_.find(hmod);
@@ -897,12 +862,12 @@ hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod, te
}
dvar->shadowVptr = new texture<char>();
*texRef = reinterpret_cast<textureReference*>(dvar->shadowVptr);
*texRef = reinterpret_cast<textureReference*>(dvar->shadowVptr);
return hipSuccess;
}
hipError_t PlatformState::digestFatBinary(const void* data, hip::FatBinaryInfo*& programs) {
return statCO_.digestFatBinary(data, programs);
return statCO_.digestFatBinary(data, programs);
}
hip::FatBinaryInfo** PlatformState::addFatBinary(const void* data) {
@@ -925,19 +890,21 @@ hipError_t PlatformState::registerStatManagedVar(hip::Var* var) {
return statCO_.registerStatManagedVar(var);
}
hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) {
hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction,
int deviceId) {
return statCO_.getStatFunc(hfunc, hostFunction, deviceId);
}
hipError_t PlatformState::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId) {
if(func_attr == nullptr || hostFunction == nullptr) {
hipError_t PlatformState::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction,
int deviceId) {
if (func_attr == nullptr || hostFunction == nullptr) {
return hipErrorInvalidValue;
}
return statCO_.getStatFuncAttr(func_attr, hostFunction, deviceId);
}
hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr,
size_t* size_ptr) {
hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId,
hipDeviceptr_t* dev_ptr, size_t* size_ptr) {
return statCO_.getStatGlobalVar(hostVar, deviceId, dev_ptr, size_ptr);
}
@@ -945,7 +912,7 @@ hipError_t PlatformState::initStatManagedVarDevicePtr(int deviceId) {
return statCO_.initStatManagedVarDevicePtr(deviceId);
}
void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) {
void PlatformState::setupArgument(const void* arg, size_t size, size_t offset) {
auto& arguments = execStack_.top().arguments_;
if (arguments.size() < offset + size) {
+14 -15
Просмотреть файл
@@ -27,9 +27,8 @@
namespace hip_impl {
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize,
const amd::Device& device, hipFunction_t func, int inputBlockSize,
size_t dynamicSMemSize, bool bCalcPotentialBlkSz);
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device,
hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz);
} /* namespace hip_impl*/
class PlatformState {
@@ -40,14 +39,14 @@ class PlatformState {
PlatformState() {}
~PlatformState() {}
public:
public:
void init();
//Dynamic Code Objects functions
// Dynamic Code Objects functions
hipError_t loadModule(hipModule_t* module, const char* fname, const void* image = nullptr);
hipError_t unloadModule(hipModule_t hmod);
hipError_t getDynFunc(hipFunction_t *hfunc, hipModule_t hmod, const char* func_name);
hipError_t getDynFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* func_name);
hipError_t getDynGlobalVar(const char* hostVar, hipModule_t hmod, hipDeviceptr_t* dev_ptr,
size_t* size_ptr);
hipError_t getDynTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef);
@@ -59,14 +58,14 @@ public:
/* Singleton instance */
static PlatformState& instance() {
if (platform_ == nullptr) {
// __hipRegisterFatBinary() will call this when app starts, thus
// there is no multiple entry issue here.
platform_ = new PlatformState();
// __hipRegisterFatBinary() will call this when app starts, thus
// there is no multiple entry issue here.
platform_ = new PlatformState();
}
return *platform_;
}
//Static Code Objects functions
// Static Code Objects functions
hip::FatBinaryInfo** addFatBinary(const void* data);
hipError_t removeFatBinary(hip::FatBinaryInfo** module);
hipError_t digestFatBinary(const void* data, hip::FatBinaryInfo*& programs);
@@ -83,15 +82,15 @@ public:
hipError_t initStatManagedVarDevicePtr(int deviceId);
//Exec Functions
void setupArgument(const void *arg, size_t size, size_t offset);
// Exec Functions
void setupArgument(const void* arg, size_t size, size_t offset);
void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream);
void popExec(ihipExec_t& exec);
private:
//Dynamic Code Object map, keyin module to get the corresponding object
private:
// Dynamic Code Object map, keyin module to get the corresponding object
std::unordered_map<hipModule_t, hip::DynCO*> dynCO_map_;
hip::StatCO statCO_; //Static Code object var
hip::StatCO statCO_; // Static Code object var
bool initialized_{false};
std::unordered_map<textureReference*, std::pair<hipModule_t, std::string>> texRef_map_;
};
+102 -10
Просмотреть файл
@@ -81,7 +81,7 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const cha
progName = name;
}
auto* rtcProgram = new hiprtc::RTCProgram(progName);
auto* rtcProgram = new hiprtc::RTCCompileProgram(progName);
if (rtcProgram == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE);
}
@@ -98,7 +98,7 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const cha
}
}
*prog = hiprtc::RTCProgram::as_hiprtcProgram(rtcProgram);
*prog = hiprtc::RTCCompileProgram::as_hiprtcProgram(rtcProgram);
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
@@ -106,7 +106,7 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const cha
hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) {
HIPRTC_INIT_API(prog, numOptions, options);
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
std::vector<std::string> opt;
opt.reserve(numOptions);
@@ -127,7 +127,7 @@ hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expres
if (name_expression == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
std::string name = name_expression;
if (!rtcProgram->trackMangledName(name)) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
@@ -144,7 +144,7 @@ hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expressio
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
if (!rtcProgram->getDemangledName(name_expression, loweredName)) {
return HIPRTC_RETURN(HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID);
@@ -158,7 +158,7 @@ hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) {
if (prog == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(*prog);
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(*prog);
delete rtcProgram;
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
@@ -169,7 +169,7 @@ hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* binarySizeRet) {
if (binarySizeRet == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
*binarySizeRet = rtcProgram->getExecSize();
@@ -182,7 +182,7 @@ hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* binaryMem) {
if (binaryMem == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
auto binary = rtcProgram->getExec();
::memcpy(binaryMem, binary.data(), binary.size());
@@ -194,7 +194,7 @@ hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* dst) {
if (dst == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
auto log = rtcProgram->getLog();
::memcpy(dst, log.data(), log.size());
@@ -206,7 +206,7 @@ hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) {
if (logSizeRet == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
auto* rtcProgram = hiprtc::RTCProgram::as_RTCProgram(prog);
auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog);
*logSizeRet = rtcProgram->getLogSize();
@@ -226,3 +226,95 @@ hiprtcResult hiprtcVersion(int* major, int* minor) {
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcLinkCreate(unsigned int num_options, hiprtcJIT_option* options_ptr,
void** options_vals_pptr, hiprtcLinkState* hip_link_state_ptr) {
HIPRTC_INIT_API(num_options, options_ptr, options_vals_pptr, hip_link_state_ptr);
if (options_ptr == nullptr || options_vals_pptr == nullptr || hip_link_state_ptr == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
std::string name("Linker Program");
hiprtc::RTCLinkProgram* rtc_link_prog_ptr = new hiprtc::RTCLinkProgram(name);
if (!rtc_link_prog_ptr->AddLinkerOptions(num_options, options_ptr, options_vals_pptr)) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_OPTION);
}
*hip_link_state_ptr = reinterpret_cast<hiprtcLinkState>(rtc_link_prog_ptr);
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcLinkAddFile(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type,
const char* file_path, unsigned int num_options,
hiprtcJIT_option* options_ptr, void** option_values) {
HIPRTC_INIT_API(hip_link_state, input_type, file_path, num_options, options_ptr, option_values);
if (hip_link_state == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
if (input_type == HIPRTC_JIT_INPUT_CUBIN || input_type == HIPRTC_JIT_INPUT_PTX
|| input_type == HIPRTC_JIT_INPUT_FATBINARY || input_type == HIPRTC_JIT_INPUT_OBJECT
|| input_type == HIPRTC_JIT_INPUT_LIBRARY || input_type == HIPRTC_JIT_INPUT_NVVM) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
hiprtc::RTCLinkProgram* rtc_link_prog_ptr
= reinterpret_cast<hiprtc::RTCLinkProgram*>(hip_link_state);
if (!rtc_link_prog_ptr->AddLinkerFile(std::string(file_path), input_type)) {
HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE);
}
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcLinkAddData(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type,
void* image, size_t image_size, const char* name,
unsigned int num_options, hiprtcJIT_option* options_ptr,
void** option_values) {
HIPRTC_INIT_API(hip_link_state, image, image_size, name, num_options, options_ptr,
option_values);
if (image == nullptr || image_size <= 0 || name == nullptr) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
if (input_type == HIPRTC_JIT_INPUT_CUBIN || input_type == HIPRTC_JIT_INPUT_PTX
|| input_type == HIPRTC_JIT_INPUT_FATBINARY || input_type == HIPRTC_JIT_INPUT_OBJECT
|| input_type == HIPRTC_JIT_INPUT_LIBRARY || input_type == HIPRTC_JIT_INPUT_NVVM) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
hiprtc::RTCLinkProgram* rtc_link_prog_ptr
= reinterpret_cast<hiprtc::RTCLinkProgram*>(hip_link_state);
if (!rtc_link_prog_ptr->AddLinkerData(image, image_size, name, input_type)) {
HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE);
}
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcLinkComplete(hiprtcLinkState hip_link_state, void** bin_out, size_t* size_out) {
HIPRTC_INIT_API(hip_link_state, bin_out, size_out);
hiprtc::RTCLinkProgram* rtc_link_prog_ptr
= reinterpret_cast<hiprtc::RTCLinkProgram*>(hip_link_state);
if (!rtc_link_prog_ptr->LinkComplete(bin_out, size_out)) {
HIPRTC_RETURN(HIPRTC_ERROR_LINKING);
}
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
hiprtcResult hiprtcLinkDestroy(hiprtcLinkState hip_link_state) {
HIPRTC_INIT_API(hip_link_state);
hiprtc::RTCLinkProgram* rtc_link_prog_ptr
= reinterpret_cast<hiprtc::RTCLinkProgram*>(hip_link_state);
delete rtc_link_prog_ptr;
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
+6 -1
Просмотреть файл
@@ -8,4 +8,9 @@ hiprtcGetProgramLog
hiprtcGetProgramLogSize
hiprtcGetCode
hiprtcGetCodeSize
hiprtcGetErrorString
hiprtcGetErrorString
hiprtcLinkCreate
hiprtcLinkAddFile
hiprtcLinkAddData
hiprtcLinkComplete
hiprtcLinkDestroy
+5
Просмотреть файл
@@ -11,6 +11,11 @@ global:
hiprtcGetErrorString;
hiprtcAddNameExpression;
hiprtcVersion;
hiprtcLinkCreate;
hiprtcLinkAddFile;
hiprtcLinkAddData;
hiprtcLinkComplete;
hiprtcLinkDestroy;
local:
*;
};
+350 -128
Просмотреть файл
@@ -22,110 +22,21 @@ THE SOFTWARE.
#include "hiprtcInternal.hpp"
#include <fstream>
#include <sys/stat.h>
#include "vdi_common.hpp"
#include "utils/flags.hpp"
namespace hiprtc {
using namespace helpers;
RTCProgram::RTCProgram(std::string name_) : name(name_) {
//RTC Program Member Functions
RTCProgram::RTCProgram(std::string name) : name_(name) {
std::call_once(amd::Comgr::initialized, amd::Comgr::LoadLib);
if (amd::Comgr::create_data_set(&compileInput) != AMD_COMGR_STATUS_SUCCESS ||
amd::Comgr::create_data_set(&linkInput) != AMD_COMGR_STATUS_SUCCESS ||
amd::Comgr::create_data_set(&execInput) != AMD_COMGR_STATUS_SUCCESS) {
if (amd::Comgr::create_data_set(&exec_input_) != AMD_COMGR_STATUS_SUCCESS) {
crashWithMessage("Failed to allocate internal hiprtc structure");
}
// Add internal header
if (!addBuiltinHeader()) {
crashWithMessage("Unable to add internal header");
}
// Add compile options
const std::string hipVerOpt{"--hip-version=" + std::to_string(HIP_VERSION_MAJOR) + '.' +
std::to_string(HIP_VERSION_MINOR) + '.' +
std::to_string(HIP_VERSION_PATCH)};
const std::string hipVerMajor{"-DHIP_VERSION_MAJOR=" + std::to_string(HIP_VERSION_MAJOR)};
const std::string hipVerMinor{"-DHIP_VERSION_MINOR=" + std::to_string(HIP_VERSION_MINOR)};
const std::string hipVerPatch{"-DHIP_VERSION_PATCH=" + std::to_string(HIP_VERSION_PATCH)};
compileOptions.reserve(20); // count of options below
compileOptions.push_back("-O3");
#ifdef HIPRTC_EARLY_INLINE
compileOptions.push_back("-mllvm");
compileOptions.push_back("-amdgpu-early-inline-all");
#endif
compileOptions.push_back("-mllvm");
compileOptions.push_back("-amdgpu-prelink");
if (GPU_ENABLE_WGP_MODE) compileOptions.push_back("-mcumode");
if (!GPU_ENABLE_WAVE32_MODE) compileOptions.push_back("-mwavefrontsize64");
compileOptions.push_back(hipVerOpt);
compileOptions.push_back(hipVerMajor);
compileOptions.push_back(hipVerMinor);
compileOptions.push_back(hipVerPatch);
compileOptions.push_back("-D__HIPCC_RTC__");
compileOptions.push_back("-include");
compileOptions.push_back("hiprtc_runtime.h");
compileOptions.push_back("-std=c++14");
compileOptions.push_back("-nogpuinc");
#ifdef _WIN32
compileOptions.push_back("-target");
compileOptions.push_back("x86_64-pc-windows-msvc");
compileOptions.push_back("-fms-extensions");
compileOptions.push_back("-fms-compatibility");
#endif
if (!GPU_ENABLE_WAVE32_MODE) linkOptions.push_back("wavefrontsize64");
exeOptions.push_back("-O3");
exeOptions.push_back("-mllvm");
exeOptions.push_back("-amdgpu-internalize-symbols");
exeOptions.push_back("-mcumode");
if (!GPU_ENABLE_WAVE32_MODE) exeOptions.push_back("-mwavefrontsize64");
}
bool RTCProgram::addSource(const std::string& source, const std::string& name) {
if (source.size() == 0 || name.size() == 0) {
LogError("Error in hiprtc: source or name is of size 0 in addSource");
return false;
}
sourceCode += source;
sourceName = name;
return true;
}
// addSource_impl is a different function because we need to add source when we track mangled
// objects
bool RTCProgram::addSource_impl() {
std::vector<char> vsource(sourceCode.begin(), sourceCode.end());
if (!addCodeObjData(compileInput, vsource, sourceName, AMD_COMGR_DATA_KIND_SOURCE)) {
return false;
}
return true;
}
bool RTCProgram::addHeader(const std::string& source, const std::string& name) {
if (source.size() == 0 || name.size() == 0) {
LogError("Error in hiprtc: source or name is of size 0 in addHeader");
return false;
}
std::vector<char> vsource(source.begin(), source.end());
if (!addCodeObjData(compileInput, vsource, name, AMD_COMGR_DATA_KIND_INCLUDE)) {
return false;
}
return true;
}
bool RTCProgram::addBuiltinHeader() {
std::vector<char> source(__hipRTC_header, __hipRTC_header + __hipRTC_header_size);
std::string name{"hiprtc_runtime.h"};
if (!addCodeObjData(compileInput, source, name, AMD_COMGR_DATA_KIND_INCLUDE)) {
return false;
}
return true;
}
bool RTCProgram::findIsa() {
@@ -140,7 +51,7 @@ bool RTCProgram::findIsa() {
if (!handle) {
LogInfo("hip runtime failed to load using dlopen");
buildLog +=
build_log_ +=
"Error: Please provide architecture for which code is to be "
"generated.\n";
return false;
@@ -151,7 +62,7 @@ bool RTCProgram::findIsa() {
if (sym_hipGetDevice == nullptr || sym_hipGetDeviceProperties == nullptr) {
LogInfo("ISA cannot be found to dlsym failure");
buildLog +=
build_log_ +=
"Error: Please provide architecture for which code is to be "
"generated.\n";
return false;
@@ -173,14 +84,106 @@ bool RTCProgram::findIsa() {
if (status != hipSuccess) {
return false;
}
isa = "amdgcn-amd-amdhsa--";
isa.append(props.gcnArchName);
isa_ = "amdgcn-amd-amdhsa--";
isa_.append(props.gcnArchName);
amd::Os::unloadLibrary(handle);
return true;
}
bool RTCProgram::transformOptions() {
//RTC Compile Program Member Functions
RTCCompileProgram::RTCCompileProgram(std::string name_) : RTCProgram(name_) {
if ((amd::Comgr::create_data_set(&compile_input_) != AMD_COMGR_STATUS_SUCCESS) ||
(amd::Comgr::create_data_set(&link_input_) != AMD_COMGR_STATUS_SUCCESS)) {
crashWithMessage("Failed to allocate internal hiprtc structure");
}
// Add internal header
if (!addBuiltinHeader()) {
crashWithMessage("Unable to add internal header");
}
// Add compile options
const std::string hipVerOpt{"--hip-version=" + std::to_string(HIP_VERSION_MAJOR) + '.' +
std::to_string(HIP_VERSION_MINOR) + '.' +
std::to_string(HIP_VERSION_PATCH)};
const std::string hipVerMajor{"-DHIP_VERSION_MAJOR=" + std::to_string(HIP_VERSION_MAJOR)};
const std::string hipVerMinor{"-DHIP_VERSION_MINOR=" + std::to_string(HIP_VERSION_MINOR)};
const std::string hipVerPatch{"-DHIP_VERSION_PATCH=" + std::to_string(HIP_VERSION_PATCH)};
compile_options_.reserve(20); // count of options below
compile_options_.push_back("-O3");
#ifdef HIPRTC_EARLY_INLINE
compile_options_.push_back("-mllvm");
compile_options_.push_back("-amdgpu-early-inline-all");
#endif
if (GPU_ENABLE_WGP_MODE) compile_options_.push_back("-mcumode");
if (!GPU_ENABLE_WAVE32_MODE) compile_options_.push_back("-mwavefrontsize64");
compile_options_.push_back(hipVerOpt);
compile_options_.push_back(hipVerMajor);
compile_options_.push_back(hipVerMinor);
compile_options_.push_back(hipVerPatch);
compile_options_.push_back("-D__HIPCC_RTC__");
compile_options_.push_back("-include");
compile_options_.push_back("hiprtc_runtime.h");
compile_options_.push_back("-std=c++14");
compile_options_.push_back("-nogpuinc");
#ifdef _WIN32
compile_options_.push_back("-target");
compile_options_.push_back("x86_64-pc-windows-msvc");
compile_options_.push_back("-fms-extensions");
compile_options_.push_back("-fms-compatibility");
#endif
exe_options_.push_back("-O3");
}
bool RTCCompileProgram::addSource(const std::string& source, const std::string& name) {
if (source.size() == 0 || name.size() == 0) {
LogError("Error in hiprtc: source or name is of size 0 in addSource");
return false;
}
source_code_ += source;
source_name_ = name;
return true;
}
// addSource_impl is a different function because we need to add source when we track mangled
// objects
bool RTCCompileProgram::addSource_impl() {
std::vector<char> vsource(source_code_.begin(), source_code_.end());
if (!addCodeObjData(compile_input_, vsource, source_name_, AMD_COMGR_DATA_KIND_SOURCE)) {
return false;
}
return true;
}
bool RTCCompileProgram::addHeader(const std::string& source, const std::string& name) {
if (source.size() == 0 || name.size() == 0) {
LogError("Error in hiprtc: source or name is of size 0 in addHeader");
return false;
}
std::vector<char> vsource(source.begin(), source.end());
if (!addCodeObjData(compile_input_, vsource, name, AMD_COMGR_DATA_KIND_INCLUDE)) {
return false;
}
return true;
}
bool RTCCompileProgram::addBuiltinHeader() {
std::vector<char> source(__hipRTC_header, __hipRTC_header + __hipRTC_header_size);
std::string name{"hiprtc_runtime.h"};
if (!addCodeObjData(compile_input_, source, name, AMD_COMGR_DATA_KIND_INCLUDE)) {
return false;
}
return true;
}
bool RTCCompileProgram::transformOptions() {
auto getValueOf = [](const std::string& option) {
std::string res;
auto f = std::find(option.begin(), option.end(), '=');
@@ -188,7 +191,7 @@ bool RTCProgram::transformOptions() {
return res;
};
for (auto& i : compileOptions) {
for (auto& i : compile_options_) {
if (i == "-hip-pch") {
LogInfo(
"-hip-pch is deprecated option, has no impact on execution of new hiprtc programs, it "
@@ -204,18 +207,18 @@ bool RTCProgram::transformOptions() {
continue;
}
if (i == "--save-temps") {
settings.dumpISA = true;
settings_.dumpISA = true;
continue;
}
}
if (auto res = std::find_if(
compileOptions.begin(), compileOptions.end(),
compile_options_.begin(), compile_options_.end(),
[](const std::string& str) { return str.find("--offload-arch=") != std::string::npos; });
res != compileOptions.end()) {
res != compile_options_.end()) {
auto isaName = getValueOf(*res);
isa = "amdgcn-amd-amdhsa--" + isaName;
settings.offloadArchProvided = true;
isa_ = "amdgcn-amd-amdhsa--" + isaName;
settings_.offloadArchProvided = true;
return true;
}
// App has not provided the gpu archiecture, need to find it
@@ -224,7 +227,7 @@ bool RTCProgram::transformOptions() {
amd::Monitor RTCProgram::lock_("HIPRTC Program", true);
bool RTCProgram::compile(const std::vector<std::string>& options) {
bool RTCCompileProgram::compile(const std::vector<std::string>& options) {
amd::ScopedLock lock(lock_); // Lock, because LLVM is not multi threaded
if (!addSource_impl()) {
@@ -233,8 +236,8 @@ bool RTCProgram::compile(const std::vector<std::string>& options) {
}
// Append compile options
compileOptions.reserve(compileOptions.size() + options.size());
compileOptions.insert(compileOptions.end(), options.begin(), options.end());
compile_options_.reserve(compile_options_.size() + options.size());
compile_options_.insert(compile_options_.end(), options.begin(), options.end());
if (!transformOptions()) {
LogError("Error in hiprtc: unable to transform options");
@@ -242,48 +245,48 @@ bool RTCProgram::compile(const std::vector<std::string>& options) {
}
std::vector<char> LLVMBitcode;
if (!compileToBitCode(compileInput, isa, compileOptions, buildLog, LLVMBitcode)) {
if (!compileToBitCode(compile_input_, isa_, compile_options_, build_log_, LLVMBitcode)) {
LogError("Error in hiprtc: unable to compile source to bitcode");
return false;
}
std::string linkFileName = "linked";
if (!addCodeObjData(linkInput, LLVMBitcode, linkFileName, AMD_COMGR_DATA_KIND_BC)) {
if (!addCodeObjData(link_input_, LLVMBitcode, linkFileName, AMD_COMGR_DATA_KIND_BC)) {
LogError("Error in hiprtc: unable to add linked code object");
return false;
}
std::vector<char> LinkedLLVMBitcode;
if (!linkLLVMBitcode(linkInput, isa, linkOptions, buildLog, LinkedLLVMBitcode)) {
if (!linkLLVMBitcode(link_input_, isa_, link_options_, build_log_, LinkedLLVMBitcode)) {
LogError("Error in hiprtc: unable to add device libs to linked bitcode");
return false;
}
std::string linkedFileName = "LLVMBitcode.bc";
if (!addCodeObjData(execInput, LinkedLLVMBitcode, linkedFileName, AMD_COMGR_DATA_KIND_BC)) {
if (!addCodeObjData(exec_input_, LinkedLLVMBitcode, linkedFileName, AMD_COMGR_DATA_KIND_BC)) {
LogError("Error in hiprtc: unable to add device libs linked code object");
return false;
}
if (settings.dumpISA) {
if (!dumpIsaFromBC(execInput, isa, exeOptions, name, buildLog)) {
if (settings_.dumpISA) {
if (!dumpIsaFromBC(exec_input_, isa_, exe_options_, name_, build_log_)) {
LogError("Error in hiprtc: unable to dump isa code");
return false;
}
}
if (!createExecutable(execInput, isa, exeOptions, buildLog, executable)) {
if (!createExecutable(exec_input_, isa_, exe_options_, build_log_, executable_)) {
LogError("Error in hiprtc: unable to create executable");
return false;
}
std::vector<std::string> mangledNames;
if (!fillDemangledNames(executable, mangledNames)) {
if (!fillDemangledNames(executable_, mangledNames)) {
LogError("Error in hiprtc: unable to fill demangled names");
return false;
}
if (!getMangledNames(mangledNames, strippedNames, demangledNames)) {
if (!getMangledNames(mangledNames, stripped_names_, demangled_names_)) {
LogError("Error in hiprtc: unable to get mangled names");
return false;
}
@@ -291,7 +294,7 @@ bool RTCProgram::compile(const std::vector<std::string>& options) {
return true;
}
bool RTCProgram::trackMangledName(std::string& name) {
bool RTCCompileProgram::trackMangledName(std::string& name) {
amd::ScopedLock lock(lock_);
if (name.size() == 0) return false;
@@ -312,20 +315,20 @@ bool RTCProgram::trackMangledName(std::string& name) {
return std::isspace(c);
}), strippedNameNoSpace.end());
strippedNames.insert(std::pair<std::string, std::string>(name, strippedNameNoSpace));
demangledNames.insert(std::pair<std::string, std::string>(strippedName, ""));
stripped_names_.insert(std::pair<std::string, std::string>(name, strippedNameNoSpace));
demangled_names_.insert(std::pair<std::string, std::string>(strippedName, ""));
const auto var{"__hiprtc_" + std::to_string(strippedNames.size())};
const auto var{"__hiprtc_" + std::to_string(stripped_names_.size())};
const auto code{"\nextern \"C\" constexpr auto " + var + " = " + name + ";\n"};
sourceCode += code;
source_code_ += code;
return true;
}
bool RTCProgram::getDemangledName(const char* name_expression, const char** loweredName) {
bool RTCCompileProgram::getDemangledName(const char* name_expression, const char** loweredName) {
std::string name = name_expression;
if (auto res = strippedNames.find(name); res != strippedNames.end()) {
if (auto dres = demangledNames.find(res->second); dres != demangledNames.end()) {
if (auto res = stripped_names_.find(name); res != stripped_names_.end()) {
if (auto dres = demangled_names_.find(res->second); dres != demangled_names_.end()) {
if (dres->second.size() != 0) {
*loweredName = dres->second.c_str();
return true;
@@ -333,7 +336,7 @@ bool RTCProgram::getDemangledName(const char* name_expression, const char** lowe
return false;
}
}
if (auto dres = demangledNames.find(name); dres != demangledNames.end()) {
if (auto dres = demangled_names_.find(name); dres != demangled_names_.end()) {
if (dres->second.size() != 0) {
*loweredName = dres->second.c_str();
return true;
@@ -343,4 +346,223 @@ bool RTCProgram::getDemangledName(const char* name_expression, const char** lowe
return false;
}
//RTC Link Program Member Functions
RTCLinkProgram::RTCLinkProgram(std::string name) : RTCProgram(name) {
if (amd::Comgr::create_data_set(&link_input_) != AMD_COMGR_STATUS_SUCCESS) {
crashWithMessage("Failed to allocate internal hiprtc structure");
}
}
bool RTCLinkProgram::AddLinkerOptions(unsigned int num_options, hiprtcJIT_option* options_ptr,
void** options_vals_ptr) {
if (options_ptr == nullptr || options_vals_ptr == nullptr) {
crashWithMessage("JIT Options ptr cannot be null");
return false;
}
for (size_t opt_idx = 0; opt_idx < num_options; ++opt_idx) {
if (options_vals_ptr[opt_idx] == nullptr) {
crashWithMessage("JIT Options value ptr cannot be null");
return false;
}
switch(options_ptr[opt_idx]) {
case HIPRTC_JIT_MAX_REGISTERS:
link_args_.max_registers_ = *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_THREADS_PER_BLOCK:
link_args_.threads_per_block_
= *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_WALL_TIME:
link_args_.wall_time_ = *(reinterpret_cast<long*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_INFO_LOG_BUFFER:
link_args_.info_log_ = (reinterpret_cast<char*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES:
link_args_.info_log_size_ = *(reinterpret_cast<size_t*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_ERROR_LOG_BUFFER:
link_args_.error_log_ = reinterpret_cast<char*>(options_vals_ptr[opt_idx]);
break;
case HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES:
link_args_.error_log_size_ = *(reinterpret_cast<size_t*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_OPTIMIZATION_LEVEL:
link_args_.optimization_level_
= *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_TARGET_FROM_HIPCONTEXT:
link_args_.target_from_hip_context_
= *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_TARGET:
link_args_.jit_target_ = *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_FALLBACK_STRATEGY:
link_args_.fallback_strategy_
= *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_GENERATE_DEBUG_INFO:
link_args_.generate_debug_info_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_LOG_VERBOSE:
link_args_.log_verbose_ = reinterpret_cast<long>(options_vals_ptr[opt_idx]);
break;
case HIPRTC_JIT_GENERATE_LINE_INFO:
link_args_.generate_line_info_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_CACHE_MODE:
link_args_.cache_mode_ = *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_NEW_SM3X_OPT:
link_args_.sm3x_opt_ = *(reinterpret_cast<bool*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_FAST_COMPILE:
link_args_.fast_compile_ = *(reinterpret_cast<bool*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_GLOBAL_SYMBOL_NAMES:
link_args_.global_symbol_names_ = reinterpret_cast<const char**>(options_vals_ptr[opt_idx]);
break;
case HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS:
link_args_.global_symbol_addresses_ = reinterpret_cast<void**>(options_vals_ptr[opt_idx]);
break;
case HIPRTC_JIT_GLOBAL_SYMBOL_COUNT:
link_args_.global_symbol_count_
= *(reinterpret_cast<unsigned int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_LTO:
link_args_.lto_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_FTZ:
link_args_.ftz_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_PREC_DIV:
link_args_.prec_div_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_PREC_SQRT:
link_args_.prec_sqrt_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
break;
case HIPRTC_JIT_FMA:
link_args_.fma_ = *(reinterpret_cast<int*>(options_vals_ptr[opt_idx]));
break;
default:
break;
}
}
return true;
}
amd_comgr_data_kind_t RTCLinkProgram::GetCOMGRDataKind(hiprtcJITInputType input_type) {
amd_comgr_data_kind_t data_kind = AMD_COMGR_DATA_KIND_UNDEF;
// Map the hiprtc input type to comgr data kind
switch (input_type) {
case HIPRTC_JIT_INPUT_LLVM_BITCODE :
data_kind = AMD_COMGR_DATA_KIND_BC;
break;
case HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE :
data_kind = AMD_COMGR_DATA_KIND_FATBIN;
break;
case HIPRTC_JIT_INPUT_LLVM_ARCHIVES_OF_BUNDLED_BITCODE :
data_kind = AMD_COMGR_DATA_KIND_FATBIN;
break;
default :
LogError("Cannot find the corresponding comgr data kind");
break;
}
return data_kind;
}
bool RTCLinkProgram::AddLinkerFile(std::string file_path, hiprtcJITInputType input_type) {
amd::ScopedLock lock(lock_);
struct stat stat_buf;
if (stat(file_path.c_str(), &stat_buf)) {
return false;
}
std::string link_file_name_("Linker Program");
std::vector<char> llvm_bitcode(stat_buf.st_size);
std::ifstream bc_file(file_path, std::ios_base::in | std::ios_base::binary);
if (!bc_file.good()) {
return true;
}
bc_file.read(llvm_bitcode.data(), stat_buf.st_size);
bc_file.close();
amd_comgr_data_kind_t data_kind;
if((data_kind = GetCOMGRDataKind(input_type)) == AMD_COMGR_DATA_KIND_UNDEF) {
LogError("Cannot find the correct COMGR data kind");
return false;
}
if (!addCodeObjData(link_input_, llvm_bitcode, link_file_name_, data_kind)) {
LogError("Error in hiprtc: unable to add linked code object");
return false;
}
return true;
}
bool RTCLinkProgram::AddLinkerData(void* image_ptr, size_t image_size, std::string link_file_name,
hiprtcJITInputType input_type) {
amd::ScopedLock lock(lock_);
char* image_char_buf = reinterpret_cast<char*>(image_ptr);
std::vector<char> llvm_bitcode(image_char_buf, image_char_buf + image_size);
amd_comgr_data_kind_t data_kind;
if((data_kind = GetCOMGRDataKind(input_type)) == AMD_COMGR_DATA_KIND_UNDEF) {
LogError("Cannot find the correct COMGR data kind");
return false;
}
if(!addCodeObjData(link_input_,llvm_bitcode , link_file_name, data_kind)) {
LogError("Error in hiprtc: unable to add linked code object");
return false;
}
return true;
}
bool RTCLinkProgram::LinkComplete(void** bin_out, size_t* size_out) {
amd::ScopedLock lock(lock_);
if (!findIsa()) {
return false;
}
std::vector<char> linked_llvm_bitcode;
if (!linkLLVMBitcode(link_input_, isa_, link_options_, build_log_, linked_llvm_bitcode)) {
LogError("Error in hiprtc: unable to add device libs to linked bitcode");
return false;
}
std::string linkedFileName = "LLVMBitcode.bc";
if (!addCodeObjData(exec_input_, linked_llvm_bitcode, linkedFileName, AMD_COMGR_DATA_KIND_BC)) {
LogError("Error in hiprtc: unable to add linked bitcode");
return false;
}
std::vector<std::string> exe_options;
exe_options.push_back("-O3");
if (!createExecutable(exec_input_, isa_, exe_options, build_log_, executable_)) {
LogError("Error in hiprtc: unable to create exectuable");
return false;
}
*size_out = executable_.size();
char* bin_out_c = new char[*size_out];
std::copy(executable_.begin(), executable_.end(), bin_out_c);
*bin_out = reinterpret_cast<void*>(bin_out_c);
return true;
}
} // namespace hiprtc
+111 -40
Просмотреть файл
@@ -46,7 +46,6 @@ extern unsigned __hipRTC_header_size;
#include "hiprtcComgrHelper.hpp"
namespace hiprtc {
namespace internal {
template <typename T> inline std::string ToString(T v) {
@@ -90,67 +89,139 @@ struct Settings {
};
class RTCProgram {
protected:
// Lock and control variables
static amd::Monitor lock_;
static std::once_flag initialized;
static std::once_flag initialized_;
std::string name;
Settings settings;
RTCProgram(std::string name);
~RTCProgram() {
amd::Comgr::destroy_data_set(exec_input_);
}
std::string isa;
std::string buildLog;
std::vector<char> executable;
std::map<std::string, std::string> strippedNames;
std::map<std::string, std::string> demangledNames;
std::string sourceCode;
std::string sourceName;
std::vector<std::string> compileOptions;
std::vector<std::string> linkOptions;
std::vector<std::string> exeOptions;
amd_comgr_data_set_t compileInput;
amd_comgr_data_set_t linkInput;
amd_comgr_data_set_t execInput;
bool dumpIsa();
// Member Functions
bool findIsa();
// Data Members
std::string name_;
std::string isa_;
std::string build_log_;
std::vector<char> executable_;
amd_comgr_data_set_t exec_input_;
std::vector<std::string> exe_options_;
};
class RTCCompileProgram : public RTCProgram {
// Private Data Members
Settings settings_;
std::string source_code_;
std::string source_name_;
std::map<std::string, std::string> stripped_names_;
std::map<std::string, std::string> demangled_names_;
std::vector<std::string> compile_options_;
std::vector<std::string> link_options_;
amd_comgr_data_set_t compile_input_;
amd_comgr_data_set_t link_input_;
// Private Member functions
bool addSource_impl();
bool addBuiltinHeader();
bool transformOptions();
RTCProgram() = delete;
RTCProgram(RTCProgram&) = delete;
RTCProgram& operator=(RTCProgram&) = delete;
RTCCompileProgram() = delete;
RTCCompileProgram(RTCCompileProgram&) = delete;
RTCCompileProgram& operator=(RTCCompileProgram&) = delete;
public:
RTCProgram(std::string);
RTCCompileProgram(std::string);
~RTCCompileProgram() {
amd::Comgr::destroy_data_set(compile_input_);
amd::Comgr::destroy_data_set(link_input_);
}
// Converters
inline static hiprtcProgram as_hiprtcProgram(RTCProgram* p) {
inline static hiprtcProgram as_hiprtcProgram(RTCCompileProgram* p) {
return reinterpret_cast<hiprtcProgram>(p);
}
inline static RTCProgram* as_RTCProgram(hiprtcProgram& p) {
return reinterpret_cast<RTCProgram*>(p);
inline static RTCCompileProgram* as_RTCCompileProgram(hiprtcProgram& p) {
return reinterpret_cast<RTCCompileProgram*>(p);
}
// Public Member Functions
bool addSource(const std::string& source, const std::string& name);
bool addHeader(const std::string& source, const std::string& name);
bool compile(const std::vector<std::string>& options);
bool getDemangledName(const char* name_expression, const char** loweredName);
bool trackMangledName(std::string& name);
const std::vector<char>& getExec() const { return executable; }
size_t getExecSize() const { return executable.size(); }
const std::string& getLog() const { return buildLog; }
size_t getLogSize() const { return buildLog.size(); }
~RTCProgram() {
amd::Comgr::destroy_data_set(compileInput);
amd::Comgr::destroy_data_set(linkInput);
amd::Comgr::destroy_data_set(execInput);
}
// Public Getter/Setters
const std::vector<char>& getExec() const { return executable_; }
size_t getExecSize() const { return executable_.size(); }
const std::string& getLog() const { return build_log_; }
size_t getLogSize() const { return build_log_.size(); }
};
// Linker Arguments passed via hipLinkCreate
struct LinkArguments {
unsigned int max_registers_;
unsigned int threads_per_block_;
float wall_time_;
size_t info_log_size_;
char* info_log_;
size_t error_log_size_;
char* error_log_;
unsigned int optimization_level_;
unsigned int target_from_hip_context_;
unsigned int jit_target_;
unsigned int fallback_strategy_;
int generate_debug_info_;
long log_verbose_;
int generate_line_info_;
unsigned int cache_mode_;
bool sm3x_opt_;
bool fast_compile_;
const char** global_symbol_names_;
void** global_symbol_addresses_;
unsigned int global_symbol_count_;
int lto_;
int ftz_;
int prec_div_;
int prec_sqrt_;
int fma_;
};
class RTCLinkProgram : public RTCProgram {
// Private Member Functions (forbid these function calls)
RTCLinkProgram() = delete;
RTCLinkProgram(RTCLinkProgram&) = delete;
RTCLinkProgram& operator=(RTCLinkProgram&) = delete;
amd_comgr_data_kind_t GetCOMGRDataKind(hiprtcJITInputType input_type);
// Linker Argumenets at hipLinkCreate
LinkArguments link_args_;
// Private Data Members
amd_comgr_data_set_t link_input_;
std::vector<std::string> link_options_;
public:
RTCLinkProgram(std::string name);
~RTCLinkProgram() {
amd::Comgr::destroy_data_set(link_input_);
}
// Public Member Functions
bool AddLinkerOptions(unsigned int num_options, hiprtcJIT_option* options_ptr,
void** options_vals_ptr);
bool AddLinkerFile(std::string file_path, hiprtcJITInputType input_type);
bool AddLinkerData(void* image_ptr, size_t image_size, std::string link_file_name,
hiprtcJITInputType input_type);
bool LinkComplete(void** bin_out, size_t* size_out);
};
} // namespace hiprtc