Merge pull request #730 from yxsamliu/dbg-fatbin
Add more checks and debug outputs for fat binary
[ROCm/clr commit: e510be9395]
Este cometimento está contido em:
@@ -22,6 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
@@ -86,6 +87,7 @@ __hipRegisterFatBinary(const void* data)
|
||||
|
||||
std::string target{&desc->triple[sizeof(AMDGCN_AMDHSA_TRIPLE)],
|
||||
desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)};
|
||||
tprintf(DB_FB, "Found bundle for %s\n", target.c_str());
|
||||
|
||||
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
|
||||
hsa_agent_t agent = g_allAgents[deviceId + 1];
|
||||
@@ -110,10 +112,35 @@ __hipRegisterFatBinary(const void* data)
|
||||
|
||||
if (module->executable.handle) {
|
||||
modules->at(deviceId) = module;
|
||||
tprintf(DB_FB, "Loaded code object for %s\n", name);
|
||||
if (HIP_DUMP_CODE_OBJECT) {
|
||||
char fname[30];
|
||||
static std::atomic<int> index;
|
||||
sprintf(fname, "__hip_dump_code_object%04d.o", index++);
|
||||
tprintf(DB_FB, "Dump code object %s\n", fname);
|
||||
std::ofstream ofs;
|
||||
ofs.open(fname, std::ios::binary);
|
||||
ofs << image;
|
||||
ofs.close();
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Failed to load code object for %s\n", name);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
|
||||
hsa_agent_t agent = g_allAgents[deviceId + 1];
|
||||
|
||||
char name[64] = {};
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
|
||||
if (!(*modules)[deviceId]) {
|
||||
fprintf(stderr, "No device code bundle for %s\n", name);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
tprintf(DB_FB, "__hipRegisterFatBinary succeeds and returns %p\n", modules);
|
||||
return modules;
|
||||
}
|
||||
@@ -132,13 +159,18 @@ extern "C" void __hipRegisterFunction(
|
||||
dim3* gridDim,
|
||||
int* wSize)
|
||||
{
|
||||
HIP_INIT_API(modules, hostFunction, deviceFunction, deviceName);
|
||||
std::vector<hipFunction_t> functions{g_deviceCnt};
|
||||
|
||||
assert(modules && modules->size() >= g_deviceCnt);
|
||||
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
|
||||
hipFunction_t function;
|
||||
if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName)) {
|
||||
functions[deviceId] = function;
|
||||
}
|
||||
else {
|
||||
tprintf(DB_FB, "missing kernel %s for device %d\n", deviceName, deviceId);
|
||||
}
|
||||
}
|
||||
|
||||
g_functions.insert(std::make_pair(hostFunction, std::move(functions)));
|
||||
@@ -180,6 +212,7 @@ hipError_t hipSetupArgument(
|
||||
size_t size,
|
||||
size_t offset)
|
||||
{
|
||||
HIP_INIT_API(arg, size, offset);
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
||||
auto& arguments = crit->_execStack.top()._arguments;
|
||||
@@ -194,6 +227,7 @@ hipError_t hipSetupArgument(
|
||||
|
||||
hipError_t hipLaunchByPtr(const void *hostFunction)
|
||||
{
|
||||
HIP_INIT_API(hostFunction);
|
||||
ihipExec_t exec;
|
||||
{
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
@@ -213,20 +247,26 @@ hipError_t hipLaunchByPtr(const void *hostFunction)
|
||||
deviceId = 0;
|
||||
}
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
decltype(g_functions)::iterator it;
|
||||
if ((it = g_functions.find(hostFunction)) == g_functions.end())
|
||||
return hipErrorUnknown;
|
||||
if ((it = g_functions.find(hostFunction)) == g_functions.end()) {
|
||||
e = hipErrorUnknown;
|
||||
fprintf(stderr, "kernel %p not found!\n", hostFunction);
|
||||
abort();
|
||||
} else {
|
||||
size_t size = exec._arguments.size();
|
||||
void *extra[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0],
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
|
||||
size_t size = exec._arguments.size();
|
||||
void *extra[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0],
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
e = hipModuleLaunchKernel(it->second[deviceId],
|
||||
exec._gridDim.x, exec._gridDim.y, exec._gridDim.z,
|
||||
exec._blockDim.x, exec._blockDim.y, exec._blockDim.z,
|
||||
exec._sharedMem, exec._hStream, nullptr, extra);
|
||||
}
|
||||
|
||||
return hipModuleLaunchKernel(it->second[deviceId],
|
||||
exec._gridDim.x, exec._gridDim.y, exec._gridDim.z,
|
||||
exec._blockDim.x, exec._blockDim.y, exec._blockDim.z,
|
||||
exec._sharedMem, exec._hStream, nullptr, extra);
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
@@ -97,6 +97,8 @@ int HIP_INIT_ALLOC = -1;
|
||||
int HIP_SYNC_STREAM_WAIT = 0;
|
||||
int HIP_FORCE_NULL_STREAM = 0;
|
||||
|
||||
int HIP_DUMP_CODE_OBJECT = 0;
|
||||
|
||||
|
||||
#if (__hcc_workweek__ >= 17300)
|
||||
// Make sure we have required bug fix in HCC
|
||||
@@ -1294,6 +1296,10 @@ void HipReadEnv() {
|
||||
"overridden by specifying hipEventReleaseToSystem or hipEventReleaseToDevice flag "
|
||||
"when creating the event.");
|
||||
|
||||
READ_ENV_I(release, HIP_DUMP_CODE_OBJECT, 0,
|
||||
"If set, dump code object as __hip_dump_code_object[nnnn].o in the current directory,"
|
||||
"where nnnn is the index number.");
|
||||
|
||||
// Some flags have both compile-time and runtime flags - generate a warning if user enables the
|
||||
// runtime flag but the compile-time flag is disabled.
|
||||
if (HIP_DB && !COMPILE_HIP_DB) {
|
||||
|
||||
@@ -83,11 +83,11 @@ extern int HIP_SYNC_NULL_STREAM;
|
||||
extern int HIP_INIT_ALLOC;
|
||||
extern int HIP_FORCE_NULL_STREAM;
|
||||
|
||||
extern int HIP_DUMP_CODE_OBJECT;
|
||||
|
||||
// TODO - remove when this is standard behavior.
|
||||
extern int HCC_OPT_FLUSH;
|
||||
|
||||
|
||||
// Class to assign a short TID to each new thread, for HIP debugging purposes.
|
||||
class TidInfo {
|
||||
public:
|
||||
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador