2
0

Merge pull request #730 from yxsamliu/dbg-fatbin

Add more checks and debug outputs for fat binary

[ROCm/clr commit: e510be9395]
Este cometimento está contido em:
Maneesh Gupta
2018-11-08 11:44:27 +05:30
cometido por GitHub
ascendente 387e6de9e6 cc101b22ed
cometimento a3758b506d
3 ficheiros modificados com 59 adições e 13 eliminações
+52 -12
Ver ficheiro
@@ -22,6 +22,7 @@ THE SOFTWARE.
#include <unordered_map>
#include <string>
#include <fstream>
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
@@ -86,6 +87,7 @@ __hipRegisterFatBinary(const void* data)
std::string target{&desc->triple[sizeof(AMDGCN_AMDHSA_TRIPLE)],
desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)};
tprintf(DB_FB, "Found bundle for %s\n", target.c_str());
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
hsa_agent_t agent = g_allAgents[deviceId + 1];
@@ -110,10 +112,35 @@ __hipRegisterFatBinary(const void* data)
if (module->executable.handle) {
modules->at(deviceId) = module;
tprintf(DB_FB, "Loaded code object for %s\n", name);
if (HIP_DUMP_CODE_OBJECT) {
char fname[30];
static std::atomic<int> index;
sprintf(fname, "__hip_dump_code_object%04d.o", index++);
tprintf(DB_FB, "Dump code object %s\n", fname);
std::ofstream ofs;
ofs.open(fname, std::ios::binary);
ofs << image;
ofs.close();
}
} else {
fprintf(stderr, "Failed to load code object for %s\n", name);
abort();
}
}
}
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
hsa_agent_t agent = g_allAgents[deviceId + 1];
char name[64] = {};
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
if (!(*modules)[deviceId]) {
fprintf(stderr, "No device code bundle for %s\n", name);
abort();
}
}
tprintf(DB_FB, "__hipRegisterFatBinary succeeds and returns %p\n", modules);
return modules;
}
@@ -132,13 +159,18 @@ extern "C" void __hipRegisterFunction(
dim3* gridDim,
int* wSize)
{
HIP_INIT_API(modules, hostFunction, deviceFunction, deviceName);
std::vector<hipFunction_t> functions{g_deviceCnt};
assert(modules && modules->size() >= g_deviceCnt);
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
hipFunction_t function;
if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName)) {
functions[deviceId] = function;
}
else {
tprintf(DB_FB, "missing kernel %s for device %d\n", deviceName, deviceId);
}
}
g_functions.insert(std::make_pair(hostFunction, std::move(functions)));
@@ -180,6 +212,7 @@ hipError_t hipSetupArgument(
size_t size,
size_t offset)
{
HIP_INIT_API(arg, size, offset);
auto ctx = ihipGetTlsDefaultCtx();
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
auto& arguments = crit->_execStack.top()._arguments;
@@ -194,6 +227,7 @@ hipError_t hipSetupArgument(
hipError_t hipLaunchByPtr(const void *hostFunction)
{
HIP_INIT_API(hostFunction);
ihipExec_t exec;
{
auto ctx = ihipGetTlsDefaultCtx();
@@ -213,20 +247,26 @@ hipError_t hipLaunchByPtr(const void *hostFunction)
deviceId = 0;
}
hipError_t e = hipSuccess;
decltype(g_functions)::iterator it;
if ((it = g_functions.find(hostFunction)) == g_functions.end())
return hipErrorUnknown;
if ((it = g_functions.find(hostFunction)) == g_functions.end()) {
e = hipErrorUnknown;
fprintf(stderr, "kernel %p not found!\n", hostFunction);
abort();
} else {
size_t size = exec._arguments.size();
void *extra[] = {
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0],
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END
};
size_t size = exec._arguments.size();
void *extra[] = {
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0],
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END
};
e = hipModuleLaunchKernel(it->second[deviceId],
exec._gridDim.x, exec._gridDim.y, exec._gridDim.z,
exec._blockDim.x, exec._blockDim.y, exec._blockDim.z,
exec._sharedMem, exec._hStream, nullptr, extra);
}
return hipModuleLaunchKernel(it->second[deviceId],
exec._gridDim.x, exec._gridDim.y, exec._gridDim.z,
exec._blockDim.x, exec._blockDim.y, exec._blockDim.z,
exec._sharedMem, exec._hStream, nullptr, extra);
return ihipLogStatus(e);
}
+6
Ver ficheiro
@@ -97,6 +97,8 @@ int HIP_INIT_ALLOC = -1;
int HIP_SYNC_STREAM_WAIT = 0;
int HIP_FORCE_NULL_STREAM = 0;
int HIP_DUMP_CODE_OBJECT = 0;
#if (__hcc_workweek__ >= 17300)
// Make sure we have required bug fix in HCC
@@ -1294,6 +1296,10 @@ void HipReadEnv() {
"overridden by specifying hipEventReleaseToSystem or hipEventReleaseToDevice flag "
"when creating the event.");
READ_ENV_I(release, HIP_DUMP_CODE_OBJECT, 0,
"If set, dump code object as __hip_dump_code_object[nnnn].o in the current directory,"
"where nnnn is the index number.");
// Some flags have both compile-time and runtime flags - generate a warning if user enables the
// runtime flag but the compile-time flag is disabled.
if (HIP_DB && !COMPILE_HIP_DB) {
+1 -1
Ver ficheiro
@@ -83,11 +83,11 @@ extern int HIP_SYNC_NULL_STREAM;
extern int HIP_INIT_ALLOC;
extern int HIP_FORCE_NULL_STREAM;
extern int HIP_DUMP_CODE_OBJECT;
// TODO - remove when this is standard behavior.
extern int HCC_OPT_FLUSH;
// Class to assign a short TID to each new thread, for HIP debugging purposes.
class TidInfo {
public: