P4 to Git Change 2024454 by axie@axie-hip-rocm on 2019/11/04 14:38:31

SWDEV-198863 - Options for hip-clang-vdi path to provide the chicken bits, or functional equivalents to HCC_DB (phase 1)

	1. The log macros is turned off for release build.  So log functions has zero impact to release build.
	2. The log macros have level, mask, condition control. So we can have more control to avoid log flooding.

	I also adjusted some existing log to use new log functions.
	1. To excercise and test the new log functions.
	2. To improve performance slightly.
	3. The change is mainly for HIP-ROCM, we can move more in next phases for PAL or ORCA.
	4. I make these log feature unavailable for release build. We can revert to old log functions for release build in a case by case method.

	Tests:
	1. http://ocltc.amd.com:8111/viewModification.html?modId=128289&personal=true&tab=vcsModificationBuilds
	http://ocltc.amd.com:8111/viewModification.html?modId=128358&personal=true&tab=vcsModificationBuilds

	2. release build, run hip program, there is no log
	3. fastdebug build, run hip program,
	export LOG_LEVEL=3
	export GPU_LOG_MASK=4294967295
	There was a lot of logs.

	4. fastdebug build, run hip program,
	export LOG_LEVEL=2
	export GPU_LOG_MASK=4294967295
	There was no logs.

	5. fastdebug build, run hip program,
	export LOG_LEVEL=3
	export GPU_LOG_MASK=4294967294
	There was much less logs.

	6. fastdebug build, run hip program,
	export LOG_LEVEL=3
	export GPU_LOG_MASK=47102
	There was even much less logs. The logs was expected according to the mask.

	7. Tested step 2 to 6 similarily in Windows and Linux

	ReviewBoard: http://ocltc.amd.com/reviews/r/18215

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#46 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#82 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#26 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hiprtc_internal.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/comgrctx.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#68 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#137 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#91 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#100 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.cpp#40 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#323 edit


[ROCm/clr commit: 3f6e18bf6b]
Этот коммит содержится в:
foreman
2019-11-04 14:44:59 -05:00
родитель cf6b79ccd0
Коммит 46a6201be0
10 изменённых файлов: 82 добавлений и 16 удалений
+1 -1
Просмотреть файл
@@ -13,7 +13,7 @@ bool Comgr::is_ready_ = false;
bool Comgr::LoadLib() {
#if defined(COMGR_DYN_DLL)
LogInfo("Loading COMGR library.");
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Loading COMGR library.");
static const char* ComgrLibName =
LP64_SWITCH(WINDOWS_SWITCH("amd_comgr32.dll", "libamd_comgr32.so"),
WINDOWS_SWITCH("amd_comgr.dll", "libamd_comgr.so"));
+2 -2
Просмотреть файл
@@ -1731,7 +1731,7 @@ void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
} while (end != std::string::npos);
if (tokens.size() < 2) {
LogPrintfWarning("Invalid PrintInfo string: \"%s\"", str.c_str());
ClPrint(amd::LOG_WARNING, amd::LOG_KERN, "Invalid PrintInfo string: \"%s\"", str.c_str());
continue;
}
@@ -1747,7 +1747,7 @@ void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
// ensure that we have the correct number of tokens
if (tokens.size() < end + 1 /*last token is the fmtString*/) {
LogPrintfWarning("Invalid PrintInfo string: \"%s\"", str.c_str());
ClPrint(amd::LOG_WARNING, amd::LOG_KERN, "Invalid PrintInfo string: \"%s\"", str.c_str());
continue;
}
+3 -3
Просмотреть файл
@@ -1398,7 +1398,7 @@ static void dumpCodeObject(const std::string& image) {
char fname[30];
static std::atomic<int> index;
sprintf(fname, "_code_object%04d.o", index++);
LogPrintfInfo("Code object saved in %s\n", fname);
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Code object saved in %s\n", fname);
std::ofstream ofs;
ofs.open(fname, std::ios::binary);
ofs << image;
@@ -2905,7 +2905,7 @@ bool Program::createKernelMetadataMap() {
status = amd::Comgr::metadata_lookup(metadata_, "Kernels", &kernelsMD);
if (status == AMD_COMGR_STATUS_SUCCESS) {
LogInfo("Using Code Object V2.");
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V2.");
hasKernelMD = true;
codeObjectVer_ = 2;
}
@@ -2913,7 +2913,7 @@ bool Program::createKernelMetadataMap() {
status = amd::Comgr::metadata_lookup(metadata_, "amdhsa.kernels", &kernelsMD);
if (status == AMD_COMGR_STATUS_SUCCESS) {
LogInfo("Using Code Object V3.");
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V3.");
hasKernelMD = true;
codeObjectVer_ = 3;
}
+7 -7
Просмотреть файл
@@ -396,7 +396,7 @@ void Device::XferBuffers::release(VirtualGPU& gpu, Memory& buffer) {
}
bool Device::init() {
LogInfo("Initializing HSA stack.");
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Initializing HSA stack.");
// Initialize the compiler
if (!initCompiler(offlineDevice_)) {
@@ -469,7 +469,7 @@ bool Device::init() {
}
// If the AmdDeviceInfo for the HsaDevice Id could not be found return false
if (id == HSA_INVALID_DEVICE_ID) {
LogPrintfWarning("Could not find a DeviceInfo entry for %d", deviceId);
ClPrint(amd::LOG_WARNING, amd::LOG_INIT, "Could not find a DeviceInfo entry for %d", deviceId);
continue;
}
roc_device->deviceInfo_ = DeviceInfo[id];
@@ -1835,8 +1835,8 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI
hsa_queue_t *Device::acquireQueue(uint32_t queue_size_hint) {
assert(queuePool_.size() <= GPU_MAX_HW_QUEUES);
LogPrintfInfo("number of allocated hardware queues: %d, maximum: %d",
queuePool_.size(), GPU_MAX_HW_QUEUES);
ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "number of allocated hardware queues: %d, maximum: %d",
queuePool_.size(), GPU_MAX_HW_QUEUES);
// If we have reached the max number of queues, reuse an existing queue,
// choosing the one with the least number of users.
@@ -1846,7 +1846,7 @@ hsa_queue_t *Device::acquireQueue(uint32_t queue_size_hint) {
[] (PoolRef A, PoolRef B) {
return A.second.refCount < B.second.refCount;
});
LogPrintfInfo("selected queue with least refCount: %p (%d)",
ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "selected queue with least refCount: %p (%d)",
lowest->first, lowest->second.refCount);
lowest->second.refCount++;
return lowest->first;
@@ -1870,7 +1870,7 @@ hsa_queue_t *Device::acquireQueue(uint32_t queue_size_hint) {
return nullptr;
}
}
LogPrintfInfo("created hardware queue %p with size %d",
ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "created hardware queue %p with size %d",
queue, queue_size);
hsa_amd_profiling_set_profiler_enabled(queue, 1);
auto result = queuePool_.emplace(std::make_pair(queue, QueueInfo()));
@@ -1890,7 +1890,7 @@ void Device::releaseQueue(hsa_queue_t* queue) {
if (qInfo.refCount != 0) {
return;
}
LogPrintfInfo("deleting hardware queue %p with refCount 0", queue);
ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "deleting hardware queue %p with refCount 0", queue);
hsa_queue_destroy(queue);
queuePool_.erase(qIter);
+2 -2
Просмотреть файл
@@ -300,7 +300,7 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
gpuMem->syncCacheFromHost(*this);
}
const void* globalAddress = *reinterpret_cast<const void* const*>(params + desc.offset_);
LogPrintfInfo("!\targ%d: %s %s = ptr:%p obj:[%p-%p] threadId : %zx\n", index,
ClPrint(amd::LOG_INFO, amd::LOG_KERN, "!\targ%d: %s %s = ptr:%p obj:[%p-%p] threadId : %zx\n", index,
desc.typeName_.c_str(), desc.name_.c_str(),
globalAddress, gpuMem->getDeviceMemory(),
reinterpret_cast<address>(gpuMem->getDeviceMemory()) + mem->getSize(),
@@ -2034,7 +2034,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
return false;
}
LogPrintfInfo("[%zx]!\tShaderName : %s\n", std::this_thread::get_id(), gpuKernel.name().c_str());
ClPrint(amd::LOG_INFO, amd::LOG_KERN, "[%zx]!\tShaderName : %s\n", std::this_thread::get_id(), gpuKernel.name().c_str());
// Check if runtime has to setup hidden arguments
for (uint32_t i = signature.numParameters(); i < signature.numParametersAll(); ++i) {
+8
Просмотреть файл
@@ -96,6 +96,8 @@ bool Event::setStatus(cl_int status, uint64_t timeStamp) {
}
if (status <= CL_COMPLETE) {
ClPrint(LOG_DEBUG, LOG_CMD, "command %p complete", &command());
// Before we notify the waiters that this event reached the CL_COMPLETE
// status, we release all the resources associated with this instance.
releaseResources();
@@ -160,12 +162,16 @@ bool Event::awaitCompletion() {
return false;
}
ClPrint(LOG_DEBUG, LOG_WAIT, "waiting for event %p to complete, current status %d", this, status_);
ScopedLock lock(lock_);
// Wait until the status becomes CL_COMPLETE or negative.
while (status_ > CL_COMPLETE) {
lock_.wait();
}
ClPrint(LOG_DEBUG, LOG_WAIT, "event %p wait completed", this);
}
return status_ == CL_COMPLETE;
@@ -180,6 +186,7 @@ bool Event::notifyCmdQueue() {
notified_.clear();
return false;
}
ClPrint(LOG_DEBUG, LOG_CMD, "queue marker to command queue: %p", queue);
command->enqueue();
command->release();
}
@@ -220,6 +227,7 @@ void Command::enqueue() {
if (IS_HIP) {
queue_->setLastQueuedCommand(this);
}
ClPrint(LOG_DEBUG, LOG_CMD, "command is enqueued: %p", this);
queue_->append(*this);
queue_->flush();
if ((queue_->device().settings().waitCommand_ && (type_ != 0)) ||
+3 -1
Просмотреть файл
@@ -75,10 +75,11 @@ void HostQueue::finish() {
if (command == NULL) {
return;
}
ClPrint(LOG_DEBUG, LOG_CMD, "marker is queued");
command->enqueue();
command->awaitCompletion();
command->release();
ClPrint(LOG_DEBUG, LOG_CMD, "All commands finished");
}
void HostQueue::loop(device::VirtualDevice* virtualDevice) {
@@ -132,6 +133,7 @@ void HostQueue::loop(device::VirtualDevice* virtualDevice) {
continue;
}
ClPrint(LOG_DEBUG, LOG_CMD, "command is submitted: %p", command);
command->setStatus(CL_SUBMITTED);
// Submit to the device queue.
command->submit(*virtualDevice);
+3
Просмотреть файл
@@ -58,10 +58,12 @@ bool Runtime::init() {
if (!Flag::init() || !option::init() || !Device::init()
// Agent initializes last
|| !Agent::init()) {
ClPrint(LOG_ERROR, LOG_INIT, "Runtime initilization failed");
return false;
}
initialized_ = true;
ClTrace(LOG_DEBUG, LOG_INIT);
return true;
}
@@ -69,6 +71,7 @@ void Runtime::tearDown() {
if (!initialized_) {
return;
}
ClTrace(LOG_DEBUG, LOG_INIT);
Agent::tearDown();
Device::tearDown();
+51
Просмотреть файл
@@ -14,6 +14,26 @@ namespace amd { /*@{*/
enum LogLevel { LOG_NONE = 0, LOG_ERROR = 1, LOG_WARNING = 2, LOG_INFO = 3, LOG_DEBUG = 4 };
enum LogMask {
LOG_API = 0x00000001, //!< API call
LOG_CMD = 0x00000002, //!< Kernel and Copy Commands and Barriers
LOG_WAIT = 0x00000004, //!< Synchronization and waiting for commands to finish
LOG_AQL = 0x00000008, //!< Decode and display AQL packets
LOG_QUEUE = 0x00000010, //!< Queue commands and queue contents
LOG_SIG = 0x00000020, //!< Signal creation, allocation, pool
LOG_LOCK = 0x00000040, //!< Locks and thread-safety code.
LOG_KERN = 0x00000080, //!< kernel creations and arguments, etc.
LOG_COPY = 0x00000100, //!< Copy debug
LOG_COPY2 = 0x00000200, //!< Detailed copy debug
LOG_RESOURCE = 0x00000400, //!< Resource allocation, performance-impacting events.
LOG_INIT = 0x00000800, //!< Initialization and shutdown
LOG_MISC = 0x00001000, //!< misc debug, not yet classified
LOG_AQL2 = 0x00002000, //!< Show raw bytes of AQL packet
LOG_CODE = 0x00004000, //!< Show code creation debug
LOG_CMD2 = 0x00008000, //!< More detailed command info, including barrier commands
LOG_ALWAYS = 0xFFFFFFFF, //!< Log always even mask flag is zero
};
//! \cond ignore
extern "C" void breakpoint();
//! \endcond
@@ -137,4 +157,35 @@ inline void warning(const char* msg) { amd::report_warning(msg); }
#define DebugInfoGuarantee(cond) LogGuarantee(cond, amd::LOG_INFO, "Warning")
#ifndef NDEBUG
#define CL_LOG
#endif
// You may define CL_LOG to enable following log functions even for release build
#ifdef CL_LOG
#define ClPrint(level, mask, format, ...) \
do { \
if (LOG_LEVEL >= level) { \
if (GPU_LOG_MASK & mask || mask == amd::LOG_ALWAYS) { \
amd::log_printf(level, __FILE__, __LINE__, format, ##__VA_ARGS__); \
} \
} \
} while (false)
#define ClCondPrint(level, mask, condition, format, ...) \
do { \
if (LOG_LEVEL >= level && (condition)) { \
if (GPU_LOG_MASK & mask || mask == amd::LOG_ALWAYS) { \
amd::log_printf(level, __FILE__, __LINE__, format, ##__VA_ARGS__); \
} \
} \
} while (false)
#define ClTrace(level, mask) ClPrint(level, mask, "%s", __func__)
#else /*CL_LOG*/
#define ClPrint(level, mask, format, ...) (void)(0)
#define ClCondPrint(level, mask, condition, format, ...) (void)(0)
#define ClTrace(level, mask) (void)(0)
#endif /*CL_LOG*/
#endif /*DEBUG_HPP_*/
+2
Просмотреть файл
@@ -10,6 +10,8 @@
\
release(int, LOG_LEVEL, 0, \
"The default log level") \
release(uint, GPU_LOG_MASK, 0, \
"The mask to enable specific kinds of logs") \
debug(uint, DEBUG_GPU_FLAGS, 0, \
"The debug options for GPU device") \
release(uint, GPU_MAX_COMMAND_QUEUES, 300, \