SWDEV-553126 - Improve logging (#835)

* Ability to mask COPY api usage in logs
* Show total graph nodes in logs
* Add another log level for detailed debug
This commit is contained in:
SaleelK
2025-09-04 10:08:41 -07:00
zatwierdzone przez GitHub
rodzic 79bda80049
commit c4537e8050
17 zmienionych plików z 142 dodań i 121 usunięć
+4 -3
Wyświetl plik
@@ -32,7 +32,7 @@ namespace hip {
// ================================================================================================
hip::Stream* Device::NullStream(bool wait) {
ClPrint(amd::LOG_DEBUG, amd::LOG_WAIT, "NullStream %p, wait %d", null_stream_, wait);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_WAIT, "NullStream %p, wait %d", null_stream_, wait);
if (null_stream_ == nullptr) {
amd::ScopedLock lock(lock_);
if (null_stream_ == nullptr) {
@@ -188,7 +188,7 @@ void Device::WaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stre
if (wait_null_stream) {
if (null_stream_) {
ClPrint(amd::LOG_DEBUG, amd::LOG_WAIT, "Waiting on nullstream %p", null_stream_);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_WAIT, "Waiting on nullstream %p", null_stream_);
waitForStream(null_stream_);
}
} else {
@@ -199,7 +199,8 @@ void Device::WaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stre
((active_stream->Flags() & hipStreamNonBlocking) == 0) &&
// and it's not the current stream
(active_stream != blocking_stream)) {
ClPrint(amd::LOG_DEBUG, amd::LOG_WAIT, "Waiting on active stream %p", active_stream);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_WAIT, "Waiting on active stream %p",
active_stream);
// Get the last valid command
waitForStream(active_stream);
}
+2 -2
Wyświetl plik
@@ -399,7 +399,7 @@ hipError_t hipEventRecord_common(hipEvent_t event, hipStream_t stream, unsigned
e->SetCaptureStream(stream);
if ((stream != nullptr && stream != hipStreamLegacy) &&
(s->GetCaptureStatus() == hipStreamCaptureStatusActive)) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_INFO, amd::LOG_CODE,
"[hipGraph] Current capture node EventRecord on stream : %p, Event %p", stream, event);
s->SetCaptureEvent(event);
std::vector<hip::GraphNode*> lastCapturedNodes = s->GetLastCapturedNodes();
@@ -411,7 +411,7 @@ hipError_t hipEventRecord_common(hipEvent_t event, hipStream_t stream, unsigned
reinterpret_cast<hip::GraphNode* const*>(s->GetLastCapturedNodes().data()),
s->GetLastCapturedNodes().size(), false);
if (status != hipSuccess) {
ClPrint(amd::LOG_ERROR, amd::LOG_API, "hipEventRecord add external event node failed");
ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "hipEventRecord add external event node failed");
return status;
}
s->SetLastCapturedNode(node);
+48 -39
Wyświetl plik
@@ -228,7 +228,7 @@ hipError_t ihipGraphAddMemsetNode(hip::GraphNode** pGraphNode, hip::Graph* graph
hipError_t capturehipLaunchKernel(hipStream_t& stream, const void*& hostFunction, dim3& gridDim,
dim3& blockDim, void**& args, size_t& sharedMemBytes) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node LaunchKernel on stream : %p", stream);
if (!hip::isValid(stream)) {
@@ -316,7 +316,7 @@ hipError_t capturehipExtModuleLaunchKernel(hipStream_t& stream, hipFunction_t& f
size_t& sharedMemBytes, void**& kernelParams,
void**& extra, hipEvent_t& startEvent,
hipEvent_t& stopEvent, uint32_t& flags) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node ExtModuleLaunchKernel on stream : %p", stream);
return ihipExtLaunchKernel(stream, f, globalWorkSizeX / localWorkSizeX,
globalWorkSizeY / localWorkSizeY, globalWorkSizeZ / localWorkSizeZ,
@@ -329,7 +329,7 @@ hipError_t capturehipExtModuleLaunchKernel(hipStream_t& stream, hipFunction_t& f
hipError_t capturehipExtLaunchKernel(hipStream_t& stream, const void*& hostFunction, dim3& gridDim,
dim3& blockDim, void**& args, size_t& sharedMemBytes,
hipEvent_t& startEvent, hipEvent_t& stopEvent, int& flags) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node ExtLaunchKernel on stream : %p", stream);
return ihipExtLaunchKernel(
stream, reinterpret_cast<hipFunction_t>(const_cast<void*>(hostFunction)), gridDim.x,
@@ -342,7 +342,7 @@ hipError_t capturehipModuleLaunchKernel(hipStream_t& stream, hipFunction_t& f, u
uint32_t& blockDimY, uint32_t& blockDimZ,
uint32_t& sharedMemBytes, void**& kernelParams,
void**& extra) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node ModuleLaunchKernel on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
@@ -372,7 +372,7 @@ hipError_t capturehipModuleLaunchCooperativeKernel(hipStream_t& stream, hipFunct
uint32_t& gridDimZ, uint32_t& blockDimX,
uint32_t& blockDimY, uint32_t& blockDimZ,
uint32_t& sharedMemBytes, void**& kernelParams) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node ModuleLaunchCooperativeKernel on stream : %p", stream);
if (!hip::isValid(stream)) {
@@ -403,8 +403,8 @@ hipError_t capturehipModuleLaunchCooperativeKernel(hipStream_t& stream, hipFunct
hipError_t capturehipLaunchByPtr(hipStream_t& stream, hipFunction_t func, dim3 blockDim,
dim3 gridDim, unsigned int sharedMemBytes, void** extra) {
ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node LaunchByPtr on stream : %p",
stream);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node LaunchByPtr on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
}
@@ -433,7 +433,7 @@ hipError_t capturehipLaunchByPtr(hipStream_t& stream, hipFunction_t func, dim3 b
hipError_t capturehipLaunchCooperativeKernel(hipStream_t& stream, const void*& f, dim3& gridDim,
dim3& blockDim, void**& kernelParams,
uint32_t& sharedMemBytes) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node LaunchCooperativeKernel on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
@@ -462,8 +462,8 @@ hipError_t capturehipLaunchCooperativeKernel(hipStream_t& stream, const void*& f
}
hipError_t capturehipMemcpy3DAsync(hipStream_t& stream, const hipMemcpy3DParms*& p) {
ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memcpy3D on stream : %p",
stream);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node Memcpy3D on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
}
@@ -488,8 +488,8 @@ hipError_t capturehipMemcpy3DAsync(hipStream_t& stream, const hipMemcpy3DParms*&
hipError_t capturehipMemcpy2DAsync(hipStream_t& stream, void*& dst, size_t& dpitch,
const void*& src, size_t& spitch, size_t& width, size_t& height,
hipMemcpyKind& kind) {
ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memcpy2D on stream : %p",
stream);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node Memcpy2D on stream : %p", stream);
if (dst == nullptr || src == nullptr) {
return hipErrorInvalidValue;
}
@@ -536,7 +536,7 @@ hipError_t capturehipMemcpy2DFromArrayAsync(hipStream_t& stream, void*& dst, siz
hipArray_const_t& src, size_t& wOffsetSrc,
size_t& hOffsetSrc, size_t& width, size_t& height,
hipMemcpyKind& kind) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node Memcpy2DFromArray on stream : %p", stream);
// Skip zero-sized copies
@@ -577,7 +577,7 @@ hipError_t capturehipMemcpy2DFromArrayAsync(hipStream_t& stream, void*& dst, siz
hipError_t capturehipMemcpy2DToArrayAsync(hipStream_t& stream, hipArray_t& dst, size_t& wOffset,
size_t& hOffset, const void*& src, size_t& spitch,
size_t& width, size_t& height, hipMemcpyKind& kind) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node Memcpy2DFromArray on stream : %p", stream);
// Skip zero-sized copies
@@ -616,7 +616,7 @@ hipError_t capturehipMemcpy2DToArrayAsync(hipStream_t& stream, hipArray_t& dst,
}
hipError_t capturehipMemcpyParam2DAsync(hipStream_t& stream, const hip_Memcpy2D*& pCopy) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node MemcpyParam2D on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
@@ -691,7 +691,7 @@ hipError_t capturehipMemcpyParam2DAsync(hipStream_t& stream, const hip_Memcpy2D*
hipError_t capturehipMemcpyAtoHAsync(hipStream_t& stream, void*& dstHost, hipArray_t& srcArray,
size_t& srcOffset, size_t& ByteCount) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node MemcpyParam2D on stream : %p", stream);
if (srcArray == nullptr || dstHost == nullptr) {
return hipErrorInvalidValue;
@@ -720,7 +720,7 @@ hipError_t capturehipMemcpyAtoHAsync(hipStream_t& stream, void*& dstHost, hipArr
hipError_t capturehipMemcpyHtoAAsync(hipStream_t& stream, hipArray_t& dstArray, size_t& dstOffset,
const void*& srcHost, size_t& ByteCount) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node MemcpyParam2D on stream : %p", stream);
if (dstArray == nullptr || srcHost == nullptr) {
return hipErrorInvalidValue;
@@ -748,6 +748,8 @@ hipError_t capturehipMemcpyHtoAAsync(hipStream_t& stream, hipArray_t& dstArray,
hipError_t capturehipMemcpy(hipStream_t stream, void* dst, const void* src, size_t sizeBytes,
hipMemcpyKind kind) {
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node Memcpy on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
}
@@ -770,8 +772,8 @@ hipError_t capturehipMemcpy(hipStream_t stream, void* dst, const void* src, size
hipError_t capturehipMemcpyAsync(hipStream_t& stream, void*& dst, const void*& src,
size_t& sizeBytes, hipMemcpyKind& kind) {
ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memcpy1D on stream : %p",
stream);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node Memcpy1D on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
}
@@ -779,9 +781,10 @@ hipError_t capturehipMemcpyAsync(hipStream_t& stream, void*& dst, const void*& s
}
hipError_t capturehipMemcpyHtoDAsync(hipStream_t& stream, hipDeviceptr_t& dstDevice,
const void*& srcHost, size_t& ByteCount, hipMemcpyKind& kind) {
ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node MemcpyHtoD on stream : %p",
stream);
const void*& srcHost, size_t& ByteCount,
hipMemcpyKind& kind) {
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node MemcpyHtoD on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
}
@@ -791,18 +794,19 @@ hipError_t capturehipMemcpyHtoDAsync(hipStream_t& stream, hipDeviceptr_t& dstDev
hipError_t capturehipMemcpyDtoDAsync(hipStream_t& stream, hipDeviceptr_t& dstDevice,
hipDeviceptr_t& srcDevice, size_t& ByteCount,
hipMemcpyKind& kind) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
"[hipGraph] Current capture node hipMemcpyDtoD on stream : %p", stream);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node MemcpyDtoD on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
}
return capturehipMemcpy(stream, dstDevice, srcDevice, ByteCount, kind);
}
hipError_t capturehipMemcpyDtoHAsync(hipStream_t& stream, void*& dstHost, hipDeviceptr_t& srcDevice,
size_t& ByteCount, hipMemcpyKind& kind) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
"[hipGraph] Current capture node hipMemcpyDtoH on stream : %p", stream);
hipError_t capturehipMemcpyDtoHAsync(hipStream_t& stream, void*& dstHost,
hipDeviceptr_t& srcDevice, size_t& ByteCount,
hipMemcpyKind& kind) {
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node MemcpyDtoH on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
}
@@ -810,8 +814,9 @@ hipError_t capturehipMemcpyDtoHAsync(hipStream_t& stream, void*& dstHost, hipDev
}
hipError_t capturehipMemcpyFromSymbolAsync(hipStream_t& stream, void*& dst, const void*& symbol,
size_t& sizeBytes, size_t& offset, hipMemcpyKind& kind) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
size_t& sizeBytes, size_t& offset,
hipMemcpyKind& kind) {
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node MemcpyFromSymbolNode on stream : %p", stream);
if (kind != hipMemcpyDeviceToHost && kind != hipMemcpyDeviceToDevice &&
@@ -847,7 +852,7 @@ hipError_t capturehipMemcpyFromSymbolAsync(hipStream_t& stream, void*& dst, cons
hipError_t capturehipMemcpyToSymbolAsync(hipStream_t& stream, const void*& symbol, const void*& src,
size_t& sizeBytes, size_t& offset, hipMemcpyKind& kind) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node MemcpyToSymbolNode on stream : %p", stream);
if (kind != hipMemcpyHostToDevice && kind != hipMemcpyDeviceToDevice &&
@@ -882,8 +887,8 @@ hipError_t capturehipMemcpyToSymbolAsync(hipStream_t& stream, const void*& symbo
hipError_t capturehipMemsetAsync(hipStream_t& stream, void*& dst, int& value, size_t& valueSize,
size_t& sizeBytes) {
ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memset1D on stream : %p",
stream);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node Memset1D on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
}
@@ -908,8 +913,8 @@ hipError_t capturehipMemsetAsync(hipStream_t& stream, void*& dst, int& value, si
hipError_t capturehipMemset2DAsync(hipStream_t& stream, void*& dst, size_t& pitch, int& value,
size_t& width, size_t& height) {
ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memset2D on stream : %p",
stream);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node Memset2D on stream : %p", stream);
hipMemsetParams memsetParams = {0};
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
@@ -934,8 +939,8 @@ hipError_t capturehipMemset2DAsync(hipStream_t& stream, void*& dst, size_t& pitc
hipError_t capturehipMemset3DAsync(hipStream_t& stream, hipPitchedPtr& pitchedDevPtr, int& value,
hipExtent& extent) {
ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memset3D on stream : %p",
stream);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node Memset3D on stream : %p", stream);
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
}
@@ -966,8 +971,8 @@ hipError_t capturehipMemset3DAsync(hipStream_t& stream, hipPitchedPtr& pitchedDe
}
hipError_t capturehipLaunchHostFunc(hipStream_t& stream, hipHostFn_t& fn, void*& userData) {
ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node host on stream : %p",
stream);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node HostFunction launch on stream : %p", stream);
if (fn == nullptr) {
return hipErrorInvalidValue;
}
@@ -992,6 +997,8 @@ hipError_t capturehipLaunchHostFunc(hipStream_t& stream, hipHostFn_t& fn, void*&
// ================================================================================================
hipError_t capturehipMallocAsync(hipStream_t stream, hipMemPool_t mem_pool, size_t size,
void** dev_ptr) {
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node MallocAsync on stream : %p", stream);
auto s = reinterpret_cast<hip::Stream*>(stream);
auto mpool = reinterpret_cast<hip::MemoryPool*>(mem_pool);
@@ -1029,6 +1036,8 @@ hipError_t capturehipMallocAsync(hipStream_t stream, hipMemPool_t mem_pool, size
// ================================================================================================
hipError_t capturehipFreeAsync(hipStream_t stream, void* dev_ptr) {
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node FreeAsync on stream : %p", stream);
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
auto mem_free_node = new hip::GraphMemFreeNode(dev_ptr);
auto status =
@@ -122,7 +122,7 @@ bool Graph::isGraphValid(Graph* pGraph) {
// ================================================================================================
void Graph::AddNode(const Node& node) {
vertices_.emplace_back(node);
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] Add %s(%p)",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "[hipGraph] Add %s(%p)",
GetGraphNodeTypeString(node->GetType()), node);
node->SetParentGraph(this);
}
@@ -140,7 +140,7 @@ std::vector<Node> Graph::GetRootNodes() const {
for (auto entry : vertices_) {
if (entry->GetInDegree() == 0) {
roots.push_back(entry);
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] Root node: %s(%p)",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "[hipGraph] Root node: %s(%p)",
GetGraphNodeTypeString(entry->GetType()), entry);
}
}
@@ -703,6 +703,11 @@ hipError_t GraphExec::Run(hip::Stream* launch_stream) {
repeatLaunch_ = true;
}
ClPrint(amd::LOG_DEBUG, amd::LOG_CODE,
"GraphExec::Run max_streams: %d, "
"on device: %d, total number of nodes: %d",
max_streams_, launch_stream->DeviceId(), topoOrder_.size());
if (max_streams_ == 1 && instantiateDeviceId_ == launch_stream->DeviceId()) {
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
// If the graph has kernels that does device side allocation, during packet capture, heap is
@@ -2327,7 +2327,7 @@ class GraphMemAllocNode final : public GraphNode {
amd::Device::VmmAccess::kReadWrite);
va_->retain();
graph_->IncrementMemAllocNodeCount(); // Increment count of unreleased mem alloc nodes
ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemAlloc execute [%p-%p], %p",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph MemAlloc execute [%p-%p], %p",
vaddr_sub_obj->getSvmPtr(),
reinterpret_cast<char*>(vaddr_sub_obj->getSvmPtr()) + aligned_size, memory());
}
@@ -2390,7 +2390,7 @@ class GraphMemAllocNode final : public GraphNode {
// be executed again
amd::MemObjMap::AddMemObj(node_params_.dptr, va_);
}
ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemAlloc create: %p", node_params_.dptr);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph MemAlloc create: %p", node_params_.dptr);
}
}
return error;
@@ -2405,7 +2405,7 @@ class GraphMemAllocNode final : public GraphNode {
va_ = amd::MemObjMap::FindVirtualMemObj(node_params_.dptr);
amd::MemObjMap::AddMemObj(node_params_.dptr, va_);
}
ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemAlloc reserve VA: %p", node_params_.dptr);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph MemAlloc reserve VA: %p", node_params_.dptr);
}
return node_params_.dptr;
}
@@ -2470,7 +2470,7 @@ class GraphMemFreeNode : public GraphNode {
}
amd::MemObjMap::AddMemObj(ptr(), vaddr_mem_obj);
graph_->DecrementMemAllocNodeCount(); // Decrement count of unreleased memalloc nodes
ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemFree execute: %p, %p", ptr(),
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph MemFree execute: %p, %p", ptr(),
vaddr_sub_obj);
}
@@ -2500,7 +2500,7 @@ class GraphMemFreeNode : public GraphNode {
graph, stream->DeviceId(), *stream, amd::Command::EventWaitList{}, device_ptr_,
amd::alignUp(va->getSize(), dev_info.virtualMemAllocGranularity_), nullptr);
commands_.push_back(cmd);
ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph FreeMem create: %p", device_ptr_);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph FreeMem create: %p", device_ptr_);
}
}
return error;
+1 -1
Wyświetl plik
@@ -476,7 +476,7 @@ hipError_t hipStreamWaitEvent_common(hipStream_t stream, hipEvent_t event, unsig
hip::Stream* eventStream = reinterpret_cast<hip::Stream*>(eventStreamHandle);
if (eventStream != nullptr && eventStream->IsEventCaptured(event) == true) {
ClPrint(amd::LOG_INFO, amd::LOG_API,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
"[hipGraph] Current capture node StreamWaitEvent on stream : %p, Event %p", stream,
event);
if (waitStream == nullptr) {
+6 -5
Wyświetl plik
@@ -39,7 +39,7 @@ bool HostBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
LogError("Couldn't map device memory for host read");
return false;
}
ClPrint(amd::LOG_INFO, amd::LOG_COPY, "Using host memcpy D2H, src=%p, dst=%p, size=%zu",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "Using host memcpy D2H, src=%p, dst=%p, size=%zu",
(reinterpret_cast<const_address>(src) + origin[0]), dstHost, size[0]);
// Copy memory
std::memcpy(dstHost, reinterpret_cast<const_address>(src) + origin[0], size[0]);
@@ -163,7 +163,7 @@ bool HostBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory
return false;
}
ClPrint(amd::LOG_INFO, amd::LOG_COPY, "Using host memcpy H2D, src=%p, dst=%p, size=%zu",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "Using host memcpy H2D, src=%p, dst=%p, size=%zu",
srcHost, (reinterpret_cast<address>(dst) + origin[0]), size[0]);
// Copy memory
std::memcpy(reinterpret_cast<address>(dst) + origin[0], srcHost, size[0]);
@@ -294,7 +294,8 @@ bool HostBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstM
LogError("Couldn't map destination memory");
return false;
}
ClPrint(amd::LOG_INFO, amd::LOG_COPY, "Using host memcpy for copyBuffer, src=%p, dst=%p, size=%zu",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY,
"Using host memcpy for copyBuffer, src=%p, dst=%p, size=%zu",
(reinterpret_cast<const_address>(src) + srcOrigin[0]),
(reinterpret_cast<address>(dst) + dstOrigin[0]), size[0]);
// Straight forward buffer copy
@@ -328,8 +329,8 @@ bool HostBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory&
return false;
}
ClPrint(amd::LOG_INFO, amd::LOG_COPY, "Using host memcpy for copyBufferRect, src=%p, "
"dst=%p, size=%zu",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY,
"Using host memcpy for copyBufferRect, src=%p, dst=%p, size=%zu",
(reinterpret_cast<const_address>(src) + srcRect.offset(0, 0, 0)),
(reinterpret_cast<address>(dst) + dstRect.offset(0, 0, 0)), size[0]);
@@ -1049,7 +1049,7 @@ static void dumpCodeObject(const std::string& image) {
char fname[30];
static std::atomic<int> index;
sprintf(fname, "_code_object%04d.o", index++);
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Code object saved in %s\n", fname);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "Code object saved in %s\n", fname);
std::ofstream ofs;
ofs.open(fname, std::ios::binary);
ofs << image;
@@ -2539,7 +2539,7 @@ bool Program::createKernelMetadataMap(void* binary, size_t binSize) {
status = amd::Comgr::metadata_lookup(metadata_, "Kernels", &kernelsMD);
if (status == AMD_COMGR_STATUS_SUCCESS) {
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V2.");
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "Using Code Object V2.");
hasKernelMD = true;
codeObjectVer_ = 2;
} else {
@@ -2591,13 +2591,13 @@ bool Program::createKernelMetadataMap(void* binary, size_t binSize) {
if (major_version == '1') {
if (minor_version == '0') {
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V3.");
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "Using Code Object V3.");
codeObjectVer_ = 3;
} else if (minor_version == '1') {
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V4.");
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "Using Code Object V4.");
codeObjectVer_ = 4;
} else if (minor_version == '2') {
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V5.");
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "Using Code Object V5.");
codeObjectVer_ = 5;
} else {
ClPrint(amd::LOG_ERROR, amd::LOG_CODE,
@@ -310,7 +310,7 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d
hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
// Copy memory line by line
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY2,
"HSA Async Copy Rect dst=0x%zx, src=0x%zx, wait_event=0x%zx, "
"completion_signal=0x%zx",
dstMem.base, srcMem.base, (wait_events.size() != 0) ? wait_events[0].handle : 0,
@@ -335,7 +335,7 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d
size_t dstOffset = dstRect.offset(0, y, z);
// Copy memory line by line
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY2,
"HSA Async Copy wait_event=0x%zx, completion_signal=0x%zx",
(wait_events.size() != 0) ? wait_events[0].handle : 0, active.handle);
hsa_status_t status = hsa_amd_memory_async_copy(
@@ -509,7 +509,7 @@ inline bool DmaBlitManager::rocrCopyBuffer(address dst, hsa_agent_t& dstAgent, c
if (!kUseRegularCopyApi && engine != HwQueueEngine::Unknown) {
copyMask = gpu().getLastUsedSdmaEngine();
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Last copy mask 0x%x", copyMask);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "Last copy mask 0x%x", copyMask);
copyMask &= (engine == HwQueueEngine::SdmaRead ? sdmaEngineReadMask_ : sdmaEngineWriteMask_);
if (copyMask == 0) {
// Check SDMA engine status
@@ -539,7 +539,7 @@ inline bool DmaBlitManager::rocrCopyBuffer(address dst, hsa_agent_t& dstAgent, c
// Copy on the first available free engine if ROCr returns a valid mask
hsa_amd_sdma_engine_id_t copyEngine = static_cast<hsa_amd_sdma_engine_id_t>(copyMask);
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY2,
"HSA Copy copy_engine=0x%x, dst=0x%zx, src=0x%zx, "
"size=%ld, forceSDMA=%d, engineType=%d, wait_event=0x%zx, completion_signal=0x%zx",
copyEngine, dst, src, size, forceSDMA, engine,
@@ -554,7 +554,7 @@ inline bool DmaBlitManager::rocrCopyBuffer(address dst, hsa_agent_t& dstAgent, c
}
if (engine == HwQueueEngine::Unknown || kUseRegularCopyApi) {
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY2,
"HSA Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
"completion_signal=0x%zx, engineType=%d",
dst, src, size, (wait_events.size() != 0) ? wait_events[0].handle : 0, active.handle,
@@ -656,7 +656,8 @@ void DmaBlitManager::getBuffer(const_address hostMem, size_t size, bool enablePi
if (pinnedMem != nullptr) {
Memory* pinnedMemory = dev().getRocMemory(pinnedMem);
address pinBuffer = pinnedMemory->getDeviceMemory();
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "HSA Copy Using Pinned resource size %d", xferSize);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "HSA Copy Using Pinned resource size %d",
xferSize);
buffState.copySize_ = xferSize;
buffState.buffer_ = pinBuffer + partial1 + partial2;
buffState.pinnedMem_ = pinnedMem;
@@ -666,7 +667,8 @@ void DmaBlitManager::getBuffer(const_address hostMem, size_t size, bool enablePi
}
// If Memory Pinning fails, failback to staging buffer
xferSize = std::min(xferSize, StagingXferSize);
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "HSA Copy Using Staging resource size %d", xferSize);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "HSA Copy Using Staging resource size %d",
xferSize);
buffState.copySize_ = xferSize;
buffState.buffer_ = gpu().Staging().Acquire(std::min(xferSize, StagingXferSize));
}
@@ -711,7 +713,7 @@ bool DmaBlitManager::hsaCopyStagedOrPinned(const_address hostSrc, address hostDs
}
if (hostToDev) { // H2D Path
if (outBuffer.pinnedMem_ == nullptr) { // Copy to Staging Buffer
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "memcpy stg buf=%p, host src=%p, size=%zu",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "memcpy stg buf=%p, host src=%p, size=%zu",
stagingBuffer, hostSrc + copyOffset, copysize);
memcpy(stagingBuffer, hostSrc + copyOffset, copysize);
}
@@ -731,7 +733,7 @@ bool DmaBlitManager::hsaCopyStagedOrPinned(const_address hostSrc, address hostDs
if (outBuffer.pinnedMem_ == nullptr) {
// Wait for current signal of previous rocr copy if its not pinned mem
gpu().Barriers().WaitCurrent();
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "memcpy host dst=%p, stg buf=%p, size=%zu",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "memcpy host dst=%p, stg buf=%p, size=%zu",
hostDst + copyOffset, stagingBuffer, copysize);
memcpy(hostDst + copyOffset, stagingBuffer, copysize);
}
@@ -1754,7 +1756,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
// Wait for current signal of previous blit copy if its not pinned mem
if (outBuffer.pinnedMem_ == nullptr) {
gpu().Barriers().WaitCurrent();
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "memcpy host dst=%p, stg buf=%p, size=%zu",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "memcpy host dst=%p, stg buf=%p, size=%zu",
(void*)(dstAddr + stagedCopyOffset), stagingBuffer, copySize);
memcpy(dstAddr + stagedCopyOffset, stagingBuffer, copySize);
}
@@ -1875,7 +1877,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
copySize = outBuffer.copySize_;
address currentDstAddr = dstAddr + stagedCopyOffset;
if (outBuffer.pinnedMem_ == nullptr) {
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "memcpy stg buf=%p, host src=%p, size=%zu",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "memcpy stg buf=%p, host src=%p, size=%zu",
stagingBuffer, (void*)(srcAddr + stagedCopyOffset), copySize);
memcpy(stagingBuffer, srcAddr + stagedCopyOffset, copySize);
}
@@ -234,12 +234,13 @@ Device::~Device() {
hsa_queue_t* queue = qIter->first;
auto& qInfo = qIter->second;
if (qInfo.hostcallBuffer_) {
ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "Deleting hostcall buffer %p for hardware queue %p",
qInfo.hostcallBuffer_, qIter->first->base_address);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_QUEUE,
"Deleting hostcall buffer %p for hardware queue %p", qInfo.hostcallBuffer_,
qIter->first->base_address);
amd::disableHostcalls(qInfo.hostcallBuffer_);
context().svmFree(qInfo.hostcallBuffer_);
}
ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "Deleting hardware queue %p with refCount 0",
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_QUEUE, "Deleting hardware queue %p with refCount 0",
queue->base_address);
qIter = it.erase(qIter);
hsa_queue_destroy(queue);
@@ -1987,11 +1988,11 @@ hsa_amd_memory_pool_t Device::getHostMemoryPool(MemorySegment mem_seg,
break;
case kUncachedAtomics:
if (agentInfo->ext_fine_grain_pool.handle != 0) {
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM,
"Using extended fine grained access system memory pool");
segment = agentInfo->ext_fine_grain_pool;
} else {
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM,
"Falling through on fine grained access system memory pool");
segment = agentInfo->fine_grain_pool;
}
@@ -2065,7 +2066,7 @@ void* Device::hostNumaAlloc(size_t size, size_t alignment, MemorySegment mem_seg
LogPrintfError("get_mempolicy failed with error %ld", res);
return ptr;
}
ClPrint(amd::LOG_INFO, amd::LOG_RESOURCE,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_RESOURCE,
"get_mempolicy() succeed with mode %d, nodeMask 0x%lx, cpuCount %zu", mode,
*nodeMask->maskp, cpuCount);
@@ -2805,7 +2806,7 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait, amd::SyncPolicy
void* hw_event =
(event.NotifyEvent() != nullptr) ? event.NotifyEvent()->HwEvent() : event.HwEvent();
if (hw_event == nullptr) {
ClPrint(amd::LOG_INFO, amd::LOG_SIG, "No HW event");
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_SIG, "No HW event");
return false;
} else if (wait) {
// hipEventBlockingSync
@@ -122,7 +122,7 @@ void* Memory::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& reg
if (indirectMapCount_ == 1) {
if (!allocateMapMemory(owner()->getSize())) {
decIndMapCount();
DevLogPrintfError("Cannot allocate Map memory for size: %u \n", owner()->getSize());
DevLogPrintfError("Cannot allocate Map memory for size: %u", owner()->getSize());
return nullptr;
}
} else {
@@ -180,7 +180,7 @@ void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, u
if (!isHostMemDirectAccess() && !IsPersistentDirectMap()) {
if (!vDev.blitMgr().readBuffer(*this, mapTarget, amd::Coord3D(0), amd::Coord3D(size()), true)) {
decIndMapCount();
DevLogError("Cannot read buffer \n");
DevLogError("Cannot read buffer");
return nullptr;
}
}
@@ -192,7 +192,7 @@ void Memory::cpuUnmap(device::VirtualDevice& vDev) {
if (!isHostMemDirectAccess() && !IsPersistentDirectMap()) {
if (!vDev.blitMgr().writeBuffer(mapMemory_->getHostMem(), *this, amd::Coord3D(0),
amd::Coord3D(size()), true)) {
LogError("[OCL] Fail sync the device memory on cpuUnmap");
LogError("Fail sync the device memory on cpuUnmap");
}
// Wait on CPU for the transfer
static_cast<roc::VirtualGPU&>(vDev).releaseGpuMemoryFence();
@@ -624,7 +624,7 @@ Buffer::~Buffer() {
// Detach the memory from HSA
auto hsa_status = hsa_amd_ipc_memory_detach(owner()->getSvmPtr());
if (hsa_status != HSA_STATUS_SUCCESS) {
LogPrintfError("HSA failed to detach memory with status: %d \n", hsa_status);
LogPrintfError("HSA failed to detach memory with status: %d", hsa_status);
}
}
}
@@ -666,8 +666,8 @@ void Buffer::destroy() {
}
} else if (memFlags & ROCCLR_MEM_HSA_SIGNAL_MEMORY) {
if (HSA_STATUS_SUCCESS != hsa_signal_destroy(signal_)) {
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM,
"[ROCClr] ROCCLR_MEM_HSA_SIGNAL_MEMORY signal destroy failed \n");
ClPrint(amd::LOG_ERROR, amd::LOG_MEM,
"hsa_signal_destroy failed");
}
deviceMemory_ = nullptr;
} else {
@@ -684,7 +684,7 @@ void Buffer::destroy() {
}
// destroy system memory
if (!(amd::Os::releaseMemory(deviceMemory_, size()))) {
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM, "[ROCClr] munmap failed \n");
ClPrint(amd::LOG_ERROR, amd::LOG_MEM, "munmap failed");
}
}
}
@@ -764,7 +764,7 @@ bool Buffer::create(bool alloc_local) {
reinterpret_cast<const amd::IpcBuffer*>(owner())->Handle()),
owner()->getSize(), ipc_agents_num, dev().IpcAgents(), &orig_dev_ptr);
if (hsa_status != HSA_STATUS_SUCCESS) {
LogPrintfError("HSA failed to attach IPC memory with status: %d \n", hsa_status);
LogPrintfError("HSA failed to attach IPC memory with status: %d", hsa_status);
return false;
}
owner()->setSvmPtr(orig_dev_ptr);
@@ -779,7 +779,7 @@ bool Buffer::create(bool alloc_local) {
// if interprocess flag is set, then the memory is importable.
if (!dev().ImportShareableHSAHandle(owner()->getSvmPtr(),
&owner()->getUserData().hsa_handle)) {
LogPrintfError("Importing Shareable Memory failed with os_handle: 0x%x \n",
LogPrintfError("Importing Shareable Memory failed with os_handle: 0x%x",
owner()->getSvmPtr());
return false;
}
@@ -835,13 +835,13 @@ bool Buffer::create(bool alloc_local) {
if (HSA_STATUS_SUCCESS != hsa_amd_signal_create(kInitSignalValueOne, 0, nullptr,
HSA_AMD_SIGNAL_AMD_GPU_ONLY, &signal_)) {
ClPrint(amd::LOG_ERROR, amd::LOG_MEM,
"[ROCclr] ROCCLR_MEM_HSA_SIGNAL_MEMORY signal creation failed");
"hsa_amd_signal_create signal failed");
return false;
}
volatile hsa_signal_value_t* signalValuePtr = nullptr;
if (HSA_STATUS_SUCCESS != hsa_amd_signal_value_pointer(signal_, &signalValuePtr)) {
ClPrint(amd::LOG_ERROR, amd::LOG_MEM,
"[ROCclr] ROCCLR_MEM_HSA_SIGNAL_MEMORY pointer query failed");
"hsa_amd_signal_value_pointer failed");
return false;
}
@@ -1043,7 +1043,7 @@ bool Buffer::ExportHandle(void* handle) const {
auto hsa_status = hsa_amd_ipc_memory_create(orig_dev_ptr, owner()->getSize(),
reinterpret_cast<hsa_amd_ipc_memory_t*>(handle));
if (hsa_status != HSA_STATUS_SUCCESS) {
LogPrintfError("Failed to create memory for IPC, failed with hsa_status: %d \n", hsa_status);
LogPrintfError("Failed to create memory for IPC, failed with hsa_status: %d", hsa_status);
return false;
}
return true;
@@ -1304,7 +1304,7 @@ bool Image::create(bool alloc_local) {
permission_, &deviceImageInfo_);
if (status != HSA_STATUS_SUCCESS) {
LogPrintfError("[OCL] Fail to allocate image memory, failed with hsa_status: %d \n", status);
LogPrintfError("Fail to allocate image memory, failed with hsa_status: %d", status);
return false;
}
@@ -773,7 +773,7 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
mem = memories[index];
const void* globalAddress = *reinterpret_cast<const void* const*>(params + desc.offset_);
if (mem == nullptr) {
ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Arg%d: %s %s = ptr:%p ", i, desc.typeName_.c_str(),
ClPrint(amd::LOG_DEBUG, amd::LOG_KERN, "Arg%d: %s %s = ptr:%p ", i, desc.typeName_.c_str(),
desc.name_.c_str(), globalAddress);
//! This condition is for SVM fine-grain
if (dev().isFineGrainedSystem(true)) {
@@ -787,7 +787,7 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
gpuMem = static_cast<Memory*>(mem->getDeviceMemory(dev()));
const void* globalAddress = *reinterpret_cast<const void* const*>(params + desc.offset_);
ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Arg%d: %s %s = ptr:%p obj:[%p-%p]", i,
ClPrint(amd::LOG_DEBUG, amd::LOG_KERN, "Arg%d: %s %s = ptr:%p obj:[%p-%p]", i,
desc.typeName_.c_str(), desc.name_.c_str(), globalAddress,
gpuMem->getDeviceMemory(),
reinterpret_cast<address>(gpuMem->getDeviceMemory()) + mem->getSize());
@@ -874,10 +874,10 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
if (desc.size_ > kMaxBytes) {
bytes += "...";
}
ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Arg%d: %s %s = %s (size:0x%x)", i,
ClPrint(amd::LOG_DEBUG, amd::LOG_KERN, "Arg%d: %s %s = %s (size:0x%x)", i,
desc.typeName_.c_str(), desc.name_.c_str(), bytes.c_str(), desc.size_);
} else {
ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Arg%d: %s %s = val:0x%lx (size:0x%x)", i,
ClPrint(amd::LOG_DEBUG, amd::LOG_KERN, "Arg%d: %s %s = val:0x%lx (size:0x%x)", i,
desc.typeName_.c_str(), desc.name_.c_str(),
(desc.size_ == 1) ? *reinterpret_cast<const uint8_t*>(srcArgPtr)
: (desc.size_ == 2) ? *reinterpret_cast<const uint16_t*>(srcArgPtr)
@@ -1621,7 +1621,8 @@ address VirtualGPU::ManagedBuffer::Acquire(uint32_t size, uint32_t alignment) {
} else {
// Reset the signal for the barrier packet
hsa_signal_silent_store_relaxed(pool_signal_[active_chunk_], kInitSignalValueOne);
ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Issue barrier to flush chunk %d", active_chunk_);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_KERN, "Issue barrier to flush chunk %d",
active_chunk_);
// Currently don't skip wait signal check, because SDMA engine cna be used in staging copy
constexpr bool kSkipSignal = false;
// Dispatch a barrier packet into the queue
@@ -3489,7 +3490,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
gpuKernel.KernargSegmentAlignment());
command_->SetKernelName(gpuKernel.getDemangledName().c_str());
} else {
ClPrint(amd::LOG_INFO, amd::LOG_KERN,
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_KERN,
"KernargSegmentByteSize = %lu "
"KernargSegmentAlignment = %lu",
gpuKernel.KernargSegmentByteSize(), gpuKernel.KernargSegmentAlignment());
@@ -3544,11 +3545,10 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
// Validate privateMemSize is more than max allowed.
size_t maxStackSize = dev().MaxStackSize();
if (dispatchPacket.private_segment_size > maxStackSize) {
ClPrint(amd::LOG_INFO, amd::LOG_KERN,
ClPrint(amd::LOG_ERROR, amd::LOG_KERN,
"Scratch size (%u) exceeds max allowed (%zu) for kernel : %s",
dispatchPacket.private_segment_size, maxStackSize,
gpuKernel.getDemangledName().c_str());
LogError("Scratch size exceeds max allowed.");
return false;
}
}
@@ -65,7 +65,7 @@ inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false, bool yi
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
kTimeout100us, HSA_WAIT_STATE_ACTIVE) != 0) {
if (HIP_SKIP_ABORT_ON_GPU_ERROR && amd::Device::IsGPUInError()) {
ClPrint(amd::LOG_INFO, amd::LOG_SIG,
ClPrint(amd::LOG_ERROR, amd::LOG_SIG,
"Device not Stable, while waiting for Signal ="
"(0x%lx) for %d ns",
signal.handle, kTimeout100us);
@@ -79,7 +79,7 @@ inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false, bool yi
while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
kTimeout4Secs, wait_state) != 0) {
if (HIP_SKIP_ABORT_ON_GPU_ERROR && amd::Device::IsGPUInError()) {
ClPrint(amd::LOG_INFO, amd::LOG_SIG,
ClPrint(amd::LOG_ERROR, amd::LOG_SIG,
"Device not Stable, while waiting for Signal ="
"(0x%lx) for %d ns",
signal.handle, kTimeout4Secs);
@@ -160,12 +160,12 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) {
}
if (profilingInfo().enabled_) {
ClPrint(LOG_DEBUG, LOG_CMD, "Command %p complete (Wall: %ld, CPU: %ld, GPU: %ld us)",
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command %p complete (Wall: %ld, CPU: %ld, GPU: %ld us)",
&command(), ((profilingInfo().end_ - epoch) / 1000),
((profilingInfo().submitted_ - profilingInfo().queued_) / 1000),
((profilingInfo().end_ - profilingInfo().start_) / 1000));
} else {
ClPrint(LOG_DEBUG, LOG_CMD, "Command %p complete", &command());
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command %p complete", &command());
}
release();
}
@@ -177,7 +177,7 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) {
bool Event::resetStatus(int32_t status) {
int32_t currentStatus = this->status();
if (currentStatus != CL_COMPLETE) {
ClPrint(LOG_ERROR, LOG_CMD, "command is reset before complete current status :%d",
ClPrint(LOG_ERROR, LOG_CMD, "Command is reset before complete current status :%d",
currentStatus);
}
if (!status_.compare_exchange_strong(currentStatus, status, std::memory_order_relaxed)) {
@@ -191,7 +191,7 @@ bool Event::resetStatus(int32_t status) {
// ================================================================================================
bool Event::setCallback(int32_t status, Event::CallBackFunction callback, void* data,
bool blocking) {
assert(status >= CL_COMPLETE && status <= CL_QUEUED && "invalid status");
assert(status >= CL_COMPLETE && status <= CL_QUEUED && "Invalid status");
CallBackEntry* entry = new CallBackEntry(status, callback, data, blocking);
if (entry == NULL) {
@@ -240,8 +240,8 @@ bool Event::awaitCompletion() {
return false;
}
ClPrint(LOG_DEBUG, LOG_WAIT, "Waiting for event %p to complete, current status %d", this,
status());
ClPrint(LOG_DETAIL_DEBUG, LOG_WAIT, "Waiting for event %p to complete, current status %d",
this, status());
auto* queue = command().queue();
if ((queue != nullptr) && queue->vdev()->ActiveWait()) {
while (status() > CL_COMPLETE) {
@@ -255,7 +255,7 @@ bool Event::awaitCompletion() {
lock_.wait();
}
}
ClPrint(LOG_DEBUG, LOG_WAIT, "Event %p wait completed", this);
ClPrint(LOG_DETAIL_DEBUG, LOG_WAIT, "Event %p wait completed", this);
}
return status() == CL_COMPLETE;
@@ -353,7 +353,7 @@ void Command::enqueue() {
Agent::postEventCreate(as_cl(static_cast<Event*>(this)), type_);
}
ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p to queue: %p",
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command (%s) enqueued: %p to queue: %p",
amd::activity_prof::getOclCommandKindString(this->type()), this, queue_);
// Direct dispatch logic below will submit the command immediately, but the command status
@@ -75,7 +75,7 @@ bool HostQueue::terminate() {
if (GetSubmissionBatch() != nullptr) {
auto command = new Marker(*this, false);
if (command != nullptr) {
ClPrint(LOG_DEBUG, LOG_CMD, "Marker queued to ensure finish");
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Marker queued to ensure finish");
command->enqueue();
lastCommand->release();
lastCommand = command;
@@ -145,7 +145,7 @@ void HostQueue::finishCommand(Command* command) {
if (command == nullptr) {
command = getLastQueuedCommand(true);
if (command != nullptr) {
ClPrint(LOG_DEBUG, LOG_CMD, "No command, awaiting complete status on host");
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "No command, awaiting complete status on host");
command->awaitCompletion();
command->release();
}
@@ -154,7 +154,7 @@ void HostQueue::finishCommand(Command* command) {
// Check hardware event status for the specific command
static constexpr bool kWaitCompletion = true;
if (!device().IsHwEventReady(command->event(), kWaitCompletion)) {
ClPrint(LOG_DEBUG, LOG_CMD, "No HW event, awaiting complete status on host");
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "No HW event, awaiting complete status on host");
command->awaitCompletion();
}
}
@@ -181,7 +181,7 @@ void HostQueue::finish(bool cpu_wait) {
}
size_t batchSize = GetSubmissionBatchSize();
ClPrint(LOG_DEBUG, LOG_CMD,
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD,
"finish() called with batch size: %zu, cpu_wait: %d, "
"fence dirty: %d",
batchSize, cpu_wait, vdev()->isFenceDirty());
@@ -203,7 +203,7 @@ void HostQueue::finish(bool cpu_wait) {
// Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status
static constexpr bool kWaitCompletion = true;
if (cpu_wait || !device().IsHwEventReady(command->event(), kWaitCompletion, GetSyncPolicy())) {
ClPrint(LOG_DEBUG, LOG_CMD,
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD,
"No HW event or batch size is less than %zu, "
"await command completion",
minBatchSize);
@@ -258,14 +258,14 @@ void HostQueue::loop(device::VirtualDevice* virtualDevice) {
// Process the command's event wait list.
const Command::EventWaitList& events = command->eventWaitList();
bool dependencyFailed = false;
ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) processing: %p ,events.size(): %d",
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command (%s) processing: %p ,events.size(): %d",
amd::activity_prof::getOclCommandKindString(command->type()), command, events.size());
for (const auto& it : events) {
// Only wait if the command is enqueued into another queue.
if (it->command().queue() != this) {
// Runtime has to flush the current batch only if the dependent wait is blocking
if (it->command().status() != CL_COMPLETE) {
ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) %p awaiting event: %p",
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command (%s) %p awaiting event: %p",
amd::activity_prof::getOclCommandKindString(command->type()), command, it);
virtualDevice->flush(head, true);
tail = head = NULL;
@@ -287,7 +287,7 @@ void HostQueue::loop(device::VirtualDevice* virtualDevice) {
continue;
}
ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) submitted: %p",
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command (%s) submitted: %p",
amd::activity_prof::getOclCommandKindString(command->type()), command);
command->setStatus(CL_SUBMITTED);
@@ -80,7 +80,8 @@ bool Runtime::init() {
ClPrint(LOG_ERROR, LOG_INIT, "Runtime initialization failed");
return false;
}
ClPrint(LOG_INFO, LOG_MISC, "ROCclr version: %s", ROCCLR_VERSION_GITHASH);
ClPrint(LOG_INFO, LOG_MISC && !amd::IS_HIP, "ROCclr version: %s", ROCCLR_VERSION_GITHASH);
initialized_ = true;
pid_ = amd::Os::getProcessId();
+2 -1
Wyświetl plik
@@ -41,7 +41,8 @@ enum LogLevel {
LOG_WARNING = 2,
LOG_INFO = 3,
LOG_DEBUG = 4,
LOG_EXTRA_DEBUG = 5
LOG_DETAIL_DEBUG = 5,
LOG_EXTRA_DEBUG = 6
};
enum LogMask {