From 0c407e3cf41840ee4290492d7bb3c1dba45c7a6a Mon Sep 17 00:00:00 2001 From: Rachida Kebichi Date: Fri, 27 Sep 2019 18:32:54 -0400 Subject: [PATCH] Add KFD support. [ROCm/roctracer commit: 315a547cc43ab234e5a6ab854aae2924c809068b] --- projects/roctracer/inc/ext/prof_protocol.h | 5 +- projects/roctracer/inc/roctracer_kfd.h | 498 +++++++++++++++++ projects/roctracer/script/kfdap.py | 551 ++++++++++++++++++- projects/roctracer/src/CMakeLists.txt | 4 +- projects/roctracer/src/core/roctracer.cpp | 21 +- projects/roctracer/test/CMakeLists.txt | 2 +- projects/roctracer/test/run.sh | 2 +- projects/roctracer/test/tool/tracer_tool.cpp | 57 +- 8 files changed, 1113 insertions(+), 27 deletions(-) create mode 100644 projects/roctracer/inc/roctracer_kfd.h diff --git a/projects/roctracer/inc/ext/prof_protocol.h b/projects/roctracer/inc/ext/prof_protocol.h index ff195882ea..d6e08ca0f3 100644 --- a/projects/roctracer/inc/ext/prof_protocol.h +++ b/projects/roctracer/inc/ext/prof_protocol.h @@ -31,8 +31,9 @@ typedef enum { ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain - ACTIVITY_DOMAIN_EXT_API = 4, // External ID domain - ACTIVITY_DOMAIN_ROCTX = 5, // ROCTX domain + ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain + ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain + ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain ACTIVITY_DOMAIN_NUMBER } activity_domain_t; diff --git a/projects/roctracer/inc/roctracer_kfd.h b/projects/roctracer/inc/roctracer_kfd.h new file mode 100644 index 0000000000..34229ce96d --- /dev/null +++ b/projects/roctracer/inc/roctracer_kfd.h @@ -0,0 +1,498 @@ +/* +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef INC_ROCTRACER_KFD_H_ +#define INC_ROCTRACER_KFD_H_ +#include +#include + +#include + +#include "roctracer.h" +#include "hsakmt.h" + +namespace roctracer { +namespace kfd_support { +template +struct output_streamer { + inline static std::ostream& put(std::ostream& out, const T& v) { return out; } +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, bool v) { out << std::hex << ""; return out; } +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, uint8_t v) { out << std::hex << ""; return out; } +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, uint16_t v) { out << std::hex << ""; return out; } +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, uint32_t v) { out << std::hex << ""; return out; } +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, uint64_t v) { out << std::hex << ""; return out; } +}; + +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, bool* v) { out << std::hex << ""; return out; } +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, uint8_t* v) { out << std::hex << ""; return out; } +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, uint16_t* v) { out << std::hex << ""; return out; } +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, uint32_t* v) { out << std::hex << ""; return out; } +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, uint64_t* v) { out << std::hex << ""; return out; } +}; + +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, hsa_queue_t* v) { out << ""; return out; } +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, hsa_queue_t** v) { out << ""; return out; } +}; +// begin ostream ops for KFD +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaVersionInfo& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.KernelInterfaceMajorVersion); + roctracer::kfd_support::output_streamer::put(out,v.KernelInterfaceMinorVersion); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaSystemProperties& v) { + roctracer::kfd_support::output_streamer::put(out,v.NumNodes); + roctracer::kfd_support::output_streamer::put(out,v.PlatformOem); + roctracer::kfd_support::output_streamer::put(out,v.PlatformId); + roctracer::kfd_support::output_streamer::put(out,v.PlatformRev); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HSA_CAPABILITY& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.Value); + roctracer::kfd_support::output_streamer::put(out,v.ui32.HotPluggable); + roctracer::kfd_support::output_streamer::put(out,v.ui32.HSAMMUPresent); + roctracer::kfd_support::output_streamer::put(out,v.ui32.SharedWithGraphics); + roctracer::kfd_support::output_streamer::put(out,v.ui32.QueueSizePowerOfTwo); + roctracer::kfd_support::output_streamer::put(out,v.ui32.QueueSize32bit); + roctracer::kfd_support::output_streamer::put(out,v.ui32.QueueIdleEvent); + roctracer::kfd_support::output_streamer::put(out,v.ui32.VALimit); + roctracer::kfd_support::output_streamer::put(out,v.ui32.WatchPointsSupported); + roctracer::kfd_support::output_streamer::put(out,v.ui32.WatchPointsTotalBits); + roctracer::kfd_support::output_streamer::put(out,v.ui32.DoorbellType); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Reserved); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaNodeProperties& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.NumCPUCores); + roctracer::kfd_support::output_streamer::put(out,v.NumFComputeCores); + roctracer::kfd_support::output_streamer::put(out,v.NumMemoryBanks); + roctracer::kfd_support::output_streamer::put(out,v.NumCaches); + roctracer::kfd_support::output_streamer::put(out,v.NumIOLinks); + roctracer::kfd_support::output_streamer::put(out,v.CComputeIdLo); + roctracer::kfd_support::output_streamer::put(out,v.FComputeIdLo); + roctracer::kfd_support::output_streamer::put(out,v.Capability); + roctracer::kfd_support::output_streamer::put(out,v.MaxWavesPerSIMD); + roctracer::kfd_support::output_streamer::put(out,v.LDSSizeInKB); + roctracer::kfd_support::output_streamer::put(out,v.GDSSizeInKB); + roctracer::kfd_support::output_streamer::put(out,v.WaveFrontSize); + roctracer::kfd_support::output_streamer::put(out,v.NumShaderBanks); + roctracer::kfd_support::output_streamer::put(out,v.NumArrays); + roctracer::kfd_support::output_streamer::put(out,v.NumCUPerArray); + roctracer::kfd_support::output_streamer::put(out,v.NumSIMDPerCU); + roctracer::kfd_support::output_streamer::put(out,v.MaxSlotsScratchCU); + roctracer::kfd_support::output_streamer::put(out,v.EngineId); + roctracer::kfd_support::output_streamer::put(out,v.VendorId); + roctracer::kfd_support::output_streamer::put(out,v.DeviceId); + roctracer::kfd_support::output_streamer::put(out,v.LocationId); + roctracer::kfd_support::output_streamer::put(out,v.LocalMemSize); + roctracer::kfd_support::output_streamer::put(out,v.MaxEngineClockMhzFCompute); + roctracer::kfd_support::output_streamer::put(out,v.MaxEngineClockMhzCCompute); + roctracer::kfd_support::output_streamer::put(out,v.MarketingName[HSA_PUBLIC_NAME_SIZE]); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HSA_MEMORYPROPERTY& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.MemoryProperty); + roctracer::kfd_support::output_streamer::put(out,v.ui32.HotPluggable); + roctracer::kfd_support::output_streamer::put(out,v.ui32.NonVolatile); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Reserved); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaMemoryProperties& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.HeapType); + roctracer::kfd_support::output_streamer::put(out,v.SizeInBytes); + roctracer::kfd_support::output_streamer::put(out,v.ui32.SizeInBytesLow); + roctracer::kfd_support::output_streamer::put(out,v.ui32.SizeInBytesHigh); + roctracer::kfd_support::output_streamer::put(out,v.Flags); + roctracer::kfd_support::output_streamer::put(out,v.Width); + roctracer::kfd_support::output_streamer::put(out,v.MemoryClockMax); + roctracer::kfd_support::output_streamer::put(out,v.VirtualBaseAddress); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaCacheType& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.Value); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Data); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Instruction); + roctracer::kfd_support::output_streamer::put(out,v.ui32.CPU); + roctracer::kfd_support::output_streamer::put(out,v.ui32.HSACU); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Reserved); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaCacheProperties& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.ProcessorIdLow); + roctracer::kfd_support::output_streamer::put(out,v.CacheLevel); + roctracer::kfd_support::output_streamer::put(out,v.CacheSize); + roctracer::kfd_support::output_streamer::put(out,v.CacheLineSize); + roctracer::kfd_support::output_streamer::put(out,v.CacheLinesPerTag); + roctracer::kfd_support::output_streamer::put(out,v.CacheAssociativity); + roctracer::kfd_support::output_streamer::put(out,v.CacheLatency); + roctracer::kfd_support::output_streamer::put(out,v.CacheType); + roctracer::kfd_support::output_streamer::put(out,v.SiblingMap[HSA_CPU_SIBLINGS]); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaCComputeProperties& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.SiblingMap[HSA_CPU_SIBLINGS]); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HSA_LINKPROPERTY& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.LinkProperty); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Override); + roctracer::kfd_support::output_streamer::put(out,v.ui32.NonCoherent); + roctracer::kfd_support::output_streamer::put(out,v.ui32.NoAtomics32bit); + roctracer::kfd_support::output_streamer::put(out,v.ui32.NoAtomics64bit); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Reserved); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaIoLinkProperties& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.IoLinkType); + roctracer::kfd_support::output_streamer::put(out,v.VersionMajor); + roctracer::kfd_support::output_streamer::put(out,v.VersionMinor); + roctracer::kfd_support::output_streamer::put(out,v.NodeFrom); + roctracer::kfd_support::output_streamer::put(out,v.NodeTo); + roctracer::kfd_support::output_streamer::put(out,v.Weight); + roctracer::kfd_support::output_streamer::put(out,v.MinimumLatency); + roctracer::kfd_support::output_streamer::put(out,v.MaximumLatency); + roctracer::kfd_support::output_streamer::put(out,v.MinimumBandwidth); + roctracer::kfd_support::output_streamer::put(out,v.MaximumBandwidth); + roctracer::kfd_support::output_streamer::put(out,v.RecTransferSize); + roctracer::kfd_support::output_streamer::put(out,v.Flags); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaMemFlags& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.ui32.NonPaged); + roctracer::kfd_support::output_streamer::put(out,v.ui32.CachePolicy); + roctracer::kfd_support::output_streamer::put(out,v.ui32.ReadOnly); + roctracer::kfd_support::output_streamer::put(out,v.ui32.PageSize); + roctracer::kfd_support::output_streamer::put(out,v.ui32.HostAccess); + roctracer::kfd_support::output_streamer::put(out,v.ui32.NoSubstitute); + roctracer::kfd_support::output_streamer::put(out,v.ui32.GDSMemory); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Scratch); + roctracer::kfd_support::output_streamer::put(out,v.ui32.AtomicAccessFull); + roctracer::kfd_support::output_streamer::put(out,v.ui32.AtomicAccessPartial); + roctracer::kfd_support::output_streamer::put(out,v.ui32.ExecuteAccess); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Reserved); + roctracer::kfd_support::output_streamer::put(out,v.Value); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaQueueResource& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.QueueId); + roctracer::kfd_support::output_streamer::put(out,*(v.Queue_DoorBell)); + roctracer::kfd_support::output_streamer::put(out,*(v.Queue_DoorBell_aql)); + roctracer::kfd_support::output_streamer::put(out,v.QueueDoorBell); + roctracer::kfd_support::output_streamer::put(out,*(v.Queue_write_ptr)); + roctracer::kfd_support::output_streamer::put(out,*(v.Queue_write_ptr_aql)); + roctracer::kfd_support::output_streamer::put(out,v.QueueWptrValue); + roctracer::kfd_support::output_streamer::put(out,*(v.Queue_read_ptr)); + roctracer::kfd_support::output_streamer::put(out,*(v.Queue_read_ptr_aql)); + roctracer::kfd_support::output_streamer::put(out,v.QueueRptrValue); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaQueueReport& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.VMID); + out << ""; + roctracer::kfd_support::output_streamer::put(out,v.QueueSize); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaDbgWaveMsgAMDGen2& v) +{ + roctracer::kfd_support::output_streamer::put(out, v.Value); + roctracer::kfd_support::output_streamer::put(out, v.Reserved2); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaDbgWaveMessageAMD& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.WaveMsgInfoGen2); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaDbgWaveMessage& v) +{ + out << ""; + roctracer::kfd_support::output_streamer::put(out,v.DbgWaveMsg); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaSyncVar& v) +{ + out << ""; + roctracer::kfd_support::output_streamer::put(out,v.SyncVar.UserDataPtrValue); + roctracer::kfd_support::output_streamer::put(out,v.SyncVarSize); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaNodeChange& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.Flags); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaDeviceStateChange& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.NodeId); + roctracer::kfd_support::output_streamer::put(out,v.Device); + roctracer::kfd_support::output_streamer::put(out,v.Flags); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaAccessAttributeFailure& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.NotPresent); + roctracer::kfd_support::output_streamer::put(out,v.ReadOnly); + roctracer::kfd_support::output_streamer::put(out,v.NoExecute); + roctracer::kfd_support::output_streamer::put(out,v.GpuAccess); + roctracer::kfd_support::output_streamer::put(out,v.ECC); + roctracer::kfd_support::output_streamer::put(out,v.Reserved); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaMemoryAccessFault& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.NodeId); + roctracer::kfd_support::output_streamer::put(out,v.VirtualAddress); + roctracer::kfd_support::output_streamer::put(out,v. Failure); + roctracer::kfd_support::output_streamer::put(out,v.Flags); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaEventData& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.EventType); + roctracer::kfd_support::output_streamer::put(out,v.EventData.SyncVar); + roctracer::kfd_support::output_streamer::put(out,v.EventData.NodeChangeState); + roctracer::kfd_support::output_streamer::put(out,v.EventData.DeviceState); + roctracer::kfd_support::output_streamer::put(out,v.EventData.MemoryAccessFault); + roctracer::kfd_support::output_streamer::put(out,v.HWData1); + roctracer::kfd_support::output_streamer::put(out,v.HWData2); + roctracer::kfd_support::output_streamer::put(out,v.HWData3); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaEventDescriptor& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.EventType); + roctracer::kfd_support::output_streamer::put(out,v.NodeId); + roctracer::kfd_support::output_streamer::put(out,v.SyncVar); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaEvent& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.EventId); + roctracer::kfd_support::output_streamer::put(out,v.EventData); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaClockCounters& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.GPUClockCounter); + roctracer::kfd_support::output_streamer::put(out,v.CPUClockCounter); + roctracer::kfd_support::output_streamer::put(out,v.SystemClockCounter); + roctracer::kfd_support::output_streamer::put(out,v.SystemClockFrequencyHz); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HSA_UUID& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.Data1); + roctracer::kfd_support::output_streamer::put(out,v.Data2); + roctracer::kfd_support::output_streamer::put(out,v.Data3); + roctracer::kfd_support::output_streamer::put(out,v.Data4[8]); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaCounterFlags& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.ui32.Global); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Resettable); + roctracer::kfd_support::output_streamer::put(out,v.ui32.ReadOnly); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Stream); + roctracer::kfd_support::output_streamer::put(out,v.ui32.Reserved); + roctracer::kfd_support::output_streamer::put(out, v.Value); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaCounter& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.Type); + roctracer::kfd_support::output_streamer::put(out,v.CounterId); + roctracer::kfd_support::output_streamer::put(out,v.CounterSizeInBits); + roctracer::kfd_support::output_streamer::put(out,v.CounterMask); + roctracer::kfd_support::output_streamer::put(out,v.Flags); + roctracer::kfd_support::output_streamer::put(out,v.BlockIndex); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaCounterBlockProperties& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.BlockId); + roctracer::kfd_support::output_streamer::put(out,v.NumCounters); + roctracer::kfd_support::output_streamer::put(out,v.NumConcurrent); + roctracer::kfd_support::output_streamer::put(out,v.Counters[1]); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaCounterProperties& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.NumBlocks); + roctracer::kfd_support::output_streamer::put(out,v.NumConcurrent); + roctracer::kfd_support::output_streamer::put(out,v.Blocks[1]); + return out; +} +}; +template<> +struct output_streamer { + inline static std::ostream& put(std::ostream& out, HsaPmcTraceRoot& v) +{ + roctracer::kfd_support::output_streamer::put(out,v.TraceBufferMinSizeBytes); + roctracer::kfd_support::output_streamer::put(out,v.NumberOfPasses); + roctracer::kfd_support::output_streamer::put(out,v.TraceId); + return out; +} +}; +// end ostream ops for KFD +};}; + +#include + +#endif // INC_ROCTRACER_KFD_H_ diff --git a/projects/roctracer/script/kfdap.py b/projects/roctracer/script/kfdap.py index 93f7e18072..4401de548f 100755 --- a/projects/roctracer/script/kfdap.py +++ b/projects/roctracer/script/kfdap.py @@ -1,16 +1,533 @@ #!/usr/bin/python import os, sys, re -OUT_H = 'inc/kfd_prof_str.h' -OUT_C = "src/kfd/kfd_wrapper.cpp" -API_HEADER = "hsakmt.h" +OUT_H = 'inc/kfd_prof_str.h' +OUT_CPP = 'src/kfd/kfd_wrapper.cpp' +API_HEADERS_H = ( + ('HSAKMTAPI', 'hsakmt.h'), +) -content_h = \ - '#ifndef KFD_PROF_STR_H_\n' + \ - '#define KFD_PROF_STR_H_\n' + \ - '#endif \\\\ KFD_PROF_STR_H_\n' +LICENSE = \ +'/*\n' + \ +'Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.\n' + \ +'\n' + \ +'Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \ +'of this software and associated documentation files (the "Software"), to deal\n' + \ +'in the Software without restriction, including without limitation the rights\n' + \ +'to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \ +'copies of the Software, and to permit persons to whom the Software is\n' + \ +'furnished to do so, subject to the following conditions:\n' + \ +'\n' + \ +'The above copyright notice and this permission notice shall be included in\n' + \ +'all copies or substantial portions of the Software.\n' + \ +'\n' + \ +'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \ +'IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \ +'FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \ +'AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \ +'LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \ +'OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \ +'THE SOFTWARE.\n' + \ +'*/\n' -content_c = 'namespace kfd { void fun() {}; } // namespace kfd\n' +############################################################# +# Error handler +def fatal(module, msg): + print >>sys.stderr, module + ' Error: "' + msg + '"' + sys.exit(1) + +# Get next text block +def NextBlock(pos, record): + if len(record) == 0: return pos + + space_pattern = re.compile(r'(\s+)') + word_pattern = re.compile(r'([\w\*]+\[*\]*)') + if record[pos] != '(': + m = space_pattern.match(record, pos) + if not m: + m = word_pattern.match(record, pos) + if m: + return pos + len(m.group(1)) + else: + fatal('NextBlock', "bad record '" + record + "' pos(" + str(pos) + ")") + else: + count = 0 + for index in range(pos, len(record)): + if record[index] == '(': + count = count + 1 + elif record[index] == ')': + count = count-1 + if count == 0: + index = index + 1 + break + if count != 0: + fatal('NextBlock', "count is not zero (" + str(count) + ")") + if record[index-1] != ')': + fatal('NextBlock', "last char is not ')' '" + record[index-1] + "'") + return index + +############################################################# +# API table parser class +class API_TableParser: + def fatal(self, msg): + fatal('API_TableParser', msg) + + def __init__(self, header, name, full_fct): + self.name = name + self.full_fct = full_fct + + if not os.path.isfile(header): + self.fatal("file '" + header + "' not found") + + self.inp = open(header, 'r') + + self.beg_pattern = re.compile(name) + self.end_pattern = re.compile('.*\)\s*;\s*$'); + self.array = [] + self.parse() + + # normalizing a line + def norm_line(self, line): + return re.sub(r'^\s+', r' ', line) + + def fix_comment_line(self, line): + return re.sub(r'\/\/.*', r'', line) + + def remove_ret_line(self, line): + return re.sub(r'\n', r'', line) + + # check for start record + def is_start(self, record): + return self.beg_pattern.match(record) + + # check for end record + def is_end(self, record): + return self.end_pattern.match(record) + + # check for declaration entry record + def is_entry(self, record): + return re.match(r'^\s*HSAKMTAPI\s*(.*)\s*\((.*)\)', record) + + # parse method + def parse(self): + active = 0 + record = ""; + cumulate = 0; + self.full_fct = {} + for line in self.inp.readlines(): + line = self.norm_line(line) + line = self.fix_comment_line(line) + + if cumulate == 1: record += " " + line; + else: record = line; + if self.is_start(line): cumulate = 1; continue; + if self.is_end(line): record = self.remove_ret_line(record); cumulate = 0; active = 1; + else: continue; + if active != 0: + m = self.is_entry(record) + if m: + mycall_full = "void " + m.group(1) + ' (' + m.group(2) + ')' + mycall = m.group(1) + self.full_fct[mycall] = mycall_full + self.array.append(mycall) + +############################################################# +# API declaration parser clas +class API_DeclParser: + def fatal(self, msg): + fatal('API_DeclParser', msg) + + def __init__(self, header, array, data, full_fct): + if not os.path.isfile(header): + self.fatal("file '" + header + "' not found") + + self.inp = open(header, 'r') + + self.end_pattern = re.compile('\)\s*;\s*$') + self.data = data + for call in array: + if call in data: + self.fatal(call + ' is already found') + self.parse(call,full_fct) + + # check for start record + def is_start(self, call, record): + return re.search('\s*' + call + '\s*\(', record) + + # check for API method record + def is_api(self, call, record): + return re.match('\s*' + call + '\s*\(', record) + + + # check for end record + def is_end(self, record): + return self.end_pattern.search(record) + + # parse method args + def get_args(self, record): + struct = {'ret': '', 'args': '', 'astr': {}, 'alst': [], 'tlst': []} + record = re.sub(r'^\s+', r'', record) + record = re.sub(r'\s*(\*+)\s*', r'\1 ', record) + rind = NextBlock(0, record) + struct['ret'] = record[0:rind] + pos = record.find('(') + end = NextBlock(pos, record); + args = record[pos:end] + args = re.sub(r'^\(\s*', r'', args) + args = re.sub(r'\s*\)$', r'', args) + args = re.sub(r'\s*,\s*', r',', args) + struct['args'] = re.sub(r',', r', ', args) + if args == "void": + return struct + + if len(args) == 0: return struct + + pos = 0 + args = args + ',' + while pos < len(args): + ind1 = NextBlock(pos, args) # type + ind2 = NextBlock(ind1, args) # space + if args[ind2] != '(': + while ind2 < len(args): + end = NextBlock(ind2, args) + if args[end] == ',': break + else: ind2 = end + name = args[ind2:end] + else: + ind3 = NextBlock(ind2, args) # field + m = re.match(r'\(\s*\*\s*(\S+)\s*\)', args[ind2:ind3]) + if not m: + self.fatal("bad block3 '" + args + "' : '" + args[ind2:ind3] + "'") + name = m.group(1) + end = NextBlock(ind3, args) # the rest + item = args[pos:end] + struct['astr'][name] = item + struct['alst'].append(name) + struct['tlst'].append(item) + if args[end] != ',': + self.fatal("no comma '" + args + "'") + pos = end + 1 + + return struct + + # parse given api + def parse(self, call, full_fct): + if call in full_fct: + self.data[call] = self.get_args(full_fct[call]) + else: + self.data[call] = self.get_args(call) + +############################################################# +# API description parser class +class API_DescrParser: + def fatal(self, msg): + fatal('API_DescrParser', msg) + + def __init__(self, out_file, kfd_dir, api_headers, license): + out_macro = re.sub(r'[\/\.]', r'_', out_file.upper()) + '_' + + self.content_h = '' + self.content_cpp = '' + + self.api_names = [] + self.api_calls = {} + self.api_rettypes = set() + self.api_id = {} + + api_data = {} + full_fct = {} + api_list = [] + ns_calls = [] + + (name, header) = api_headers[0] + api = API_TableParser(kfd_dir + header, name, full_fct) + full_fct = api.full_fct + api_list = api.array + self.api_names.append(name) + self.api_calls[name] = api_list + + for call in api_list: + if call in api_data: + self.fatal("call '" + call + "' is already found") + + API_DeclParser(kfd_dir + header, api_list, api_data, full_fct) + + for call in api_list: + if not call in api_data: + # Not-supported functions + ns_calls.append(call) + else: + # API ID map + self.api_id[call] = 'KFD_API_ID_' + call + # Return types + self.api_rettypes.add(api_data[call]['ret']) + + self.api_rettypes.discard('void') + self.api_data = api_data + self.ns_calls = ns_calls + + self.content_h += "// automatically generated\n\n" + license + '\n' + + self.content_h += "/////////////////////////////////////////////////////////////////////////////\n" + for call in self.ns_calls: + self.content_h += '// ' + call + ' was not parsed\n' + self.content_h += '\n' + self.content_h += '#ifndef ' + out_macro + '\n' + self.content_h += '#define ' + out_macro + '\n' + + self.content_h += '\n' + + self.content_h += '#include \n' + self.content_h += '#include \n' + self.content_h += '#include \"roctracer_kfd.h\"\n' + self.content_h += '#include \"hsakmt.h\"\n' + self.content_h += '#include \"cb_table.h\"\n' + + self.content_h += '#define PUBLIC_API __attribute__((visibility(\"default\")))\n' + + self.add_section('API ID enumeration', ' ', self.gen_id_enum) + self.add_section('API arg structure', ' ', self.gen_arg_struct) + + self.content_h += '\n' + self.content_h += '#if PROF_API_IMPL\n' + self.content_h += 'namespace roctracer {\n' + self.content_h += 'namespace kfd_support {\n' + + self.add_section('API get_name function', ' ', self.gen_get_name) + self.add_section('API get_code function', ' ', self.gen_get_code) + + self.add_section('API intercepting code', '', self.gen_intercept_decl) + self.add_section('API intercepting code', '', self.gen_intercept) + self.add_section('API callback functions', '', self.gen_callbacks) + + self.content_h += '\n};};\n' + self.content_h += '#endif // PROF_API_IMPL\n' + + self.content_cpp += "// automatically generated\n\n" + license + '\n' + self.content_cpp += "/////////////////////////////////////////////////////////////////////////////\n\n" + self.content_cpp += '#define PROF_API_IMPL 1\n' + self.content_cpp += '#include \"kfd_prof_str.h\"\n' + + self.add_section('API output stream', ' ', self.gen_out_stream) + self.add_section_cpp('API callback fcts', ' ', self.gen_public_api) + self.content_h += '#endif // ' + out_macro + '_' + self.content_cpp += '}\n' + self.content_cpp += '\n' + + # add code section + def add_section_cpp(self, title, gap, fun): + n = 0 + self.content_cpp += '\n// section: ' + title + '\n\n' + fun(-1, '-', '-', {}) + for index in range(len(self.api_names)): + last = (index == len(self.api_names)-1) + name = self.api_names[index] + + if n != 0: + if gap == '': fun(n, name, '-', {}) + self.content_cpp += '\n' + self.content_cpp += gap + '// block: ' + name + ' API\n' + for call in self.api_calls[name]: + fun(n, name, call, self.api_data[call]) + n += 1 + fun(n, '-', '-', {}) + + def add_section(self, title, gap, fun): + n = 0 + self.content_h += '\n// section: ' + title + '\n\n' + fun(-1, '-', '-', {}) + for index in range(len(self.api_names)): + last = (index == len(self.api_names)-1) + name = self.api_names[index] + + if n != 0: + if gap == '': fun(n, name, '-', {}) + self.content_h += '\n' + self.content_h += gap + '// block: ' + name + ' API\n' + for call in self.api_calls[name]: + fun(n, name, call, self.api_data[call]) + n += 1 + fun(n, '-', '-', {}) + + # check if it's an array decl + def is_arr(self, record): + return re.match(r'\s*(.*)\s+(.*)\[\]\s*', record) + + # generate API ID enumeration + def gen_id_enum(self, n, name, call, data): + if n == -1: + self.content_h += 'enum kfd_api_id_t {\n' + return + if call != '-': + self.content_h += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n' + else: + self.content_h += '\n' + self.content_h += ' KFD_API_ID_NUMBER = ' + str(n) + ',\n' + self.content_h += ' KFD_API_ID_ANY = ' + str(n + 1) + ',\n' + self.content_h += '};\n' + + # generate API args structure + def gen_arg_struct(self, n, name, call, struct): + if n == -1: + self.content_h += 'struct kfd_api_data_t {\n' + self.content_h += ' uint64_t correlation_id;\n' + self.content_h += ' uint32_t phase;\n' + self.content_h += ' union {\n' + for ret_type in self.api_rettypes: + self.content_h += ' ' + ret_type + ' ' + ret_type + '_retval;\n' + self.content_h += ' };\n' + self.content_h += ' union {\n' + return + if call != '-': + self.content_h += ' struct {\n' + for (var, item) in struct['astr'].items(): + m = self.is_arr(item) + if m: + self.content_h += ' ' + m.group(1) + '* ' + m.group(2) + ';\n' + else: + self.content_h += ' ' + item + ';\n' + self.content_h += ' } ' + call + ';\n' + else: + self.content_h += ' } args;\n' + self.content_h += '};\n' + + # generate API callbacks + def gen_callbacks(self, n, name, call, struct): + if n == -1: + self.content_h += 'typedef CbTable cb_table_t;\n' + self.content_h += 'cb_table_t cb_table;\n' + self.content_h += '\n' + if call != '-': + call_id = self.api_id[call]; + ret_type = struct['ret'] + self.content_h += ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n' # 'static ' + + if call == 'hsaKmtOpenKFD': + self.content_h += ' if (' + name + '_table == NULL) intercept_KFDApiTable();\n' + self.content_h += ' kfd_api_data_t api_data{};\n' + for var in struct['alst']: + self.content_h += ' api_data.args.' + call + '.' + var.replace("[]","") + ' = ' + var.replace("[]","") + ';\n' + self.content_h += ' activity_rtapi_callback_t api_callback_fun = NULL;\n' + self.content_h += ' void* api_callback_arg = NULL;\n' + self.content_h += ' cb_table.get(' + call_id + ', &api_callback_fun, &api_callback_arg);\n' + self.content_h += ' api_data.phase = 0;\n' + self.content_h += ' if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_KFD_API, ' + call_id + ', &api_data, api_callback_arg);\n' + if ret_type != 'void': + self.content_h += ' ' + ret_type + ' ret = ' + tmp_str = ' ' + name + '_table->' + call + '_fn(' + ', '.join(struct['alst']) + ');\n' + self.content_h += tmp_str.replace("[]","") + if ret_type != 'void': + self.content_h += ' api_data.' + ret_type + '_retval = ret;\n' + self.content_h += ' api_data.phase = 1;\n' + self.content_h += ' if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_KFD_API, ' + call_id + ', &api_data, api_callback_arg);\n' + if ret_type != 'void': + self.content_h += ' return ret;\n' + self.content_h += '}\n' + + # Generates API intercepting table struct definition + def gen_intercept_decl(self, n, name, call, struct): + if n > 0 and call == '-': + self.content_h += '} HSAKMTAPI_table_t;\n' #was HSAKMTAPI_table_t + if n == 0 or (call == '-' and name != '-'): + self.content_h += 'typedef struct {\n' + if call != '-': + self.content_h += ' decltype(' + call + ')* ' + call + '_fn;\n' + + # generate API intercepting code + def gen_intercept(self, n, name, call, struct): + if n > 0 and call == '-': + self.content_h += '};\n' + if n == 0 or (call == '-' and name != '-'): + self.content_h += name + '_table_t* ' + name + '_table = NULL;\n' + self.content_h += 'void intercept_' + 'KFDApiTable' + '(void) {\n' + self.content_h += ' ' + name + '_table = new ' + name + '_table_t{}' + ';\n' + + if call != '-': + self.content_h += ' typedef decltype(' + name + '_table_t::' + call + '_fn) ' + call + '_t;\n' + self.content_h += ' ' + name + '_table->' + call + '_fn = (' + call + '_t)' + 'dlsym(RTLD_NEXT,\"' + call + '\");\n' + + # generate API name function + def gen_get_name(self, n, name, call, struct): + if n == -1: + self.content_h += 'const char* GetApiName(const uint32_t& id) {\n' #static + self.content_h += ' switch (id) {\n' + return + if call != '-': + self.content_h += ' case ' + self.api_id[call] + ': return "' + call + '";\n' + else: + self.content_h += ' }\n' + self.content_h += ' return "unknown";\n' + self.content_h += '}\n' + + # generate API code function + def gen_get_code(self, n, name, call, struct): + if n == -1: + self.content_h += 'uint32_t GetApiCode(const char* str) {\n' # static + return + if call != '-': + self.content_h += ' if (strcmp("' + call + '", str) == 0) return ' + self.api_id[call] + ';\n' + else: + self.content_h += ' return KFD_API_ID_NUMBER;\n' + self.content_h += '}\n' + + # generate stream operator + def gen_out_stream(self, n, name, call, struct): + if n == -1: + self.content_h += 'typedef std::pair kfd_api_data_pair_t;\n' + self.content_h += 'inline std::ostream& operator<< (std::ostream& out, const kfd_api_data_pair_t& data_pair) {\n' + self.content_h += ' const uint32_t cid = data_pair.first;\n' + self.content_h += ' const kfd_api_data_t& api_data = data_pair.second;\n' + self.content_h += ' switch(cid) {\n' + return + if call != '-': + self.content_h += ' case ' + self.api_id[call] + ': {\n' + self.content_h += ' out << "' + call + '(";\n' + arg_list = struct['alst'] + if len(arg_list) != 0: + for ind in range(len(arg_list)): + arg_var = arg_list[ind] + arg_val = 'api_data.args.' + call + '.' + arg_var + if re.search(r'MemFlags',arg_var): + continue + self.content_h += ' typedef decltype(' + arg_val.replace("[]","") + ') arg_val_type_t' + str(ind) + ';\n' + self.content_h += ' roctracer::kfd_support::output_streamer::put(out, ' + arg_val.replace("[]","") + ')' + if ind < len(arg_list)-1: self.content_h += ' << ", ";\n' + else: self.content_h += ';\n' + if struct['ret'] != 'void': + self.content_h += ' out << ") = " << api_data.' + struct['ret'] + '_retval;\n' + else: + self.content_h += ' out << ") = void";\n' + self.content_h += ' break;\n' + self.content_h += ' }\n' + else: + self.content_h += ' default:\n' + self.content_h += ' out << "ERROR: unknown API";\n' + self.content_h += ' abort();\n' + self.content_h += ' }\n' + self.content_h += ' return out;\n' + self.content_h += '}\n' + self.content_cpp += 'inline std::ostream& operator<< (std::ostream& out, const HsaMemFlags& v) { out << "HsaMemFlags"; return out; }\n' + + # generate PUBLIC_API for all API fcts + def gen_public_api(self, n, name, call, struct): + if n == -1: + self.content_cpp += 'extern "C" {\n' + self.content_cpp += 'PUBLIC_API bool RegisterApiCallback(uint32_t op, void* callback, void* user_data) {\n'; + self.content_cpp += ' roctracer::kfd_support::cb_table.set(op, reinterpret_cast(callback), user_data);\n'; + self.content_cpp += ' return true;\n'; + self.content_cpp += '}\n'; + self.content_cpp += 'PUBLIC_API bool RemoveApiCallback(uint32_t op) {\n' + self.content_cpp += ' roctracer::kfd_support::cb_table.set(op, NULL, NULL);\n'; + self.content_cpp += ' return true;\n'; + self.content_cpp += '}\n\n'; + + if call != '-': + self.content_cpp += 'PUBLIC_API HSAKMT_STATUS ' + call + '(' + struct['args'] + ') { roctracer::kfd_support::' + call + '_callback(' + for i in range(0,len(struct['alst'])): + if i == (len(struct['alst'])-1): + self.content_cpp += struct['alst'][i].replace("[]","") + else: + self.content_cpp += struct['alst'][i].replace("[]","") + ', ' + self.content_cpp += '); return HSAKMT_STATUS_SUCCESS;} \n' ############################################################# # main @@ -22,14 +539,18 @@ else: ROOT = sys.argv[1] + '/' KFD_DIR = sys.argv[2] + '/' +descr = API_DescrParser(OUT_H, KFD_DIR, API_HEADERS_H, LICENSE) -out_h_file = ROOT + OUT_H -out_c_file = ROOT + OUT_C -print 'Generating: "' + out_h_file + '", ' + out_c_file + '"' -f = open(out_h_file, 'w') -f.write(content_h) +out_file = ROOT + OUT_H +print 'Generating "' + out_file + '"' +f = open(out_file, 'w') +f.write(descr.content_h[:-1]) f.close() -f = open(out_c_file, 'w') -f.write(content_c) + +out_file = ROOT + OUT_CPP +print 'Generating "' + out_file + '"' +f = open(out_file, 'w') +f.write(descr.content_cpp[:-1]) f.close() + ############################################################# diff --git a/projects/roctracer/src/CMakeLists.txt b/projects/roctracer/src/CMakeLists.txt index 29571891f8..bf0fe6c06a 100644 --- a/projects/roctracer/src/CMakeLists.txt +++ b/projects/roctracer/src/CMakeLists.txt @@ -10,7 +10,7 @@ set ( LIB_SRC ${LIB_DIR}/util/hsa_rsrc_factory.cpp ) add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} ) -target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ) +target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} ) target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ ) execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/hsaap.py ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH}" ) @@ -19,7 +19,7 @@ set ( KFD_LIB_SRC ${LIB_DIR}/kfd/kfd_wrapper.cpp ) add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) -target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ) +target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" ) diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index 5250dd8622..f9855e663e 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -26,6 +26,7 @@ THE SOFTWARE. #include "inc/roctracer_roctx.h" #define PROF_API_IMPL 1 #include "inc/roctracer_hsa.h" +#include "inc/roctracer_kfd.h" #include #include @@ -633,6 +634,10 @@ PUBLIC_API const char* roctracer_op_string( return roctracer::HipLoader::Instance().ApiName(op); break; } + case ACTIVITY_DOMAIN_KFD_API: { + return roctracer::kfd_support::GetApiName(op); + break; + } default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } @@ -653,6 +658,11 @@ PUBLIC_API roctracer_status_t roctracer_op_code( if (kind != NULL) *kind = 0; break; } + case ACTIVITY_DOMAIN_KFD_API: { + *op = roctracer::kfd_support::GetApiCode(str); + if (kind != NULL) *kind = 0; + break; + } default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "limited domain ID(" << domain << ")"); } @@ -665,6 +675,7 @@ static inline uint32_t get_op_num(const uint32_t& domain) { case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; case ACTIVITY_DOMAIN_HCC_OPS: return hc::HSA_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER; + case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER; case ACTIVITY_DOMAIN_EXT_API: return 0; case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER; default: @@ -681,13 +692,11 @@ static void roctracer_enable_callback_impl( void* user_data) { switch (domain) { -#if 0 case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error"); break; } -#endif case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: { roctracer::hsa_support::cb_table.set(op, callback, user_data); @@ -751,13 +760,11 @@ static void roctracer_disable_callback_impl( uint32_t op) { switch (domain) { -#if 0 case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RemoveApiCallback(op); if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RemoveApiCallback error"); break; } -#endif case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_HCC_OPS: break; @@ -856,6 +863,7 @@ static void roctracer_enable_activity_impl( break; } case ACTIVITY_DOMAIN_HSA_API: break; + case ACTIVITY_DOMAIN_KFD_API: break; case ACTIVITY_DOMAIN_HCC_OPS: { if (roctracer::HccLoader::GetRef() == NULL) { roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback, @@ -920,6 +928,7 @@ static void roctracer_disable_activity_impl( break; } case ACTIVITY_DOMAIN_HSA_API: break; + case ACTIVITY_DOMAIN_KFD_API: break; case ACTIVITY_DOMAIN_HCC_OPS: { const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false); if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error domain(" << domain << ") op(" << op << ")"); @@ -1039,6 +1048,10 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( break; } + case ACTIVITY_DOMAIN_KFD_API: { + roctracer::kfd_support::intercept_KFDApiTable(); + break; + } case ACTIVITY_DOMAIN_HSA_API: { // HSA API properties HsaApiTable* table = reinterpret_cast(properties); diff --git a/projects/roctracer/test/CMakeLists.txt b/projects/roctracer/test/CMakeLists.txt index 1092b48c52..d794c44c39 100644 --- a/projects/roctracer/test/CMakeLists.txt +++ b/projects/roctracer/test/CMakeLists.txt @@ -44,7 +44,7 @@ file( GLOB UTIL_SRC "${HSA_TEST_DIR}/util/*.cpp" ) set ( TEST_LIB "tracer_tool" ) set ( TEST_LIB_SRC ${TEST_DIR}/tool/tracer_tool.cpp ${UTIL_SRC} ) add_library ( ${TEST_LIB} SHARED ${TEST_LIB_SRC} ) -target_include_directories ( ${TEST_LIB} PRIVATE ${HSA_TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ) +target_include_directories ( ${TEST_LIB} PRIVATE ${HSA_TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} ) target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt ) ## Build HSA test diff --git a/projects/roctracer/test/run.sh b/projects/roctracer/test/run.sh index 807a0202bc..6840aacc63 100755 --- a/projects/roctracer/test/run.sh +++ b/projects/roctracer/test/run.sh @@ -59,7 +59,7 @@ eval_test() { # Standalone test # rocTrecer is used explicitely by test -eval_test "standalone HIP test" ./test/MatrixTranspose_test +eval_test "standalone HIP test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_test" # Tool test # rocTracer/tool is loaded by HSA runtime diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index 095397aa77..8aef4dea29 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -31,6 +31,7 @@ THE SOFTWARE. #include #include #include +#include #include #include #include @@ -58,9 +59,11 @@ typedef hsa_rt_utils::Timer::timestamp_t timestamp_t; hsa_rt_utils::Timer* timer = NULL; thread_local timestamp_t hsa_begin_timestamp = 0; thread_local timestamp_t hip_begin_timestamp = 0; +thread_local timestamp_t kfd_begin_timestamp = 0; bool trace_hsa_api = false; bool trace_hsa_activity = false; bool trace_hip = false; +bool trace_kfd = false; LOADER_INSTANTIATE(); @@ -69,6 +72,7 @@ FILE* hsa_api_file_handle = NULL; FILE* hsa_async_copy_file_handle = NULL; FILE* hip_api_file_handle = NULL; FILE* hcc_activity_file_handle = NULL; +FILE* kfd_api_file_handle = NULL; static inline uint32_t GetPid() { return syscall(__NR_getpid); } static inline uint32_t GetTid() { return syscall(__NR_gettid); } @@ -79,12 +83,31 @@ void fatal(const std::string msg) { fflush(hsa_async_copy_file_handle); fflush(hip_api_file_handle); fflush(hcc_activity_file_handle); + fflush(kfd_api_file_handle); fflush(stdout); fprintf(stderr, "%s\n\n", msg.c_str()); fflush(stderr); abort(); } +// KFD API callback function +void kfd_api_callback( + uint32_t domain, + uint32_t cid, + const void* callback_data, + void* arg) +{ + (void)arg; + const kfd_api_data_t* data = reinterpret_cast(callback_data); + if (data->phase == ACTIVITY_API_PHASE_ENTER) { + kfd_begin_timestamp = timer->timestamp_fn_ns(); + } else { + const timestamp_t end_timestamp = timer->timestamp_fn_ns(); + std::ostringstream os; + os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data); + fprintf(kfd_api_file_handle, "%s\n", os.str().c_str()); + } +} // C++ symbol demangle static inline const char* cxx_demangle(const char* symbol) { size_t funcnamesize; @@ -296,10 +319,8 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) { record->begin_ns, record->end_ns, record->device_id, record->queue_id, name, record->correlation_id); fflush(hcc_activity_file_handle); } else { -#if 0 fprintf(hip_api_file_handle, "%lu:%lu %u:%u %s()\n", record->begin_ns, record->end_ns, record->process_id, record->thread_id, name); -#endif } ROCTRACER_CALL(roctracer_next_record(record, &record)); } @@ -394,6 +415,8 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } } + trace_kfd = (trace_domain == NULL) || (strncmp(trace_domain, "kfd", 3) == 0); + // Output file const char* output_prefix = getenv("ROCP_OUTPUT_DIR"); if (output_prefix != NULL) { @@ -408,6 +431,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, // API trace vector std::vector hsa_api_vec; + std::vector kfd_api_vec; printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout); // XML input @@ -437,6 +461,11 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, trace_hsa_api = true; hsa_api_vec = api_vec; } + if (name == "KFD") { + found = true; + trace_kfd = true; + kfd_api_vec = api_vec; + } if (name == "GPU") { found = true; trace_hsa_activity = true; @@ -473,6 +502,25 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, printf(")\n"); } + if (trace_kfd) { + kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt"); + // initialize KFD tracing + roctracer_set_properties(ACTIVITY_DOMAIN_KFD_API, NULL); + + printf(" KFD-trace("); + if (kfd_api_vec.size() != 0) { + for (unsigned i = 0; i < kfd_api_vec.size(); ++i) { + uint32_t cid = KFD_API_ID_NUMBER; + const char* api = kfd_api_vec[i].c_str(); + ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid)); + ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL)); + printf(" %s", api); + } + } else { + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, kfd_api_callback, NULL)); + } + printf(")\n"); + } if (trace_hsa_activity) { hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt"); @@ -542,6 +590,11 @@ void tool_unload(bool destruct) { close_output_file(hip_api_file_handle); close_output_file(hcc_activity_file_handle); } + + if (trace_kfd) { + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API)); + fclose(kfd_api_file_handle); + } if (onload_debug) { printf("TOOL tool_unload end\n"); fflush(stdout); } }