Add KFD support.

[ROCm/roctracer commit: 315a547cc4]
This commit is contained in:
Rachida Kebichi
2019-09-27 18:32:54 -04:00
parent 7eb12e97de
commit 0c407e3cf4
8 changed files with 1113 additions and 27 deletions
+3 -2
View File
@@ -31,8 +31,9 @@ typedef enum {
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain
ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain
ACTIVITY_DOMAIN_EXT_API = 4, // External ID domain
ACTIVITY_DOMAIN_ROCTX = 5, // ROCTX domain
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
ACTIVITY_DOMAIN_NUMBER
} activity_domain_t;
+498
View File
@@ -0,0 +1,498 @@
/*
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef INC_ROCTRACER_KFD_H_
#define INC_ROCTRACER_KFD_H_
#include <iostream>
#include <mutex>
#include <hsa.h>
#include "roctracer.h"
#include "hsakmt.h"
namespace roctracer {
namespace kfd_support {
template <typename T>
struct output_streamer {
inline static std::ostream& put(std::ostream& out, const T& v) { return out; }
};
template<>
struct output_streamer<bool> {
inline static std::ostream& put(std::ostream& out, bool v) { out << std::hex << "<bool " << "0x" << v << std::dec << ">"; return out; }
};
template<>
struct output_streamer<uint8_t> {
inline static std::ostream& put(std::ostream& out, uint8_t v) { out << std::hex << "<uint8_t " << "0x" << v << std::dec << ">"; return out; }
};
template<>
struct output_streamer<uint16_t> {
inline static std::ostream& put(std::ostream& out, uint16_t v) { out << std::hex << "<uint16_t " << "0x" << v << std::dec << ">"; return out; }
};
template<>
struct output_streamer<uint32_t> {
inline static std::ostream& put(std::ostream& out, uint32_t v) { out << std::hex << "<uint32_t " << "0x" << v << std::dec << ">"; return out; }
};
template<>
struct output_streamer<uint64_t> {
inline static std::ostream& put(std::ostream& out, uint64_t v) { out << std::hex << "<uint64_t " << "0x" << v << std::dec << ">"; return out; }
};
template<>
struct output_streamer<bool*> {
inline static std::ostream& put(std::ostream& out, bool* v) { out << std::hex << "<bool " << "0x" << *v << std::dec << ">"; return out; }
};
template<>
struct output_streamer<uint8_t*> {
inline static std::ostream& put(std::ostream& out, uint8_t* v) { out << std::hex << "<uint8_t " << "0x" << *v << std::dec << ">"; return out; }
};
template<>
struct output_streamer<uint16_t*> {
inline static std::ostream& put(std::ostream& out, uint16_t* v) { out << std::hex << "<uint16_t " << "0x" << *v << std::dec << ">"; return out; }
};
template<>
struct output_streamer<uint32_t*> {
inline static std::ostream& put(std::ostream& out, uint32_t* v) { out << std::hex << "<uint32_t " << "0x" << *v << std::dec << ">"; return out; }
};
template<>
struct output_streamer<uint64_t*> {
inline static std::ostream& put(std::ostream& out, uint64_t* v) { out << std::hex << "<uint64_t " << "0x" << *v << std::dec << ">"; return out; }
};
template<>
struct output_streamer<hsa_queue_t*> {
inline static std::ostream& put(std::ostream& out, hsa_queue_t* v) { out << "<queue " << v << ">"; return out; }
};
template<>
struct output_streamer<hsa_queue_t**> {
inline static std::ostream& put(std::ostream& out, hsa_queue_t** v) { out << "<queue " << *v << ">"; return out; }
};
// begin ostream ops for KFD
template<>
struct output_streamer<HsaVersionInfo&> {
inline static std::ostream& put(std::ostream& out, HsaVersionInfo& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.KernelInterfaceMajorVersion);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.KernelInterfaceMinorVersion);
return out;
}
};
template<>
struct output_streamer<HsaSystemProperties&> {
inline static std::ostream& put(std::ostream& out, HsaSystemProperties& v) {
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumNodes);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.PlatformOem);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.PlatformId);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.PlatformRev);
return out;
}
};
template<>
struct output_streamer<HSA_CAPABILITY&> {
inline static std::ostream& put(std::ostream& out, HSA_CAPABILITY& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Value);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.HotPluggable);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.HSAMMUPresent);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.SharedWithGraphics);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.QueueSizePowerOfTwo);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.QueueSize32bit);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.QueueIdleEvent);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.VALimit);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.WatchPointsSupported);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.WatchPointsTotalBits);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.DoorbellType);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
return out;
}
};
template<>
struct output_streamer<HsaNodeProperties&> {
inline static std::ostream& put(std::ostream& out, HsaNodeProperties& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumCPUCores);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumFComputeCores);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumMemoryBanks);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumCaches);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumIOLinks);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CComputeIdLo);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.FComputeIdLo);
roctracer::kfd_support::output_streamer<HSA_CAPABILITY&>::put(out,v.Capability);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaxWavesPerSIMD);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.LDSSizeInKB);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.GDSSizeInKB);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.WaveFrontSize);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumShaderBanks);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumArrays);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumCUPerArray);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumSIMDPerCU);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaxSlotsScratchCU);
roctracer::kfd_support::output_streamer<HSA_ENGINE_ID>::put(out,v.EngineId);
roctracer::kfd_support::output_streamer<uint16_t>::put(out,v.VendorId);
roctracer::kfd_support::output_streamer<uint16_t>::put(out,v.DeviceId);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.LocationId);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.LocalMemSize);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaxEngineClockMhzFCompute);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaxEngineClockMhzCCompute);
roctracer::kfd_support::output_streamer<uint16_t>::put(out,v.MarketingName[HSA_PUBLIC_NAME_SIZE]);
return out;
}
};
template<>
struct output_streamer<HSA_MEMORYPROPERTY&> {
inline static std::ostream& put(std::ostream& out, HSA_MEMORYPROPERTY& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MemoryProperty);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.HotPluggable);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NonVolatile);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
return out;
}
};
template<>
struct output_streamer<HsaMemoryProperties&> {
inline static std::ostream& put(std::ostream& out, HsaMemoryProperties& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.HeapType);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.SizeInBytes);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.SizeInBytesLow);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.SizeInBytesHigh);
roctracer::kfd_support::output_streamer<HSA_MEMORYPROPERTY>::put(out,v.Flags);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Width);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MemoryClockMax);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.VirtualBaseAddress);
return out;
}
};
template<>
struct output_streamer<HsaCacheType&> {
inline static std::ostream& put(std::ostream& out, HsaCacheType& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Value);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Data);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Instruction);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.CPU);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.HSACU);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
return out;
}
};
template<>
struct output_streamer<HsaCacheProperties&> {
inline static std::ostream& put(std::ostream& out, HsaCacheProperties& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ProcessorIdLow);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheLevel);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheSize);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheLineSize);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheLinesPerTag);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheAssociativity);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheLatency);
roctracer::kfd_support::output_streamer<HsaCacheType>::put(out,v.CacheType);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.SiblingMap[HSA_CPU_SIBLINGS]);
return out;
}
};
template<>
struct output_streamer<HsaCComputeProperties&> {
inline static std::ostream& put(std::ostream& out, HsaCComputeProperties& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.SiblingMap[HSA_CPU_SIBLINGS]);
return out;
}
};
template<>
struct output_streamer<HSA_LINKPROPERTY&> {
inline static std::ostream& put(std::ostream& out, HSA_LINKPROPERTY& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.LinkProperty);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Override);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NonCoherent);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NoAtomics32bit);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NoAtomics64bit);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
return out;
}
};
template<>
struct output_streamer<HsaIoLinkProperties&> {
inline static std::ostream& put(std::ostream& out, HsaIoLinkProperties& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.IoLinkType);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.VersionMajor);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.VersionMinor);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NodeFrom);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NodeTo);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Weight);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MinimumLatency);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaximumLatency);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MinimumBandwidth);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaximumBandwidth);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.RecTransferSize);
roctracer::kfd_support::output_streamer<HSA_LINKPROPERTY&>::put(out,v.Flags);
return out;
}
};
template<>
struct output_streamer<HsaMemFlags&> {
inline static std::ostream& put(std::ostream& out, HsaMemFlags& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NonPaged);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.CachePolicy);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.ReadOnly);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.PageSize);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.HostAccess);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NoSubstitute);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.GDSMemory);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Scratch);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.AtomicAccessFull);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.AtomicAccessPartial);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.ExecuteAccess);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Value);
return out;
}
};
template<>
struct output_streamer<HsaQueueResource&> {
inline static std::ostream& put(std::ostream& out, HsaQueueResource& v)
{
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.QueueId);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,*(v.Queue_DoorBell));
roctracer::kfd_support::output_streamer<uint64_t>::put(out,*(v.Queue_DoorBell_aql));
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.QueueDoorBell);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,*(v.Queue_write_ptr));
roctracer::kfd_support::output_streamer<uint64_t>::put(out,*(v.Queue_write_ptr_aql));
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.QueueWptrValue);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,*(v.Queue_read_ptr));
roctracer::kfd_support::output_streamer<uint64_t>::put(out,*(v.Queue_read_ptr_aql));
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.QueueRptrValue);
return out;
}
};
template<>
struct output_streamer<HsaQueueReport&> {
inline static std::ostream& put(std::ostream& out, HsaQueueReport& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.VMID);
out << "<void *" << v.QueueAddress << ">";
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.QueueSize);
return out;
}
};
template<>
struct output_streamer<HsaDbgWaveMsgAMDGen2&> {
inline static std::ostream& put(std::ostream& out, HsaDbgWaveMsgAMDGen2& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out, v.Value);
roctracer::kfd_support::output_streamer<uint32_t>::put(out, v.Reserved2);
return out;
}
};
template<>
struct output_streamer<HsaDbgWaveMessageAMD&> {
inline static std::ostream& put(std::ostream& out, HsaDbgWaveMessageAMD& v)
{
roctracer::kfd_support::output_streamer<HsaDbgWaveMsgAMDGen2>::put(out,v.WaveMsgInfoGen2);
return out;
}
};
template<>
struct output_streamer<HsaDbgWaveMessage&> {
inline static std::ostream& put(std::ostream& out, HsaDbgWaveMessage& v)
{
out << "<void* " << v.MemoryVA << ">";
roctracer::kfd_support::output_streamer<HsaDbgWaveMessageAMD>::put(out,v.DbgWaveMsg);
return out;
}
};
template<>
struct output_streamer<HsaSyncVar&> {
inline static std::ostream& put(std::ostream& out, HsaSyncVar& v)
{
out << "<void * " << v.SyncVar.UserData << ">";
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.SyncVar.UserDataPtrValue);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.SyncVarSize);
return out;
}
};
template<>
struct output_streamer<HsaNodeChange&> {
inline static std::ostream& put(std::ostream& out, HsaNodeChange& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Flags);
return out;
}
};
template<>
struct output_streamer<HsaDeviceStateChange&> {
inline static std::ostream& put(std::ostream& out, HsaDeviceStateChange& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NodeId);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Device);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Flags);
return out;
}
};
template<>
struct output_streamer<HsaAccessAttributeFailure&> {
inline static std::ostream& put(std::ostream& out, HsaAccessAttributeFailure& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NotPresent);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ReadOnly);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NoExecute);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.GpuAccess);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ECC);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Reserved);
return out;
}
};
template<>
struct output_streamer<HsaMemoryAccessFault&> {
inline static std::ostream& put(std::ostream& out, HsaMemoryAccessFault& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NodeId);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.VirtualAddress);
roctracer::kfd_support::output_streamer<HsaAccessAttributeFailure>::put(out,v. Failure);
roctracer::kfd_support::output_streamer<int>::put(out,v.Flags);
return out;
}
};
template<>
struct output_streamer<HsaEventData&> {
inline static std::ostream& put(std::ostream& out, HsaEventData& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.EventType);
roctracer::kfd_support::output_streamer<HsaSyncVar>::put(out,v.EventData.SyncVar);
roctracer::kfd_support::output_streamer<HsaNodeChange>::put(out,v.EventData.NodeChangeState);
roctracer::kfd_support::output_streamer<HsaDeviceStateChange>::put(out,v.EventData.DeviceState);
roctracer::kfd_support::output_streamer<HsaMemoryAccessFault>::put(out,v.EventData.MemoryAccessFault);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.HWData1);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.HWData2);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.HWData3);
return out;
}
};
template<>
struct output_streamer<HsaEventDescriptor&> {
inline static std::ostream& put(std::ostream& out, HsaEventDescriptor& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.EventType);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NodeId);
roctracer::kfd_support::output_streamer<HsaSyncVar>::put(out,v.SyncVar);
return out;
}
};
template<>
struct output_streamer<HsaEvent&> {
inline static std::ostream& put(std::ostream& out, HsaEvent& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.EventId);
roctracer::kfd_support::output_streamer<HsaEventData>::put(out,v.EventData);
return out;
}
};
template<>
struct output_streamer<HsaClockCounters&> {
inline static std::ostream& put(std::ostream& out, HsaClockCounters& v)
{
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.GPUClockCounter);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.CPUClockCounter);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.SystemClockCounter);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.SystemClockFrequencyHz);
return out;
}
};
template<>
struct output_streamer<HSA_UUID&> {
inline static std::ostream& put(std::ostream& out, HSA_UUID& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Data1);
roctracer::kfd_support::output_streamer<uint16_t>::put(out,v.Data2);
roctracer::kfd_support::output_streamer<uint16_t>::put(out,v.Data3);
roctracer::kfd_support::output_streamer<uint8_t>::put(out,v.Data4[8]);
return out;
}
};
template<>
struct output_streamer<HsaCounterFlags&> {
inline static std::ostream& put(std::ostream& out, HsaCounterFlags& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Global);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Resettable);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.ReadOnly);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Stream);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
roctracer::kfd_support::output_streamer<uint32_t>::put(out, v.Value);
return out;
}
};
template<>
struct output_streamer<HsaCounter&> {
inline static std::ostream& put(std::ostream& out, HsaCounter& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Type);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.CounterId);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CounterSizeInBits);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.CounterMask);
roctracer::kfd_support::output_streamer<HsaCounterFlags>::put(out,v.Flags);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.BlockIndex);
return out;
}
};
template<>
struct output_streamer<HsaCounterBlockProperties&> {
inline static std::ostream& put(std::ostream& out, HsaCounterBlockProperties& v)
{
roctracer::kfd_support::output_streamer<HSA_UUID>::put(out,v.BlockId);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumCounters);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumConcurrent);
roctracer::kfd_support::output_streamer<HsaCounter>::put(out,v.Counters[1]);
return out;
}
};
template<>
struct output_streamer<HsaCounterProperties&> {
inline static std::ostream& put(std::ostream& out, HsaCounterProperties& v)
{
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumBlocks);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumConcurrent);
roctracer::kfd_support::output_streamer<HsaCounterBlockProperties>::put(out,v.Blocks[1]);
return out;
}
};
template<>
struct output_streamer<HsaPmcTraceRoot&> {
inline static std::ostream& put(std::ostream& out, HsaPmcTraceRoot& v)
{
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.TraceBufferMinSizeBytes);
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumberOfPasses);
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.TraceId);
return out;
}
};
// end ostream ops for KFD
};};
#include <inc/kfd_prof_str.h>
#endif // INC_ROCTRACER_KFD_H_
+536 -15
View File
@@ -1,16 +1,533 @@
#!/usr/bin/python
import os, sys, re
OUT_H = 'inc/kfd_prof_str.h'
OUT_C = "src/kfd/kfd_wrapper.cpp"
API_HEADER = "hsakmt.h"
OUT_H = 'inc/kfd_prof_str.h'
OUT_CPP = 'src/kfd/kfd_wrapper.cpp'
API_HEADERS_H = (
('HSAKMTAPI', 'hsakmt.h'),
)
content_h = \
'#ifndef KFD_PROF_STR_H_\n' + \
'#define KFD_PROF_STR_H_\n' + \
'#endif \\\\ KFD_PROF_STR_H_\n'
LICENSE = \
'/*\n' + \
'Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.\n' + \
'\n' + \
'Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \
'of this software and associated documentation files (the "Software"), to deal\n' + \
'in the Software without restriction, including without limitation the rights\n' + \
'to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \
'copies of the Software, and to permit persons to whom the Software is\n' + \
'furnished to do so, subject to the following conditions:\n' + \
'\n' + \
'The above copyright notice and this permission notice shall be included in\n' + \
'all copies or substantial portions of the Software.\n' + \
'\n' + \
'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \
'IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \
'FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \
'AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \
'LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \
'OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \
'THE SOFTWARE.\n' + \
'*/\n'
content_c = 'namespace kfd { void fun() {}; } // namespace kfd\n'
#############################################################
# Error handler
def fatal(module, msg):
print >>sys.stderr, module + ' Error: "' + msg + '"'
sys.exit(1)
# Get next text block
def NextBlock(pos, record):
if len(record) == 0: return pos
space_pattern = re.compile(r'(\s+)')
word_pattern = re.compile(r'([\w\*]+\[*\]*)')
if record[pos] != '(':
m = space_pattern.match(record, pos)
if not m:
m = word_pattern.match(record, pos)
if m:
return pos + len(m.group(1))
else:
fatal('NextBlock', "bad record '" + record + "' pos(" + str(pos) + ")")
else:
count = 0
for index in range(pos, len(record)):
if record[index] == '(':
count = count + 1
elif record[index] == ')':
count = count-1
if count == 0:
index = index + 1
break
if count != 0:
fatal('NextBlock', "count is not zero (" + str(count) + ")")
if record[index-1] != ')':
fatal('NextBlock', "last char is not ')' '" + record[index-1] + "'")
return index
#############################################################
# API table parser class
class API_TableParser:
def fatal(self, msg):
fatal('API_TableParser', msg)
def __init__(self, header, name, full_fct):
self.name = name
self.full_fct = full_fct
if not os.path.isfile(header):
self.fatal("file '" + header + "' not found")
self.inp = open(header, 'r')
self.beg_pattern = re.compile(name)
self.end_pattern = re.compile('.*\)\s*;\s*$');
self.array = []
self.parse()
# normalizing a line
def norm_line(self, line):
return re.sub(r'^\s+', r' ', line)
def fix_comment_line(self, line):
return re.sub(r'\/\/.*', r'', line)
def remove_ret_line(self, line):
return re.sub(r'\n', r'', line)
# check for start record
def is_start(self, record):
return self.beg_pattern.match(record)
# check for end record
def is_end(self, record):
return self.end_pattern.match(record)
# check for declaration entry record
def is_entry(self, record):
return re.match(r'^\s*HSAKMTAPI\s*(.*)\s*\((.*)\)', record)
# parse method
def parse(self):
active = 0
record = "";
cumulate = 0;
self.full_fct = {}
for line in self.inp.readlines():
line = self.norm_line(line)
line = self.fix_comment_line(line)
if cumulate == 1: record += " " + line;
else: record = line;
if self.is_start(line): cumulate = 1; continue;
if self.is_end(line): record = self.remove_ret_line(record); cumulate = 0; active = 1;
else: continue;
if active != 0:
m = self.is_entry(record)
if m:
mycall_full = "void " + m.group(1) + ' (' + m.group(2) + ')'
mycall = m.group(1)
self.full_fct[mycall] = mycall_full
self.array.append(mycall)
#############################################################
# API declaration parser clas
class API_DeclParser:
def fatal(self, msg):
fatal('API_DeclParser', msg)
def __init__(self, header, array, data, full_fct):
if not os.path.isfile(header):
self.fatal("file '" + header + "' not found")
self.inp = open(header, 'r')
self.end_pattern = re.compile('\)\s*;\s*$')
self.data = data
for call in array:
if call in data:
self.fatal(call + ' is already found')
self.parse(call,full_fct)
# check for start record
def is_start(self, call, record):
return re.search('\s*' + call + '\s*\(', record)
# check for API method record
def is_api(self, call, record):
return re.match('\s*' + call + '\s*\(', record)
# check for end record
def is_end(self, record):
return self.end_pattern.search(record)
# parse method args
def get_args(self, record):
struct = {'ret': '', 'args': '', 'astr': {}, 'alst': [], 'tlst': []}
record = re.sub(r'^\s+', r'', record)
record = re.sub(r'\s*(\*+)\s*', r'\1 ', record)
rind = NextBlock(0, record)
struct['ret'] = record[0:rind]
pos = record.find('(')
end = NextBlock(pos, record);
args = record[pos:end]
args = re.sub(r'^\(\s*', r'', args)
args = re.sub(r'\s*\)$', r'', args)
args = re.sub(r'\s*,\s*', r',', args)
struct['args'] = re.sub(r',', r', ', args)
if args == "void":
return struct
if len(args) == 0: return struct
pos = 0
args = args + ','
while pos < len(args):
ind1 = NextBlock(pos, args) # type
ind2 = NextBlock(ind1, args) # space
if args[ind2] != '(':
while ind2 < len(args):
end = NextBlock(ind2, args)
if args[end] == ',': break
else: ind2 = end
name = args[ind2:end]
else:
ind3 = NextBlock(ind2, args) # field
m = re.match(r'\(\s*\*\s*(\S+)\s*\)', args[ind2:ind3])
if not m:
self.fatal("bad block3 '" + args + "' : '" + args[ind2:ind3] + "'")
name = m.group(1)
end = NextBlock(ind3, args) # the rest
item = args[pos:end]
struct['astr'][name] = item
struct['alst'].append(name)
struct['tlst'].append(item)
if args[end] != ',':
self.fatal("no comma '" + args + "'")
pos = end + 1
return struct
# parse given api
def parse(self, call, full_fct):
if call in full_fct:
self.data[call] = self.get_args(full_fct[call])
else:
self.data[call] = self.get_args(call)
#############################################################
# API description parser class
class API_DescrParser:
def fatal(self, msg):
fatal('API_DescrParser', msg)
def __init__(self, out_file, kfd_dir, api_headers, license):
out_macro = re.sub(r'[\/\.]', r'_', out_file.upper()) + '_'
self.content_h = ''
self.content_cpp = ''
self.api_names = []
self.api_calls = {}
self.api_rettypes = set()
self.api_id = {}
api_data = {}
full_fct = {}
api_list = []
ns_calls = []
(name, header) = api_headers[0]
api = API_TableParser(kfd_dir + header, name, full_fct)
full_fct = api.full_fct
api_list = api.array
self.api_names.append(name)
self.api_calls[name] = api_list
for call in api_list:
if call in api_data:
self.fatal("call '" + call + "' is already found")
API_DeclParser(kfd_dir + header, api_list, api_data, full_fct)
for call in api_list:
if not call in api_data:
# Not-supported functions
ns_calls.append(call)
else:
# API ID map
self.api_id[call] = 'KFD_API_ID_' + call
# Return types
self.api_rettypes.add(api_data[call]['ret'])
self.api_rettypes.discard('void')
self.api_data = api_data
self.ns_calls = ns_calls
self.content_h += "// automatically generated\n\n" + license + '\n'
self.content_h += "/////////////////////////////////////////////////////////////////////////////\n"
for call in self.ns_calls:
self.content_h += '// ' + call + ' was not parsed\n'
self.content_h += '\n'
self.content_h += '#ifndef ' + out_macro + '\n'
self.content_h += '#define ' + out_macro + '\n'
self.content_h += '\n'
self.content_h += '#include <dlfcn.h>\n'
self.content_h += '#include <string.h>\n'
self.content_h += '#include \"roctracer_kfd.h\"\n'
self.content_h += '#include \"hsakmt.h\"\n'
self.content_h += '#include \"cb_table.h\"\n'
self.content_h += '#define PUBLIC_API __attribute__((visibility(\"default\")))\n'
self.add_section('API ID enumeration', ' ', self.gen_id_enum)
self.add_section('API arg structure', ' ', self.gen_arg_struct)
self.content_h += '\n'
self.content_h += '#if PROF_API_IMPL\n'
self.content_h += 'namespace roctracer {\n'
self.content_h += 'namespace kfd_support {\n'
self.add_section('API get_name function', ' ', self.gen_get_name)
self.add_section('API get_code function', ' ', self.gen_get_code)
self.add_section('API intercepting code', '', self.gen_intercept_decl)
self.add_section('API intercepting code', '', self.gen_intercept)
self.add_section('API callback functions', '', self.gen_callbacks)
self.content_h += '\n};};\n'
self.content_h += '#endif // PROF_API_IMPL\n'
self.content_cpp += "// automatically generated\n\n" + license + '\n'
self.content_cpp += "/////////////////////////////////////////////////////////////////////////////\n\n"
self.content_cpp += '#define PROF_API_IMPL 1\n'
self.content_cpp += '#include \"kfd_prof_str.h\"\n'
self.add_section('API output stream', ' ', self.gen_out_stream)
self.add_section_cpp('API callback fcts', ' ', self.gen_public_api)
self.content_h += '#endif // ' + out_macro + '_'
self.content_cpp += '}\n'
self.content_cpp += '\n'
# add code section
def add_section_cpp(self, title, gap, fun):
n = 0
self.content_cpp += '\n// section: ' + title + '\n\n'
fun(-1, '-', '-', {})
for index in range(len(self.api_names)):
last = (index == len(self.api_names)-1)
name = self.api_names[index]
if n != 0:
if gap == '': fun(n, name, '-', {})
self.content_cpp += '\n'
self.content_cpp += gap + '// block: ' + name + ' API\n'
for call in self.api_calls[name]:
fun(n, name, call, self.api_data[call])
n += 1
fun(n, '-', '-', {})
def add_section(self, title, gap, fun):
n = 0
self.content_h += '\n// section: ' + title + '\n\n'
fun(-1, '-', '-', {})
for index in range(len(self.api_names)):
last = (index == len(self.api_names)-1)
name = self.api_names[index]
if n != 0:
if gap == '': fun(n, name, '-', {})
self.content_h += '\n'
self.content_h += gap + '// block: ' + name + ' API\n'
for call in self.api_calls[name]:
fun(n, name, call, self.api_data[call])
n += 1
fun(n, '-', '-', {})
# check if it's an array decl
def is_arr(self, record):
return re.match(r'\s*(.*)\s+(.*)\[\]\s*', record)
# generate API ID enumeration
def gen_id_enum(self, n, name, call, data):
if n == -1:
self.content_h += 'enum kfd_api_id_t {\n'
return
if call != '-':
self.content_h += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n'
else:
self.content_h += '\n'
self.content_h += ' KFD_API_ID_NUMBER = ' + str(n) + ',\n'
self.content_h += ' KFD_API_ID_ANY = ' + str(n + 1) + ',\n'
self.content_h += '};\n'
# generate API args structure
def gen_arg_struct(self, n, name, call, struct):
if n == -1:
self.content_h += 'struct kfd_api_data_t {\n'
self.content_h += ' uint64_t correlation_id;\n'
self.content_h += ' uint32_t phase;\n'
self.content_h += ' union {\n'
for ret_type in self.api_rettypes:
self.content_h += ' ' + ret_type + ' ' + ret_type + '_retval;\n'
self.content_h += ' };\n'
self.content_h += ' union {\n'
return
if call != '-':
self.content_h += ' struct {\n'
for (var, item) in struct['astr'].items():
m = self.is_arr(item)
if m:
self.content_h += ' ' + m.group(1) + '* ' + m.group(2) + ';\n'
else:
self.content_h += ' ' + item + ';\n'
self.content_h += ' } ' + call + ';\n'
else:
self.content_h += ' } args;\n'
self.content_h += '};\n'
# generate API callbacks
def gen_callbacks(self, n, name, call, struct):
if n == -1:
self.content_h += 'typedef CbTable<KFD_API_ID_NUMBER> cb_table_t;\n'
self.content_h += 'cb_table_t cb_table;\n'
self.content_h += '\n'
if call != '-':
call_id = self.api_id[call];
ret_type = struct['ret']
self.content_h += ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n' # 'static ' +
if call == 'hsaKmtOpenKFD':
self.content_h += ' if (' + name + '_table == NULL) intercept_KFDApiTable();\n'
self.content_h += ' kfd_api_data_t api_data{};\n'
for var in struct['alst']:
self.content_h += ' api_data.args.' + call + '.' + var.replace("[]","") + ' = ' + var.replace("[]","") + ';\n'
self.content_h += ' activity_rtapi_callback_t api_callback_fun = NULL;\n'
self.content_h += ' void* api_callback_arg = NULL;\n'
self.content_h += ' cb_table.get(' + call_id + ', &api_callback_fun, &api_callback_arg);\n'
self.content_h += ' api_data.phase = 0;\n'
self.content_h += ' if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_KFD_API, ' + call_id + ', &api_data, api_callback_arg);\n'
if ret_type != 'void':
self.content_h += ' ' + ret_type + ' ret = '
tmp_str = ' ' + name + '_table->' + call + '_fn(' + ', '.join(struct['alst']) + ');\n'
self.content_h += tmp_str.replace("[]","")
if ret_type != 'void':
self.content_h += ' api_data.' + ret_type + '_retval = ret;\n'
self.content_h += ' api_data.phase = 1;\n'
self.content_h += ' if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_KFD_API, ' + call_id + ', &api_data, api_callback_arg);\n'
if ret_type != 'void':
self.content_h += ' return ret;\n'
self.content_h += '}\n'
# Generates API intercepting table struct definition
def gen_intercept_decl(self, n, name, call, struct):
if n > 0 and call == '-':
self.content_h += '} HSAKMTAPI_table_t;\n' #was HSAKMTAPI_table_t
if n == 0 or (call == '-' and name != '-'):
self.content_h += 'typedef struct {\n'
if call != '-':
self.content_h += ' decltype(' + call + ')* ' + call + '_fn;\n'
# generate API intercepting code
def gen_intercept(self, n, name, call, struct):
if n > 0 and call == '-':
self.content_h += '};\n'
if n == 0 or (call == '-' and name != '-'):
self.content_h += name + '_table_t* ' + name + '_table = NULL;\n'
self.content_h += 'void intercept_' + 'KFDApiTable' + '(void) {\n'
self.content_h += ' ' + name + '_table = new ' + name + '_table_t{}' + ';\n'
if call != '-':
self.content_h += ' typedef decltype(' + name + '_table_t::' + call + '_fn) ' + call + '_t;\n'
self.content_h += ' ' + name + '_table->' + call + '_fn = (' + call + '_t)' + 'dlsym(RTLD_NEXT,\"' + call + '\");\n'
# generate API name function
def gen_get_name(self, n, name, call, struct):
if n == -1:
self.content_h += 'const char* GetApiName(const uint32_t& id) {\n' #static
self.content_h += ' switch (id) {\n'
return
if call != '-':
self.content_h += ' case ' + self.api_id[call] + ': return "' + call + '";\n'
else:
self.content_h += ' }\n'
self.content_h += ' return "unknown";\n'
self.content_h += '}\n'
# generate API code function
def gen_get_code(self, n, name, call, struct):
if n == -1:
self.content_h += 'uint32_t GetApiCode(const char* str) {\n' # static
return
if call != '-':
self.content_h += ' if (strcmp("' + call + '", str) == 0) return ' + self.api_id[call] + ';\n'
else:
self.content_h += ' return KFD_API_ID_NUMBER;\n'
self.content_h += '}\n'
# generate stream operator
def gen_out_stream(self, n, name, call, struct):
if n == -1:
self.content_h += 'typedef std::pair<uint32_t, kfd_api_data_t> kfd_api_data_pair_t;\n'
self.content_h += 'inline std::ostream& operator<< (std::ostream& out, const kfd_api_data_pair_t& data_pair) {\n'
self.content_h += ' const uint32_t cid = data_pair.first;\n'
self.content_h += ' const kfd_api_data_t& api_data = data_pair.second;\n'
self.content_h += ' switch(cid) {\n'
return
if call != '-':
self.content_h += ' case ' + self.api_id[call] + ': {\n'
self.content_h += ' out << "' + call + '(";\n'
arg_list = struct['alst']
if len(arg_list) != 0:
for ind in range(len(arg_list)):
arg_var = arg_list[ind]
arg_val = 'api_data.args.' + call + '.' + arg_var
if re.search(r'MemFlags',arg_var):
continue
self.content_h += ' typedef decltype(' + arg_val.replace("[]","") + ') arg_val_type_t' + str(ind) + ';\n'
self.content_h += ' roctracer::kfd_support::output_streamer<arg_val_type_t' + str(ind) + '>::put(out, ' + arg_val.replace("[]","") + ')'
if ind < len(arg_list)-1: self.content_h += ' << ", ";\n'
else: self.content_h += ';\n'
if struct['ret'] != 'void':
self.content_h += ' out << ") = " << api_data.' + struct['ret'] + '_retval;\n'
else:
self.content_h += ' out << ") = void";\n'
self.content_h += ' break;\n'
self.content_h += ' }\n'
else:
self.content_h += ' default:\n'
self.content_h += ' out << "ERROR: unknown API";\n'
self.content_h += ' abort();\n'
self.content_h += ' }\n'
self.content_h += ' return out;\n'
self.content_h += '}\n'
self.content_cpp += 'inline std::ostream& operator<< (std::ostream& out, const HsaMemFlags& v) { out << "HsaMemFlags"; return out; }\n'
# generate PUBLIC_API for all API fcts
def gen_public_api(self, n, name, call, struct):
if n == -1:
self.content_cpp += 'extern "C" {\n'
self.content_cpp += 'PUBLIC_API bool RegisterApiCallback(uint32_t op, void* callback, void* user_data) {\n';
self.content_cpp += ' roctracer::kfd_support::cb_table.set(op, reinterpret_cast<activity_rtapi_callback_t>(callback), user_data);\n';
self.content_cpp += ' return true;\n';
self.content_cpp += '}\n';
self.content_cpp += 'PUBLIC_API bool RemoveApiCallback(uint32_t op) {\n'
self.content_cpp += ' roctracer::kfd_support::cb_table.set(op, NULL, NULL);\n';
self.content_cpp += ' return true;\n';
self.content_cpp += '}\n\n';
if call != '-':
self.content_cpp += 'PUBLIC_API HSAKMT_STATUS ' + call + '(' + struct['args'] + ') { roctracer::kfd_support::' + call + '_callback('
for i in range(0,len(struct['alst'])):
if i == (len(struct['alst'])-1):
self.content_cpp += struct['alst'][i].replace("[]","")
else:
self.content_cpp += struct['alst'][i].replace("[]","") + ', '
self.content_cpp += '); return HSAKMT_STATUS_SUCCESS;} \n'
#############################################################
# main
@@ -22,14 +539,18 @@ else:
ROOT = sys.argv[1] + '/'
KFD_DIR = sys.argv[2] + '/'
descr = API_DescrParser(OUT_H, KFD_DIR, API_HEADERS_H, LICENSE)
out_h_file = ROOT + OUT_H
out_c_file = ROOT + OUT_C
print 'Generating: "' + out_h_file + '", ' + out_c_file + '"'
f = open(out_h_file, 'w')
f.write(content_h)
out_file = ROOT + OUT_H
print 'Generating "' + out_file + '"'
f = open(out_file, 'w')
f.write(descr.content_h[:-1])
f.close()
f = open(out_c_file, 'w')
f.write(content_c)
out_file = ROOT + OUT_CPP
print 'Generating "' + out_file + '"'
f = open(out_file, 'w')
f.write(descr.content_cpp[:-1])
f.close()
#############################################################
+2 -2
View File
@@ -10,7 +10,7 @@ set ( LIB_SRC
${LIB_DIR}/util/hsa_rsrc_factory.cpp
)
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} )
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} )
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ )
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/hsaap.py ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH}" )
@@ -19,7 +19,7 @@ set ( KFD_LIB_SRC
${LIB_DIR}/kfd/kfd_wrapper.cpp
)
add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} )
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} )
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} )
target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ )
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" )
+17 -4
View File
@@ -26,6 +26,7 @@ THE SOFTWARE.
#include "inc/roctracer_roctx.h"
#define PROF_API_IMPL 1
#include "inc/roctracer_hsa.h"
#include "inc/roctracer_kfd.h"
#include <atomic>
#include <mutex>
@@ -633,6 +634,10 @@ PUBLIC_API const char* roctracer_op_string(
return roctracer::HipLoader::Instance().ApiName(op);
break;
}
case ACTIVITY_DOMAIN_KFD_API: {
return roctracer::kfd_support::GetApiName(op);
break;
}
default:
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
}
@@ -653,6 +658,11 @@ PUBLIC_API roctracer_status_t roctracer_op_code(
if (kind != NULL) *kind = 0;
break;
}
case ACTIVITY_DOMAIN_KFD_API: {
*op = roctracer::kfd_support::GetApiCode(str);
if (kind != NULL) *kind = 0;
break;
}
default:
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "limited domain ID(" << domain << ")");
}
@@ -665,6 +675,7 @@ static inline uint32_t get_op_num(const uint32_t& domain) {
case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER;
case ACTIVITY_DOMAIN_HCC_OPS: return hc::HSA_OP_ID_NUMBER;
case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER;
case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER;
case ACTIVITY_DOMAIN_EXT_API: return 0;
case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER;
default:
@@ -681,13 +692,11 @@ static void roctracer_enable_callback_impl(
void* user_data)
{
switch (domain) {
#if 0
case ACTIVITY_DOMAIN_KFD_API: {
const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error");
break;
}
#endif
case ACTIVITY_DOMAIN_HSA_OPS: break;
case ACTIVITY_DOMAIN_HSA_API: {
roctracer::hsa_support::cb_table.set(op, callback, user_data);
@@ -751,13 +760,11 @@ static void roctracer_disable_callback_impl(
uint32_t op)
{
switch (domain) {
#if 0
case ACTIVITY_DOMAIN_KFD_API: {
const bool succ = roctracer::KfdLoader::Instance().RemoveApiCallback(op);
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RemoveApiCallback error");
break;
}
#endif
case ACTIVITY_DOMAIN_HSA_OPS: break;
case ACTIVITY_DOMAIN_HSA_API: break;
case ACTIVITY_DOMAIN_HCC_OPS: break;
@@ -856,6 +863,7 @@ static void roctracer_enable_activity_impl(
break;
}
case ACTIVITY_DOMAIN_HSA_API: break;
case ACTIVITY_DOMAIN_KFD_API: break;
case ACTIVITY_DOMAIN_HCC_OPS: {
if (roctracer::HccLoader::GetRef() == NULL) {
roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback,
@@ -920,6 +928,7 @@ static void roctracer_disable_activity_impl(
break;
}
case ACTIVITY_DOMAIN_HSA_API: break;
case ACTIVITY_DOMAIN_KFD_API: break;
case ACTIVITY_DOMAIN_HCC_OPS: {
const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false);
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error domain(" << domain << ") op(" << op << ")");
@@ -1039,6 +1048,10 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
break;
}
case ACTIVITY_DOMAIN_KFD_API: {
roctracer::kfd_support::intercept_KFDApiTable();
break;
}
case ACTIVITY_DOMAIN_HSA_API: {
// HSA API properties
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(properties);
+1 -1
View File
@@ -44,7 +44,7 @@ file( GLOB UTIL_SRC "${HSA_TEST_DIR}/util/*.cpp" )
set ( TEST_LIB "tracer_tool" )
set ( TEST_LIB_SRC ${TEST_DIR}/tool/tracer_tool.cpp ${UTIL_SRC} )
add_library ( ${TEST_LIB} SHARED ${TEST_LIB_SRC} )
target_include_directories ( ${TEST_LIB} PRIVATE ${HSA_TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} )
target_include_directories ( ${TEST_LIB} PRIVATE ${HSA_TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} )
target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt )
## Build HSA test
+1 -1
View File
@@ -59,7 +59,7 @@ eval_test() {
# Standalone test
# rocTrecer is used explicitely by test
eval_test "standalone HIP test" ./test/MatrixTranspose_test
eval_test "standalone HIP test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_test"
# Tool test
# rocTracer/tool is loaded by HSA runtime
+55 -2
View File
@@ -31,6 +31,7 @@ THE SOFTWARE.
#include <inc/roctracer_hsa.h>
#include <inc/roctracer_hip.h>
#include <inc/roctracer_hcc.h>
#include <inc/roctracer_kfd.h>
#include <inc/ext/hsa_rt_utils.hpp>
#include <src/core/loader.h>
#include <src/core/trace_buffer.h>
@@ -58,9 +59,11 @@ typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
hsa_rt_utils::Timer* timer = NULL;
thread_local timestamp_t hsa_begin_timestamp = 0;
thread_local timestamp_t hip_begin_timestamp = 0;
thread_local timestamp_t kfd_begin_timestamp = 0;
bool trace_hsa_api = false;
bool trace_hsa_activity = false;
bool trace_hip = false;
bool trace_kfd = false;
LOADER_INSTANTIATE();
@@ -69,6 +72,7 @@ FILE* hsa_api_file_handle = NULL;
FILE* hsa_async_copy_file_handle = NULL;
FILE* hip_api_file_handle = NULL;
FILE* hcc_activity_file_handle = NULL;
FILE* kfd_api_file_handle = NULL;
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
@@ -79,12 +83,31 @@ void fatal(const std::string msg) {
fflush(hsa_async_copy_file_handle);
fflush(hip_api_file_handle);
fflush(hcc_activity_file_handle);
fflush(kfd_api_file_handle);
fflush(stdout);
fprintf(stderr, "%s\n\n", msg.c_str());
fflush(stderr);
abort();
}
// KFD API callback function
void kfd_api_callback(
uint32_t domain,
uint32_t cid,
const void* callback_data,
void* arg)
{
(void)arg;
const kfd_api_data_t* data = reinterpret_cast<const kfd_api_data_t*>(callback_data);
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
kfd_begin_timestamp = timer->timestamp_fn_ns();
} else {
const timestamp_t end_timestamp = timer->timestamp_fn_ns();
std::ostringstream os;
os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data);
fprintf(kfd_api_file_handle, "%s\n", os.str().c_str());
}
}
// C++ symbol demangle
static inline const char* cxx_demangle(const char* symbol) {
size_t funcnamesize;
@@ -296,10 +319,8 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) {
record->begin_ns, record->end_ns, record->device_id, record->queue_id, name, record->correlation_id);
fflush(hcc_activity_file_handle);
} else {
#if 0
fprintf(hip_api_file_handle, "%lu:%lu %u:%u %s()\n",
record->begin_ns, record->end_ns, record->process_id, record->thread_id, name);
#endif
}
ROCTRACER_CALL(roctracer_next_record(record, &record));
}
@@ -394,6 +415,8 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
}
}
trace_kfd = (trace_domain == NULL) || (strncmp(trace_domain, "kfd", 3) == 0);
// Output file
const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
if (output_prefix != NULL) {
@@ -408,6 +431,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
// API trace vector
std::vector<std::string> hsa_api_vec;
std::vector<std::string> kfd_api_vec;
printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout);
// XML input
@@ -437,6 +461,11 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
trace_hsa_api = true;
hsa_api_vec = api_vec;
}
if (name == "KFD") {
found = true;
trace_kfd = true;
kfd_api_vec = api_vec;
}
if (name == "GPU") {
found = true;
trace_hsa_activity = true;
@@ -473,6 +502,25 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
printf(")\n");
}
if (trace_kfd) {
kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt");
// initialize KFD tracing
roctracer_set_properties(ACTIVITY_DOMAIN_KFD_API, NULL);
printf(" KFD-trace(");
if (kfd_api_vec.size() != 0) {
for (unsigned i = 0; i < kfd_api_vec.size(); ++i) {
uint32_t cid = KFD_API_ID_NUMBER;
const char* api = kfd_api_vec[i].c_str();
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid));
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL));
printf(" %s", api);
}
} else {
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, kfd_api_callback, NULL));
}
printf(")\n");
}
if (trace_hsa_activity) {
hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt");
@@ -542,6 +590,11 @@ void tool_unload(bool destruct) {
close_output_file(hip_api_file_handle);
close_output_file(hcc_activity_file_handle);
}
if (trace_kfd) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API));
fclose(kfd_api_file_handle);
}
if (onload_debug) { printf("TOOL tool_unload end\n"); fflush(stdout); }
}