@@ -31,8 +31,9 @@ typedef enum {
|
||||
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
|
||||
ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain
|
||||
ACTIVITY_DOMAIN_EXT_API = 4, // External ID domain
|
||||
ACTIVITY_DOMAIN_ROCTX = 5, // ROCTX domain
|
||||
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
|
||||
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
|
||||
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
|
||||
ACTIVITY_DOMAIN_NUMBER
|
||||
} activity_domain_t;
|
||||
|
||||
|
||||
@@ -0,0 +1,498 @@
|
||||
/*
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef INC_ROCTRACER_KFD_H_
|
||||
#define INC_ROCTRACER_KFD_H_
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
|
||||
#include <hsa.h>
|
||||
|
||||
#include "roctracer.h"
|
||||
#include "hsakmt.h"
|
||||
|
||||
namespace roctracer {
|
||||
namespace kfd_support {
|
||||
template <typename T>
|
||||
struct output_streamer {
|
||||
inline static std::ostream& put(std::ostream& out, const T& v) { return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<bool> {
|
||||
inline static std::ostream& put(std::ostream& out, bool v) { out << std::hex << "<bool " << "0x" << v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint8_t> {
|
||||
inline static std::ostream& put(std::ostream& out, uint8_t v) { out << std::hex << "<uint8_t " << "0x" << v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint16_t> {
|
||||
inline static std::ostream& put(std::ostream& out, uint16_t v) { out << std::hex << "<uint16_t " << "0x" << v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint32_t> {
|
||||
inline static std::ostream& put(std::ostream& out, uint32_t v) { out << std::hex << "<uint32_t " << "0x" << v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint64_t> {
|
||||
inline static std::ostream& put(std::ostream& out, uint64_t v) { out << std::hex << "<uint64_t " << "0x" << v << std::dec << ">"; return out; }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct output_streamer<bool*> {
|
||||
inline static std::ostream& put(std::ostream& out, bool* v) { out << std::hex << "<bool " << "0x" << *v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint8_t*> {
|
||||
inline static std::ostream& put(std::ostream& out, uint8_t* v) { out << std::hex << "<uint8_t " << "0x" << *v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint16_t*> {
|
||||
inline static std::ostream& put(std::ostream& out, uint16_t* v) { out << std::hex << "<uint16_t " << "0x" << *v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint32_t*> {
|
||||
inline static std::ostream& put(std::ostream& out, uint32_t* v) { out << std::hex << "<uint32_t " << "0x" << *v << std::dec << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<uint64_t*> {
|
||||
inline static std::ostream& put(std::ostream& out, uint64_t* v) { out << std::hex << "<uint64_t " << "0x" << *v << std::dec << ">"; return out; }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct output_streamer<hsa_queue_t*> {
|
||||
inline static std::ostream& put(std::ostream& out, hsa_queue_t* v) { out << "<queue " << v << ">"; return out; }
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<hsa_queue_t**> {
|
||||
inline static std::ostream& put(std::ostream& out, hsa_queue_t** v) { out << "<queue " << *v << ">"; return out; }
|
||||
};
|
||||
// begin ostream ops for KFD
|
||||
template<>
|
||||
struct output_streamer<HsaVersionInfo&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaVersionInfo& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.KernelInterfaceMajorVersion);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.KernelInterfaceMinorVersion);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaSystemProperties&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaSystemProperties& v) {
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumNodes);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.PlatformOem);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.PlatformId);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.PlatformRev);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HSA_CAPABILITY&> {
|
||||
inline static std::ostream& put(std::ostream& out, HSA_CAPABILITY& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Value);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.HotPluggable);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.HSAMMUPresent);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.SharedWithGraphics);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.QueueSizePowerOfTwo);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.QueueSize32bit);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.QueueIdleEvent);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.VALimit);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.WatchPointsSupported);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.WatchPointsTotalBits);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.DoorbellType);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaNodeProperties&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaNodeProperties& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumCPUCores);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumFComputeCores);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumMemoryBanks);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumCaches);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumIOLinks);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CComputeIdLo);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.FComputeIdLo);
|
||||
roctracer::kfd_support::output_streamer<HSA_CAPABILITY&>::put(out,v.Capability);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaxWavesPerSIMD);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.LDSSizeInKB);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.GDSSizeInKB);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.WaveFrontSize);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumShaderBanks);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumArrays);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumCUPerArray);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumSIMDPerCU);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaxSlotsScratchCU);
|
||||
roctracer::kfd_support::output_streamer<HSA_ENGINE_ID>::put(out,v.EngineId);
|
||||
roctracer::kfd_support::output_streamer<uint16_t>::put(out,v.VendorId);
|
||||
roctracer::kfd_support::output_streamer<uint16_t>::put(out,v.DeviceId);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.LocationId);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.LocalMemSize);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaxEngineClockMhzFCompute);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaxEngineClockMhzCCompute);
|
||||
roctracer::kfd_support::output_streamer<uint16_t>::put(out,v.MarketingName[HSA_PUBLIC_NAME_SIZE]);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HSA_MEMORYPROPERTY&> {
|
||||
inline static std::ostream& put(std::ostream& out, HSA_MEMORYPROPERTY& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MemoryProperty);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.HotPluggable);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NonVolatile);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaMemoryProperties&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaMemoryProperties& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.HeapType);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.SizeInBytes);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.SizeInBytesLow);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.SizeInBytesHigh);
|
||||
roctracer::kfd_support::output_streamer<HSA_MEMORYPROPERTY>::put(out,v.Flags);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Width);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MemoryClockMax);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.VirtualBaseAddress);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaCacheType&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaCacheType& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Value);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Data);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Instruction);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.CPU);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.HSACU);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaCacheProperties&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaCacheProperties& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ProcessorIdLow);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheLevel);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheSize);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheLineSize);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheLinesPerTag);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheAssociativity);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CacheLatency);
|
||||
roctracer::kfd_support::output_streamer<HsaCacheType>::put(out,v.CacheType);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.SiblingMap[HSA_CPU_SIBLINGS]);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaCComputeProperties&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaCComputeProperties& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.SiblingMap[HSA_CPU_SIBLINGS]);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HSA_LINKPROPERTY&> {
|
||||
inline static std::ostream& put(std::ostream& out, HSA_LINKPROPERTY& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.LinkProperty);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Override);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NonCoherent);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NoAtomics32bit);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NoAtomics64bit);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaIoLinkProperties&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaIoLinkProperties& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.IoLinkType);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.VersionMajor);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.VersionMinor);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NodeFrom);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NodeTo);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Weight);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MinimumLatency);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaximumLatency);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MinimumBandwidth);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.MaximumBandwidth);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.RecTransferSize);
|
||||
roctracer::kfd_support::output_streamer<HSA_LINKPROPERTY&>::put(out,v.Flags);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaMemFlags&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaMemFlags& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NonPaged);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.CachePolicy);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.ReadOnly);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.PageSize);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.HostAccess);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.NoSubstitute);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.GDSMemory);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Scratch);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.AtomicAccessFull);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.AtomicAccessPartial);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.ExecuteAccess);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Value);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaQueueResource&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaQueueResource& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.QueueId);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,*(v.Queue_DoorBell));
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,*(v.Queue_DoorBell_aql));
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.QueueDoorBell);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,*(v.Queue_write_ptr));
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,*(v.Queue_write_ptr_aql));
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.QueueWptrValue);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,*(v.Queue_read_ptr));
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,*(v.Queue_read_ptr_aql));
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.QueueRptrValue);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaQueueReport&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaQueueReport& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.VMID);
|
||||
out << "<void *" << v.QueueAddress << ">";
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.QueueSize);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaDbgWaveMsgAMDGen2&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaDbgWaveMsgAMDGen2& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out, v.Value);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out, v.Reserved2);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaDbgWaveMessageAMD&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaDbgWaveMessageAMD& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<HsaDbgWaveMsgAMDGen2>::put(out,v.WaveMsgInfoGen2);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaDbgWaveMessage&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaDbgWaveMessage& v)
|
||||
{
|
||||
out << "<void* " << v.MemoryVA << ">";
|
||||
roctracer::kfd_support::output_streamer<HsaDbgWaveMessageAMD>::put(out,v.DbgWaveMsg);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaSyncVar&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaSyncVar& v)
|
||||
{
|
||||
out << "<void * " << v.SyncVar.UserData << ">";
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.SyncVar.UserDataPtrValue);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.SyncVarSize);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaNodeChange&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaNodeChange& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Flags);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaDeviceStateChange&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaDeviceStateChange& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NodeId);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Device);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Flags);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaAccessAttributeFailure&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaAccessAttributeFailure& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NotPresent);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ReadOnly);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NoExecute);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.GpuAccess);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ECC);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Reserved);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaMemoryAccessFault&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaMemoryAccessFault& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NodeId);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.VirtualAddress);
|
||||
roctracer::kfd_support::output_streamer<HsaAccessAttributeFailure>::put(out,v. Failure);
|
||||
roctracer::kfd_support::output_streamer<int>::put(out,v.Flags);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaEventData&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaEventData& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.EventType);
|
||||
roctracer::kfd_support::output_streamer<HsaSyncVar>::put(out,v.EventData.SyncVar);
|
||||
roctracer::kfd_support::output_streamer<HsaNodeChange>::put(out,v.EventData.NodeChangeState);
|
||||
roctracer::kfd_support::output_streamer<HsaDeviceStateChange>::put(out,v.EventData.DeviceState);
|
||||
roctracer::kfd_support::output_streamer<HsaMemoryAccessFault>::put(out,v.EventData.MemoryAccessFault);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.HWData1);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.HWData2);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.HWData3);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaEventDescriptor&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaEventDescriptor& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.EventType);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NodeId);
|
||||
roctracer::kfd_support::output_streamer<HsaSyncVar>::put(out,v.SyncVar);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaEvent&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaEvent& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.EventId);
|
||||
roctracer::kfd_support::output_streamer<HsaEventData>::put(out,v.EventData);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaClockCounters&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaClockCounters& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.GPUClockCounter);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.CPUClockCounter);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.SystemClockCounter);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.SystemClockFrequencyHz);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HSA_UUID&> {
|
||||
inline static std::ostream& put(std::ostream& out, HSA_UUID& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Data1);
|
||||
roctracer::kfd_support::output_streamer<uint16_t>::put(out,v.Data2);
|
||||
roctracer::kfd_support::output_streamer<uint16_t>::put(out,v.Data3);
|
||||
roctracer::kfd_support::output_streamer<uint8_t>::put(out,v.Data4[8]);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaCounterFlags&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaCounterFlags& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Global);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Resettable);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.ReadOnly);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Stream);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.ui32.Reserved);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out, v.Value);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaCounter&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaCounter& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.Type);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.CounterId);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.CounterSizeInBits);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.CounterMask);
|
||||
roctracer::kfd_support::output_streamer<HsaCounterFlags>::put(out,v.Flags);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.BlockIndex);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaCounterBlockProperties&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaCounterBlockProperties& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<HSA_UUID>::put(out,v.BlockId);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumCounters);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumConcurrent);
|
||||
roctracer::kfd_support::output_streamer<HsaCounter>::put(out,v.Counters[1]);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaCounterProperties&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaCounterProperties& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumBlocks);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumConcurrent);
|
||||
roctracer::kfd_support::output_streamer<HsaCounterBlockProperties>::put(out,v.Blocks[1]);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct output_streamer<HsaPmcTraceRoot&> {
|
||||
inline static std::ostream& put(std::ostream& out, HsaPmcTraceRoot& v)
|
||||
{
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.TraceBufferMinSizeBytes);
|
||||
roctracer::kfd_support::output_streamer<uint32_t>::put(out,v.NumberOfPasses);
|
||||
roctracer::kfd_support::output_streamer<uint64_t>::put(out,v.TraceId);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
// end ostream ops for KFD
|
||||
};};
|
||||
|
||||
#include <inc/kfd_prof_str.h>
|
||||
|
||||
#endif // INC_ROCTRACER_KFD_H_
|
||||
@@ -1,16 +1,533 @@
|
||||
#!/usr/bin/python
|
||||
import os, sys, re
|
||||
|
||||
OUT_H = 'inc/kfd_prof_str.h'
|
||||
OUT_C = "src/kfd/kfd_wrapper.cpp"
|
||||
API_HEADER = "hsakmt.h"
|
||||
OUT_H = 'inc/kfd_prof_str.h'
|
||||
OUT_CPP = 'src/kfd/kfd_wrapper.cpp'
|
||||
API_HEADERS_H = (
|
||||
('HSAKMTAPI', 'hsakmt.h'),
|
||||
)
|
||||
|
||||
content_h = \
|
||||
'#ifndef KFD_PROF_STR_H_\n' + \
|
||||
'#define KFD_PROF_STR_H_\n' + \
|
||||
'#endif \\\\ KFD_PROF_STR_H_\n'
|
||||
LICENSE = \
|
||||
'/*\n' + \
|
||||
'Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.\n' + \
|
||||
'\n' + \
|
||||
'Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \
|
||||
'of this software and associated documentation files (the "Software"), to deal\n' + \
|
||||
'in the Software without restriction, including without limitation the rights\n' + \
|
||||
'to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \
|
||||
'copies of the Software, and to permit persons to whom the Software is\n' + \
|
||||
'furnished to do so, subject to the following conditions:\n' + \
|
||||
'\n' + \
|
||||
'The above copyright notice and this permission notice shall be included in\n' + \
|
||||
'all copies or substantial portions of the Software.\n' + \
|
||||
'\n' + \
|
||||
'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \
|
||||
'IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \
|
||||
'FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \
|
||||
'AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \
|
||||
'LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \
|
||||
'OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \
|
||||
'THE SOFTWARE.\n' + \
|
||||
'*/\n'
|
||||
|
||||
content_c = 'namespace kfd { void fun() {}; } // namespace kfd\n'
|
||||
#############################################################
|
||||
# Error handler
|
||||
def fatal(module, msg):
|
||||
print >>sys.stderr, module + ' Error: "' + msg + '"'
|
||||
sys.exit(1)
|
||||
|
||||
# Get next text block
|
||||
def NextBlock(pos, record):
|
||||
if len(record) == 0: return pos
|
||||
|
||||
space_pattern = re.compile(r'(\s+)')
|
||||
word_pattern = re.compile(r'([\w\*]+\[*\]*)')
|
||||
if record[pos] != '(':
|
||||
m = space_pattern.match(record, pos)
|
||||
if not m:
|
||||
m = word_pattern.match(record, pos)
|
||||
if m:
|
||||
return pos + len(m.group(1))
|
||||
else:
|
||||
fatal('NextBlock', "bad record '" + record + "' pos(" + str(pos) + ")")
|
||||
else:
|
||||
count = 0
|
||||
for index in range(pos, len(record)):
|
||||
if record[index] == '(':
|
||||
count = count + 1
|
||||
elif record[index] == ')':
|
||||
count = count-1
|
||||
if count == 0:
|
||||
index = index + 1
|
||||
break
|
||||
if count != 0:
|
||||
fatal('NextBlock', "count is not zero (" + str(count) + ")")
|
||||
if record[index-1] != ')':
|
||||
fatal('NextBlock', "last char is not ')' '" + record[index-1] + "'")
|
||||
return index
|
||||
|
||||
#############################################################
|
||||
# API table parser class
|
||||
class API_TableParser:
|
||||
def fatal(self, msg):
|
||||
fatal('API_TableParser', msg)
|
||||
|
||||
def __init__(self, header, name, full_fct):
|
||||
self.name = name
|
||||
self.full_fct = full_fct
|
||||
|
||||
if not os.path.isfile(header):
|
||||
self.fatal("file '" + header + "' not found")
|
||||
|
||||
self.inp = open(header, 'r')
|
||||
|
||||
self.beg_pattern = re.compile(name)
|
||||
self.end_pattern = re.compile('.*\)\s*;\s*$');
|
||||
self.array = []
|
||||
self.parse()
|
||||
|
||||
# normalizing a line
|
||||
def norm_line(self, line):
|
||||
return re.sub(r'^\s+', r' ', line)
|
||||
|
||||
def fix_comment_line(self, line):
|
||||
return re.sub(r'\/\/.*', r'', line)
|
||||
|
||||
def remove_ret_line(self, line):
|
||||
return re.sub(r'\n', r'', line)
|
||||
|
||||
# check for start record
|
||||
def is_start(self, record):
|
||||
return self.beg_pattern.match(record)
|
||||
|
||||
# check for end record
|
||||
def is_end(self, record):
|
||||
return self.end_pattern.match(record)
|
||||
|
||||
# check for declaration entry record
|
||||
def is_entry(self, record):
|
||||
return re.match(r'^\s*HSAKMTAPI\s*(.*)\s*\((.*)\)', record)
|
||||
|
||||
# parse method
|
||||
def parse(self):
|
||||
active = 0
|
||||
record = "";
|
||||
cumulate = 0;
|
||||
self.full_fct = {}
|
||||
for line in self.inp.readlines():
|
||||
line = self.norm_line(line)
|
||||
line = self.fix_comment_line(line)
|
||||
|
||||
if cumulate == 1: record += " " + line;
|
||||
else: record = line;
|
||||
if self.is_start(line): cumulate = 1; continue;
|
||||
if self.is_end(line): record = self.remove_ret_line(record); cumulate = 0; active = 1;
|
||||
else: continue;
|
||||
if active != 0:
|
||||
m = self.is_entry(record)
|
||||
if m:
|
||||
mycall_full = "void " + m.group(1) + ' (' + m.group(2) + ')'
|
||||
mycall = m.group(1)
|
||||
self.full_fct[mycall] = mycall_full
|
||||
self.array.append(mycall)
|
||||
|
||||
#############################################################
|
||||
# API declaration parser clas
|
||||
class API_DeclParser:
|
||||
def fatal(self, msg):
|
||||
fatal('API_DeclParser', msg)
|
||||
|
||||
def __init__(self, header, array, data, full_fct):
|
||||
if not os.path.isfile(header):
|
||||
self.fatal("file '" + header + "' not found")
|
||||
|
||||
self.inp = open(header, 'r')
|
||||
|
||||
self.end_pattern = re.compile('\)\s*;\s*$')
|
||||
self.data = data
|
||||
for call in array:
|
||||
if call in data:
|
||||
self.fatal(call + ' is already found')
|
||||
self.parse(call,full_fct)
|
||||
|
||||
# check for start record
|
||||
def is_start(self, call, record):
|
||||
return re.search('\s*' + call + '\s*\(', record)
|
||||
|
||||
# check for API method record
|
||||
def is_api(self, call, record):
|
||||
return re.match('\s*' + call + '\s*\(', record)
|
||||
|
||||
|
||||
# check for end record
|
||||
def is_end(self, record):
|
||||
return self.end_pattern.search(record)
|
||||
|
||||
# parse method args
|
||||
def get_args(self, record):
|
||||
struct = {'ret': '', 'args': '', 'astr': {}, 'alst': [], 'tlst': []}
|
||||
record = re.sub(r'^\s+', r'', record)
|
||||
record = re.sub(r'\s*(\*+)\s*', r'\1 ', record)
|
||||
rind = NextBlock(0, record)
|
||||
struct['ret'] = record[0:rind]
|
||||
pos = record.find('(')
|
||||
end = NextBlock(pos, record);
|
||||
args = record[pos:end]
|
||||
args = re.sub(r'^\(\s*', r'', args)
|
||||
args = re.sub(r'\s*\)$', r'', args)
|
||||
args = re.sub(r'\s*,\s*', r',', args)
|
||||
struct['args'] = re.sub(r',', r', ', args)
|
||||
if args == "void":
|
||||
return struct
|
||||
|
||||
if len(args) == 0: return struct
|
||||
|
||||
pos = 0
|
||||
args = args + ','
|
||||
while pos < len(args):
|
||||
ind1 = NextBlock(pos, args) # type
|
||||
ind2 = NextBlock(ind1, args) # space
|
||||
if args[ind2] != '(':
|
||||
while ind2 < len(args):
|
||||
end = NextBlock(ind2, args)
|
||||
if args[end] == ',': break
|
||||
else: ind2 = end
|
||||
name = args[ind2:end]
|
||||
else:
|
||||
ind3 = NextBlock(ind2, args) # field
|
||||
m = re.match(r'\(\s*\*\s*(\S+)\s*\)', args[ind2:ind3])
|
||||
if not m:
|
||||
self.fatal("bad block3 '" + args + "' : '" + args[ind2:ind3] + "'")
|
||||
name = m.group(1)
|
||||
end = NextBlock(ind3, args) # the rest
|
||||
item = args[pos:end]
|
||||
struct['astr'][name] = item
|
||||
struct['alst'].append(name)
|
||||
struct['tlst'].append(item)
|
||||
if args[end] != ',':
|
||||
self.fatal("no comma '" + args + "'")
|
||||
pos = end + 1
|
||||
|
||||
return struct
|
||||
|
||||
# parse given api
|
||||
def parse(self, call, full_fct):
|
||||
if call in full_fct:
|
||||
self.data[call] = self.get_args(full_fct[call])
|
||||
else:
|
||||
self.data[call] = self.get_args(call)
|
||||
|
||||
#############################################################
|
||||
# API description parser class
|
||||
class API_DescrParser:
|
||||
def fatal(self, msg):
|
||||
fatal('API_DescrParser', msg)
|
||||
|
||||
def __init__(self, out_file, kfd_dir, api_headers, license):
|
||||
out_macro = re.sub(r'[\/\.]', r'_', out_file.upper()) + '_'
|
||||
|
||||
self.content_h = ''
|
||||
self.content_cpp = ''
|
||||
|
||||
self.api_names = []
|
||||
self.api_calls = {}
|
||||
self.api_rettypes = set()
|
||||
self.api_id = {}
|
||||
|
||||
api_data = {}
|
||||
full_fct = {}
|
||||
api_list = []
|
||||
ns_calls = []
|
||||
|
||||
(name, header) = api_headers[0]
|
||||
api = API_TableParser(kfd_dir + header, name, full_fct)
|
||||
full_fct = api.full_fct
|
||||
api_list = api.array
|
||||
self.api_names.append(name)
|
||||
self.api_calls[name] = api_list
|
||||
|
||||
for call in api_list:
|
||||
if call in api_data:
|
||||
self.fatal("call '" + call + "' is already found")
|
||||
|
||||
API_DeclParser(kfd_dir + header, api_list, api_data, full_fct)
|
||||
|
||||
for call in api_list:
|
||||
if not call in api_data:
|
||||
# Not-supported functions
|
||||
ns_calls.append(call)
|
||||
else:
|
||||
# API ID map
|
||||
self.api_id[call] = 'KFD_API_ID_' + call
|
||||
# Return types
|
||||
self.api_rettypes.add(api_data[call]['ret'])
|
||||
|
||||
self.api_rettypes.discard('void')
|
||||
self.api_data = api_data
|
||||
self.ns_calls = ns_calls
|
||||
|
||||
self.content_h += "// automatically generated\n\n" + license + '\n'
|
||||
|
||||
self.content_h += "/////////////////////////////////////////////////////////////////////////////\n"
|
||||
for call in self.ns_calls:
|
||||
self.content_h += '// ' + call + ' was not parsed\n'
|
||||
self.content_h += '\n'
|
||||
self.content_h += '#ifndef ' + out_macro + '\n'
|
||||
self.content_h += '#define ' + out_macro + '\n'
|
||||
|
||||
self.content_h += '\n'
|
||||
|
||||
self.content_h += '#include <dlfcn.h>\n'
|
||||
self.content_h += '#include <string.h>\n'
|
||||
self.content_h += '#include \"roctracer_kfd.h\"\n'
|
||||
self.content_h += '#include \"hsakmt.h\"\n'
|
||||
self.content_h += '#include \"cb_table.h\"\n'
|
||||
|
||||
self.content_h += '#define PUBLIC_API __attribute__((visibility(\"default\")))\n'
|
||||
|
||||
self.add_section('API ID enumeration', ' ', self.gen_id_enum)
|
||||
self.add_section('API arg structure', ' ', self.gen_arg_struct)
|
||||
|
||||
self.content_h += '\n'
|
||||
self.content_h += '#if PROF_API_IMPL\n'
|
||||
self.content_h += 'namespace roctracer {\n'
|
||||
self.content_h += 'namespace kfd_support {\n'
|
||||
|
||||
self.add_section('API get_name function', ' ', self.gen_get_name)
|
||||
self.add_section('API get_code function', ' ', self.gen_get_code)
|
||||
|
||||
self.add_section('API intercepting code', '', self.gen_intercept_decl)
|
||||
self.add_section('API intercepting code', '', self.gen_intercept)
|
||||
self.add_section('API callback functions', '', self.gen_callbacks)
|
||||
|
||||
self.content_h += '\n};};\n'
|
||||
self.content_h += '#endif // PROF_API_IMPL\n'
|
||||
|
||||
self.content_cpp += "// automatically generated\n\n" + license + '\n'
|
||||
self.content_cpp += "/////////////////////////////////////////////////////////////////////////////\n\n"
|
||||
self.content_cpp += '#define PROF_API_IMPL 1\n'
|
||||
self.content_cpp += '#include \"kfd_prof_str.h\"\n'
|
||||
|
||||
self.add_section('API output stream', ' ', self.gen_out_stream)
|
||||
self.add_section_cpp('API callback fcts', ' ', self.gen_public_api)
|
||||
self.content_h += '#endif // ' + out_macro + '_'
|
||||
self.content_cpp += '}\n'
|
||||
self.content_cpp += '\n'
|
||||
|
||||
# add code section
|
||||
def add_section_cpp(self, title, gap, fun):
|
||||
n = 0
|
||||
self.content_cpp += '\n// section: ' + title + '\n\n'
|
||||
fun(-1, '-', '-', {})
|
||||
for index in range(len(self.api_names)):
|
||||
last = (index == len(self.api_names)-1)
|
||||
name = self.api_names[index]
|
||||
|
||||
if n != 0:
|
||||
if gap == '': fun(n, name, '-', {})
|
||||
self.content_cpp += '\n'
|
||||
self.content_cpp += gap + '// block: ' + name + ' API\n'
|
||||
for call in self.api_calls[name]:
|
||||
fun(n, name, call, self.api_data[call])
|
||||
n += 1
|
||||
fun(n, '-', '-', {})
|
||||
|
||||
def add_section(self, title, gap, fun):
|
||||
n = 0
|
||||
self.content_h += '\n// section: ' + title + '\n\n'
|
||||
fun(-1, '-', '-', {})
|
||||
for index in range(len(self.api_names)):
|
||||
last = (index == len(self.api_names)-1)
|
||||
name = self.api_names[index]
|
||||
|
||||
if n != 0:
|
||||
if gap == '': fun(n, name, '-', {})
|
||||
self.content_h += '\n'
|
||||
self.content_h += gap + '// block: ' + name + ' API\n'
|
||||
for call in self.api_calls[name]:
|
||||
fun(n, name, call, self.api_data[call])
|
||||
n += 1
|
||||
fun(n, '-', '-', {})
|
||||
|
||||
# check if it's an array decl
|
||||
def is_arr(self, record):
|
||||
return re.match(r'\s*(.*)\s+(.*)\[\]\s*', record)
|
||||
|
||||
# generate API ID enumeration
|
||||
def gen_id_enum(self, n, name, call, data):
|
||||
if n == -1:
|
||||
self.content_h += 'enum kfd_api_id_t {\n'
|
||||
return
|
||||
if call != '-':
|
||||
self.content_h += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n'
|
||||
else:
|
||||
self.content_h += '\n'
|
||||
self.content_h += ' KFD_API_ID_NUMBER = ' + str(n) + ',\n'
|
||||
self.content_h += ' KFD_API_ID_ANY = ' + str(n + 1) + ',\n'
|
||||
self.content_h += '};\n'
|
||||
|
||||
# generate API args structure
|
||||
def gen_arg_struct(self, n, name, call, struct):
|
||||
if n == -1:
|
||||
self.content_h += 'struct kfd_api_data_t {\n'
|
||||
self.content_h += ' uint64_t correlation_id;\n'
|
||||
self.content_h += ' uint32_t phase;\n'
|
||||
self.content_h += ' union {\n'
|
||||
for ret_type in self.api_rettypes:
|
||||
self.content_h += ' ' + ret_type + ' ' + ret_type + '_retval;\n'
|
||||
self.content_h += ' };\n'
|
||||
self.content_h += ' union {\n'
|
||||
return
|
||||
if call != '-':
|
||||
self.content_h += ' struct {\n'
|
||||
for (var, item) in struct['astr'].items():
|
||||
m = self.is_arr(item)
|
||||
if m:
|
||||
self.content_h += ' ' + m.group(1) + '* ' + m.group(2) + ';\n'
|
||||
else:
|
||||
self.content_h += ' ' + item + ';\n'
|
||||
self.content_h += ' } ' + call + ';\n'
|
||||
else:
|
||||
self.content_h += ' } args;\n'
|
||||
self.content_h += '};\n'
|
||||
|
||||
# generate API callbacks
|
||||
def gen_callbacks(self, n, name, call, struct):
|
||||
if n == -1:
|
||||
self.content_h += 'typedef CbTable<KFD_API_ID_NUMBER> cb_table_t;\n'
|
||||
self.content_h += 'cb_table_t cb_table;\n'
|
||||
self.content_h += '\n'
|
||||
if call != '-':
|
||||
call_id = self.api_id[call];
|
||||
ret_type = struct['ret']
|
||||
self.content_h += ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n' # 'static ' +
|
||||
if call == 'hsaKmtOpenKFD':
|
||||
self.content_h += ' if (' + name + '_table == NULL) intercept_KFDApiTable();\n'
|
||||
self.content_h += ' kfd_api_data_t api_data{};\n'
|
||||
for var in struct['alst']:
|
||||
self.content_h += ' api_data.args.' + call + '.' + var.replace("[]","") + ' = ' + var.replace("[]","") + ';\n'
|
||||
self.content_h += ' activity_rtapi_callback_t api_callback_fun = NULL;\n'
|
||||
self.content_h += ' void* api_callback_arg = NULL;\n'
|
||||
self.content_h += ' cb_table.get(' + call_id + ', &api_callback_fun, &api_callback_arg);\n'
|
||||
self.content_h += ' api_data.phase = 0;\n'
|
||||
self.content_h += ' if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_KFD_API, ' + call_id + ', &api_data, api_callback_arg);\n'
|
||||
if ret_type != 'void':
|
||||
self.content_h += ' ' + ret_type + ' ret = '
|
||||
tmp_str = ' ' + name + '_table->' + call + '_fn(' + ', '.join(struct['alst']) + ');\n'
|
||||
self.content_h += tmp_str.replace("[]","")
|
||||
if ret_type != 'void':
|
||||
self.content_h += ' api_data.' + ret_type + '_retval = ret;\n'
|
||||
self.content_h += ' api_data.phase = 1;\n'
|
||||
self.content_h += ' if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_KFD_API, ' + call_id + ', &api_data, api_callback_arg);\n'
|
||||
if ret_type != 'void':
|
||||
self.content_h += ' return ret;\n'
|
||||
self.content_h += '}\n'
|
||||
|
||||
# Generates API intercepting table struct definition
|
||||
def gen_intercept_decl(self, n, name, call, struct):
|
||||
if n > 0 and call == '-':
|
||||
self.content_h += '} HSAKMTAPI_table_t;\n' #was HSAKMTAPI_table_t
|
||||
if n == 0 or (call == '-' and name != '-'):
|
||||
self.content_h += 'typedef struct {\n'
|
||||
if call != '-':
|
||||
self.content_h += ' decltype(' + call + ')* ' + call + '_fn;\n'
|
||||
|
||||
# generate API intercepting code
|
||||
def gen_intercept(self, n, name, call, struct):
|
||||
if n > 0 and call == '-':
|
||||
self.content_h += '};\n'
|
||||
if n == 0 or (call == '-' and name != '-'):
|
||||
self.content_h += name + '_table_t* ' + name + '_table = NULL;\n'
|
||||
self.content_h += 'void intercept_' + 'KFDApiTable' + '(void) {\n'
|
||||
self.content_h += ' ' + name + '_table = new ' + name + '_table_t{}' + ';\n'
|
||||
|
||||
if call != '-':
|
||||
self.content_h += ' typedef decltype(' + name + '_table_t::' + call + '_fn) ' + call + '_t;\n'
|
||||
self.content_h += ' ' + name + '_table->' + call + '_fn = (' + call + '_t)' + 'dlsym(RTLD_NEXT,\"' + call + '\");\n'
|
||||
|
||||
# generate API name function
|
||||
def gen_get_name(self, n, name, call, struct):
|
||||
if n == -1:
|
||||
self.content_h += 'const char* GetApiName(const uint32_t& id) {\n' #static
|
||||
self.content_h += ' switch (id) {\n'
|
||||
return
|
||||
if call != '-':
|
||||
self.content_h += ' case ' + self.api_id[call] + ': return "' + call + '";\n'
|
||||
else:
|
||||
self.content_h += ' }\n'
|
||||
self.content_h += ' return "unknown";\n'
|
||||
self.content_h += '}\n'
|
||||
|
||||
# generate API code function
|
||||
def gen_get_code(self, n, name, call, struct):
|
||||
if n == -1:
|
||||
self.content_h += 'uint32_t GetApiCode(const char* str) {\n' # static
|
||||
return
|
||||
if call != '-':
|
||||
self.content_h += ' if (strcmp("' + call + '", str) == 0) return ' + self.api_id[call] + ';\n'
|
||||
else:
|
||||
self.content_h += ' return KFD_API_ID_NUMBER;\n'
|
||||
self.content_h += '}\n'
|
||||
|
||||
# generate stream operator
|
||||
def gen_out_stream(self, n, name, call, struct):
|
||||
if n == -1:
|
||||
self.content_h += 'typedef std::pair<uint32_t, kfd_api_data_t> kfd_api_data_pair_t;\n'
|
||||
self.content_h += 'inline std::ostream& operator<< (std::ostream& out, const kfd_api_data_pair_t& data_pair) {\n'
|
||||
self.content_h += ' const uint32_t cid = data_pair.first;\n'
|
||||
self.content_h += ' const kfd_api_data_t& api_data = data_pair.second;\n'
|
||||
self.content_h += ' switch(cid) {\n'
|
||||
return
|
||||
if call != '-':
|
||||
self.content_h += ' case ' + self.api_id[call] + ': {\n'
|
||||
self.content_h += ' out << "' + call + '(";\n'
|
||||
arg_list = struct['alst']
|
||||
if len(arg_list) != 0:
|
||||
for ind in range(len(arg_list)):
|
||||
arg_var = arg_list[ind]
|
||||
arg_val = 'api_data.args.' + call + '.' + arg_var
|
||||
if re.search(r'MemFlags',arg_var):
|
||||
continue
|
||||
self.content_h += ' typedef decltype(' + arg_val.replace("[]","") + ') arg_val_type_t' + str(ind) + ';\n'
|
||||
self.content_h += ' roctracer::kfd_support::output_streamer<arg_val_type_t' + str(ind) + '>::put(out, ' + arg_val.replace("[]","") + ')'
|
||||
if ind < len(arg_list)-1: self.content_h += ' << ", ";\n'
|
||||
else: self.content_h += ';\n'
|
||||
if struct['ret'] != 'void':
|
||||
self.content_h += ' out << ") = " << api_data.' + struct['ret'] + '_retval;\n'
|
||||
else:
|
||||
self.content_h += ' out << ") = void";\n'
|
||||
self.content_h += ' break;\n'
|
||||
self.content_h += ' }\n'
|
||||
else:
|
||||
self.content_h += ' default:\n'
|
||||
self.content_h += ' out << "ERROR: unknown API";\n'
|
||||
self.content_h += ' abort();\n'
|
||||
self.content_h += ' }\n'
|
||||
self.content_h += ' return out;\n'
|
||||
self.content_h += '}\n'
|
||||
self.content_cpp += 'inline std::ostream& operator<< (std::ostream& out, const HsaMemFlags& v) { out << "HsaMemFlags"; return out; }\n'
|
||||
|
||||
# generate PUBLIC_API for all API fcts
|
||||
def gen_public_api(self, n, name, call, struct):
|
||||
if n == -1:
|
||||
self.content_cpp += 'extern "C" {\n'
|
||||
self.content_cpp += 'PUBLIC_API bool RegisterApiCallback(uint32_t op, void* callback, void* user_data) {\n';
|
||||
self.content_cpp += ' roctracer::kfd_support::cb_table.set(op, reinterpret_cast<activity_rtapi_callback_t>(callback), user_data);\n';
|
||||
self.content_cpp += ' return true;\n';
|
||||
self.content_cpp += '}\n';
|
||||
self.content_cpp += 'PUBLIC_API bool RemoveApiCallback(uint32_t op) {\n'
|
||||
self.content_cpp += ' roctracer::kfd_support::cb_table.set(op, NULL, NULL);\n';
|
||||
self.content_cpp += ' return true;\n';
|
||||
self.content_cpp += '}\n\n';
|
||||
|
||||
if call != '-':
|
||||
self.content_cpp += 'PUBLIC_API HSAKMT_STATUS ' + call + '(' + struct['args'] + ') { roctracer::kfd_support::' + call + '_callback('
|
||||
for i in range(0,len(struct['alst'])):
|
||||
if i == (len(struct['alst'])-1):
|
||||
self.content_cpp += struct['alst'][i].replace("[]","")
|
||||
else:
|
||||
self.content_cpp += struct['alst'][i].replace("[]","") + ', '
|
||||
self.content_cpp += '); return HSAKMT_STATUS_SUCCESS;} \n'
|
||||
|
||||
#############################################################
|
||||
# main
|
||||
@@ -22,14 +539,18 @@ else:
|
||||
ROOT = sys.argv[1] + '/'
|
||||
KFD_DIR = sys.argv[2] + '/'
|
||||
|
||||
descr = API_DescrParser(OUT_H, KFD_DIR, API_HEADERS_H, LICENSE)
|
||||
|
||||
out_h_file = ROOT + OUT_H
|
||||
out_c_file = ROOT + OUT_C
|
||||
print 'Generating: "' + out_h_file + '", ' + out_c_file + '"'
|
||||
f = open(out_h_file, 'w')
|
||||
f.write(content_h)
|
||||
out_file = ROOT + OUT_H
|
||||
print 'Generating "' + out_file + '"'
|
||||
f = open(out_file, 'w')
|
||||
f.write(descr.content_h[:-1])
|
||||
f.close()
|
||||
f = open(out_c_file, 'w')
|
||||
f.write(content_c)
|
||||
|
||||
out_file = ROOT + OUT_CPP
|
||||
print 'Generating "' + out_file + '"'
|
||||
f = open(out_file, 'w')
|
||||
f.write(descr.content_cpp[:-1])
|
||||
f.close()
|
||||
|
||||
#############################################################
|
||||
|
||||
@@ -10,7 +10,7 @@ set ( LIB_SRC
|
||||
${LIB_DIR}/util/hsa_rsrc_factory.cpp
|
||||
)
|
||||
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} )
|
||||
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ )
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/hsaap.py ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH}" )
|
||||
|
||||
@@ -19,7 +19,7 @@ set ( KFD_LIB_SRC
|
||||
${LIB_DIR}/kfd/kfd_wrapper.cpp
|
||||
)
|
||||
add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} )
|
||||
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} )
|
||||
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} )
|
||||
target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ )
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" )
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ THE SOFTWARE.
|
||||
#include "inc/roctracer_roctx.h"
|
||||
#define PROF_API_IMPL 1
|
||||
#include "inc/roctracer_hsa.h"
|
||||
#include "inc/roctracer_kfd.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
@@ -633,6 +634,10 @@ PUBLIC_API const char* roctracer_op_string(
|
||||
return roctracer::HipLoader::Instance().ApiName(op);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
return roctracer::kfd_support::GetApiName(op);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
@@ -653,6 +658,11 @@ PUBLIC_API roctracer_status_t roctracer_op_code(
|
||||
if (kind != NULL) *kind = 0;
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
*op = roctracer::kfd_support::GetApiCode(str);
|
||||
if (kind != NULL) *kind = 0;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "limited domain ID(" << domain << ")");
|
||||
}
|
||||
@@ -665,6 +675,7 @@ static inline uint32_t get_op_num(const uint32_t& domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: return hc::HSA_OP_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_EXT_API: return 0;
|
||||
case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER;
|
||||
default:
|
||||
@@ -681,13 +692,11 @@ static void roctracer_enable_callback_impl(
|
||||
void* user_data)
|
||||
{
|
||||
switch (domain) {
|
||||
#if 0
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
|
||||
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error");
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
roctracer::hsa_support::cb_table.set(op, callback, user_data);
|
||||
@@ -751,13 +760,11 @@ static void roctracer_disable_callback_impl(
|
||||
uint32_t op)
|
||||
{
|
||||
switch (domain) {
|
||||
#if 0
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
const bool succ = roctracer::KfdLoader::Instance().RemoveApiCallback(op);
|
||||
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RemoveApiCallback error");
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: break;
|
||||
@@ -856,6 +863,7 @@ static void roctracer_enable_activity_impl(
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_KFD_API: break;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: {
|
||||
if (roctracer::HccLoader::GetRef() == NULL) {
|
||||
roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback,
|
||||
@@ -920,6 +928,7 @@ static void roctracer_disable_activity_impl(
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_KFD_API: break;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: {
|
||||
const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error domain(" << domain << ") op(" << op << ")");
|
||||
@@ -1039,6 +1048,10 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
roctracer::kfd_support::intercept_KFDApiTable();
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
// HSA API properties
|
||||
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(properties);
|
||||
|
||||
@@ -44,7 +44,7 @@ file( GLOB UTIL_SRC "${HSA_TEST_DIR}/util/*.cpp" )
|
||||
set ( TEST_LIB "tracer_tool" )
|
||||
set ( TEST_LIB_SRC ${TEST_DIR}/tool/tracer_tool.cpp ${UTIL_SRC} )
|
||||
add_library ( ${TEST_LIB} SHARED ${TEST_LIB_SRC} )
|
||||
target_include_directories ( ${TEST_LIB} PRIVATE ${HSA_TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} )
|
||||
target_include_directories ( ${TEST_LIB} PRIVATE ${HSA_TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HIP_INC_DIR} ${HCC_INC_DIR} ${HSA_KMT_INC_PATH} )
|
||||
target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt )
|
||||
|
||||
## Build HSA test
|
||||
|
||||
@@ -59,7 +59,7 @@ eval_test() {
|
||||
|
||||
# Standalone test
|
||||
# rocTrecer is used explicitely by test
|
||||
eval_test "standalone HIP test" ./test/MatrixTranspose_test
|
||||
eval_test "standalone HIP test" "LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_test"
|
||||
|
||||
# Tool test
|
||||
# rocTracer/tool is loaded by HSA runtime
|
||||
|
||||
@@ -31,6 +31,7 @@ THE SOFTWARE.
|
||||
#include <inc/roctracer_hsa.h>
|
||||
#include <inc/roctracer_hip.h>
|
||||
#include <inc/roctracer_hcc.h>
|
||||
#include <inc/roctracer_kfd.h>
|
||||
#include <inc/ext/hsa_rt_utils.hpp>
|
||||
#include <src/core/loader.h>
|
||||
#include <src/core/trace_buffer.h>
|
||||
@@ -58,9 +59,11 @@ typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
|
||||
hsa_rt_utils::Timer* timer = NULL;
|
||||
thread_local timestamp_t hsa_begin_timestamp = 0;
|
||||
thread_local timestamp_t hip_begin_timestamp = 0;
|
||||
thread_local timestamp_t kfd_begin_timestamp = 0;
|
||||
bool trace_hsa_api = false;
|
||||
bool trace_hsa_activity = false;
|
||||
bool trace_hip = false;
|
||||
bool trace_kfd = false;
|
||||
|
||||
LOADER_INSTANTIATE();
|
||||
|
||||
@@ -69,6 +72,7 @@ FILE* hsa_api_file_handle = NULL;
|
||||
FILE* hsa_async_copy_file_handle = NULL;
|
||||
FILE* hip_api_file_handle = NULL;
|
||||
FILE* hcc_activity_file_handle = NULL;
|
||||
FILE* kfd_api_file_handle = NULL;
|
||||
|
||||
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
@@ -79,12 +83,31 @@ void fatal(const std::string msg) {
|
||||
fflush(hsa_async_copy_file_handle);
|
||||
fflush(hip_api_file_handle);
|
||||
fflush(hcc_activity_file_handle);
|
||||
fflush(kfd_api_file_handle);
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "%s\n\n", msg.c_str());
|
||||
fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
|
||||
// KFD API callback function
|
||||
void kfd_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
(void)arg;
|
||||
const kfd_api_data_t* data = reinterpret_cast<const kfd_api_data_t*>(callback_data);
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
kfd_begin_timestamp = timer->timestamp_fn_ns();
|
||||
} else {
|
||||
const timestamp_t end_timestamp = timer->timestamp_fn_ns();
|
||||
std::ostringstream os;
|
||||
os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data);
|
||||
fprintf(kfd_api_file_handle, "%s\n", os.str().c_str());
|
||||
}
|
||||
}
|
||||
// C++ symbol demangle
|
||||
static inline const char* cxx_demangle(const char* symbol) {
|
||||
size_t funcnamesize;
|
||||
@@ -296,10 +319,8 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
record->begin_ns, record->end_ns, record->device_id, record->queue_id, name, record->correlation_id);
|
||||
fflush(hcc_activity_file_handle);
|
||||
} else {
|
||||
#if 0
|
||||
fprintf(hip_api_file_handle, "%lu:%lu %u:%u %s()\n",
|
||||
record->begin_ns, record->end_ns, record->process_id, record->thread_id, name);
|
||||
#endif
|
||||
}
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &record));
|
||||
}
|
||||
@@ -394,6 +415,8 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
}
|
||||
}
|
||||
|
||||
trace_kfd = (trace_domain == NULL) || (strncmp(trace_domain, "kfd", 3) == 0);
|
||||
|
||||
// Output file
|
||||
const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
|
||||
if (output_prefix != NULL) {
|
||||
@@ -408,6 +431,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
|
||||
// API trace vector
|
||||
std::vector<std::string> hsa_api_vec;
|
||||
std::vector<std::string> kfd_api_vec;
|
||||
|
||||
printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout);
|
||||
// XML input
|
||||
@@ -437,6 +461,11 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
trace_hsa_api = true;
|
||||
hsa_api_vec = api_vec;
|
||||
}
|
||||
if (name == "KFD") {
|
||||
found = true;
|
||||
trace_kfd = true;
|
||||
kfd_api_vec = api_vec;
|
||||
}
|
||||
if (name == "GPU") {
|
||||
found = true;
|
||||
trace_hsa_activity = true;
|
||||
@@ -473,6 +502,25 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
printf(")\n");
|
||||
}
|
||||
|
||||
if (trace_kfd) {
|
||||
kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt");
|
||||
// initialize KFD tracing
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_KFD_API, NULL);
|
||||
|
||||
printf(" KFD-trace(");
|
||||
if (kfd_api_vec.size() != 0) {
|
||||
for (unsigned i = 0; i < kfd_api_vec.size(); ++i) {
|
||||
uint32_t cid = KFD_API_ID_NUMBER;
|
||||
const char* api = kfd_api_vec[i].c_str();
|
||||
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid));
|
||||
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL));
|
||||
printf(" %s", api);
|
||||
}
|
||||
} else {
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, kfd_api_callback, NULL));
|
||||
}
|
||||
printf(")\n");
|
||||
}
|
||||
if (trace_hsa_activity) {
|
||||
hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt");
|
||||
|
||||
@@ -542,6 +590,11 @@ void tool_unload(bool destruct) {
|
||||
close_output_file(hip_api_file_handle);
|
||||
close_output_file(hcc_activity_file_handle);
|
||||
}
|
||||
|
||||
if (trace_kfd) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API));
|
||||
fclose(kfd_api_file_handle);
|
||||
}
|
||||
if (onload_debug) { printf("TOOL tool_unload end\n"); fflush(stdout); }
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user