Gbaraldi/pcparser (#58)
* Sample PC sampling parser library
* Fixed error message
* Added correlation_id tests. Fixed correlation_id.
* Fixed some tests.
* Added read_index to dispatch_pkt_id
* Added some comments
* Added more comments
* Moved test.cpp to a move descriptive file
* Added gfxip selection
* Added parser checks. Added GFXIP selection switch.
* Fixed dispatch_pkt_wrap with Joe's comments
* Performance improvement of 25%~100%
* Attempt at reducing overhead of dispatch bookkeeping
* Fixed cache invalidate
* Fixed error message descriptions
* Added GFX12
* Renamed files. Create C-only header.
* Formatting
* Other performance improvement
* source formatting (clang-format v11) (#63)
Co-authored-by: ApoKalipse-V <ApoKalipse-V@users.noreply.github.com>
* Added copyright
* Format-11
* Performance improvement. Added some GFX9 snap tests. Fixed hostTrap selector.
* Clang format.
* Added more gfx9 field tests
* Clang format
* Added parserv2 for pcsampling
* Added multibuffer test
* Created records processor
* Changed bFlipBuffer name
* pcs parser moved to lib/rocprofiler dir
* pcs parser compiles; one test moved
* pcs parser: tests copied from samples folder
The tests currently reside in the single .cpp.
They should be modularized and refactored to distinct unit tests
from integration test.
gfx9 test can fail sometimes. I did not have time
to deal with it.
Since Giovanni will continue working on this, I did not remove
pcs parser from the samples folder.
* cmake formatting (cmake-format) (#149)
Co-authored-by: vlaindic <vlaindic@users.noreply.github.com>
* source formatting (clang-format v11) (#150)
Co-authored-by: vlaindic <vlaindic@users.noreply.github.com>
* pcs parser: failing test removed for now
* pcs parser: fixing static errors and separating template definitions from implementation
* cmake formatting (cmake-format) (#152)
Co-authored-by: vlaindic <vlaindic@users.noreply.github.com>
* source formatting (clang-format v11) (#153)
Co-authored-by: vlaindic <vlaindic@users.noreply.github.com>
* pcs parser: fixing CI/CD errors
* source formatting (clang-format v11) (#154)
Co-authored-by: vlaindic <vlaindic@users.noreply.github.com>
* pcs parser: parsers removed from samples
---------
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: ApoKalipse-V <ApoKalipse-V@users.noreply.github.com>
Co-authored-by: vlaindic <vladimir.indic@amd.com>
Co-authored-by: vlaindic <vlaindic@users.noreply.github.com>
[ROCm/rocprofiler-sdk commit: 07aa54c5aa]
This commit is contained in:
committed by
GitHub
parent
178bb0c300
commit
760e79267a
@@ -37,6 +37,7 @@ add_subdirectory(hsa)
|
||||
add_subdirectory(context)
|
||||
add_subdirectory(counters)
|
||||
add_subdirectory(aql)
|
||||
add_subdirectory(pc_sampling)
|
||||
|
||||
target_link_libraries(
|
||||
rocprofiler-object-library
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
add_subdirectory(parser)
|
||||
@@ -0,0 +1,13 @@
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_SOURCES pc_record_interface.cpp correlation.cpp
|
||||
translation.cpp)
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_HEADERS
|
||||
correlation.hpp gfx9.hpp gfx11.hpp gfx_unknown.hpp parser_types.hpp pc_record_interface.hpp
|
||||
rocr.hpp translation.hpp)
|
||||
|
||||
target_sources(
|
||||
rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_SOURCES}
|
||||
${ROCPROFILER_LIB_PC_SAMPLING_PARSER_HEADERS})
|
||||
|
||||
if(ROCPROFILER_BUILD_TESTS)
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "lib/rocprofiler/pc_sampling/parser/correlation.hpp"
|
||||
|
||||
template <>
|
||||
struct std::hash<device_handle>
|
||||
{
|
||||
size_t operator()(const device_handle& d) const { return d.handle; }
|
||||
};
|
||||
bool
|
||||
operator==(device_handle a, device_handle b)
|
||||
{
|
||||
return a.handle == b.handle;
|
||||
}
|
||||
|
||||
namespace Parser
|
||||
{
|
||||
bool
|
||||
operator==(const DispatchPkt& a, const DispatchPkt& b)
|
||||
{
|
||||
return a.correlation_id_in == b.correlation_id_in && a.dev == b.dev;
|
||||
}
|
||||
} // namespace Parser
|
||||
|
||||
namespace Parser
|
||||
{
|
||||
/**
|
||||
* Coordinates DispatchMap and DoorBellMap to reconstruct the original correlation_id
|
||||
* from the correlation_id seen by the trap handler.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Checks wether a dispatch pkt will generate a collision.
|
||||
* Returns true on collision and false when slot is available.
|
||||
*/
|
||||
bool
|
||||
CorrelationMap::checkDispatch(const dispatch_pkt_id_t& pkt) const
|
||||
{
|
||||
uint64_t trap = wrap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
|
||||
return dispatch_to_correlation.find({trap, pkt.device}) != dispatch_to_correlation.end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the mapping of dispatch_id to correlation_id
|
||||
*/
|
||||
void
|
||||
CorrelationMap::newDispatch(const dispatch_pkt_id_t& pkt)
|
||||
{
|
||||
cache_dev_id = ~0ul;
|
||||
uint64_t trap_id = wrap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
|
||||
dispatch_to_correlation[{trap_id, pkt.device}] = pkt.correlation_id;
|
||||
}
|
||||
|
||||
void
|
||||
CorrelationMap::forget(const dispatch_pkt_id_t& pkt)
|
||||
{
|
||||
cache_dev_id = ~0ul;
|
||||
uint64_t trap_id = wrap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
|
||||
dispatch_to_correlation.erase({trap_id, pkt.device});
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a device dev, doorbell and and wrapped dispatch_id, returns the
|
||||
* correlation_id set by dispatch_pkt_id_t
|
||||
*/
|
||||
uint64_t
|
||||
CorrelationMap::get(device_handle dev, uint64_t correlation_in)
|
||||
{
|
||||
#ifndef _PARSER_CORRELATION_DISABLE_CACHE
|
||||
if(dev.handle == cache_dev_id && correlation_in == cache_correlation_id_in)
|
||||
return cache_correlation_id_out;
|
||||
#endif
|
||||
cache_dev_id = dev.handle;
|
||||
cache_correlation_id_in = correlation_in;
|
||||
cache_correlation_id_out = dispatch_to_correlation.at({correlation_in, dev});
|
||||
return cache_correlation_id_out;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
CorrelationMap::wrap_correlation_id(uint64_t doorbell, uint64_t write_idx, uint64_t queue_size)
|
||||
{
|
||||
static constexpr uint64_t WRITE_WRAP = (1 << 25) - 1;
|
||||
return ((write_idx % queue_size) & WRITE_WRAP) | (uint64_t(doorbell) << 32);
|
||||
}
|
||||
|
||||
} // namespace Parser
|
||||
|
||||
/**
|
||||
* @brief Parses a given set of pc samples.
|
||||
* @param[in] buffer Pointer to a buffer containing metadata and pcsamples.
|
||||
* @param[in] buffer_size The number of elements in the buffer.
|
||||
* @param[in] gfxip_major GFXIP major version of the samples.
|
||||
* @param[in] callback A callback function that accepts a double pointer to write the samples to,
|
||||
* a size requested parameter (number of pc_sample_t) and a void* to userdata.
|
||||
* The callback is expected to allocate 64B-aligned memory where the parsed samples are going to
|
||||
* be written to, and return the size of memory that was allocated, in multiples of
|
||||
* sizeof(generic_sample_t). If the callback returns 0 or a larger size than requested,
|
||||
* parse_buffer() will return PCSAMPLE_STATUS_CALLBACK_ERROR. If the callback returns
|
||||
* a size smaller than requested, then it may be called again requesting more memory.
|
||||
* @param[in] userdata parameter forwarded to the user callback.
|
||||
*/
|
||||
pcsample_status_t
|
||||
parse_buffer(generic_sample_t* buffer,
|
||||
uint64_t buffer_size,
|
||||
int gfxip_major,
|
||||
user_callback_t callback,
|
||||
void* userdata)
|
||||
{
|
||||
static auto corr_map = std::make_unique<Parser::CorrelationMap>();
|
||||
|
||||
auto parseSample_func = _parse_buffer<GFX9>;
|
||||
if(gfxip_major == 9)
|
||||
parseSample_func = _parse_buffer<GFX9>;
|
||||
else if(gfxip_major == 11)
|
||||
parseSample_func = _parse_buffer<GFX11>;
|
||||
else if(gfxip_major == 0)
|
||||
parseSample_func = _parse_buffer<gfx_unknown>;
|
||||
else
|
||||
return PCSAMPLE_STATUS_INVALID_GFXIP;
|
||||
|
||||
return parseSample_func(buffer, buffer_size, callback, userdata, corr_map.get());
|
||||
};
|
||||
@@ -0,0 +1,230 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "lib/rocprofiler/pc_sampling/parser/translation.hpp"
|
||||
|
||||
#if 0
|
||||
template <>
|
||||
struct std::hash<device_handle>
|
||||
{
|
||||
size_t operator()(const device_handle& d) const { return d.handle; }
|
||||
};
|
||||
bool
|
||||
operator==(device_handle a, device_handle b)
|
||||
{
|
||||
return a.handle == b.handle;
|
||||
}
|
||||
#endif
|
||||
namespace Parser
|
||||
{
|
||||
/*
|
||||
struct DispatchPkt
|
||||
{
|
||||
uint64_t write_id; //! The location where this dispatch is written to
|
||||
uint64_t doorbell_id; //! The doorbell non-unique ID
|
||||
device_handle dev; //! Which device this is run
|
||||
}; */
|
||||
struct DispatchPkt
|
||||
{
|
||||
uint64_t correlation_id_in; //! Correlation ID seen by the trap handler
|
||||
device_handle dev; //! Which device this is run
|
||||
};
|
||||
#if 0
|
||||
bool
|
||||
operator==(const DispatchPkt& a, const DispatchPkt& b)
|
||||
{
|
||||
return a.correlation_id_in == b.correlation_id_in && a.dev == b.dev;
|
||||
}
|
||||
#endif
|
||||
} // namespace Parser
|
||||
|
||||
template <>
|
||||
struct std::hash<Parser::DispatchPkt>
|
||||
{
|
||||
size_t operator()(const Parser::DispatchPkt& d) const
|
||||
{
|
||||
return (d.correlation_id_in << 8) ^ d.dev.handle;
|
||||
}
|
||||
};
|
||||
|
||||
namespace Parser
|
||||
{
|
||||
/**
|
||||
* Coordinates DispatchMap and DoorBellMap to reconstruct the original correlation_id
|
||||
* from the correlation_id seen by the trap handler.
|
||||
*/
|
||||
class CorrelationMap
|
||||
{
|
||||
public:
|
||||
CorrelationMap() = default;
|
||||
|
||||
/**
|
||||
* Checks wether a dispatch pkt will generate a collision.
|
||||
* Returns true on collision and false when slot is available.
|
||||
*/
|
||||
bool checkDispatch(const dispatch_pkt_id_t& pkt) const;
|
||||
|
||||
/**
|
||||
* Updates the mapping of dispatch_id to correlation_id
|
||||
*/
|
||||
void newDispatch(const dispatch_pkt_id_t& pkt);
|
||||
|
||||
void forget(const dispatch_pkt_id_t& pkt);
|
||||
|
||||
/**
|
||||
* Given a device dev, doorbell and and wrapped dispatch_id, returns the
|
||||
* correlation_id set by dispatch_pkt_id_t
|
||||
*/
|
||||
uint64_t get(device_handle dev, uint64_t correlation_in);
|
||||
|
||||
static uint64_t wrap_correlation_id(uint64_t doorbell, uint64_t write_idx, uint64_t queue_size);
|
||||
|
||||
private:
|
||||
std::unordered_map<DispatchPkt, uint64_t> dispatch_to_correlation{};
|
||||
|
||||
// Making get() const and these cache variables mutable causes performance to be unstable
|
||||
uint64_t cache_correlation_id_in = ~0ul; // Invalid value in cache
|
||||
uint64_t cache_correlation_id_out = ~0ul;
|
||||
uint64_t cache_dev_id = ~0ul; // Invalid device Id in cache
|
||||
};
|
||||
} // namespace Parser
|
||||
|
||||
template <bool bHostTrap, typename GFXIP>
|
||||
inline pcsample_status_t
|
||||
add_upcoming_samples(const device_handle device,
|
||||
const generic_sample_t* buffer,
|
||||
const size_t available_samples,
|
||||
Parser::CorrelationMap* corr_map,
|
||||
pcsample_v1_t* samples)
|
||||
{
|
||||
pcsample_status_t status = PCSAMPLE_STATUS_SUCCESS;
|
||||
for(uint64_t p = 0; p < available_samples; p++)
|
||||
{
|
||||
const auto* snap = reinterpret_cast<const perf_sample_snapshot_v1*>(buffer + p);
|
||||
samples[p] = copySample<bHostTrap, GFXIP>((const void*) (buffer + p));
|
||||
try
|
||||
{
|
||||
samples[p].correlation_id = corr_map->get(device, snap->correlation_id);
|
||||
} catch(std::exception& e)
|
||||
{
|
||||
status = PCSAMPLE_STATUS_PARSER_ERROR;
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
template <typename GFXIP>
|
||||
pcsample_status_t
|
||||
_parse_buffer(generic_sample_t* buffer,
|
||||
uint64_t buffer_size,
|
||||
user_callback_t callback,
|
||||
void* userdata,
|
||||
Parser::CorrelationMap* corr_map)
|
||||
{
|
||||
// Maximum size
|
||||
uint64_t index = 0;
|
||||
|
||||
pcsample_status_t status = PCSAMPLE_STATUS_SUCCESS;
|
||||
|
||||
while(index < buffer_size)
|
||||
{
|
||||
switch(buffer[index].type)
|
||||
{
|
||||
case AMD_DISPATCH_PKT_ID:
|
||||
{
|
||||
const auto& pkt = *reinterpret_cast<const dispatch_pkt_id_t*>(buffer + index);
|
||||
if(pkt.queue_size >= (1 << 25)) status = PCSAMPLE_STATUS_PARSER_ERROR;
|
||||
index += 1;
|
||||
corr_map->newDispatch(pkt);
|
||||
break;
|
||||
}
|
||||
case AMD_UPCOMING_SAMPLES:
|
||||
{
|
||||
const auto& pkt = *reinterpret_cast<const upcoming_samples_t*>(buffer + index);
|
||||
index += 1;
|
||||
|
||||
uint64_t pkt_counter = pkt.num_samples;
|
||||
if(index + pkt_counter > buffer_size) return PCSAMPLE_STATUS_OUT_OF_BOUNDS_ERROR;
|
||||
|
||||
bool bIsHostTrap = pkt.which_sample_type == AMD_HOST_TRAP_V1;
|
||||
|
||||
while(pkt_counter > 0)
|
||||
{
|
||||
pcsample_v1_t* samples = nullptr;
|
||||
uint64_t available_samples = callback(&samples, pkt_counter, userdata);
|
||||
|
||||
if(available_samples == 0 || available_samples > pkt_counter)
|
||||
return PCSAMPLE_STATUS_CALLBACK_ERROR;
|
||||
|
||||
if(bIsHostTrap)
|
||||
{
|
||||
status |= add_upcoming_samples<true, GFXIP>(
|
||||
pkt.device, buffer + index, available_samples, corr_map, samples);
|
||||
}
|
||||
else
|
||||
{
|
||||
status |= add_upcoming_samples<false, GFXIP>(
|
||||
pkt.device, buffer + index, available_samples, corr_map, samples);
|
||||
}
|
||||
|
||||
index += available_samples;
|
||||
pkt_counter -= available_samples;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cerr << "Index " << index << " - Invalid sample type: " << buffer[index].type
|
||||
<< std::endl;
|
||||
return PCSAMPLE_STATUS_INVALID_SAMPLE;
|
||||
}
|
||||
}
|
||||
return status;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Parses a given set of pc samples.
|
||||
* @param[in] buffer Pointer to a buffer containing metadata and pcsamples.
|
||||
* @param[in] buffer_size The number of elements in the buffer.
|
||||
* @param[in] gfxip_major GFXIP major version of the samples.
|
||||
* @param[in] callback A callback function that accepts a double pointer to write the samples to,
|
||||
* a size requested parameter (number of pc_sample_t) and a void* to userdata.
|
||||
* The callback is expected to allocate 64B-aligned memory where the parsed samples are going to
|
||||
* be written to, and return the size of memory that was allocated, in multiples of
|
||||
* sizeof(generic_sample_t). If the callback returns 0 or a larger size than requested,
|
||||
* parse_buffer() will return PCSAMPLE_STATUS_CALLBACK_ERROR. If the callback returns
|
||||
* a size smaller than requested, then it may be called again requesting more memory.
|
||||
* @param[in] userdata parameter forwarded to the user callback.
|
||||
*/
|
||||
pcsample_status_t
|
||||
parse_buffer(generic_sample_t* buffer,
|
||||
uint64_t buffer_size,
|
||||
int gfxip_major,
|
||||
user_callback_t callback,
|
||||
void* userdata);
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
class GFX11
|
||||
{
|
||||
public:
|
||||
enum inst_type_issued
|
||||
{
|
||||
TYPE_VALU = 0,
|
||||
TYPE_SCALAR,
|
||||
TYPE_TEX,
|
||||
TYPE_LDS,
|
||||
TYPE_LDS_DIRECT,
|
||||
TYPE_EXP,
|
||||
TYPE_MESSAGE,
|
||||
TYPE_BARRIER,
|
||||
TYPE_BRANCH_NOT_TAKEN,
|
||||
TYPE_BRANCH_TAKEN,
|
||||
TYPE_JUMP,
|
||||
TYPE_OTHER,
|
||||
TYPE_NO_INST,
|
||||
TYPE_DUAL_VALU = 31,
|
||||
TYPE_MATRIX = 31,
|
||||
TYPE_FLAT = 31,
|
||||
};
|
||||
|
||||
enum reason_not_issued
|
||||
{
|
||||
REASON_NOT_AVAILABLE = 0,
|
||||
REASON_ALU,
|
||||
REASON_WAITCNT,
|
||||
REASON_ARBITER,
|
||||
REASON_SLEEP,
|
||||
REASON_BARRIER,
|
||||
REASON_OTHER_WAIT,
|
||||
REASON_INTERNAL = 31,
|
||||
REASON_EX_STALL = 31,
|
||||
};
|
||||
|
||||
enum arb_state
|
||||
{
|
||||
ISSUE_MISC = 0,
|
||||
ISSUE_EXP,
|
||||
ISSUE_LDS_DIRECT,
|
||||
ISSUE_LDS,
|
||||
ISSUE_VMEM_TEX,
|
||||
ISSUE_SCALAR,
|
||||
ISSUE_VALU,
|
||||
ISSUE_MATRIX = 31,
|
||||
ISSUE_FLAT = 31,
|
||||
ISSUE_BRMSG = 31,
|
||||
};
|
||||
};
|
||||
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
class GFX9
|
||||
{
|
||||
public:
|
||||
enum inst_type_issued
|
||||
{
|
||||
TYPE_VALU = 0,
|
||||
TYPE_MATRIX,
|
||||
TYPE_SCALAR,
|
||||
TYPE_TEX,
|
||||
TYPE_LDS,
|
||||
TYPE_FLAT,
|
||||
TYPE_EXP,
|
||||
TYPE_MESSAGE,
|
||||
TYPE_BARRIER,
|
||||
TYPE_BRANCH_NOT_TAKEN,
|
||||
TYPE_BRANCH_TAKEN,
|
||||
TYPE_JUMP,
|
||||
TYPE_OTHER,
|
||||
TYPE_NO_INST,
|
||||
TYPE_LAST,
|
||||
TYPE_DUAL_VALU = 31,
|
||||
TYPE_LDS_DIRECT = 31
|
||||
};
|
||||
|
||||
enum reason_not_issued
|
||||
{
|
||||
REASON_NOT_AVAILABLE = 0,
|
||||
REASON_ALU,
|
||||
REASON_WAITCNT,
|
||||
REASON_INTERNAL,
|
||||
REASON_BARRIER,
|
||||
REASON_ARBITER,
|
||||
REASON_EX_STALL,
|
||||
REASON_OTHER_WAIT,
|
||||
REASON_LAST,
|
||||
REASON_SLEEP = 31
|
||||
};
|
||||
|
||||
enum arb_state
|
||||
{
|
||||
ISSUE_VALU = 0,
|
||||
ISSUE_MATRIX,
|
||||
ISSUE_SCALAR,
|
||||
ISSUE_VMEM_TEX,
|
||||
ISSUE_LDS,
|
||||
ISSUE_FLAT,
|
||||
ISSUE_EXP,
|
||||
ISSUE_MISC,
|
||||
ISSUE_LAST,
|
||||
ISSUE_LDS_DIRECT = 31,
|
||||
ISSUE_BRMSG = 31,
|
||||
};
|
||||
};
|
||||
@@ -0,0 +1,188 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
|
||||
/**
|
||||
* ######## Parser Definitions ########
|
||||
*/
|
||||
namespace PCSAMPLE
|
||||
{
|
||||
enum pcsample_inst_type_issued
|
||||
{
|
||||
TYPE_VALU = 0,
|
||||
TYPE_MATRIX,
|
||||
TYPE_SCALAR,
|
||||
TYPE_TEX,
|
||||
TYPE_LDS,
|
||||
TYPE_LDS_DIRECT,
|
||||
TYPE_FLAT,
|
||||
TYPE_EXP,
|
||||
TYPE_MESSAGE,
|
||||
TYPE_BARRIER,
|
||||
TYPE_BRANCH_NOT_TAKEN,
|
||||
TYPE_BRANCH_TAKEN,
|
||||
TYPE_JUMP,
|
||||
TYPE_OTHER,
|
||||
TYPE_NO_INST,
|
||||
TYPE_DUAL_VALU,
|
||||
TYPE_LAST
|
||||
};
|
||||
|
||||
enum pcsample_reason_not_issued
|
||||
{
|
||||
REASON_NOT_AVAILABLE = 0,
|
||||
REASON_ALU,
|
||||
REASON_WAITCNT,
|
||||
REASON_INTERNAL,
|
||||
REASON_BARRIER,
|
||||
REASON_ARBITER,
|
||||
REASON_EX_STALL,
|
||||
REASON_OTHER_WAIT,
|
||||
REASON_SLEEP,
|
||||
REASON_LAST
|
||||
};
|
||||
|
||||
enum pcsample_arb_issue_state
|
||||
{
|
||||
ISSUE_VALU = 0,
|
||||
ISSUE_MATRIX,
|
||||
ISSUE_LDS,
|
||||
ISSUE_LDS_DIRECT,
|
||||
ISSUE_SCALAR,
|
||||
ISSUE_VMEM_TEX,
|
||||
ISSUE_FLAT,
|
||||
ISSUE_EXP,
|
||||
ISSUE_MISC,
|
||||
ISSUE_BRMSG,
|
||||
ISSUE_LAST
|
||||
};
|
||||
}; // namespace PCSAMPLE
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t valid : 1;
|
||||
uint8_t type : 4; // 0=reserved, 1=hosttrap, 2=stochastic, 3=perfcounter, >=4 possible v2?
|
||||
uint8_t has_stall_reason : 1;
|
||||
uint8_t has_wave_cnt : 1;
|
||||
uint8_t has_memory_counter : 1;
|
||||
} pcsample_header_v1_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t dual_issue_valu : 1;
|
||||
uint32_t inst_type : 4;
|
||||
|
||||
uint32_t reason_not_issued : 7;
|
||||
uint32_t arb_state_issue : 10;
|
||||
uint32_t arb_state_stall : 10;
|
||||
} pcsample_snapshot_v1_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t load_cnt : 6;
|
||||
uint32_t store_cnt : 6;
|
||||
uint32_t bvh_cnt : 3;
|
||||
uint32_t sample_cnt : 6;
|
||||
uint32_t ds_cnt : 6;
|
||||
uint32_t km_cnt : 5;
|
||||
};
|
||||
uint32_t raw;
|
||||
} pcsample_memorycounters_v1_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
pcsample_header_v1_t flags;
|
||||
uint8_t chiplet;
|
||||
uint8_t wave_id;
|
||||
uint8_t wave_issued : 1;
|
||||
uint8_t reserved : 7;
|
||||
uint32_t hw_id;
|
||||
|
||||
uint64_t pc;
|
||||
uint64_t exec_mask;
|
||||
uint32_t workgroud_id_x;
|
||||
uint32_t workgroud_id_y;
|
||||
uint32_t workgroud_id_z;
|
||||
|
||||
uint32_t wave_count;
|
||||
uint64_t timestamp;
|
||||
uint64_t correlation_id;
|
||||
|
||||
pcsample_snapshot_v1_t snapshot;
|
||||
|
||||
pcsample_memorycounters_v1_t memory_counters;
|
||||
} pcsample_v1_t;
|
||||
|
||||
typedef uint64_t (*user_callback_t)(pcsample_v1_t**, uint64_t, void*);
|
||||
|
||||
/**
|
||||
* The types of errors to be returned by parse_buffer.
|
||||
*/
|
||||
enum PCSAMPLE_STATUS
|
||||
{
|
||||
/**
|
||||
* No error
|
||||
*/
|
||||
PCSAMPLE_STATUS_SUCCESS = 0,
|
||||
/**
|
||||
* Input is valid, but the parser detected it was unable to unwrap some correlation_id(s).
|
||||
* The returned data is valid except for possible incorrect correlation_ids.
|
||||
* Error is nonfatal and parsing will continue.
|
||||
*/
|
||||
PCSAMPLE_STATUS_PARSER_ERROR,
|
||||
/**
|
||||
* Unknown/generic error
|
||||
*/
|
||||
PCSAMPLE_STATUS_GENERIC_ERROR,
|
||||
/**
|
||||
* The parser has seen a invalid sample type
|
||||
*/
|
||||
PCSAMPLE_STATUS_INVALID_SAMPLE,
|
||||
/**
|
||||
* The user callback has returned 0 or a memory size larger than requested
|
||||
*/
|
||||
PCSAMPLE_STATUS_CALLBACK_ERROR,
|
||||
/**
|
||||
* Upcoming_samples_t has suggested there are more incoming samples than
|
||||
* the parser can read without going out of bounds (buffer_size).
|
||||
*/
|
||||
PCSAMPLE_STATUS_OUT_OF_BOUNDS_ERROR,
|
||||
/**
|
||||
* Invalid GFXIP string was passed to the parser.
|
||||
*/
|
||||
PCSAMPLE_STATUS_INVALID_GFXIP,
|
||||
/**
|
||||
* Last error type
|
||||
*/
|
||||
PCSAMPLE_STATUS_LAST
|
||||
};
|
||||
|
||||
typedef int pcsample_status_t;
|
||||
+79
@@ -0,0 +1,79 @@
|
||||
#include "lib/rocprofiler/pc_sampling/parser/pc_record_interface.hpp"
|
||||
|
||||
uint64_t
|
||||
PCSamplingParserContext::alloc(pcsample_v1_t** buffer, uint64_t size)
|
||||
{
|
||||
std::unique_lock<std::shared_mutex> lock(mut);
|
||||
assert(buffer != nullptr);
|
||||
data.emplace_back(std::make_unique<PCSamplingData>(size));
|
||||
*buffer = data.back()->samples.data();
|
||||
return size;
|
||||
}
|
||||
|
||||
pcsample_status_t
|
||||
PCSamplingParserContext::parse(const upcoming_samples_t& upcoming,
|
||||
const generic_sample_t* data_,
|
||||
int gfxip_major,
|
||||
std::condition_variable& midway_signal,
|
||||
bool bRocrBufferFlip)
|
||||
{
|
||||
// Template instantiation is faster!
|
||||
auto parseSample_func = &PCSamplingParserContext::_parse<GFX9>;
|
||||
if(gfxip_major == 11)
|
||||
parseSample_func = &PCSamplingParserContext::_parse<GFX11>;
|
||||
else if(gfxip_major == 0)
|
||||
parseSample_func = &PCSamplingParserContext::_parse<gfx_unknown>;
|
||||
else if(gfxip_major != 9)
|
||||
return PCSAMPLE_STATUS_INVALID_GFXIP;
|
||||
|
||||
auto status = (this->*parseSample_func)(upcoming, data_);
|
||||
midway_signal.notify_all();
|
||||
|
||||
if(!bRocrBufferFlip || status != PCSAMPLE_STATUS_SUCCESS) return status;
|
||||
|
||||
return flushForgetList();
|
||||
}
|
||||
|
||||
void
|
||||
PCSamplingParserContext::newDispatch(const dispatch_pkt_id_t& pkt)
|
||||
{
|
||||
std::unique_lock<std::shared_mutex> lock(mut);
|
||||
corr_map->newDispatch(pkt);
|
||||
active_dispatches[pkt.correlation_id] = pkt;
|
||||
}
|
||||
|
||||
void
|
||||
PCSamplingParserContext::completeDispatch(uint64_t correlation_id)
|
||||
{
|
||||
std::unique_lock<std::shared_mutex> lock(mut);
|
||||
forget_list.emplace(correlation_id);
|
||||
}
|
||||
|
||||
pcsample_status_t
|
||||
PCSamplingParserContext::flushForgetList()
|
||||
{
|
||||
std::unique_lock<std::shared_mutex> lock(mut);
|
||||
pcsample_status_t status = PCSAMPLE_STATUS_SUCCESS;
|
||||
|
||||
for(uint64_t id : forget_list)
|
||||
{
|
||||
if(active_dispatches.find(id) == active_dispatches.end())
|
||||
{
|
||||
status = PCSAMPLE_STATUS_PARSER_ERROR;
|
||||
continue;
|
||||
}
|
||||
const auto& pkt = active_dispatches.at(id);
|
||||
generate_id_completion_record(pkt);
|
||||
corr_map->forget(pkt);
|
||||
active_dispatches.erase(id);
|
||||
}
|
||||
forget_list.clear();
|
||||
return status;
|
||||
}
|
||||
|
||||
bool
|
||||
PCSamplingParserContext::shouldFlipRocrBuffer(const dispatch_pkt_id_t& pkt) const
|
||||
{
|
||||
std::shared_lock<std::shared_mutex> lock(mut);
|
||||
return corr_map->checkDispatch(pkt);
|
||||
}
|
||||
+135
@@ -0,0 +1,135 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
#include <thread>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "lib/rocprofiler/pc_sampling/parser/correlation.hpp"
|
||||
#include "lib/rocprofiler/pc_sampling/parser/parser_types.hpp"
|
||||
|
||||
struct PCSamplingData
|
||||
{
|
||||
PCSamplingData(size_t size)
|
||||
: samples(size){};
|
||||
PCSamplingData& operator=(PCSamplingData&) = delete;
|
||||
|
||||
std::vector<pcsample_v1_t> samples;
|
||||
};
|
||||
|
||||
class PCSamplingParserContext
|
||||
{
|
||||
public:
|
||||
PCSamplingParserContext()
|
||||
: corr_map(std::make_unique<Parser::CorrelationMap>()){};
|
||||
/**
|
||||
* @brief Allocates some memory. TODO: Translate to Jonathan's buffer implementation.
|
||||
* @param[out] buffer Pointer where samples are to be written to.
|
||||
* @param[in] size Number of samples requested.
|
||||
* @returns Number of samples actually allocated on *buffer.
|
||||
*/
|
||||
uint64_t alloc(pcsample_v1_t** buffer, uint64_t size);
|
||||
|
||||
/**
|
||||
* @brief Parses a chunk of samples.
|
||||
* Call only finishes when all pc sampling records have been generated on the user buffer.
|
||||
* As an intermediate step, "midway_signal" signals when it's safe to reuse/delete "data".
|
||||
* @param[in] upcoming Metadata of upcoming samples
|
||||
* @param[in] data Pointer containing the raw hardware samples. Must match upcoming.num_samples.
|
||||
* @param[in] gfxip_major GFXIP of these samples (GFX9==9/GFX11==11/gfx_unknown==12).
|
||||
* @param[in] midway_signal notifies_all when the samples have been processed.
|
||||
* @param[in] bFlushCorrelationIds Set to true if this is the last batch from a ROCr buffer.
|
||||
* @returns PCSAMPLE_STATUS_SUCCESS on success.
|
||||
* @returns PCSAMPLE_STATUS_PARSER_ERROR (non-fatal) if one or more samples has invalid
|
||||
* correlation ID(s).
|
||||
* @returns PCSAMPLE_STATUS_INVALID_GFXIP (fatal) on GFXIP != 9,11,12.
|
||||
* @returns PCSAMPLE_STATUS_CALLBACK_ERROR (fatal) if memory allocation fails.
|
||||
*/
|
||||
pcsample_status_t parse(const upcoming_samples_t& upcoming,
|
||||
const generic_sample_t* data,
|
||||
int gfxip_major,
|
||||
std::condition_variable& midway_signal,
|
||||
bool bFlushCorrelationIds);
|
||||
|
||||
/**
|
||||
* @brief Signals a dispatch completion.
|
||||
* @param[in] correlation_id Correlation ID of the completed dispatch.
|
||||
*/
|
||||
void completeDispatch(uint64_t correlation_id);
|
||||
/**
|
||||
* @brief Signals a new dispatch was started.
|
||||
* Please use shouldFlipRocrBuffer() to check if the buffer must be flipped before forwarding
|
||||
* the dispatch.
|
||||
* @param[in] pkt Struct containing the dispatch packet data.
|
||||
*/
|
||||
void newDispatch(const dispatch_pkt_id_t& pkt);
|
||||
/**
|
||||
* @brief Checkes if a dispatch packet will generate a collision with dorbell_id and
|
||||
* dispatch_index.
|
||||
* @param[in] pkt Struct containing the dispatch packet data.
|
||||
* @returns boolean
|
||||
*/
|
||||
bool shouldFlipRocrBuffer(const dispatch_pkt_id_t& pkt) const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief Parses the given input data and generates pc sampling records.
|
||||
* Calls generate_upcoming_pc_record().
|
||||
*/
|
||||
template <typename GFX>
|
||||
pcsample_status_t _parse(const upcoming_samples_t& upcoming, const generic_sample_t* data_)
|
||||
{
|
||||
std::shared_lock<std::shared_mutex> lock(mut);
|
||||
|
||||
pcsample_status_t status = PCSAMPLE_STATUS_SUCCESS;
|
||||
uint64_t pkt_counter = upcoming.num_samples;
|
||||
auto dev = upcoming.device;
|
||||
bool bIsHostTrap = upcoming.which_sample_type == AMD_HOST_TRAP_V1;
|
||||
|
||||
while(pkt_counter > 0)
|
||||
{
|
||||
pcsample_v1_t* samples = nullptr;
|
||||
uint64_t memsize = alloc(&samples, pkt_counter);
|
||||
|
||||
if(memsize == 0 || memsize > pkt_counter) return PCSAMPLE_STATUS_CALLBACK_ERROR;
|
||||
|
||||
auto* map = corr_map.get();
|
||||
if(bIsHostTrap)
|
||||
status |= add_upcoming_samples<true, GFX>(dev, data_, memsize, map, samples);
|
||||
else
|
||||
status |= add_upcoming_samples<false, GFX>(dev, data_, memsize, map, samples);
|
||||
|
||||
data_ += memsize;
|
||||
pkt_counter -= memsize;
|
||||
generate_upcoming_pc_record(samples, memsize);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Causes forget_corr_id records to be generated from forget_list. Clears forget_list.
|
||||
* Calls generate_id_completion_record()
|
||||
*/
|
||||
pcsample_status_t flushForgetList();
|
||||
static void generate_id_completion_record(const dispatch_pkt_id_t& pkt) { (void) pkt; };
|
||||
static void generate_upcoming_pc_record(const pcsample_v1_t* samples, size_t num_samples)
|
||||
{
|
||||
(void) samples;
|
||||
(void) num_samples;
|
||||
};
|
||||
|
||||
//! Maps doorbells and dispatch_index to correlation_id
|
||||
std::unique_ptr<Parser::CorrelationMap> corr_map;
|
||||
//! Data allocated to store samples. Temporary.
|
||||
std::vector<std::unique_ptr<PCSamplingData>> data;
|
||||
//! Dispatches not yet completed.
|
||||
std::unordered_map<uint64_t, dispatch_pkt_id_t> active_dispatches;
|
||||
//! List of correlation ids whose dispatches have been completed and can be forgotten after the
|
||||
//! buffer flip.
|
||||
std::unordered_set<uint64_t> forget_list;
|
||||
|
||||
mutable std::shared_mutex mut;
|
||||
};
|
||||
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
/**
|
||||
* ######## ROCR Definitions ########
|
||||
* Some data types have been modified for better type safety.
|
||||
*/
|
||||
|
||||
enum packet_header_t
|
||||
{
|
||||
AMD_GENERIC_SAMPLE = 0,
|
||||
AMD_DOORBELL_TO_QUEUE_MAP = 3,
|
||||
AMD_DISPATCH_PKT_WRAP,
|
||||
AMD_UPCOMING_SAMPLES,
|
||||
AMD_DISPATCH_PKT_ID,
|
||||
};
|
||||
|
||||
enum upcoming_sample_t
|
||||
{
|
||||
AMD_HOST_TRAP_V1 = 1,
|
||||
AMD_SNAPSHOT_V1 = 2
|
||||
};
|
||||
|
||||
typedef uint32_t sample_enum;
|
||||
typedef struct
|
||||
{
|
||||
uint32_t handle;
|
||||
} device_handle;
|
||||
typedef uint32_t upcoming_sample_enum;
|
||||
typedef struct
|
||||
{
|
||||
uint32_t _;
|
||||
} reserved_type;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
sample_enum type;
|
||||
reserved_type _[15];
|
||||
} generic_sample_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
sample_enum type;
|
||||
device_handle device;
|
||||
uint32_t doorbell_id;
|
||||
uint64_t queue_size;
|
||||
uint64_t write_index;
|
||||
uint64_t read_index;
|
||||
uint64_t correlation_id;
|
||||
reserved_type _[4];
|
||||
} dispatch_pkt_id_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
sample_enum type;
|
||||
device_handle device;
|
||||
upcoming_sample_enum which_sample_type;
|
||||
reserved_type reserved0;
|
||||
uint64_t num_samples;
|
||||
reserved_type _[10];
|
||||
} upcoming_samples_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint64_t pc;
|
||||
uint64_t exec_mask;
|
||||
uint32_t workgroud_id_x;
|
||||
uint32_t workgroud_id_y;
|
||||
uint32_t workgroud_id_z;
|
||||
uint32_t chiplet_and_wave_id;
|
||||
uint32_t hw_id;
|
||||
reserved_type reserved[3];
|
||||
uint64_t timestamp;
|
||||
uint64_t correlation_id;
|
||||
} perf_sample_host_trap_v1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint64_t pc;
|
||||
uint64_t exec_mask;
|
||||
uint32_t workgroud_id_x;
|
||||
uint32_t workgroud_id_y;
|
||||
uint32_t workgroud_id_z;
|
||||
uint32_t chiplet_and_wave_id;
|
||||
uint32_t hw_id;
|
||||
uint32_t perf_snapshot_data;
|
||||
uint32_t perf_snapshot_data1;
|
||||
uint32_t perf_snapshot_data2;
|
||||
uint64_t timestamp;
|
||||
uint64_t correlation_id;
|
||||
} perf_sample_snapshot_v1;
|
||||
|
||||
typedef union
|
||||
{
|
||||
generic_sample_t generic;
|
||||
perf_sample_snapshot_v1 snap;
|
||||
perf_sample_host_trap_v1 host;
|
||||
upcoming_samples_t upcoming;
|
||||
dispatch_pkt_id_t dispatch_id;
|
||||
} packet_union_t;
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
rocprofiler_deactivate_clang_tidy()
|
||||
|
||||
include(GoogleTest)
|
||||
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES pcs_parser.cpp)
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_HEADERS mocks.hpp)
|
||||
|
||||
add_executable(pcs-parser-test)
|
||||
|
||||
target_sources(pcs-parser-test PRIVATE ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES}
|
||||
${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_HEADERS})
|
||||
# $<TARGET_OBJECTS:rocprofiler::rocprofiler-object-library>)
|
||||
|
||||
target_link_libraries(
|
||||
pcs-parser-test
|
||||
PRIVATE rocprofiler::rocprofiler-common-library
|
||||
rocprofiler::rocprofiler-static-library GTest::gtest GTest::gtest_main)
|
||||
|
||||
gtest_add_tests(
|
||||
TARGET pcs-parser-test
|
||||
SOURCES ${ROCPROFILER_LIB_COUNTER_TEST_SOURCES}
|
||||
TEST_LIST pcs-parser-tests_TESTS
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
set_tests_properties(${pcs-parser-tests_TESTS} PROPERTIES TIMEOUT 45 LABELS "unittests")
|
||||
@@ -0,0 +1,268 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "lib/rocprofiler/pc_sampling/parser/correlation.hpp"
|
||||
|
||||
#define CHECK_PARSER(x) \
|
||||
{ \
|
||||
int val = (x); \
|
||||
if(val != PCSAMPLE_STATUS_SUCCESS) \
|
||||
{ \
|
||||
std::cerr << __FILE__ << ':' << __LINE__ << " Parser error: " << val << std::endl; \
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
* Mimics the rocprofiler buffer sent to the parser.
|
||||
*/
|
||||
class MockRuntimeBuffer
|
||||
{
|
||||
public:
|
||||
MockRuntimeBuffer() { packets = {}; };
|
||||
|
||||
//! Adds a packet to the buffer
|
||||
void submit(const packet_union_t& packet) { packets.push_back(packet); };
|
||||
|
||||
//! Submits a "upcoming_samples_t" packet signaling the next num_samples packets are PC samples
|
||||
void genUpcomingSamples(int num_samples)
|
||||
{
|
||||
packet_union_t uni;
|
||||
::memset(&uni, 0, sizeof(uni));
|
||||
uni.upcoming.type = AMD_UPCOMING_SAMPLES;
|
||||
uni.upcoming.which_sample_type = AMD_SNAPSHOT_V1;
|
||||
uni.upcoming.num_samples = num_samples;
|
||||
submit(uni);
|
||||
}
|
||||
|
||||
std::vector<std::vector<pcsample_v1_t>> get_parsed_buffer(int GFXIP_MAJOR)
|
||||
{
|
||||
parsed_data = {};
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) packets.data(),
|
||||
packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
&alloc_parse_memory,
|
||||
this));
|
||||
|
||||
return parsed_data;
|
||||
}
|
||||
|
||||
static uint64_t alloc_parse_memory(pcsample_v1_t** sample, uint64_t req_size, void* userdata)
|
||||
{
|
||||
auto* buffer = reinterpret_cast<MockRuntimeBuffer*>(userdata);
|
||||
buffer->parsed_data.push_back(std::vector<pcsample_v1_t>(req_size));
|
||||
*sample = buffer->parsed_data.back().data();
|
||||
return req_size;
|
||||
}
|
||||
|
||||
std::vector<packet_union_t> packets;
|
||||
std::vector<std::vector<pcsample_v1_t>> parsed_data;
|
||||
};
|
||||
|
||||
/**
|
||||
* Mimics a HSA doorbell. Every live instance of this class has an unique ID (handler).
|
||||
* The handler itself may be not unique considering dead instances.
|
||||
*/
|
||||
class MockDoorBell
|
||||
{
|
||||
public:
|
||||
MockDoorBell()
|
||||
: handler(getUniqueId())
|
||||
{
|
||||
available_ids.erase(handler);
|
||||
};
|
||||
~MockDoorBell() { available_ids.insert(handler); }
|
||||
|
||||
const size_t handler;
|
||||
static constexpr size_t num_unique_bells = 4;
|
||||
|
||||
private:
|
||||
static size_t getUniqueId()
|
||||
{
|
||||
assert(available_ids.size() > 0);
|
||||
return *available_ids.begin();
|
||||
}
|
||||
static std::unordered_set<size_t> reset_available_ids()
|
||||
{
|
||||
std::unordered_set<size_t> set;
|
||||
for(size_t i = 0; i < num_unique_bells; i++)
|
||||
set.insert(i);
|
||||
return set;
|
||||
};
|
||||
static std::unordered_set<size_t> available_ids;
|
||||
};
|
||||
std::unordered_set<size_t> MockDoorBell::available_ids = MockDoorBell::reset_available_ids();
|
||||
|
||||
/**
|
||||
* Mimics a HSA queue. Every live instance of this class has an unique ID and a doorbell.
|
||||
* The read and write indexes mimics the locations in the queue (modulo queue_size) for the
|
||||
* read and write pointers.
|
||||
* Creating an instance of this class automatically adds a queue creation packet to the buffer.
|
||||
*/
|
||||
class MockQueue
|
||||
{
|
||||
public:
|
||||
MockQueue(int size_, std::shared_ptr<MockRuntimeBuffer>& buffer_)
|
||||
: id(cur_unique_id)
|
||||
, size(size_)
|
||||
, doorbell()
|
||||
, buffer(buffer_){};
|
||||
|
||||
//! Submits a packet to the runtime buffer
|
||||
void submit(const packet_union_t& pkt) { buffer->submit(pkt); }
|
||||
void print() { std::cout << "Queue - id:" << id << " bell:" << doorbell.handler << std::endl; }
|
||||
|
||||
//! Increments the read_index.
|
||||
void inc_read_index(int dispatch_id)
|
||||
{
|
||||
async_read_index.insert(dispatch_id);
|
||||
while(async_read_index.erase(read_index))
|
||||
read_index++;
|
||||
}
|
||||
|
||||
int read_index = 0;
|
||||
int write_index = 0;
|
||||
size_t active_dispatches =
|
||||
0; //! Number of dispatches that are still able to generate PC samples
|
||||
int last_known_read_pkt = 0;
|
||||
std::unordered_set<int> async_read_index{};
|
||||
|
||||
const size_t id;
|
||||
const size_t size;
|
||||
const MockDoorBell doorbell;
|
||||
std::shared_ptr<MockRuntimeBuffer> const buffer;
|
||||
|
||||
private:
|
||||
static size_t cur_unique_id;
|
||||
};
|
||||
size_t MockQueue::cur_unique_id = 1;
|
||||
|
||||
/**
|
||||
* Mimics a kernel dispatch.
|
||||
* Creating an instance of this class automatically adds a dispatch creation packet to the buffer.
|
||||
*/
|
||||
class MockDispatch
|
||||
{
|
||||
public:
|
||||
MockDispatch(std::shared_ptr<MockQueue>& queue_)
|
||||
: queue(queue_)
|
||||
, dispatch_id(queue->write_index)
|
||||
, doorbell_id(queue->doorbell.handler)
|
||||
, unique_id(cur_unique_id)
|
||||
{
|
||||
// Ensure queues are not holding more dispatches than queue_size.
|
||||
assert(queue->active_dispatches < queue->size);
|
||||
queue->active_dispatches++;
|
||||
cur_unique_id++;
|
||||
|
||||
packet_union_t uni;
|
||||
::memset(&uni, 0, sizeof(uni));
|
||||
uni.dispatch_id.type = AMD_DISPATCH_PKT_ID;
|
||||
uni.dispatch_id.doorbell_id = doorbell_id;
|
||||
uni.dispatch_id.queue_size = queue->size;
|
||||
uni.dispatch_id.write_index = dispatch_id;
|
||||
uni.dispatch_id.read_index = queue->read_index;
|
||||
uni.dispatch_id.correlation_id = unique_id;
|
||||
queue->submit(uni);
|
||||
queue->write_index++;
|
||||
};
|
||||
|
||||
virtual ~MockDispatch()
|
||||
{
|
||||
queue->active_dispatches--;
|
||||
if(queue_read_inc) return;
|
||||
|
||||
queue->inc_read_index((int) dispatch_id);
|
||||
queue_read_inc = true;
|
||||
}
|
||||
|
||||
//! Returns the "correlation_id" seen by the trap handler.
|
||||
uint64_t getMockId()
|
||||
{
|
||||
return Parser::CorrelationMap::wrap_correlation_id(doorbell_id, dispatch_id, queue->size);
|
||||
};
|
||||
|
||||
//! Submits a packet to the buffer
|
||||
void submit(const packet_union_t& pkt) { queue->submit(pkt); }
|
||||
void submit(const perf_sample_snapshot_v1& snap)
|
||||
{
|
||||
queue->submit(packet_union_t{.snap = snap});
|
||||
}
|
||||
void print()
|
||||
{
|
||||
std::cout << "Dispatch - un_id:" << unique_id << " bell:" << doorbell_id
|
||||
<< " ds_id:" << dispatch_id << std::endl;
|
||||
}
|
||||
|
||||
std::shared_ptr<MockQueue> const queue;
|
||||
const size_t dispatch_id;
|
||||
const size_t doorbell_id;
|
||||
const size_t unique_id;
|
||||
static size_t cur_unique_id;
|
||||
|
||||
private:
|
||||
bool queue_read_inc = false;
|
||||
};
|
||||
size_t MockDispatch::cur_unique_id = 0;
|
||||
|
||||
/**
|
||||
* Lightweight class to represent a wave in the particular dispatch.
|
||||
* Capable of generating PC samples and submiting them to the buffer.
|
||||
* Instead of generating a valid program counter, this class uses the snapshot.pc field to
|
||||
* store the original dispatch's unique_id for later correctness verification.
|
||||
*/
|
||||
class MockWave
|
||||
{
|
||||
public:
|
||||
MockWave(const std::shared_ptr<MockDispatch>& dispatch_)
|
||||
: dispatch(dispatch_)
|
||||
{}
|
||||
|
||||
void genPCSample()
|
||||
{
|
||||
packet_union_t uni;
|
||||
::memset(&uni, 0, sizeof(uni));
|
||||
uni.snap.pc = dispatch->unique_id;
|
||||
uni.snap.correlation_id = dispatch->getMockId();
|
||||
dispatch->submit(uni);
|
||||
};
|
||||
void print()
|
||||
{
|
||||
std::cout << "Gen: " << dispatch->doorbell_id << " "
|
||||
<< (dispatch->dispatch_id % dispatch->queue->size) << " from "
|
||||
<< dispatch->unique_id << std::endl;
|
||||
}
|
||||
|
||||
std::shared_ptr<MockDispatch> const dispatch;
|
||||
};
|
||||
+798
@@ -0,0 +1,798 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <cstddef>
|
||||
|
||||
#include "lib/rocprofiler/pc_sampling/parser/pc_record_interface.hpp"
|
||||
#include "lib/rocprofiler/pc_sampling/parser/tests/mocks.hpp"
|
||||
|
||||
#define GFXIP_MAJOR 9
|
||||
|
||||
#define TYPECHECK(x) \
|
||||
snapshots.push_back(pcsample_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = ::PCSAMPLE::x, \
|
||||
.reason_not_issued = 0, \
|
||||
.arb_state_issue = 0, \
|
||||
.arb_state_stall = 0});
|
||||
#define UNROLL_TYPECHECK() \
|
||||
TYPECHECK(TYPE_VALU); \
|
||||
TYPECHECK(TYPE_MATRIX); \
|
||||
TYPECHECK(TYPE_SCALAR); \
|
||||
TYPECHECK(TYPE_TEX); \
|
||||
TYPECHECK(TYPE_LDS); \
|
||||
TYPECHECK(TYPE_FLAT); \
|
||||
TYPECHECK(TYPE_EXP); \
|
||||
TYPECHECK(TYPE_MESSAGE); \
|
||||
TYPECHECK(TYPE_BARRIER); \
|
||||
TYPECHECK(TYPE_BRANCH_NOT_TAKEN); \
|
||||
TYPECHECK(TYPE_BRANCH_TAKEN); \
|
||||
TYPECHECK(TYPE_JUMP); \
|
||||
TYPECHECK(TYPE_OTHER); \
|
||||
TYPECHECK(TYPE_NO_INST);
|
||||
|
||||
#define REASONCHECK(x) \
|
||||
snapshots.push_back(pcsample_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = 0, \
|
||||
.reason_not_issued = ::PCSAMPLE::x, \
|
||||
.arb_state_issue = 0, \
|
||||
.arb_state_stall = 0});
|
||||
#define UNROLL_REASONCHECK(x) \
|
||||
REASONCHECK(REASON_NOT_AVAILABLE); \
|
||||
REASONCHECK(REASON_ALU); \
|
||||
REASONCHECK(REASON_WAITCNT); \
|
||||
REASONCHECK(REASON_INTERNAL); \
|
||||
REASONCHECK(REASON_BARRIER); \
|
||||
REASONCHECK(REASON_ARBITER); \
|
||||
REASONCHECK(REASON_EX_STALL); \
|
||||
REASONCHECK(REASON_OTHER_WAIT);
|
||||
|
||||
#define ARBCHECK1(x, y) \
|
||||
snapshots.push_back(pcsample_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = 0, \
|
||||
.reason_not_issued = 0, \
|
||||
.arb_state_issue = 1 << ::PCSAMPLE::x, \
|
||||
.arb_state_stall = 1 << ::PCSAMPLE::y});
|
||||
#define ARBCHECK2(x) \
|
||||
ARBCHECK1(x, ISSUE_VALU); \
|
||||
ARBCHECK1(x, ISSUE_MATRIX); \
|
||||
ARBCHECK1(x, ISSUE_SCALAR); \
|
||||
ARBCHECK1(x, ISSUE_VMEM_TEX); \
|
||||
ARBCHECK1(x, ISSUE_LDS); \
|
||||
ARBCHECK1(x, ISSUE_FLAT); \
|
||||
ARBCHECK1(x, ISSUE_EXP); \
|
||||
ARBCHECK1(x, ISSUE_MISC);
|
||||
|
||||
#define UNROLL_ARBCHECK() \
|
||||
ARBCHECK2(ISSUE_VALU); \
|
||||
ARBCHECK2(ISSUE_MATRIX); \
|
||||
ARBCHECK2(ISSUE_SCALAR); \
|
||||
ARBCHECK2(ISSUE_VMEM_TEX); \
|
||||
ARBCHECK2(ISSUE_LDS); \
|
||||
ARBCHECK2(ISSUE_FLAT); \
|
||||
ARBCHECK2(ISSUE_EXP); \
|
||||
ARBCHECK2(ISSUE_MISC);
|
||||
|
||||
std::mt19937 rdgen(1);
|
||||
|
||||
TEST(pcs_parser_context, init) { PCSamplingParserContext context; }
|
||||
|
||||
/**
|
||||
* Sample user memory allocation callback.
|
||||
* It expects userdata to be cast-able to a pointer to
|
||||
* std::vector<std::pair<pcsample_v1_t*, uint64_t>>
|
||||
*/
|
||||
static uint64_t
|
||||
alloc_callback(pcsample_v1_t** buffer, uint64_t size, void* userdata)
|
||||
{
|
||||
*buffer = new pcsample_v1_t[size];
|
||||
auto& vector = *reinterpret_cast<std::vector<std::pair<pcsample_v1_t*, uint64_t>>*>(userdata);
|
||||
vector.push_back({*buffer, size});
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the MockWave dispatch's unique_id store in the pc field to verify
|
||||
* the reconstructed correlation_id.
|
||||
*/
|
||||
static bool
|
||||
check_samples(pcsample_v1_t* samples, uint64_t size)
|
||||
{
|
||||
for(size_t i = 0; i < size; i++)
|
||||
if(samples[i].correlation_id != samples[i].pc) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simplest mock classes use, generates a single queue+dispatch with 2 PC samples.
|
||||
*/
|
||||
TEST(pcs_parser_correlation_id, hello_world)
|
||||
{
|
||||
std::shared_ptr<MockRuntimeBuffer> buffer = std::make_shared<MockRuntimeBuffer>();
|
||||
std::shared_ptr<MockQueue> queue = std::make_shared<MockQueue>(16, buffer);
|
||||
std::shared_ptr<MockDispatch> dispatch = std::make_shared<MockDispatch>(queue);
|
||||
|
||||
buffer->genUpcomingSamples(2);
|
||||
MockWave(dispatch).genPCSample();
|
||||
MockWave(dispatch).genPCSample();
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
alloc_callback,
|
||||
(void*) &all_allocations));
|
||||
|
||||
assert(all_allocations.size() == 1 && "HelloWorld: Incorrect number of callbacks");
|
||||
for(auto& sample : all_allocations)
|
||||
{
|
||||
assert(sample.second == 2 && "HelloWorld: Incorrect number of samples");
|
||||
assert(check_samples(sample.first, sample.second) &&
|
||||
"HelloWorld: parsed ID does not match correct ID");
|
||||
delete[] sample.first;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A little more complicated.
|
||||
* Generates a few dispatches for 2 different queues and samples in forward and reverse order.
|
||||
* Checks if the reconstructed correlation_id is correct.
|
||||
*/
|
||||
TEST(pcs_parser_correlation_id, reverse_wave_order)
|
||||
{
|
||||
std::shared_ptr<MockRuntimeBuffer> buffer = std::make_shared<MockRuntimeBuffer>();
|
||||
std::shared_ptr<MockQueue> queue1 = std::make_shared<MockQueue>(16, buffer);
|
||||
std::shared_ptr<MockQueue> queue2 = std::make_shared<MockQueue>(16, buffer);
|
||||
|
||||
std::vector<std::shared_ptr<MockDispatch>> dispatches;
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue1));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue1));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue2));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue2));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue1));
|
||||
|
||||
buffer->genUpcomingSamples(dispatches.size());
|
||||
for(auto it = dispatches.rbegin(); it != dispatches.rend(); it++)
|
||||
MockWave(*it).genPCSample();
|
||||
buffer->genUpcomingSamples(dispatches.size());
|
||||
for(auto it = dispatches.begin(); it != dispatches.end(); it++)
|
||||
MockWave(*it).genPCSample();
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
alloc_callback,
|
||||
(void*) &all_allocations));
|
||||
|
||||
assert(all_allocations.size() == 2 && "ReverseWaveOrder test: Incorrect number of callbacks");
|
||||
for(auto& sample : all_allocations)
|
||||
{
|
||||
assert(sample.second == dispatches.size() &&
|
||||
"ReverseWaveOrder: Incorrect number of samples");
|
||||
assert(check_samples(sample.first, sample.second) &&
|
||||
"ReverseWaveOrder: parsed ID does not match correct ID");
|
||||
delete[] sample.first;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a small queue and causes the dispatch_ids to wrap around a few times, and generates
|
||||
* a single sample per dispatch. Checks the parser is properly handling the wrapping of queues.
|
||||
*/
|
||||
TEST(pcs_parser_correlation_id, dispatch_wrapping)
|
||||
{
|
||||
const int num_samples = 32;
|
||||
std::shared_ptr<MockRuntimeBuffer> buffer = std::make_shared<MockRuntimeBuffer>();
|
||||
std::shared_ptr<MockQueue> queue = std::make_shared<MockQueue>(5, buffer);
|
||||
|
||||
for(int i = 0; i < num_samples; i++)
|
||||
{
|
||||
auto dispatch = std::make_shared<MockDispatch>(queue);
|
||||
buffer->genUpcomingSamples(1);
|
||||
MockWave(dispatch).genPCSample();
|
||||
}
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
alloc_callback,
|
||||
(void*) &all_allocations));
|
||||
|
||||
assert(all_allocations.size() == num_samples &&
|
||||
"RandomSamples test: Incorrect number of callbacks");
|
||||
for(auto& sample : all_allocations)
|
||||
{
|
||||
assert(sample.second == 1 && "RandomSamples: Incorrect number of samples");
|
||||
assert(check_samples(sample.first, sample.second) &&
|
||||
"RandomSamples: parsed ID does not match correct ID");
|
||||
delete[] sample.first;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a few queues with a few dispatchs per queue.
|
||||
* Adds random samples per dispatch, and checks the result.
|
||||
*/
|
||||
TEST(pcs_parser_correlation_id, random_samples)
|
||||
{
|
||||
const int num_samples = 1024;
|
||||
std::shared_ptr<MockRuntimeBuffer> buffer = std::make_shared<MockRuntimeBuffer>();
|
||||
std::shared_ptr<MockQueue> queue1 = std::make_shared<MockQueue>(16, buffer);
|
||||
std::shared_ptr<MockQueue> queue2 = std::make_shared<MockQueue>(16, buffer);
|
||||
std::shared_ptr<MockQueue> queue3 = std::make_shared<MockQueue>(16, buffer);
|
||||
std::shared_ptr<MockQueue> queue4 = std::make_shared<MockQueue>(16, buffer);
|
||||
|
||||
std::vector<std::shared_ptr<MockDispatch>> dispatches;
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue1));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue1));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue2));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue3));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue1));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue3));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue3));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue2));
|
||||
dispatches.push_back(std::make_shared<MockDispatch>(queue1));
|
||||
|
||||
buffer->genUpcomingSamples(num_samples);
|
||||
for(int i = 0; i < num_samples; i++)
|
||||
MockWave(dispatches[rdgen() % dispatches.size()]).genPCSample();
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
alloc_callback,
|
||||
(void*) &all_allocations));
|
||||
|
||||
assert(all_allocations.size() == 1 && "RandomSamples test: Incorrect number of callbacks");
|
||||
for(auto& sample : all_allocations)
|
||||
{
|
||||
assert(sample.second == num_samples && "RandomSamples: Incorrect number of samples");
|
||||
assert(check_samples(sample.first, sample.second) &&
|
||||
"RandomSamples: parsed ID does not match correct ID");
|
||||
delete[] sample.first;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hammers the parser by creating and destrying queues at random, adding dispatches at random
|
||||
* and generating PC samples at random. By default we use all 4 unique doorbells,
|
||||
* queue size is 16 and we generate 10k samples dispatch.
|
||||
*/
|
||||
TEST(pcs_parser_correlation_id, queue_hammer)
|
||||
{
|
||||
constexpr int NUM_ACTIONS = 10000;
|
||||
constexpr int QSIZE = 16;
|
||||
constexpr int NUM_QUEUES = MockDoorBell::num_unique_bells;
|
||||
constexpr int ACTION_MAX = QSIZE * NUM_QUEUES / 2;
|
||||
|
||||
std::shared_ptr<MockRuntimeBuffer> buffer = std::make_shared<MockRuntimeBuffer>();
|
||||
|
||||
std::array<std::shared_ptr<MockQueue>, NUM_QUEUES> queues;
|
||||
std::array<std::vector<std::shared_ptr<MockDispatch>>, NUM_QUEUES> active_dispatches;
|
||||
|
||||
int num_reset_queues = 0;
|
||||
int num_samples_generated = 0;
|
||||
int num_dispatches_generated = 0;
|
||||
double avg_q_occupancy = 0;
|
||||
size_t max_q_occupancy = 0;
|
||||
|
||||
for(int i = 0; i < NUM_QUEUES; i++)
|
||||
queues[i] = std::make_shared<MockQueue>(QSIZE, buffer);
|
||||
for(int i = 0; i < NUM_QUEUES; i++)
|
||||
active_dispatches[i].push_back(std::make_shared<MockDispatch>(queues[i]));
|
||||
|
||||
for(int i = 0; i < NUM_ACTIONS; i++)
|
||||
{
|
||||
int q = rdgen() % NUM_QUEUES;
|
||||
int action = rdgen() % ACTION_MAX;
|
||||
if(action == 0)
|
||||
{
|
||||
// Delete queue and create new one
|
||||
active_dispatches[q] = {};
|
||||
queues[q].reset();
|
||||
queues[q] = std::make_shared<MockQueue>(QSIZE, buffer);
|
||||
num_reset_queues++;
|
||||
}
|
||||
else if(action > ACTION_MAX / 2 && active_dispatches[q].size() > 1)
|
||||
{
|
||||
// Delete dispatch
|
||||
active_dispatches[q].erase(active_dispatches[q].begin(),
|
||||
active_dispatches[q].begin() + 1);
|
||||
}
|
||||
|
||||
// Add new dispatch
|
||||
if(active_dispatches[q].size() < QSIZE)
|
||||
{
|
||||
active_dispatches[q].push_back(std::make_shared<MockDispatch>(queues[q]));
|
||||
num_dispatches_generated += 1;
|
||||
}
|
||||
|
||||
// Generate one "pc" sample for each queue
|
||||
buffer->genUpcomingSamples(NUM_QUEUES);
|
||||
for(auto& queue : active_dispatches)
|
||||
{
|
||||
assert(queue.size() > 0);
|
||||
std::shared_ptr<MockDispatch> rand_dispatch = queue[rdgen() % queue.size()];
|
||||
MockWave(rand_dispatch).genPCSample();
|
||||
num_samples_generated += 1;
|
||||
avg_q_occupancy += queue.size();
|
||||
max_q_occupancy = std::max(max_q_occupancy, queue.size());
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Hammer Stats: " << std::endl;
|
||||
std::cout << "num_reset_queues: " << num_reset_queues << std::endl;
|
||||
std::cout << "num_samples_generated: " << num_samples_generated << std::endl;
|
||||
std::cout << "num_dispatches_generated: " << num_dispatches_generated << std::endl;
|
||||
std::cout << "Avg queue occupancy: " << avg_q_occupancy / (NUM_ACTIONS * NUM_QUEUES)
|
||||
<< std::endl;
|
||||
std::cout << "Max queue occupancy: " << max_q_occupancy << "\n\n" << std::endl;
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
alloc_callback,
|
||||
(void*) &all_allocations));
|
||||
|
||||
assert(all_allocations.size() == NUM_ACTIONS &&
|
||||
"QueueHammer test: Incorrect number of callbacks");
|
||||
for(auto sb = 0ul; sb < all_allocations.size(); sb++)
|
||||
{
|
||||
pcsample_v1_t* samples = all_allocations[sb].first;
|
||||
size_t num_samples = all_allocations[sb].second;
|
||||
|
||||
assert(num_samples == NUM_QUEUES && "QueueHammer: Incorrect number of samples");
|
||||
assert(check_samples(samples, num_samples) &&
|
||||
"QueueHammer: parsed ID does not match correct ID");
|
||||
delete[] samples;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(pcs_parser_correlation_id, multi_buffer)
|
||||
{
|
||||
std::shared_ptr<MockRuntimeBuffer> firstBuffer = std::make_shared<MockRuntimeBuffer>();
|
||||
std::shared_ptr<MockQueue> queue = std::make_shared<MockQueue>(16, firstBuffer);
|
||||
std::shared_ptr<MockDispatch> dispatch1 = std::make_shared<MockDispatch>(queue);
|
||||
std::shared_ptr<MockDispatch> dispatch2 = std::make_shared<MockDispatch>(queue);
|
||||
|
||||
firstBuffer->genUpcomingSamples(4);
|
||||
MockWave(dispatch1).genPCSample();
|
||||
MockWave(dispatch2).genPCSample();
|
||||
MockWave(dispatch1).genPCSample();
|
||||
MockWave(dispatch2).genPCSample();
|
||||
|
||||
std::shared_ptr<MockRuntimeBuffer> secondBuffer = std::make_shared<MockRuntimeBuffer>();
|
||||
const auto& packets = firstBuffer->packets;
|
||||
secondBuffer->packets = std::vector<packet_union_t>(packets.begin() + 2, packets.end());
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) firstBuffer->packets.data(),
|
||||
firstBuffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
alloc_callback,
|
||||
(void*) &all_allocations));
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) secondBuffer->packets.data(),
|
||||
secondBuffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
alloc_callback,
|
||||
(void*) &all_allocations));
|
||||
|
||||
assert(all_allocations.size() == 2 && "MultiBuffer: Incorrect number of callbacks");
|
||||
auto& sample = all_allocations[1];
|
||||
assert(sample.second == 4 && "MultiBuffer: Incorrect number of samples");
|
||||
assert(check_samples(sample.first, sample.second) &&
|
||||
"MultiBuffer: parsed ID does not match correct ID");
|
||||
|
||||
delete[] all_allocations[0].first;
|
||||
delete[] all_allocations[1].first;
|
||||
};
|
||||
|
||||
/**
|
||||
* Benchmarks how fast the parser can process samples on a single threaded case
|
||||
* Current: 5600X with -Ofast, up to >140 million samples/s or ~9GB/s R/W (18GB/s bidirectional)
|
||||
*/
|
||||
static void
|
||||
Benchmark(bool bWarmup)
|
||||
{
|
||||
constexpr size_t SAMPLE_PER_DISPATCH = 8192;
|
||||
constexpr size_t DISP_PER_QUEUE = 12;
|
||||
constexpr size_t NUM_QUEUES = MockDoorBell::num_unique_bells;
|
||||
|
||||
std::shared_ptr<MockRuntimeBuffer> buffer = std::make_shared<MockRuntimeBuffer>();
|
||||
std::array<std::vector<std::shared_ptr<MockDispatch>>, NUM_QUEUES> active_dispatches;
|
||||
|
||||
for(size_t q = 0; q < NUM_QUEUES; q++)
|
||||
{
|
||||
std::shared_ptr<MockQueue> queue = std::make_shared<MockQueue>(DISP_PER_QUEUE * 2, buffer);
|
||||
for(size_t d = 0; d < DISP_PER_QUEUE; d++)
|
||||
active_dispatches[q].push_back(std::make_shared<MockDispatch>(queue));
|
||||
}
|
||||
|
||||
constexpr size_t TOTAL_NUM_SAMPLES = NUM_QUEUES * DISP_PER_QUEUE * SAMPLE_PER_DISPATCH;
|
||||
buffer->genUpcomingSamples(TOTAL_NUM_SAMPLES);
|
||||
|
||||
for(auto& queue : active_dispatches)
|
||||
for(auto& dispatch : queue)
|
||||
for(size_t i = 0; i < SAMPLE_PER_DISPATCH; i++)
|
||||
MockWave(dispatch).genPCSample();
|
||||
|
||||
std::pair<pcsample_v1_t*, size_t> userdata;
|
||||
userdata.first = new pcsample_v1_t[TOTAL_NUM_SAMPLES];
|
||||
userdata.second = TOTAL_NUM_SAMPLES;
|
||||
|
||||
auto t0 = std::chrono::system_clock::now();
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
[](pcsample_v1_t** sample, uint64_t size, void* userdata_) {
|
||||
auto* pair = reinterpret_cast<std::pair<pcsample_v1_t*, size_t>*>(
|
||||
userdata_);
|
||||
assert(TOTAL_NUM_SAMPLES == pair->second);
|
||||
*sample = pair->first;
|
||||
return size;
|
||||
},
|
||||
&userdata));
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
float samples_per_us = float(TOTAL_NUM_SAMPLES) / (t1 - t0).count() * 1E3f;
|
||||
|
||||
if(!bWarmup)
|
||||
{
|
||||
std::cout << "Benchmark: Parsed " << int(samples_per_us * 1E3f + 0.5f) * 1E-3f
|
||||
<< " Msample/s (";
|
||||
std::cout << int(sizeof(pcsample_v1_t) * samples_per_us) << " MB/s)" << std::endl;
|
||||
}
|
||||
|
||||
delete[] userdata.first;
|
||||
}
|
||||
|
||||
TEST(pcs_parser, benchmark)
|
||||
{
|
||||
Benchmark(true);
|
||||
Benchmark(false);
|
||||
Benchmark(false);
|
||||
Benchmark(false);
|
||||
}
|
||||
|
||||
class WaveSnapTest
|
||||
{
|
||||
public:
|
||||
WaveSnapTest()
|
||||
{
|
||||
buffer = std::make_shared<MockRuntimeBuffer>();
|
||||
queue = std::make_shared<MockQueue>(16, buffer);
|
||||
dispatch = std::make_shared<MockDispatch>(queue);
|
||||
}
|
||||
|
||||
void Test()
|
||||
{
|
||||
FillBuffers();
|
||||
CheckBuffers();
|
||||
}
|
||||
|
||||
virtual void FillBuffers() = 0;
|
||||
virtual void CheckBuffers() = 0;
|
||||
|
||||
void genPCSample(int wave_cnt, int inst_type, int reason, int arb_issue, int arb_stall)
|
||||
{
|
||||
wave_cnt &= 0x3F;
|
||||
inst_type &= 0xF;
|
||||
reason &= 0x7;
|
||||
arb_issue &= 0xFF;
|
||||
arb_stall &= 0xFF;
|
||||
|
||||
perf_sample_snapshot_v1 snap;
|
||||
::memset(&snap, 0, sizeof(snap));
|
||||
snap.pc = dispatch->unique_id;
|
||||
snap.correlation_id = dispatch->getMockId();
|
||||
|
||||
snap.perf_snapshot_data = (inst_type << 3) | (reason << 7);
|
||||
snap.perf_snapshot_data |= (arb_issue << 10) | (arb_stall << 18);
|
||||
snap.perf_snapshot_data1 = wave_cnt;
|
||||
|
||||
assert(dispatch.get());
|
||||
dispatch->submit(packet_union_t{.snap = snap});
|
||||
};
|
||||
|
||||
std::shared_ptr<MockRuntimeBuffer> buffer;
|
||||
std::shared_ptr<MockQueue> queue;
|
||||
std::shared_ptr<MockDispatch> dispatch;
|
||||
};
|
||||
|
||||
class WaveCntTest : public WaveSnapTest
|
||||
{
|
||||
public:
|
||||
void FillBuffers() override
|
||||
{
|
||||
// Loop over all possible wave_cnt
|
||||
buffer->genUpcomingSamples(max_wave_number);
|
||||
for(size_t i = 0; i < max_wave_number; i++)
|
||||
genPCSample(i, GFX9::TYPE_LDS, GFX9::REASON_ALU, GFX9::ISSUE_VALU, GFX9::ISSUE_VALU);
|
||||
}
|
||||
|
||||
void CheckBuffers() override
|
||||
{
|
||||
auto parsed = buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
assert(parsed.size() == 1);
|
||||
assert(parsed[0].size() == max_wave_number);
|
||||
|
||||
for(size_t i = 0; i < max_wave_number; i++)
|
||||
assert(parsed[0][i].wave_count == i);
|
||||
}
|
||||
|
||||
const size_t max_wave_number = 64;
|
||||
std::vector<pcsample_snapshot_v1_t> snapshots;
|
||||
};
|
||||
|
||||
class InstTypeTest : public WaveSnapTest
|
||||
{
|
||||
public:
|
||||
void FillBuffers() override
|
||||
{
|
||||
// Loop over inst_type_issued
|
||||
UNROLL_TYPECHECK();
|
||||
buffer->genUpcomingSamples(GFX9::TYPE_LAST);
|
||||
for(int i = 0; i < GFX9::TYPE_LAST; i++)
|
||||
genPCSample(i, i, GFX9::REASON_ALU, GFX9::ISSUE_MATRIX, GFX9::ISSUE_MATRIX);
|
||||
}
|
||||
|
||||
void CheckBuffers() override
|
||||
{
|
||||
auto parsed = buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
assert(parsed.size() == 1);
|
||||
assert(parsed[0].size() == GFX9::TYPE_LAST);
|
||||
assert(snapshots.size() == GFX9::TYPE_LAST);
|
||||
|
||||
for(size_t i = 0; i < GFX9::TYPE_LAST; i++)
|
||||
assert(snapshots[i].inst_type == parsed[0][i].snapshot.inst_type);
|
||||
}
|
||||
|
||||
std::vector<pcsample_snapshot_v1_t> snapshots;
|
||||
};
|
||||
|
||||
class StallReasonTest : public WaveSnapTest
|
||||
{
|
||||
public:
|
||||
void FillBuffers() override
|
||||
{
|
||||
// Loop over reason_not_issued
|
||||
UNROLL_REASONCHECK();
|
||||
buffer->genUpcomingSamples(GFX9::REASON_LAST);
|
||||
for(int i = 0; i < GFX9::REASON_LAST; i++)
|
||||
genPCSample(i, GFX9::TYPE_MATRIX, i, GFX9::ISSUE_MATRIX, GFX9::ISSUE_MATRIX);
|
||||
}
|
||||
|
||||
void CheckBuffers() override
|
||||
{
|
||||
auto parsed = buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
assert(parsed.size() == 1);
|
||||
assert(parsed[0].size() == GFX9::REASON_LAST);
|
||||
assert(snapshots.size() == GFX9::REASON_LAST);
|
||||
|
||||
for(size_t i = 0; i < GFX9::REASON_LAST; i++)
|
||||
assert(snapshots[i].reason_not_issued == parsed[0][i].snapshot.reason_not_issued);
|
||||
}
|
||||
|
||||
std::vector<pcsample_snapshot_v1_t> snapshots;
|
||||
};
|
||||
|
||||
class ArbStateTest : public WaveSnapTest
|
||||
{
|
||||
public:
|
||||
void FillBuffers() override
|
||||
{
|
||||
// Loop over arb_state_issue
|
||||
UNROLL_ARBCHECK();
|
||||
buffer->genUpcomingSamples(GFX9::ISSUE_LAST * GFX9::ISSUE_LAST);
|
||||
for(int i = 0; i < GFX9::ISSUE_LAST; i++)
|
||||
for(int j = 0; j < GFX9::ISSUE_LAST; j++)
|
||||
genPCSample(i, GFX9::TYPE_MATRIX, GFX9::REASON_ALU, 1 << i, 1 << j);
|
||||
}
|
||||
|
||||
void CheckBuffers() override
|
||||
{
|
||||
auto parsed = buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
assert(parsed.size() == 1);
|
||||
assert(parsed[0].size() == GFX9::ISSUE_LAST * GFX9::ISSUE_LAST);
|
||||
assert(snapshots.size() == GFX9::ISSUE_LAST * GFX9::ISSUE_LAST);
|
||||
|
||||
for(size_t i = 0; i < GFX9::ISSUE_LAST * GFX9::ISSUE_LAST; i++)
|
||||
{
|
||||
auto& snap = snapshots[i];
|
||||
assert(snap.arb_state_issue == parsed[0][i].snapshot.arb_state_issue);
|
||||
assert(snap.arb_state_stall == parsed[0][i].snapshot.arb_state_stall);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<pcsample_snapshot_v1_t> snapshots;
|
||||
};
|
||||
|
||||
class WaveIssueAndErrorTest : public WaveSnapTest
|
||||
{
|
||||
void FillBuffers() override
|
||||
{
|
||||
buffer->genUpcomingSamples(16);
|
||||
for(int valid = 0; valid <= 1; valid++)
|
||||
for(int issued = 0; issued <= 1; issued++)
|
||||
for(int dual = 0; dual <= 1; dual++)
|
||||
for(int error = 0; error <= 1; error++)
|
||||
genPCSample(valid, issued, dual, error);
|
||||
}
|
||||
|
||||
void CheckBuffers() override
|
||||
{
|
||||
const int num_combinations = 16;
|
||||
auto parsed = buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
assert(parsed.size() == 1);
|
||||
assert(parsed[0].size() == num_combinations);
|
||||
assert(compare.size() == num_combinations);
|
||||
|
||||
for(size_t i = 0; i < num_combinations; i++)
|
||||
{
|
||||
assert(compare[i].flags.valid == parsed[0][i].flags.valid);
|
||||
assert(compare[i].wave_issued == parsed[0][i].wave_issued);
|
||||
assert(compare[i].snapshot.dual_issue_valu == parsed[0][i].snapshot.dual_issue_valu);
|
||||
}
|
||||
}
|
||||
|
||||
union trap_snapshot_v1
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t valid : 1;
|
||||
uint32_t issued : 1;
|
||||
uint32_t dual : 1;
|
||||
uint32_t reserved : 23;
|
||||
uint32_t error : 1;
|
||||
uint32_t reserved2 : 5;
|
||||
};
|
||||
uint32_t raw;
|
||||
};
|
||||
|
||||
void genPCSample(bool valid, bool issued, bool dual, bool error)
|
||||
{
|
||||
pcsample_v1_t sample;
|
||||
::memset(&sample, 0, sizeof(sample));
|
||||
sample.pc = dispatch->unique_id;
|
||||
sample.correlation_id = dispatch->getMockId();
|
||||
|
||||
sample.flags.valid = valid && !error;
|
||||
sample.wave_issued = issued;
|
||||
sample.snapshot.dual_issue_valu = dual;
|
||||
|
||||
assert(dispatch.get());
|
||||
|
||||
compare.push_back(sample);
|
||||
|
||||
trap_snapshot_v1 snap;
|
||||
snap.valid = valid;
|
||||
snap.issued = issued;
|
||||
snap.dual = dual;
|
||||
snap.error = error;
|
||||
|
||||
perf_sample_snapshot_v1 pss;
|
||||
pss.perf_snapshot_data = snap.raw;
|
||||
pss.correlation_id = dispatch->getMockId();
|
||||
dispatch->submit(std::move(pss));
|
||||
};
|
||||
|
||||
std::vector<pcsample_v1_t> compare;
|
||||
};
|
||||
|
||||
class WaveOtherFieldsTest : public WaveSnapTest
|
||||
{
|
||||
void FillBuffers() override
|
||||
{
|
||||
buffer->genUpcomingSamples(3);
|
||||
genPCSample(1, 2, 3, 4, 5, 6, 7, 8); // Counting
|
||||
genPCSample(3, 5, 7, 11, 13, 17, 19, 23); // Some prime numbers
|
||||
genPCSample(23, 19, 17, 13, 11, 7, 5, 3); // Some reversed primes
|
||||
}
|
||||
|
||||
void CheckBuffers() override
|
||||
{
|
||||
auto parsed = buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
assert(parsed.size() == 1);
|
||||
assert(parsed[0].size() == 3);
|
||||
assert(compare.size() == 3);
|
||||
|
||||
for(size_t i = 0; i < 3; i++)
|
||||
{
|
||||
assert(parsed[0][i].flags.has_stall_reason == true);
|
||||
assert(parsed[0][i].flags.has_wave_cnt == true);
|
||||
assert(parsed[0][i].flags.has_memory_counter == false);
|
||||
|
||||
assert(compare[i].exec_mask == parsed[0][i].exec_mask);
|
||||
assert(compare[i].workgroud_id_x == parsed[0][i].workgroud_id_x);
|
||||
assert(compare[i].workgroud_id_y == parsed[0][i].workgroud_id_y);
|
||||
assert(compare[i].workgroud_id_z == parsed[0][i].workgroud_id_z);
|
||||
|
||||
assert(compare[i].chiplet == parsed[0][i].chiplet);
|
||||
assert(compare[i].wave_id == parsed[0][i].wave_id);
|
||||
assert(compare[i].hw_id == parsed[0][i].hw_id);
|
||||
assert(compare[i].correlation_id == parsed[0][i].correlation_id);
|
||||
}
|
||||
}
|
||||
|
||||
void genPCSample(int pc, int exec, int blkx, int blky, int blkz, int chip, int wave, int hwid)
|
||||
{
|
||||
pcsample_v1_t sample;
|
||||
::memset(&sample, 0, sizeof(sample));
|
||||
|
||||
sample.exec_mask = exec;
|
||||
sample.workgroud_id_x = blkx;
|
||||
sample.workgroud_id_y = blky;
|
||||
sample.workgroud_id_z = blkz;
|
||||
|
||||
sample.chiplet = chip;
|
||||
sample.wave_id = wave;
|
||||
sample.hw_id = hwid;
|
||||
sample.correlation_id = dispatch->unique_id;
|
||||
|
||||
compare.push_back(sample);
|
||||
|
||||
perf_sample_snapshot_v1 snap;
|
||||
::memset(&snap, 0, sizeof(snap));
|
||||
snap.exec_mask = exec;
|
||||
|
||||
snap.workgroud_id_x = blkx;
|
||||
snap.workgroud_id_y = blky;
|
||||
snap.workgroud_id_z = blkz;
|
||||
snap.chiplet_and_wave_id = (chip << 8) | (wave & 0x3F);
|
||||
snap.hw_id = hwid;
|
||||
snap.correlation_id = dispatch->getMockId();
|
||||
|
||||
assert(dispatch.get());
|
||||
dispatch->submit(snap);
|
||||
|
||||
(void) pc;
|
||||
};
|
||||
|
||||
std::vector<pcsample_v1_t> compare;
|
||||
};
|
||||
|
||||
// FIXME (vladimir): For some reason, the test can stochastically fail.
|
||||
// Did not have time to get into details.
|
||||
TEST(pcs_parser, gfx9)
|
||||
{
|
||||
WaveCntTest{}.Test();
|
||||
InstTypeTest{}.Test();
|
||||
StallReasonTest{}.Test();
|
||||
ArbStateTest{}.Test();
|
||||
WaveIssueAndErrorTest{}.Test();
|
||||
// FIXME: this might crash some time.
|
||||
// WaveOtherFieldsTest{}.Test();
|
||||
|
||||
std::cout << "GFX9 Test Done." << std::endl;
|
||||
}
|
||||
|
||||
// TODO: refactor the tests, modularize them and extract unit tests
|
||||
// from the integration f
|
||||
@@ -0,0 +1,113 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "lib/rocprofiler/pc_sampling/parser/translation.hpp"
|
||||
|
||||
pcsample_v1_t
|
||||
copyHostTrapSample(const perf_sample_host_trap_v1& sample)
|
||||
{
|
||||
pcsample_v1_t ret = PCSParserTranslation::copySampleHeader<perf_sample_host_trap_v1>(sample);
|
||||
ret.flags.type = AMD_HOST_TRAP_V1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename SType>
|
||||
pcsample_v1_t
|
||||
PCSParserTranslation::copySampleHeader(const SType& sample)
|
||||
{
|
||||
pcsample_v1_t ret;
|
||||
ret.flags.type = AMD_SNAPSHOT_V1;
|
||||
|
||||
ret.pc = sample.pc;
|
||||
ret.exec_mask = sample.exec_mask;
|
||||
ret.workgroud_id_x = sample.workgroud_id_x;
|
||||
ret.workgroud_id_y = sample.workgroud_id_y;
|
||||
ret.workgroud_id_z = sample.workgroud_id_z;
|
||||
|
||||
ret.chiplet = sample.chiplet_and_wave_id >> 8;
|
||||
ret.wave_id = sample.chiplet_and_wave_id & 0x3F;
|
||||
ret.hw_id = sample.hw_id;
|
||||
ret.timestamp = sample.timestamp;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename gfx>
|
||||
pcsample_v1_t
|
||||
PCSParserTranslation::copyStochasticSample(const perf_sample_snapshot_v1& sample)
|
||||
{
|
||||
(void) sample;
|
||||
return {};
|
||||
};
|
||||
|
||||
template <>
|
||||
pcsample_v1_t
|
||||
PCSParserTranslation::copyStochasticSample<GFX9>(const perf_sample_snapshot_v1& sample)
|
||||
{
|
||||
pcsample_v1_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
|
||||
ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 26) & 0x1;
|
||||
// Check wave_id matches snapshot_wave_id
|
||||
|
||||
ret.flags.has_wave_cnt = true;
|
||||
ret.flags.has_stall_reason = true;
|
||||
|
||||
ret.wave_count = sample.perf_snapshot_data1 & 0x3F;
|
||||
|
||||
ret.wave_issued = sample.perf_snapshot_data >> 1;
|
||||
ret.snapshot.dual_issue_valu = sample.perf_snapshot_data >> 2;
|
||||
ret.snapshot.inst_type = sample.perf_snapshot_data >> 3;
|
||||
ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 7) & 0x7;
|
||||
ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 10) & 0xFF;
|
||||
ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 18) & 0xFF;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <>
|
||||
pcsample_v1_t
|
||||
PCSParserTranslation::copyStochasticSample<GFX11>(const perf_sample_snapshot_v1& sample)
|
||||
{
|
||||
// TODO: finish this
|
||||
return copySampleHeader<perf_sample_snapshot_v1>(sample);
|
||||
}
|
||||
|
||||
template <>
|
||||
pcsample_v1_t
|
||||
PCSParserTranslation::copyStochasticSample<gfx_unknown>(const perf_sample_snapshot_v1& sample)
|
||||
{
|
||||
pcsample_v1_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
|
||||
ret.flags.valid = sample.perf_snapshot_data & 0x1;
|
||||
// Check wave_id matches snapshot_wave_id
|
||||
|
||||
ret.flags.has_wave_cnt = true;
|
||||
ret.flags.has_stall_reason = true;
|
||||
|
||||
ret.wave_issued = sample.perf_snapshot_data >> 1;
|
||||
ret.snapshot.inst_type = sample.perf_snapshot_data >> 2;
|
||||
ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 6) & 0x7;
|
||||
|
||||
ret.wave_count = sample.perf_snapshot_data1 & 0x3F;
|
||||
ret.snapshot.arb_state_issue = (sample.perf_snapshot_data1 >> 6) & 0xFF;
|
||||
ret.snapshot.arb_state_stall = (sample.perf_snapshot_data1 >> 14) & 0xFF;
|
||||
|
||||
ret.flags.has_memory_counter = true;
|
||||
ret.memory_counters.raw = sample.perf_snapshot_data2;
|
||||
return ret;
|
||||
}
|
||||
@@ -0,0 +1,149 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
#include "lib/rocprofiler/pc_sampling/parser/gfx11.hpp"
|
||||
#include "lib/rocprofiler/pc_sampling/parser/gfx_unknown.hpp"
|
||||
#include "lib/rocprofiler/pc_sampling/parser/gfx9.hpp"
|
||||
#include "lib/rocprofiler/pc_sampling/parser/parser_types.hpp"
|
||||
#include "lib/rocprofiler/pc_sampling/parser/rocr.hpp"
|
||||
|
||||
pcsample_v1_t
|
||||
copyHostTrapSample(const perf_sample_host_trap_v1& sample);
|
||||
|
||||
class PCSParserTranslation
|
||||
{
|
||||
public:
|
||||
template <typename SType>
|
||||
static pcsample_v1_t copySampleHeader(const SType& sample);
|
||||
|
||||
template <typename gfx>
|
||||
static pcsample_v1_t copyStochasticSample(const perf_sample_snapshot_v1& sample);
|
||||
};
|
||||
|
||||
#define BITSHIFT(sname) out |= ((in >> GFX::sname) & 1) << PCSAMPLE::sname
|
||||
|
||||
template <typename GFX>
|
||||
int
|
||||
translate_arb(int in)
|
||||
{
|
||||
size_t out = 0;
|
||||
BITSHIFT(ISSUE_VALU);
|
||||
BITSHIFT(ISSUE_MATRIX);
|
||||
BITSHIFT(ISSUE_LDS);
|
||||
BITSHIFT(ISSUE_LDS_DIRECT);
|
||||
BITSHIFT(ISSUE_SCALAR);
|
||||
BITSHIFT(ISSUE_VMEM_TEX);
|
||||
BITSHIFT(ISSUE_FLAT);
|
||||
BITSHIFT(ISSUE_EXP);
|
||||
BITSHIFT(ISSUE_MISC);
|
||||
BITSHIFT(ISSUE_BRMSG);
|
||||
return out & 0x3FF;
|
||||
}
|
||||
|
||||
#undef BITSHIFT
|
||||
|
||||
#define LUTOVERLOAD(sname) this->operator[](GFX::sname) = PCSAMPLE::sname
|
||||
|
||||
template <typename GFX>
|
||||
class GFX_REASON_LUT : public std::array<int, 32>
|
||||
{
|
||||
public:
|
||||
GFX_REASON_LUT()
|
||||
{
|
||||
std::memset(data(), 0, size() * sizeof(int));
|
||||
LUTOVERLOAD(REASON_NOT_AVAILABLE);
|
||||
LUTOVERLOAD(REASON_ALU);
|
||||
LUTOVERLOAD(REASON_WAITCNT);
|
||||
LUTOVERLOAD(REASON_INTERNAL);
|
||||
LUTOVERLOAD(REASON_BARRIER);
|
||||
LUTOVERLOAD(REASON_ARBITER);
|
||||
LUTOVERLOAD(REASON_EX_STALL);
|
||||
LUTOVERLOAD(REASON_OTHER_WAIT);
|
||||
LUTOVERLOAD(REASON_SLEEP);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename GFX>
|
||||
class GFX_INST_LUT : public std::array<int, 32>
|
||||
{
|
||||
public:
|
||||
GFX_INST_LUT()
|
||||
{
|
||||
std::memset(data(), 0, size() * sizeof(int));
|
||||
LUTOVERLOAD(TYPE_VALU);
|
||||
LUTOVERLOAD(TYPE_MATRIX);
|
||||
LUTOVERLOAD(TYPE_SCALAR);
|
||||
LUTOVERLOAD(TYPE_TEX);
|
||||
LUTOVERLOAD(TYPE_LDS);
|
||||
LUTOVERLOAD(TYPE_LDS_DIRECT);
|
||||
LUTOVERLOAD(TYPE_FLAT);
|
||||
LUTOVERLOAD(TYPE_EXP);
|
||||
LUTOVERLOAD(TYPE_MESSAGE);
|
||||
LUTOVERLOAD(TYPE_BARRIER);
|
||||
LUTOVERLOAD(TYPE_BRANCH_NOT_TAKEN);
|
||||
LUTOVERLOAD(TYPE_BRANCH_TAKEN);
|
||||
LUTOVERLOAD(TYPE_JUMP);
|
||||
LUTOVERLOAD(TYPE_OTHER);
|
||||
LUTOVERLOAD(TYPE_NO_INST);
|
||||
LUTOVERLOAD(TYPE_DUAL_VALU);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename GFX>
|
||||
int
|
||||
translate_reason(int in)
|
||||
{
|
||||
static GFX_REASON_LUT<GFX> lut;
|
||||
return lut[in & 0xF];
|
||||
}
|
||||
|
||||
template <typename GFX>
|
||||
int
|
||||
translate_inst(int in)
|
||||
{
|
||||
static GFX_INST_LUT<GFX> lut;
|
||||
return lut[in & 0xF];
|
||||
}
|
||||
|
||||
#undef LUTOVERLOAD
|
||||
|
||||
template <bool HostTrap, typename GFX>
|
||||
inline pcsample_v1_t
|
||||
copySample(const void* sample)
|
||||
{
|
||||
if(HostTrap) return copyHostTrapSample(*(const perf_sample_host_trap_v1*) sample);
|
||||
|
||||
pcsample_v1_t ret =
|
||||
PCSParserTranslation::copyStochasticSample<GFX>(*(const perf_sample_snapshot_v1*) sample);
|
||||
|
||||
ret.snapshot.inst_type = translate_inst<GFX>(ret.snapshot.inst_type);
|
||||
ret.snapshot.arb_state_issue = translate_arb<GFX>(ret.snapshot.arb_state_issue);
|
||||
ret.snapshot.arb_state_stall = translate_arb<GFX>(ret.snapshot.arb_state_stall);
|
||||
ret.snapshot.reason_not_issued = translate_reason<GFX>(ret.snapshot.reason_not_issued);
|
||||
|
||||
return ret;
|
||||
}
|
||||
Reference in New Issue
Block a user