Thread trace and Trace Decoder API tests and samples (#416)
* Adding test and samples to decoder
* Fix sample
* Formatting
* Fix multi test
* Disable sample
* Fix tests
* Format
* Version fix
* Locking the decoder
* Add atomic
* Review comments
* Format
* Adding readme
* merge conflict and adding PCS+ATT test
* Review comments
* Properly disable PCS test
* Update tests/rocprofv3/advanced-thread-trace/CMakeLists.txt
* Adding back env var test
* Name fix
* Preload sample
* Addressing review comments
* Update docs
---------
Co-authored-by: Giovanni Baraldi <gbaraldi@amd.com>
[ROCm/rocprofiler-sdk commit: e898079a13]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
0ff0ffffa2
Коммит
4ca156e572
@@ -47,3 +47,4 @@ add_subdirectory(code_object_isa_decode)
|
||||
add_subdirectory(external_correlation_id_request)
|
||||
add_subdirectory(pc_sampling)
|
||||
add_subdirectory(openmp_target)
|
||||
add_subdirectory(thread_trace)
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
if(NOT CMAKE_HIP_COMPILER)
|
||||
find_program(
|
||||
amdclangpp_EXECUTABLE
|
||||
NAMES amdclang++
|
||||
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATH_SUFFIXES bin llvm/bin NO_CACHE)
|
||||
mark_as_advanced(amdclangpp_EXECUTABLE)
|
||||
|
||||
if(amdclangpp_EXECUTABLE)
|
||||
set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
project(rocprofiler-sdk-samples-thread-trace LANGUAGES CXX HIP)
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
find_package(rocprofiler-sdk-roctx REQUIRED)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_HIP_STANDARD 17)
|
||||
set(CMAKE_HIP_EXTENSIONS OFF)
|
||||
set(CMAKE_HIP_STANDARD_REQUIRED ON)
|
||||
|
||||
foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
|
||||
if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "")
|
||||
set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set_source_files_properties(
|
||||
main.cpp PROPERTIES LANGUAGE HIP COMPILE_FLAGS "${CMAKE_HIP_FLAGS_RELWITHDEBINFO}")
|
||||
|
||||
add_executable(thread-trace-sample)
|
||||
target_sources(thread-trace-sample PRIVATE main.cpp agent.cpp)
|
||||
|
||||
target_link_libraries(
|
||||
thread-trace-sample
|
||||
PUBLIC rocprofiler-sdk::samples-build-flags
|
||||
PRIVATE rocprofiler-sdk::rocprofiler-sdk
|
||||
rocprofiler-sdk::samples-common-library
|
||||
rocprofiler-sdk::rocprofiler-sdk-dw
|
||||
rocprofiler-sdk-roctx::rocprofiler-sdk-roctx
|
||||
rocprofiler-sdk::rocprofiler-sdk-amd-comgr)
|
||||
|
||||
add_test(NAME thread-trace-sample COMMAND $<TARGET_FILE:thread-trace-sample>)
|
||||
|
||||
find_library(
|
||||
attdecoder_LIBRARY
|
||||
NAMES rocprof-trace-decoder
|
||||
HINTS ${ROCM_PATH}
|
||||
PATHS ${ROCM_PATH}
|
||||
PATH_SUFFIXES lib)
|
||||
|
||||
if(attdecoder_LIBRARY)
|
||||
cmake_path(GET attdecoder_LIBRARY PARENT_PATH attdecoder_LIB_DIR)
|
||||
endif()
|
||||
|
||||
find_package_handle_standard_args(attdecoder REQUIRED_VARS attdecoder_LIB_DIR
|
||||
attdecoder_LIBRARY)
|
||||
|
||||
set(IS_DISABLED ON)
|
||||
if(attdecoder_FOUND)
|
||||
set(IS_DISABLED OFF)
|
||||
endif()
|
||||
|
||||
rocprofiler_samples_get_preload_env(PRELOAD_ENV)
|
||||
list(APPEND PRELOAD_ENV "ROCPROFILER_TRACE_DECODER_LIB_PATH=${attdecoder_LIB_DIR}")
|
||||
|
||||
set_tests_properties(
|
||||
thread-trace-sample PROPERTIES TIMEOUT 60 ENVIRONMENT ${PRELOAD_ENV} LABELS
|
||||
"samples;thread-trace" DISABLED ${IS_DISABLED})
|
||||
@@ -0,0 +1,26 @@
|
||||
# Thread Trace and ROCprof Trace Decoder
|
||||
|
||||
## Services
|
||||
|
||||
- Thread trace in device profiling mode
|
||||
- ROCprof Trace Decoder decodes the received thread trace data
|
||||
- Thread trace start/stop using roctx
|
||||
|
||||
## Properties
|
||||
|
||||
### [agent.cpp](agent.cpp):
|
||||
|
||||
- Configures thread trace in all GPU agents found with `rocprofiler_configure_device_thread_trace_service`
|
||||
- Waits until `roctxProfilerResume` is called to start thread trace
|
||||
- Stops tracing at `roctxProfilerPause`
|
||||
- Receives the trace data in `shader_data_callback` and calls `rocprofiler_trace_decode` to decode the data
|
||||
- `rocprofiler_trace_decode` calls `parse` (a lambda)
|
||||
- `parse` receives the dedecoded data and increments hitcount/latencies by pc address
|
||||
- At application end, `tool_fini` calls `gen_output_stream` to write the top hotspots into `thread_trace.log`
|
||||
|
||||
### [main.cpp](main.cpp):
|
||||
|
||||
- Defines a few different kernels and runs them
|
||||
- The first loop iteration warms up the kernels
|
||||
- The second iteration calls `roctxProfilerResume` to start thread trace
|
||||
- After the loop ends, `roctxProfilerPause` is called to stop tracing
|
||||
@@ -0,0 +1,391 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
// undefine NDEBUG so asserts are implemented
|
||||
#ifdef NDEBUG
|
||||
# undef NDEBUG
|
||||
#endif
|
||||
|
||||
#include <rocprofiler-sdk/cxx/codeobj/code_printing.hpp>
|
||||
|
||||
#include <rocprofiler-sdk/buffer.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/experimental/thread_trace.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/registration.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define ROCPROFILER_CALL(result, msg) \
|
||||
if(auto ec = (result); ec != ROCPROFILER_STATUS_SUCCESS) \
|
||||
{ \
|
||||
std::cerr << "rocprofiler-sdk error at " << __FILE__ << ":" << __LINE__ \
|
||||
<< " :: " << #result << std::endl; \
|
||||
std::cerr << "rocprofiler-sdk error code " << ec << ": " \
|
||||
<< rocprofiler_get_status_string(ec) << " :: " << msg << std::endl; \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
#define DECODER_CALL(result) \
|
||||
if(auto ec = (result); ec != ROCPROFILER_STATUS_SUCCESS) \
|
||||
{ \
|
||||
std::cerr << "Decoder error at " << __FILE__ << ":" << __LINE__ << std::endl; \
|
||||
std::cerr << "rocprofiler-sdk error code " << ec << ": " \
|
||||
<< rocprofiler_get_status_string(ec) << std::endl; \
|
||||
}
|
||||
|
||||
#define CHECK_NOTNULL(x) \
|
||||
if(!(x)) \
|
||||
{ \
|
||||
abort(); \
|
||||
};
|
||||
|
||||
namespace
|
||||
{
|
||||
constexpr uint64_t TARGET_CU = 1; // CU (gfx9) or WGP (gfx10+)
|
||||
constexpr uint64_t SHADER_MASK = 0x1; // Only enable SE=0
|
||||
constexpr uint64_t BUFFER_SIZE = 0x10000000; // 256MB
|
||||
}; // namespace
|
||||
|
||||
namespace Results
|
||||
{
|
||||
using pcinfo_t = rocprofiler_thread_trace_decoder_pc_t;
|
||||
|
||||
struct address_sort_t
|
||||
{
|
||||
bool operator()(const pcinfo_t& a, const pcinfo_t& b) const
|
||||
{
|
||||
if(a.marker_id == b.marker_id) return a.addr < b.addr;
|
||||
return a.marker_id < b.marker_id;
|
||||
}
|
||||
};
|
||||
|
||||
struct Latency
|
||||
{
|
||||
uint64_t latency{0};
|
||||
uint64_t hitcount{0};
|
||||
};
|
||||
|
||||
// Maps address to latency
|
||||
using LatencyTable = std::map<rocprofiler_thread_trace_decoder_pc_t, Latency, address_sort_t>;
|
||||
// Used to disassemble instructions at (id, vaddr) pair
|
||||
using AddressTable = rocprofiler::sdk::codeobj::disassembly::CodeobjAddressTranslate;
|
||||
|
||||
AddressTable* table{nullptr};
|
||||
LatencyTable* latencies{nullptr};
|
||||
|
||||
// used to calculate mean wave lifetime
|
||||
int64_t wave_lifetime = 0;
|
||||
int64_t waves_started = 0;
|
||||
int64_t waves_ended = 0;
|
||||
|
||||
void
|
||||
gen_output_stream()
|
||||
{
|
||||
CHECK_NOTNULL(Results::latencies);
|
||||
CHECK_NOTNULL(Results::table);
|
||||
|
||||
const char* OUTPUT_OFSTREAM = "thread_trace.log";
|
||||
std::ofstream file(OUTPUT_OFSTREAM);
|
||||
|
||||
if(!file.is_open())
|
||||
std::cout << "Could not open log file: " << OUTPUT_OFSTREAM << ", writing to stdout\n";
|
||||
else
|
||||
std::cout << "Writing log to: " << OUTPUT_OFSTREAM << std::endl;
|
||||
|
||||
std::ostream& output = file.is_open() ? file : std::cout;
|
||||
|
||||
// Sort map by instruction cost
|
||||
using Element = std::pair<pcinfo_t, Latency>;
|
||||
|
||||
std::vector<Element> sorted(latencies->begin(), latencies->end());
|
||||
std::stable_sort(sorted.begin(), sorted.end(), [](const Element& a, const Element& b) {
|
||||
return a.second.latency > b.second.latency;
|
||||
});
|
||||
|
||||
output << "Top 50 hotspots for trace (cycles):\n";
|
||||
for(size_t i = 0; i < sorted.size() && i < 50; i++)
|
||||
{
|
||||
auto& addr = sorted.at(i).first;
|
||||
auto& latency = sorted.at(i).second;
|
||||
auto inst = table->get(addr.marker_id, addr.addr);
|
||||
|
||||
auto comment = inst->comment;
|
||||
size_t pos = comment.rfind('/');
|
||||
if(pos != std::string::npos && pos + 1 < comment.size()) comment = comment.substr(pos + 1);
|
||||
|
||||
output << "Latency:" << latency.latency << "\tHit:" << latency.hitcount << " \t"
|
||||
<< inst->inst << " [" << comment << "]\n";
|
||||
}
|
||||
|
||||
if(waves_started != waves_ended)
|
||||
std::cerr << "Error: Some waves have not ended!" << std::endl;
|
||||
else if(waves_started == 0)
|
||||
std::cerr << "Error: No waves started!" << std::endl;
|
||||
else
|
||||
output << "\nMean wave lifetime: " << wave_lifetime / waves_started << " cycles";
|
||||
|
||||
output << "\nWaves started: " << waves_started << "\nWaves ended: " << waves_ended << "\n";
|
||||
};
|
||||
} // namespace Results
|
||||
|
||||
namespace Decoder
|
||||
{
|
||||
rocprofiler_thread_trace_decoder_id_t decoder{};
|
||||
|
||||
void
|
||||
tool_codeobj_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* /* user_data */,
|
||||
void* /* userdata */)
|
||||
{
|
||||
if(record.kind != ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT) return;
|
||||
if(record.operation != ROCPROFILER_CODE_OBJECT_LOAD) return;
|
||||
|
||||
CHECK_NOTNULL(Results::table);
|
||||
auto* data = static_cast<rocprofiler_callback_tracing_code_object_load_data_t*>(record.payload);
|
||||
|
||||
if(data->storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE)
|
||||
{
|
||||
Results::table->addDecoder(
|
||||
data->uri, data->code_object_id, data->load_delta, data->load_size);
|
||||
return;
|
||||
}
|
||||
|
||||
auto* memorybase = reinterpret_cast<const void*>(data->memory_base);
|
||||
CHECK_NOTNULL(memorybase);
|
||||
|
||||
DECODER_CALL(rocprofiler_thread_trace_decoder_codeobj_load(decoder,
|
||||
data->code_object_id,
|
||||
data->load_delta,
|
||||
data->load_size,
|
||||
memorybase,
|
||||
data->memory_size));
|
||||
|
||||
Results::table->addDecoder(
|
||||
memorybase, data->memory_size, data->code_object_id, data->load_delta, data->load_size);
|
||||
}
|
||||
|
||||
void
|
||||
shader_data_callback(rocprofiler_agent_id_t /* agent */,
|
||||
int64_t /* se_id */,
|
||||
void* se_data,
|
||||
size_t data_size,
|
||||
rocprofiler_user_data_t /* userdata */)
|
||||
{
|
||||
CHECK_NOTNULL(Results::latencies);
|
||||
|
||||
auto parse = [](rocprofiler_thread_trace_decoder_record_type_t record_type_id,
|
||||
void* events,
|
||||
uint64_t num_events,
|
||||
void* /* userdata */) {
|
||||
if(record_type_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY)
|
||||
{
|
||||
for(size_t i = 0; i < num_events; i++)
|
||||
{
|
||||
auto& event = static_cast<rocprofiler_thread_trace_decoder_occupancy_t*>(events)[i];
|
||||
|
||||
if(event.start)
|
||||
{
|
||||
Results::wave_lifetime -= static_cast<int64_t>(event.time);
|
||||
Results::waves_started++;
|
||||
}
|
||||
else
|
||||
{
|
||||
Results::wave_lifetime += static_cast<int64_t>(event.time);
|
||||
Results::waves_ended++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(record_type_id != ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE) return;
|
||||
|
||||
for(size_t w = 0; w < num_events; w++)
|
||||
{
|
||||
auto* wave = static_cast<rocprofiler_thread_trace_decoder_wave_t*>(events);
|
||||
for(size_t i = 0; i < wave->instructions_size; i++)
|
||||
{
|
||||
auto& inst = wave->instructions_array[i];
|
||||
auto& latency = (*Results::latencies)[inst.pc];
|
||||
latency.latency += inst.duration;
|
||||
latency.hitcount += 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
DECODER_CALL(rocprofiler_trace_decode(decoder, parse, se_data, data_size, nullptr));
|
||||
}
|
||||
|
||||
} // namespace Decoder
|
||||
|
||||
namespace ThreadTracer
|
||||
{
|
||||
rocprofiler_client_id_t* client_id = nullptr;
|
||||
rocprofiler_context_id_t agent_ctx = {};
|
||||
rocprofiler_context_id_t tracing_ctx = {};
|
||||
|
||||
rocprofiler_status_t
|
||||
query_available_agents(rocprofiler_agent_version_t /* version */,
|
||||
const void** agents,
|
||||
size_t num_agents,
|
||||
void* user_data)
|
||||
{
|
||||
rocprofiler_user_data_t user{};
|
||||
user.ptr = user_data;
|
||||
|
||||
for(size_t idx = 0; idx < num_agents; idx++)
|
||||
{
|
||||
const auto* agent = static_cast<const rocprofiler_agent_v0_t*>(agents[idx]);
|
||||
if(agent->type != ROCPROFILER_AGENT_TYPE_GPU) continue;
|
||||
|
||||
auto parameters = std::vector<rocprofiler_thread_trace_parameter_t>{};
|
||||
parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU, TARGET_CU});
|
||||
parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE, BUFFER_SIZE});
|
||||
parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK, SHADER_MASK});
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_device_thread_trace_service(agent_ctx,
|
||||
agent->id,
|
||||
parameters.data(),
|
||||
parameters.size(),
|
||||
Decoder::shader_data_callback,
|
||||
user),
|
||||
"thread trace service configure");
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
cntrl_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* /* user_data */,
|
||||
void* /* cb_data */)
|
||||
{
|
||||
if(record.kind != ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API) return;
|
||||
|
||||
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER &&
|
||||
record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerPause)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_stop_context(agent_ctx), "stopping context");
|
||||
}
|
||||
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT &&
|
||||
record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerResume)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(agent_ctx), "starting context");
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
tool_init(rocprofiler_client_finalize_t /* fini_func */, void* /* tool_data */)
|
||||
{
|
||||
Results::latencies = new Results::LatencyTable{};
|
||||
Results::table = new Results::AddressTable{};
|
||||
|
||||
// This is set by ctests: TODO: move to client.cpp
|
||||
// If nullptr, searches rocprofiler-sdk install location
|
||||
const char* lib_path = std::getenv("ROCPROFILER_TRACE_DECODER_LIB_PATH");
|
||||
|
||||
DECODER_CALL(rocprofiler_thread_trace_decoder_create(&Decoder::decoder, lib_path));
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&tracing_ctx), "context creation");
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&agent_ctx), "context creation");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service(tracing_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT,
|
||||
nullptr,
|
||||
0,
|
||||
Decoder::tool_codeobj_tracing_callback,
|
||||
nullptr),
|
||||
"code object tracing service configure");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
|
||||
tracing_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
|
||||
nullptr,
|
||||
0,
|
||||
cntrl_tracing_callback,
|
||||
nullptr),
|
||||
"marker tracing callback service configure");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0,
|
||||
&query_available_agents,
|
||||
sizeof(rocprofiler_agent_t),
|
||||
nullptr),
|
||||
"Failed to find GPU agents");
|
||||
|
||||
int valid_ctx = 0;
|
||||
ROCPROFILER_CALL(rocprofiler_context_is_valid(agent_ctx, &valid_ctx), "validity check");
|
||||
assert(valid_ctx != 0);
|
||||
ROCPROFILER_CALL(rocprofiler_context_is_valid(tracing_ctx, &valid_ctx), "validity check");
|
||||
assert(valid_ctx != 0);
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(tracing_ctx), "context start");
|
||||
|
||||
// no errors
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
tool_fini(void* /* tool_data */)
|
||||
{
|
||||
rocprofiler_thread_trace_decoder_destroy(Decoder::decoder);
|
||||
|
||||
Results::gen_output_stream();
|
||||
|
||||
delete Results::latencies;
|
||||
delete Results::table;
|
||||
}
|
||||
|
||||
} // namespace ThreadTracer
|
||||
|
||||
extern "C" rocprofiler_tool_configure_result_t*
|
||||
rocprofiler_configure(uint32_t /* version */,
|
||||
const char* /* runtime_version */,
|
||||
uint32_t priority,
|
||||
rocprofiler_client_id_t* id)
|
||||
{
|
||||
// only activate if main tool
|
||||
if(priority > 0) return nullptr;
|
||||
|
||||
// set the client name
|
||||
id->name = "Thread Trace Sample";
|
||||
|
||||
// store client info
|
||||
ThreadTracer::client_id = id;
|
||||
|
||||
// create configure data
|
||||
static auto cfg =
|
||||
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
|
||||
&ThreadTracer::tool_init,
|
||||
&ThreadTracer::tool_fini,
|
||||
nullptr};
|
||||
|
||||
// return pointer to configure data
|
||||
return &cfg;
|
||||
}
|
||||
@@ -0,0 +1,178 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
// undefine NDEBUG so asserts are implemented
|
||||
#ifdef NDEBUG
|
||||
# undef NDEBUG
|
||||
#endif
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include <rocprofiler-sdk-roctx/roctx.h>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
// Two waves per SIMD on MI300
|
||||
#define DATA_SIZE (304 * 64 * 4 * 2)
|
||||
#define HIP_API_CALL(CALL) \
|
||||
if((CALL) != hipSuccess) \
|
||||
{ \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
#define LDS_SIZE 1024
|
||||
|
||||
__global__ void
|
||||
divide_kernel(float* a, const float* b, const float* c, int /* unused */)
|
||||
{
|
||||
int index = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if(index >= DATA_SIZE) return;
|
||||
|
||||
a[index] = (b[index] - c[index]) / abs(c[index] + b[index]) + 1;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
looping_lds_kernel(float* a, const float* b, const float* c, int loopcount)
|
||||
{
|
||||
__shared__ float interm[LDS_SIZE];
|
||||
|
||||
size_t index = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
for(size_t i = index; i < DATA_SIZE; i += blockDim.x * gridDim.x)
|
||||
interm[threadIdx.x % LDS_SIZE] = b[index] + threadIdx.x;
|
||||
|
||||
for(int it = 0; it < loopcount; it++)
|
||||
{
|
||||
__syncthreads();
|
||||
float value = interm[(it + threadIdx.x + LDS_SIZE / 2) % LDS_SIZE];
|
||||
__syncthreads();
|
||||
interm[threadIdx.x % LDS_SIZE] += value;
|
||||
}
|
||||
|
||||
a[index] = interm[threadIdx.x % LDS_SIZE] + c[index];
|
||||
}
|
||||
|
||||
__global__ void
|
||||
fifo_kernel(float* /* a */, const float* /* b */, const float* /* c */, int loops)
|
||||
{
|
||||
using _float4 = __attribute__((__vector_size__(4 * sizeof(float)))) float;
|
||||
|
||||
__shared__ _float4 lds[LDS_SIZE];
|
||||
lds[threadIdx.x] = _float4{float(threadIdx.x)};
|
||||
lds[threadIdx.x + 512] = _float4{float(threadIdx.x)};
|
||||
|
||||
__syncthreads();
|
||||
|
||||
_float4 dst[16];
|
||||
|
||||
float res1 = 0, res2 = 0;
|
||||
|
||||
for(int l = 0; l < loops; l++)
|
||||
{
|
||||
#pragma unroll 16
|
||||
for(int i = 0; i < 16; i++)
|
||||
dst[i] = lds[threadIdx.x + i * 8];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll 16
|
||||
for(int i = 0; i < 16; i++)
|
||||
{
|
||||
res1 += dst[i][0] + dst[i][1];
|
||||
res2 += dst[i][2] + dst[i][3];
|
||||
}
|
||||
asm volatile("v_add_f32 %0, %1, %2" : "=v"(res1) : "v"(res1), "v"(res2));
|
||||
}
|
||||
};
|
||||
|
||||
class hipMemory
|
||||
{
|
||||
public:
|
||||
hipMemory(size_t size = DATA_SIZE)
|
||||
{
|
||||
HIP_API_CALL(hipMalloc(&ptr, size * sizeof(float)));
|
||||
HIP_API_CALL(hipMemset(ptr, 0, size * sizeof(float)));
|
||||
}
|
||||
~hipMemory()
|
||||
{
|
||||
if(ptr) HIP_API_CALL(hipFree(ptr));
|
||||
}
|
||||
hipMemory(hipMemory&& other)
|
||||
{
|
||||
ptr = other.ptr;
|
||||
other.ptr = nullptr;
|
||||
}
|
||||
float* ptr = nullptr;
|
||||
};
|
||||
|
||||
class HipStream
|
||||
{
|
||||
public:
|
||||
HipStream() { HIP_API_CALL(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); }
|
||||
~HipStream() { HIP_API_CALL(hipStreamDestroy(stream)); }
|
||||
|
||||
hipStream_t stream;
|
||||
|
||||
hipMemory src1{};
|
||||
hipMemory src2{};
|
||||
hipMemory dst{};
|
||||
};
|
||||
|
||||
#define Launch(kernel, stream, arglast) \
|
||||
hipLaunchKernelGGL( \
|
||||
kernel, DATA_SIZE / 512, 512, 0, 0, stream.dst.ptr, stream.src1.ptr, stream.src2.ptr, 6);
|
||||
|
||||
int
|
||||
main(int /*argc*/, char** /*argv*/)
|
||||
{
|
||||
std::array<HipStream, 3> streams{};
|
||||
std::vector<decltype(divide_kernel)*> kernels{};
|
||||
|
||||
kernels.push_back(divide_kernel);
|
||||
kernels.push_back(looping_lds_kernel);
|
||||
kernels.push_back(fifo_kernel);
|
||||
|
||||
for(size_t i = 0; i < streams.size() * kernels.size(); i++)
|
||||
{
|
||||
// Warmup then start
|
||||
if(i == streams.size())
|
||||
{
|
||||
HIP_API_CALL(hipDeviceSynchronize());
|
||||
roctxProfilerResume(0);
|
||||
}
|
||||
|
||||
auto& stream = streams.at(i % streams.size());
|
||||
auto& kernel = kernels.at(i % kernels.size());
|
||||
|
||||
Launch(kernel, stream, 3);
|
||||
HIP_API_CALL(hipGetLastError());
|
||||
}
|
||||
|
||||
HIP_API_CALL(hipDeviceSynchronize());
|
||||
roctxProfilerPause(0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -188,7 +188,7 @@ To decode the raw thread trace data, create and initialize a Trace Decoder:
|
||||
|
||||
.. code-block:: cpp
|
||||
|
||||
rocprofiler_thread_trace_decoder_handle_t decoder{};
|
||||
rocprofiler_thread_trace_decoder_id_t decoder{};
|
||||
|
||||
// Create the Trace Decoder with the path to the decoder library
|
||||
ROCPROFILER_CALL(
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hsa.h>
|
||||
#include <rocprofiler-sdk/internal_threading.h>
|
||||
@@ -70,6 +71,7 @@ ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(hsa_signal_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(hsa_executable_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(hsa_region_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(hsa_amd_memory_pool_t)
|
||||
ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER(rocprofiler_thread_trace_decoder_id_t)
|
||||
|
||||
#undef ROCPROFILER_CXX_SPECIALIZE_HANDLE_HASHER
|
||||
} // namespace std
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
#include <rocprofiler-sdk/counters.h>
|
||||
#include <rocprofiler-sdk/defines.h>
|
||||
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hsa.h>
|
||||
#include <rocprofiler-sdk/internal_threading.h>
|
||||
@@ -148,6 +149,7 @@ ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_record_dimension_inf
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_record_dimension_instance_info_t&)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_dimension_info_t&)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_version_triplet_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_thread_trace_decoder_id_t)
|
||||
|
||||
// definitions of operator==
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_context_id_t)
|
||||
@@ -164,6 +166,7 @@ ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(hsa_signal_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(hsa_executable_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(hsa_region_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(hsa_amd_memory_pool_t)
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_thread_trace_decoder_id_t)
|
||||
|
||||
inline bool
|
||||
operator==(const rocprofiler_agent_v0_t& lhs, const rocprofiler_agent_v0_t& rhs)
|
||||
@@ -241,6 +244,7 @@ ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_dim3_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(hsa_region_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(hsa_amd_memory_pool_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_version_triplet_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_thread_trace_decoder_id_t)
|
||||
|
||||
// definitions of operator<
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_context_id_t)
|
||||
@@ -257,6 +261,7 @@ ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(hsa_signal_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(hsa_executable_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(hsa_region_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(hsa_amd_memory_pool_t)
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_thread_trace_decoder_id_t)
|
||||
|
||||
inline bool
|
||||
operator<(const rocprofiler_counter_record_dimension_info_t& lhs,
|
||||
@@ -334,6 +339,7 @@ ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_dim3_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(hsa_region_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(hsa_amd_memory_pool_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_version_triplet_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_thread_trace_decoder_id_t)
|
||||
|
||||
// cleanup defines
|
||||
#undef ROCPROFILER_CXX_DECLARE_OPERATORS
|
||||
|
||||
+13
-13
@@ -36,10 +36,10 @@ ROCPROFILER_EXTERN_C_INIT
|
||||
/**
|
||||
* @brief Handle containing a loaded rocprof-trace-decoder and a decoder state.
|
||||
*/
|
||||
typedef struct rocprofiler_thread_trace_decoder_handle_t
|
||||
typedef struct rocprofiler_thread_trace_decoder_id_t
|
||||
{
|
||||
uint64_t handle;
|
||||
} rocprofiler_thread_trace_decoder_handle_t;
|
||||
} rocprofiler_thread_trace_decoder_id_t;
|
||||
|
||||
/**
|
||||
* @brief Initializes Trace Decoder library with a library search path
|
||||
@@ -51,7 +51,7 @@ typedef struct rocprofiler_thread_trace_decoder_handle_t
|
||||
* @retval ::ROCPROFILER_STATUS_SUCCESS Handle created
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
rocprofiler_thread_trace_decoder_create(rocprofiler_thread_trace_decoder_handle_t* handle,
|
||||
rocprofiler_thread_trace_decoder_create(rocprofiler_thread_trace_decoder_id_t* handle,
|
||||
const char* path) ROCPROFILER_API ROCPROFILER_NONNULL(1, 2);
|
||||
|
||||
/**
|
||||
@@ -59,7 +59,7 @@ rocprofiler_thread_trace_decoder_create(rocprofiler_thread_trace_decoder_handle_
|
||||
* @param[in] handle Handle to destroy
|
||||
*/
|
||||
void
|
||||
rocprofiler_thread_trace_decoder_destroy(rocprofiler_thread_trace_decoder_handle_t handle)
|
||||
rocprofiler_thread_trace_decoder_destroy(rocprofiler_thread_trace_decoder_id_t handle)
|
||||
ROCPROFILER_API;
|
||||
|
||||
/**
|
||||
@@ -83,11 +83,11 @@ rocprofiler_thread_trace_decoder_destroy(rocprofiler_thread_trace_decoder_handle
|
||||
* @retval ::ROCPROFILER_STATUS_SUCCESS Code object loaded
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
rocprofiler_thread_trace_decoder_codeobj_load(rocprofiler_thread_trace_decoder_handle_t handle,
|
||||
uint64_t load_id,
|
||||
uint64_t load_addr,
|
||||
uint64_t load_size,
|
||||
const void* data,
|
||||
rocprofiler_thread_trace_decoder_codeobj_load(rocprofiler_thread_trace_decoder_id_t handle,
|
||||
uint64_t load_id,
|
||||
uint64_t load_addr,
|
||||
uint64_t load_size,
|
||||
const void* data,
|
||||
uint64_t size) ROCPROFILER_API ROCPROFILER_NONNULL(5);
|
||||
|
||||
/**
|
||||
@@ -100,7 +100,7 @@ rocprofiler_thread_trace_decoder_codeobj_load(rocprofiler_thread_trace_decoder_h
|
||||
* @retval ::ROCPROFILER_STATUS_SUCCESS Code object unloaded
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
rocprofiler_thread_trace_decoder_codeobj_unload(rocprofiler_thread_trace_decoder_handle_t handle,
|
||||
rocprofiler_thread_trace_decoder_codeobj_unload(rocprofiler_thread_trace_decoder_id_t handle,
|
||||
uint64_t load_id) ROCPROFILER_API;
|
||||
|
||||
/**
|
||||
@@ -135,7 +135,7 @@ typedef void (*rocprofiler_thread_trace_decoder_callback_t)(
|
||||
* @retval ::ROCPROFILER_STATUS_SUCCESS on success
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle,
|
||||
rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_id_t handle,
|
||||
rocprofiler_thread_trace_decoder_callback_t callback,
|
||||
void* data,
|
||||
uint64_t size,
|
||||
@@ -148,8 +148,8 @@ rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle,
|
||||
* @retval null terminated string as description of "info".
|
||||
*/
|
||||
const char*
|
||||
rocprofiler_thread_trace_decoder_info_string(rocprofiler_thread_trace_decoder_handle_t handle,
|
||||
rocprofiler_thread_trace_decoder_info_t info)
|
||||
rocprofiler_thread_trace_decoder_info_string(rocprofiler_thread_trace_decoder_id_t handle,
|
||||
rocprofiler_thread_trace_decoder_info_t info)
|
||||
ROCPROFILER_API;
|
||||
|
||||
/** @} */
|
||||
|
||||
@@ -41,9 +41,9 @@ namespace rocprofiler
|
||||
{
|
||||
namespace att_wrapper
|
||||
{
|
||||
ATTFileMgr::ATTFileMgr(Fspath _dir,
|
||||
std::vector<std::string> _counters,
|
||||
rocprofiler_thread_trace_decoder_handle_t _decoder)
|
||||
ATTFileMgr::ATTFileMgr(Fspath _dir,
|
||||
std::vector<std::string> _counters,
|
||||
rocprofiler_thread_trace_decoder_id_t _decoder)
|
||||
: dir(std::move(_dir))
|
||||
, decoder(_decoder)
|
||||
{
|
||||
|
||||
@@ -71,7 +71,7 @@ public:
|
||||
bool valid() const;
|
||||
|
||||
protected:
|
||||
rocprofiler_thread_trace_decoder_handle_t decoder{};
|
||||
rocprofiler_thread_trace_decoder_id_t decoder{};
|
||||
};
|
||||
|
||||
class ATTFileMgr
|
||||
@@ -79,9 +79,9 @@ class ATTFileMgr
|
||||
using AddressTable = rocprofiler::sdk::codeobj::disassembly::CodeobjAddressTranslate;
|
||||
|
||||
public:
|
||||
ATTFileMgr(Fspath _dir,
|
||||
std::vector<std::string> _counters,
|
||||
rocprofiler_thread_trace_decoder_handle_t _decoder);
|
||||
ATTFileMgr(Fspath _dir,
|
||||
std::vector<std::string> _counters,
|
||||
rocprofiler_thread_trace_decoder_id_t _decoder);
|
||||
~ATTFileMgr();
|
||||
|
||||
void addDecoder(const char* filepath, uint64_t id, uint64_t load_addr, uint64_t memsize);
|
||||
@@ -95,7 +95,7 @@ public:
|
||||
std::shared_ptr<AddressTable> table{nullptr};
|
||||
std::map<size_t, std::vector<occupancy_t>> occupancy{};
|
||||
std::vector<uint64_t> codeobjs_to_delete{};
|
||||
rocprofiler_thread_trace_decoder_handle_t decoder{};
|
||||
rocprofiler_thread_trace_decoder_id_t decoder{};
|
||||
|
||||
std::array<std::shared_ptr<class WstatesFile>, ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST>
|
||||
wstates;
|
||||
|
||||
@@ -103,9 +103,9 @@ get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id,
|
||||
C_API_END
|
||||
}
|
||||
|
||||
ToolData::ToolData(std::vector<char>& _data,
|
||||
WaveConfig& _config,
|
||||
rocprofiler_thread_trace_decoder_handle_t _decoder)
|
||||
ToolData::ToolData(std::vector<char>& _data,
|
||||
WaveConfig& _config,
|
||||
rocprofiler_thread_trace_decoder_id_t _decoder)
|
||||
: cfile(_config.code)
|
||||
, config(_config)
|
||||
, decoder(_decoder)
|
||||
|
||||
@@ -60,9 +60,9 @@ using SymbolInfo = rocprofiler::sdk::codeobj::disassembly::SymbolInfo;
|
||||
|
||||
struct ToolData
|
||||
{
|
||||
ToolData(std::vector<char>& data,
|
||||
WaveConfig& config,
|
||||
rocprofiler_thread_trace_decoder_handle_t decoder);
|
||||
ToolData(std::vector<char>& data,
|
||||
WaveConfig& config,
|
||||
rocprofiler_thread_trace_decoder_id_t decoder);
|
||||
~ToolData();
|
||||
|
||||
CodeLine& get(pcinfo_t pc);
|
||||
@@ -73,7 +73,7 @@ struct ToolData
|
||||
std::vector<char> shader_data{};
|
||||
size_t num_waves = 0;
|
||||
|
||||
rocprofiler_thread_trace_decoder_handle_t decoder{};
|
||||
rocprofiler_thread_trace_decoder_id_t decoder{};
|
||||
};
|
||||
|
||||
} // namespace att_wrapper
|
||||
|
||||
+62
-55
@@ -21,6 +21,7 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/rocprofiler-sdk/aql/helpers.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
|
||||
@@ -29,15 +30,19 @@
|
||||
|
||||
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>
|
||||
#include <rocprofiler-sdk/experimental/thread_trace.h>
|
||||
#include <rocprofiler-sdk/cxx/hash.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
|
||||
namespace
|
||||
{
|
||||
using DL = rocprofiler::thread_trace::DL;
|
||||
using AddressTable = rocprofiler::sdk::codeobj::disassembly::CodeobjAddressTranslate;
|
||||
using LockedTable = rocprofiler::common::Synchronized<AddressTable>;
|
||||
|
||||
class DecoderInstance
|
||||
{
|
||||
@@ -46,72 +51,72 @@ public:
|
||||
: dl(std::move(_dl))
|
||||
{}
|
||||
|
||||
std::unique_ptr<DL> dl{nullptr};
|
||||
AddressTable table{};
|
||||
const std::unique_ptr<const DL> dl{nullptr};
|
||||
|
||||
LockedTable table{};
|
||||
};
|
||||
|
||||
std::mutex map_mut;
|
||||
using DecoderMap =
|
||||
std::unordered_map<rocprofiler_thread_trace_decoder_id_t, std::shared_ptr<DecoderInstance>>;
|
||||
using LockedMap = rocprofiler::common::Synchronized<DecoderMap>;
|
||||
|
||||
auto&
|
||||
get_dlopens()
|
||||
get_dlmap()
|
||||
{
|
||||
static auto*& _v = rocprofiler::common::static_object<
|
||||
std::unordered_map<uint64_t, std::shared_ptr<DecoderInstance>>>::construct();
|
||||
static auto*& _v = rocprofiler::common::static_object<LockedMap>::construct();
|
||||
return *CHECK_NOTNULL(_v);
|
||||
}
|
||||
|
||||
std::shared_ptr<DecoderInstance>
|
||||
get_dl(rocprofiler_thread_trace_decoder_handle_t handle)
|
||||
get_dl(rocprofiler_thread_trace_decoder_id_t handle)
|
||||
{
|
||||
auto lk = std::unique_lock{map_mut};
|
||||
auto it = get_dlopens().find(handle.handle);
|
||||
if(it == get_dlopens().end()) return nullptr;
|
||||
|
||||
return it->second;
|
||||
return get_dlmap().rlock([&](const DecoderMap& map) -> std::shared_ptr<DecoderInstance> {
|
||||
if(auto it = map.find(handle); it != map.end()) return it->second;
|
||||
return nullptr;
|
||||
});
|
||||
}
|
||||
} // namespace
|
||||
|
||||
extern "C" {
|
||||
rocprofiler_status_t
|
||||
rocprofiler_thread_trace_decoder_create(rocprofiler_thread_trace_decoder_handle_t* handle,
|
||||
const char* path)
|
||||
rocprofiler_thread_trace_decoder_create(rocprofiler_thread_trace_decoder_id_t* handle,
|
||||
const char* path)
|
||||
{
|
||||
auto dl = std::make_unique<DL>(path);
|
||||
if(dl->handle == nullptr) return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE;
|
||||
if(!dl->valid()) return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI;
|
||||
|
||||
auto lk = std::unique_lock{map_mut};
|
||||
static uint64_t count = 1;
|
||||
static std::atomic<uint64_t> count{1};
|
||||
handle->handle = count.fetch_add(1);
|
||||
|
||||
auto instance = std::make_shared<DecoderInstance>(std::move(dl));
|
||||
|
||||
handle->handle = count++;
|
||||
get_dlopens()[handle->handle] = std::move(instance);
|
||||
get_dlmap().wlock(
|
||||
[&](DecoderMap& map) { map[*handle] = std::make_shared<DecoderInstance>(std::move(dl)); });
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
rocprofiler_thread_trace_decoder_destroy(rocprofiler_thread_trace_decoder_handle_t handle)
|
||||
rocprofiler_thread_trace_decoder_destroy(rocprofiler_thread_trace_decoder_id_t handle)
|
||||
{
|
||||
auto lk = std::unique_lock{map_mut};
|
||||
get_dlopens().erase(handle.handle);
|
||||
get_dlmap().wlock([&](DecoderMap& map) { map.erase(handle); });
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
rocprofiler_thread_trace_decoder_codeobj_load(rocprofiler_thread_trace_decoder_handle_t handle,
|
||||
uint64_t load_id,
|
||||
uint64_t load_addr,
|
||||
uint64_t load_size,
|
||||
const void* data,
|
||||
uint64_t size)
|
||||
rocprofiler_thread_trace_decoder_codeobj_load(rocprofiler_thread_trace_decoder_id_t handle,
|
||||
uint64_t load_id,
|
||||
uint64_t load_addr,
|
||||
uint64_t load_size,
|
||||
const void* data,
|
||||
uint64_t size)
|
||||
{
|
||||
auto decoder = get_dl(handle);
|
||||
if(decoder == nullptr) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
try
|
||||
{
|
||||
decoder->table.addDecoder(data, size, load_id, load_addr, load_size);
|
||||
decoder->table.wlock([&](AddressTable& table) {
|
||||
table.addDecoder(data, size, load_id, load_addr, load_size);
|
||||
});
|
||||
} catch(...)
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
@@ -120,15 +125,17 @@ rocprofiler_thread_trace_decoder_codeobj_load(rocprofiler_thread_trace_decoder_h
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
rocprofiler_thread_trace_decoder_codeobj_unload(rocprofiler_thread_trace_decoder_handle_t handle,
|
||||
uint64_t load_id)
|
||||
rocprofiler_thread_trace_decoder_codeobj_unload(rocprofiler_thread_trace_decoder_id_t handle,
|
||||
uint64_t load_id)
|
||||
{
|
||||
auto decoder = get_dl(handle);
|
||||
if(decoder == nullptr) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
try
|
||||
{
|
||||
if(decoder->table.removeDecoder(load_id)) return ROCPROFILER_STATUS_SUCCESS;
|
||||
bool result =
|
||||
decoder->table.wlock([&](AddressTable& table) { return table.removeDecoder(load_id); });
|
||||
if(result) return ROCPROFILER_STATUS_SUCCESS;
|
||||
} catch(std::exception&)
|
||||
{}
|
||||
|
||||
@@ -169,32 +176,32 @@ isa_callback(char* isa_instruction,
|
||||
void* userdata)
|
||||
{
|
||||
ROCP_FATAL_IF(userdata == nullptr) << "Userdata is null!";
|
||||
auto& table = static_cast<trace_data_t*>(userdata)->decoder->table;
|
||||
|
||||
std::unique_ptr<Instruction> instruction{nullptr};
|
||||
auto decoder = static_cast<trace_data_t*>(userdata)->decoder;
|
||||
ROCP_FATAL_IF(decoder == nullptr) << "decoder is null";
|
||||
|
||||
try
|
||||
{
|
||||
instruction = table.get(pc.marker_id, pc.addr);
|
||||
auto instruction = decoder->table.wlock(
|
||||
[&](AddressTable& table) { return table.get(pc.marker_id, pc.addr); });
|
||||
|
||||
if(!instruction) return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
{
|
||||
size_t tmp_isa_size = *isa_size;
|
||||
*isa_size = instruction->inst.size();
|
||||
|
||||
if(*isa_size > tmp_isa_size)
|
||||
return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
memcpy(isa_instruction, instruction->inst.data(), *isa_size);
|
||||
*isa_memory_size = instruction->size;
|
||||
|
||||
} catch(std::exception& e)
|
||||
{
|
||||
ROCP_WARNING << pc.marker_id << ":" << pc.addr << ' ' << e.what();
|
||||
ROCP_CI_LOG(INFO) << pc.marker_id << ":" << pc.addr << ' ' << e.what();
|
||||
return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR;
|
||||
}
|
||||
|
||||
if(!instruction) return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
{
|
||||
size_t tmp_isa_size = *isa_size;
|
||||
*isa_size = instruction->inst.size();
|
||||
|
||||
if(*isa_size > tmp_isa_size)
|
||||
return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
memcpy(isa_instruction, instruction->inst.data(), *isa_size);
|
||||
*isa_memory_size = instruction->size;
|
||||
|
||||
return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -214,7 +221,7 @@ trace_callback(rocprofiler_thread_trace_decoder_record_type_t record_type_id,
|
||||
|
||||
extern "C" {
|
||||
rocprofiler_status_t
|
||||
rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle,
|
||||
rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_id_t handle,
|
||||
rocprofiler_thread_trace_decoder_callback_t user_callback,
|
||||
void* data,
|
||||
uint64_t size,
|
||||
@@ -249,8 +256,8 @@ rocprofiler_trace_decode(rocprofiler_thread_trace_decoder_handle_t handle,
|
||||
}
|
||||
|
||||
const char*
|
||||
rocprofiler_thread_trace_decoder_info_string(rocprofiler_thread_trace_decoder_handle_t handle,
|
||||
rocprofiler_thread_trace_decoder_info_t info)
|
||||
rocprofiler_thread_trace_decoder_info_string(rocprofiler_thread_trace_decoder_id_t handle,
|
||||
rocprofiler_thread_trace_decoder_info_t info)
|
||||
{
|
||||
auto decoder = get_dl(handle);
|
||||
if(decoder == nullptr) return nullptr;
|
||||
|
||||
+83
-79
@@ -42,8 +42,6 @@ rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.p
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
|
||||
set(IS_DISABLED ON)
|
||||
|
||||
find_library(
|
||||
attdecoder_LIBRARY
|
||||
NAMES rocprof-trace-decoder
|
||||
@@ -58,43 +56,43 @@ endif()
|
||||
find_package_handle_standard_args(attdecoder REQUIRED_VARS attdecoder_LIB_DIR
|
||||
attdecoder_LIBRARY)
|
||||
|
||||
set(COMMON_PARAMS_NO_LIB ${PRELOAD_ARGS} --att --output-format json --log-level env -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/)
|
||||
|
||||
set(IS_DISABLED ON)
|
||||
if(attdecoder_FOUND)
|
||||
set(IS_DISABLED OFF)
|
||||
set(LIB_PATH_ENV "ROCPROF_ATT_LIBRARY_PATH=${attdecoder_LIB_DIR}")
|
||||
set(COMMON_PARAMS --att-library-path ${attdecoder_LIB_DIR} ${COMMON_PARAMS_NO_LIB})
|
||||
endif()
|
||||
|
||||
# hsa multiqueue dependency test with lib path
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute
|
||||
NAME rocprofv3-test-att-hsa-multiqueue-cmd-env-att-lib-path-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --advanced-thread-trace
|
||||
1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD
|
||||
--att-buffer-size 0x6000000 --att-simd-select 0x3 --att-serialize-all 1 -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o out --output-format json
|
||||
${PRELOAD_ARGS} --att-library-path ${attdecoder_LIB_DIR} --att-activity 8 --
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> ${COMMON_PARAMS}/cmd_input
|
||||
--att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD
|
||||
--att-buffer-size 0x6000000 --att-simd-select 0x3 --att-serialize-all 1 -o out --
|
||||
$<TARGET_FILE:hsa_code_object_testapp>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute
|
||||
rocprofv3-test-att-hsa-multiqueue-cmd-env-att-lib-path-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED})
|
||||
|
||||
# hsa multiqueue dependency test with json input
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-json-execute
|
||||
NAME rocprofv3-test-att-hsa-multiqueue-json-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --att-library-path
|
||||
${attdecoder_LIB_DIR} -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/json_input -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/att_input.json ${PRELOAD_ARGS} --
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> ${COMMON_PARAMS}/json_input -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/att_input.json --
|
||||
$<TARGET_FILE:hsa_code_object_testapp>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-json-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED} ENVIRONMENT
|
||||
"${LIB_PATH_ENV}")
|
||||
rocprofv3-test-att-hsa-multiqueue-json-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED})
|
||||
|
||||
# validate output
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-cmd-validate
|
||||
NAME rocprofv3-test-att-hsa-multiqueue-cmd-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hsa_code_object_testapp-trace/cmd_input/out_results.json
|
||||
@@ -102,7 +100,7 @@ add_test(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hsa_code_object_testapp-trace/cmd_input)
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-json-validate
|
||||
NAME rocprofv3-test-att-hsa-multiqueue-json-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hsa_code_object_testapp-trace/json_input/out_results.json
|
||||
@@ -110,46 +108,46 @@ add_test(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hsa_code_object_testapp-trace/json_input)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-cmd-validate
|
||||
rocprofv3-test-att-hsa-multiqueue-cmd-validate
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DEPENDS
|
||||
"rocprofv3-test-hsa-multiqueue-att-cmd-ld-lib-path-execute"
|
||||
"rocprofv3-test-att-hsa-multiqueue-cmd-ld-lib-path-execute"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError"
|
||||
DISABLED
|
||||
${IS_DISABLED})
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-json-validate
|
||||
rocprofv3-test-att-hsa-multiqueue-json-validate
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DEPENDS
|
||||
"rocprofv3-test-hsa-multiqueue-att-json-execute"
|
||||
"rocprofv3-test-att-hsa-multiqueue-json-execute"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError"
|
||||
DISABLED
|
||||
${IS_DISABLED})
|
||||
|
||||
function(configure_att_input _FILENAME _OUTDIR)
|
||||
set(LIBRARY_OUTPUT_DIR ${_OUTDIR})
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/att_input.yml.in
|
||||
function(configure_att_input _FILENAME)
|
||||
set(LIB_PATH_LOC "${attdecoder_LIB_DIR}")
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}//${_FILENAME}.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${_FILENAME} @ONLY)
|
||||
endfunction()
|
||||
|
||||
configure_att_input(att_input.yml "${attdecoder_LIB_DIR}")
|
||||
configure_att_input(att_input_will_fail.yml "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
|
||||
configure_att_input(att_input.yml)
|
||||
configure_att_input(att_input_will_fail.yml)
|
||||
|
||||
# test yaml input
|
||||
add_test(
|
||||
NAME rocprofv3-test-att-yaml-input
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/att_input.yml --log-level env --echo --
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> ${COMMON_PARAMS_NO_LIB}/yaml -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/att_input.yml --echo --
|
||||
$<TARGET_FILE:hsa_code_object_testapp>)
|
||||
|
||||
set_tests_properties(
|
||||
@@ -166,7 +164,7 @@ add_test(
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-yaml-input-will-fail
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED True)
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED ${IS_DISABLED})
|
||||
|
||||
add_test(NAME rocprofv3-test-att-library-path-cmd-line-will-fail
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-library-path .
|
||||
@@ -174,11 +172,12 @@ add_test(NAME rocprofv3-test-att-library-path-cmd-line-will-fail
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-library-path-cmd-line-will-fail
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED True)
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED ${IS_DISABLED})
|
||||
|
||||
add_test(NAME rocprofv3-test-att-library-path-env-var-will-fail
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --log-level env --echo
|
||||
-- $<TARGET_FILE:hsa_code_object_testapp>)
|
||||
add_test(
|
||||
NAME rocprofv3-test-att-library-path-env-var-will-fail
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> ${COMMON_PARAMS_NO_LIB}/envfail
|
||||
--att-library-path . --echo -- $<TARGET_FILE:hsa_code_object_testapp>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-library-path-env-var-will-fail
|
||||
@@ -187,65 +186,70 @@ set_tests_properties(
|
||||
LABELS
|
||||
"integration-tests"
|
||||
ENVIRONMENT
|
||||
"ROCPROF_ATT_LIBRARY_PATH=."
|
||||
"ROCPROF_ATT_LIBRARY_PATH=${attdecoder_LIB_DIR}"
|
||||
WILL_FAIL
|
||||
ON
|
||||
DISABLED
|
||||
True)
|
||||
${IS_DISABLED})
|
||||
|
||||
add_test(NAME rocprofv3-test-att-env-var
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> ${COMMON_PARAMS_NO_LIB}/envvar
|
||||
-- $<TARGET_FILE:hsa_code_object_testapp>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-env-var
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
|
||||
"ROCPROF_ATT_LIBRARY_PATH=${attdecoder_LIB_DIR}" DISABLED ${IS_DISABLED})
|
||||
|
||||
#
|
||||
# Uses ATT and Counter Collection at the same time
|
||||
#
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-plus-pmc-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --pmc SQ_WAVES
|
||||
--advanced-thread-trace -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o
|
||||
out --output-format json ${PRELOAD_ARGS} -- $<TARGET_FILE:vector-ops>)
|
||||
add_test(NAME rocprofv3-test-att-hsa-multiqueue-plus-pmc-execute
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> ${COMMON_PARAMS}/cmd_input
|
||||
--pmc SQ_WAVES -o out -- $<TARGET_FILE:vector-ops>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-plus-pmc-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED} ENVIRONMENT
|
||||
"${LIB_PATH_ENV}")
|
||||
rocprofv3-test-att-hsa-multiqueue-plus-pmc-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED})
|
||||
|
||||
# Check for conflict PMC + activity
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-activity-pmc-will-fail
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --pmc SQ_WAVES
|
||||
--advanced-thread-trace -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o
|
||||
out --att-activity 8 ${PRELOAD_ARGS} -- $<TARGET_FILE:vector-ops>)
|
||||
add_test(NAME rocprofv3-test-att-hsa-multiqueue-activity-pmc-will-fail
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> ${COMMON_PARAMS}/cmd_input
|
||||
--pmc SQ_WAVES -o out --att-activity 8 -- $<TARGET_FILE:vector-ops>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-activity-pmc-will-fail
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DISABLED
|
||||
${IS_DISABLED}
|
||||
ENVIRONMENT
|
||||
"${LIB_PATH_ENV}"
|
||||
WILL_FAIL
|
||||
ON)
|
||||
rocprofv3-test-att-hsa-multiqueue-activity-pmc-will-fail
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED} WILL_FAIL ON)
|
||||
|
||||
# Check for conflict Perfcounters + activity
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-activity-perf-will-fail
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --att
|
||||
--att-perfcounter-ctrl 8 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o
|
||||
out --att-activity 8 ${PRELOAD_ARGS} -- $<TARGET_FILE:vector-ops>)
|
||||
NAME rocprofv3-test-att-hsa-multiqueue-activity-perf-will-fail
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> ${COMMON_PARAMS}/cmd_input
|
||||
--att-perfcounter-ctrl 8 -o out --att-activity 8 -- $<TARGET_FILE:vector-ops>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-activity-perf-will-fail
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DISABLED
|
||||
${IS_DISABLED}
|
||||
ENVIRONMENT
|
||||
"${LIB_PATH_ENV}"
|
||||
WILL_FAIL
|
||||
ON)
|
||||
rocprofv3-test-att-hsa-multiqueue-activity-perf-will-fail
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED} WILL_FAIL ON)
|
||||
|
||||
# ATT plus PC sampling
|
||||
|
||||
rocprofiler_sdk_pc_sampling_stochastic_disabled(IS_PC_SAMPLING_STOCHASTIC_DISABLED)
|
||||
|
||||
set(ATT_PLUS_PCS_DISABLE ${IS_DISABLED})
|
||||
if(${IS_PC_SAMPLING_STOCHASTIC_DISABLED})
|
||||
set(ATT_PLUS_PCS_DISABLE ON)
|
||||
endif()
|
||||
if(${ROCPROFILER_DISABLE_UNSTABLE_CTESTS})
|
||||
set(ATT_PLUS_PCS_DISABLE ON)
|
||||
endif()
|
||||
set(PCS_ARGS --pc-sampling-beta-enabled 1 --pc-sampling-unit cycles
|
||||
--pc-sampling-interval 16384 --pc-sampling-method stochastic)
|
||||
|
||||
add_test(NAME rocprofv3-test-att-plus-pc-sampling
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> ${COMMON_PARAMS}/cmd_input
|
||||
${PCS_ARGS} -o out -- $<TARGET_FILE:vector-ops>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-plus-pc-sampling
|
||||
PROPERTIES TIMEOUT 90 LABELS "integration-tests;thread-trace;pc-sampling" DISABLED
|
||||
${ATT_PLUS_PCS_DISABLE})
|
||||
|
||||
+3
-3
@@ -23,9 +23,9 @@
|
||||
jobs:
|
||||
- advanced_thread_trace: True
|
||||
att_library_path:
|
||||
- @LIBRARY_OUTPUT_DIR@/att
|
||||
- @LIBRARY_OUTPUT_DIR@
|
||||
- @LIB_PATH_LOC@/att
|
||||
- @LIB_PATH_LOC@
|
||||
|
||||
- advanced_thread_trace: True
|
||||
att_library_path:
|
||||
- @LIBRARY_OUTPUT_DIR@
|
||||
- @LIB_PATH_LOC@
|
||||
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
jobs:
|
||||
- advanced_thread_trace: True
|
||||
att_library_path:
|
||||
- @LIB_PATH_LOC@/att
|
||||
@@ -29,17 +29,6 @@
|
||||
|
||||
#include <set>
|
||||
|
||||
#define C_API_BEGIN \
|
||||
try \
|
||||
{
|
||||
#define C_API_END \
|
||||
} \
|
||||
catch(std::exception & e) \
|
||||
{ \
|
||||
std::cerr << "Error in " << __FILE__ << ':' << __LINE__ << ' ' << e.what() << std::endl; \
|
||||
} \
|
||||
catch(...) { std::cerr << "Error in " << __FILE__ << ':' << __LINE__ << std::endl; }
|
||||
|
||||
namespace ATTTest
|
||||
{
|
||||
namespace Agent
|
||||
@@ -135,9 +124,10 @@ query_available_agents(rocprofiler_agent_version_t /* version */,
|
||||
}
|
||||
|
||||
int
|
||||
tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
tool_init(rocprofiler_client_finalize_t /* fini_func */, void* /* tool_data */)
|
||||
{
|
||||
(void) fini_func;
|
||||
Callbacks::init();
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&tracing_ctx), "context creation");
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&agent_ctx), "context creation");
|
||||
|
||||
@@ -147,7 +137,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
nullptr,
|
||||
0,
|
||||
Callbacks::tool_codeobj_tracing_callback,
|
||||
tool_data),
|
||||
nullptr),
|
||||
"code object tracing service configure");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
@@ -156,13 +146,13 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
nullptr,
|
||||
0,
|
||||
dispatch_tracing_callback,
|
||||
tool_data),
|
||||
nullptr),
|
||||
"dispatch tracing service configure");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0,
|
||||
&query_available_agents,
|
||||
sizeof(rocprofiler_agent_t),
|
||||
tool_data),
|
||||
nullptr),
|
||||
"Failed to find GPU agents");
|
||||
|
||||
int valid_ctx = 0;
|
||||
@@ -177,13 +167,6 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
tool_fini(void* tool_data)
|
||||
{
|
||||
Callbacks::finalize_json(tool_data);
|
||||
delete static_cast<Callbacks::ToolData*>(tool_data);
|
||||
}
|
||||
|
||||
} // namespace Agent
|
||||
} // namespace ATTTest
|
||||
|
||||
@@ -206,8 +189,8 @@ rocprofiler_configure(uint32_t /* version */,
|
||||
static auto cfg =
|
||||
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
|
||||
&ATTTest::Agent::tool_init,
|
||||
&ATTTest::Agent::tool_fini,
|
||||
new Callbacks::ToolData{"att_agent_test/"}};
|
||||
&Callbacks::finalize,
|
||||
nullptr};
|
||||
|
||||
// return pointer to configure data
|
||||
return &cfg;
|
||||
|
||||
@@ -48,15 +48,17 @@ dispatch_callback(rocprofiler_agent_id_t /* agent */,
|
||||
static std::atomic<size_t> count{0};
|
||||
if(count.fetch_add(1) > NUM_KERNELS) return ROCPROFILER_THREAD_TRACE_CONTROL_NONE;
|
||||
|
||||
assert(userdata && "Dispatch callback passed null!");
|
||||
assert(dispatch_userdata && "Dispatch callback passed null!");
|
||||
dispatch_userdata->ptr = userdata;
|
||||
|
||||
return ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP;
|
||||
}
|
||||
|
||||
int
|
||||
tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data)
|
||||
tool_init(rocprofiler_client_finalize_t /* fini_func */, void* /* tool_data */)
|
||||
{
|
||||
Callbacks::init();
|
||||
|
||||
static rocprofiler_context_id_t client_ctx = {0};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&client_ctx), "context creation");
|
||||
@@ -67,7 +69,7 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data)
|
||||
nullptr,
|
||||
0,
|
||||
Callbacks::tool_codeobj_tracing_callback,
|
||||
tool_data),
|
||||
nullptr),
|
||||
"code object tracing service configure");
|
||||
|
||||
std::vector<rocprofiler_thread_trace_parameter_t> params{};
|
||||
@@ -100,7 +102,7 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data)
|
||||
params.size(),
|
||||
dispatch_callback,
|
||||
Callbacks::shader_data_callback,
|
||||
tool_data),
|
||||
nullptr),
|
||||
"thread trace service configure");
|
||||
}
|
||||
|
||||
@@ -121,13 +123,6 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
tool_fini(void* tool_data)
|
||||
{
|
||||
Callbacks::finalize_json(tool_data);
|
||||
delete static_cast<Callbacks::ToolData*>(tool_data);
|
||||
}
|
||||
|
||||
} // namespace Multi
|
||||
} // namespace ATTTest
|
||||
|
||||
@@ -147,11 +142,11 @@ rocprofiler_configure(uint32_t /* version */,
|
||||
ATTTest::Multi::client_id = id;
|
||||
|
||||
// create configure data
|
||||
static auto cfg = rocprofiler_tool_configure_result_t{
|
||||
sizeof(rocprofiler_tool_configure_result_t),
|
||||
&ATTTest::Multi::tool_init,
|
||||
&ATTTest::Multi::tool_fini,
|
||||
reinterpret_cast<void*>(new Callbacks::ToolData{"att_multi_test/"})};
|
||||
static auto cfg =
|
||||
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
|
||||
&ATTTest::Multi::tool_init,
|
||||
&Callbacks::finalize,
|
||||
nullptr};
|
||||
|
||||
// return pointer to configure data
|
||||
return &cfg;
|
||||
|
||||
@@ -41,35 +41,20 @@ dispatch_callback(rocprofiler_agent_id_t /* agent */,
|
||||
rocprofiler_async_correlation_id_t /* correlation_id */,
|
||||
rocprofiler_kernel_id_t kernel_id,
|
||||
rocprofiler_dispatch_id_t /* dispatch_id */,
|
||||
void* userdata,
|
||||
rocprofiler_user_data_t* dispatch_userdata)
|
||||
void* /* userdata */,
|
||||
rocprofiler_user_data_t* /* dispatch_userdata */)
|
||||
{
|
||||
C_API_BEGIN
|
||||
assert(userdata && "Dispatch callback passed null!");
|
||||
auto& tool = *reinterpret_cast<Callbacks::ToolData*>(userdata);
|
||||
dispatch_userdata->ptr = userdata;
|
||||
static rocprofiler_kernel_id_t target_kernel_id = kernel_id;
|
||||
if(target_kernel_id == kernel_id) return ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP;
|
||||
|
||||
static std::string_view desired_func_name = "branching_kernel";
|
||||
|
||||
try
|
||||
{
|
||||
auto& kernel_name = tool.kernel_id_to_kernel_name.at(kernel_id);
|
||||
if(kernel_name.find(desired_func_name) == std::string::npos)
|
||||
return ROCPROFILER_THREAD_TRACE_CONTROL_NONE;
|
||||
|
||||
return ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP;
|
||||
} catch(...)
|
||||
{
|
||||
std::cerr << "Could not find kernel id: " << kernel_id << std::endl;
|
||||
}
|
||||
|
||||
C_API_END
|
||||
return ROCPROFILER_THREAD_TRACE_CONTROL_NONE;
|
||||
}
|
||||
|
||||
int
|
||||
tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data)
|
||||
tool_init(rocprofiler_client_finalize_t /* fini_func */, void* /* tool_data */)
|
||||
{
|
||||
Callbacks::init();
|
||||
|
||||
static rocprofiler_context_id_t client_ctx = {0};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&client_ctx), "context creation");
|
||||
@@ -80,7 +65,7 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data)
|
||||
nullptr,
|
||||
0,
|
||||
Callbacks::tool_codeobj_tracing_callback,
|
||||
tool_data),
|
||||
nullptr),
|
||||
"code object tracing service configure");
|
||||
|
||||
std::vector<rocprofiler_agent_id_t> agents{};
|
||||
@@ -110,7 +95,7 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data)
|
||||
0,
|
||||
dispatch_callback,
|
||||
Callbacks::shader_data_callback,
|
||||
tool_data),
|
||||
nullptr),
|
||||
"thread trace service configure");
|
||||
}
|
||||
|
||||
@@ -131,13 +116,6 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
tool_fini(void* tool_data)
|
||||
{
|
||||
Callbacks::finalize_json(tool_data);
|
||||
delete static_cast<Callbacks::ToolData*>(tool_data);
|
||||
}
|
||||
|
||||
} // namespace Single
|
||||
} // namespace ATTTest
|
||||
|
||||
@@ -157,11 +135,11 @@ rocprofiler_configure(uint32_t /* version */,
|
||||
ATTTest::Single::client_id = id;
|
||||
|
||||
// create configure data
|
||||
static auto cfg = rocprofiler_tool_configure_result_t{
|
||||
sizeof(rocprofiler_tool_configure_result_t),
|
||||
&ATTTest::Single::tool_init,
|
||||
&ATTTest::Single::tool_fini,
|
||||
reinterpret_cast<void*>(new Callbacks::ToolData{"att_single_test/"})};
|
||||
static auto cfg =
|
||||
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
|
||||
&ATTTest::Single::tool_init,
|
||||
&Callbacks::finalize,
|
||||
nullptr};
|
||||
|
||||
// return pointer to configure data
|
||||
return &cfg;
|
||||
|
||||
@@ -26,11 +26,6 @@
|
||||
#endif
|
||||
|
||||
#include "trace_callbacks.hpp"
|
||||
#include <rocprofiler-sdk/cxx/codeobj/code_printing.hpp>
|
||||
|
||||
#ifdef ENABLE_ATT_FILES
|
||||
# include <nlohmann/json.hpp>
|
||||
#endif
|
||||
|
||||
#include <unistd.h>
|
||||
#include <cassert>
|
||||
@@ -38,135 +33,78 @@
|
||||
|
||||
namespace Callbacks
|
||||
{
|
||||
using code_obj_load_data_t = rocprofiler_callback_tracing_code_object_load_data_t;
|
||||
using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
|
||||
rocprofiler_thread_trace_decoder_id_t decoder{};
|
||||
std::atomic<size_t> latency{0};
|
||||
|
||||
void
|
||||
tool_codeobj_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* /* user_data */,
|
||||
void* userdata)
|
||||
void* /* userdata */)
|
||||
{
|
||||
C_API_BEGIN
|
||||
if(record.kind != ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT) return;
|
||||
if(record.phase != ROCPROFILER_CALLBACK_PHASE_LOAD) return;
|
||||
|
||||
assert(userdata && "Dispatch callback passed null!");
|
||||
auto& tool = *reinterpret_cast<Callbacks::ToolData*>(userdata);
|
||||
|
||||
if(record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER)
|
||||
{
|
||||
auto* data = static_cast<kernel_symbol_data_t*>(record.payload);
|
||||
tool.kernel_id_to_kernel_name.emplace(data->kernel_id, data->kernel_name);
|
||||
}
|
||||
|
||||
if(record.operation != ROCPROFILER_CODE_OBJECT_LOAD) return;
|
||||
|
||||
auto* data = static_cast<code_obj_load_data_t*>(record.payload);
|
||||
auto* data = static_cast<rocprofiler_callback_tracing_code_object_load_data_t*>(record.payload);
|
||||
if(data->storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE) return;
|
||||
|
||||
static std::atomic<int> filecnt{0};
|
||||
std::string name = "codeobj_" + std::to_string(filecnt.fetch_add(1)) + ".out";
|
||||
|
||||
#ifdef ENABLE_ATT_FILES
|
||||
if(std::string_view(data->uri).find("file:///") == 0)
|
||||
if(record.phase != ROCPROFILER_CALLBACK_PHASE_LOAD)
|
||||
{
|
||||
rocprofiler::sdk::codeobj::disassembly::CodeObjectBinary binary(data->uri);
|
||||
|
||||
std::ofstream file(tool.out_dir + name, std::ios::binary);
|
||||
assert(file.is_open() && "Could not open codeobj file for writing");
|
||||
file.write((char*) binary.buffer.data(), binary.buffer.size());
|
||||
DECODER_CALL(
|
||||
rocprofiler_thread_trace_decoder_codeobj_unload(decoder, data->code_object_id));
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ofstream file(tool.out_dir + name, std::ios::binary);
|
||||
file.write((char*) data->memory_base, data->memory_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
auto _lk = std::unique_lock{tool.mut};
|
||||
tool.codeobjs.push_back(
|
||||
{data->load_delta, data->load_size, data->code_object_id, name, data->uri});
|
||||
DECODER_CALL(rocprofiler_thread_trace_decoder_codeobj_load(
|
||||
decoder,
|
||||
data->code_object_id,
|
||||
data->load_delta,
|
||||
data->load_size,
|
||||
reinterpret_cast<const void*>(data->memory_base),
|
||||
data->memory_size));
|
||||
}
|
||||
|
||||
C_API_END
|
||||
typedef void (*rocprofiler_thread_trace_decoder_callback_t)(
|
||||
rocprofiler_thread_trace_decoder_record_type_t record_type_id,
|
||||
void* trace_events,
|
||||
uint64_t trace_size,
|
||||
void* userdata);
|
||||
|
||||
void
|
||||
shader_data_callback(rocprofiler_agent_id_t /* agent */,
|
||||
int64_t /* se_id */,
|
||||
void* se_data,
|
||||
size_t data_size,
|
||||
rocprofiler_user_data_t /* userdata */)
|
||||
{
|
||||
auto parse = [](rocprofiler_thread_trace_decoder_record_type_t record_type_id,
|
||||
void* trace_events,
|
||||
uint64_t trace_size,
|
||||
void* /* userdata */) {
|
||||
if(record_type_id != ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE) return;
|
||||
|
||||
for(size_t w = 0; w < trace_size; w++)
|
||||
{
|
||||
auto* wave = static_cast<rocprofiler_thread_trace_decoder_wave_t*>(trace_events);
|
||||
for(size_t i = 0; i < wave->instructions_size; i++)
|
||||
latency += wave->instructions_array[i].duration;
|
||||
}
|
||||
};
|
||||
DECODER_CALL(rocprofiler_trace_decode(decoder, parse, se_data, data_size, nullptr));
|
||||
}
|
||||
|
||||
void
|
||||
shader_data_callback(rocprofiler_agent_id_t agent,
|
||||
int64_t se_id,
|
||||
void* se_data,
|
||||
size_t data_size,
|
||||
rocprofiler_user_data_t userdata)
|
||||
init()
|
||||
{
|
||||
C_API_BEGIN
|
||||
|
||||
assert(userdata.ptr && "Dispatch callback passed null!");
|
||||
auto& tool = *reinterpret_cast<Callbacks::ToolData*>(userdata.ptr);
|
||||
|
||||
std::string name = "agent_" + std::to_string(agent.handle) + "_shader_engine_" +
|
||||
std::to_string(se_id) + "_" + std::to_string(agent.handle) + ".att";
|
||||
|
||||
#ifdef ENABLE_ATT_FILES
|
||||
{
|
||||
std::ofstream file(tool.out_dir + name, std::ios::binary);
|
||||
assert(file.is_open() && "Could not open ATT file for writing");
|
||||
file.write((char*) se_data, data_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(se_data);
|
||||
assert(data_size);
|
||||
|
||||
auto _lk = std::unique_lock{tool.mut};
|
||||
tool.att_files.push_back(name);
|
||||
|
||||
C_API_END
|
||||
// const char* decoder_lib = std::getenv("ROCPROF_TRACE_DECODER_PATH");
|
||||
DECODER_CALL(rocprofiler_thread_trace_decoder_create(&decoder, "/opt/rocm/lib"));
|
||||
}
|
||||
|
||||
void
|
||||
finalize_json(void* userdata)
|
||||
finalize(void* /* tool_data */)
|
||||
{
|
||||
assert(userdata && "Dispatch callback passed null!");
|
||||
rocprofiler_thread_trace_decoder_destroy(decoder);
|
||||
|
||||
auto& tool = *reinterpret_cast<Callbacks::ToolData*>(userdata);
|
||||
auto _lk = std::unique_lock{tool.mut};
|
||||
assert(!tool.att_files.empty());
|
||||
|
||||
#ifdef ENABLE_ATT_FILES
|
||||
nlohmann::json att_json;
|
||||
for(auto& file : tool.att_files)
|
||||
att_json.push_back(file);
|
||||
|
||||
nlohmann::json codeobj_json;
|
||||
nlohmann::json snapshot_json;
|
||||
for(auto& file : tool.codeobjs)
|
||||
{
|
||||
nlohmann::json codeobj;
|
||||
codeobj["code_object_id"] = file.id;
|
||||
codeobj["load_delta"] = file.addr;
|
||||
codeobj["load_size"] = file.size;
|
||||
codeobj["uri"] = file.uri;
|
||||
codeobj["filename"] = file.filename;
|
||||
codeobj_json.push_back(codeobj);
|
||||
|
||||
nlohmann::json pair_json;
|
||||
pair_json["key"] = file.id;
|
||||
pair_json["value"] = file.filename;
|
||||
snapshot_json.push_back(pair_json);
|
||||
}
|
||||
nlohmann::json tool_json;
|
||||
tool_json["strings"]["att_files"] = att_json;
|
||||
tool_json["code_objects"] = codeobj_json;
|
||||
tool_json["strings"]["code_object_snapshot_files"] = snapshot_json;
|
||||
|
||||
nlohmann::json array;
|
||||
array.push_back(tool_json);
|
||||
|
||||
nlohmann::json sdk_json;
|
||||
sdk_json["rocprofiler-sdk-tool"] = array;
|
||||
|
||||
std::ofstream json_file(tool.out_dir + (std::to_string(getpid()) + "_results.json"));
|
||||
assert(json_file.is_open() && "Could not open json file for writing!");
|
||||
json_file << sdk_json;
|
||||
#endif
|
||||
if(latency.load() == 0) std::cerr << "Error: No latency was assigned to the trace!";
|
||||
}
|
||||
|
||||
} // namespace Callbacks
|
||||
|
||||
@@ -40,60 +40,21 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#define ROCPROFILER_VAR_NAME_COMBINE(X, Y) X##Y
|
||||
#define ROCPROFILER_VARIABLE(X, Y) ROCPROFILER_VAR_NAME_COMBINE(X, Y)
|
||||
|
||||
#define C_API_BEGIN \
|
||||
try \
|
||||
{
|
||||
#define C_API_END \
|
||||
} \
|
||||
catch(std::exception & e) \
|
||||
{ \
|
||||
std::cerr << "Error in " << __FILE__ << ':' << __LINE__ << ' ' << e.what() << std::endl; \
|
||||
} \
|
||||
catch(...) { std::cerr << "Error in " << __FILE__ << ':' << __LINE__ << std::endl; }
|
||||
|
||||
#define ROCPROFILER_CALL(result, msg) \
|
||||
if((result) != ROCPROFILER_STATUS_SUCCESS) \
|
||||
{ \
|
||||
rocprofiler_status_t CHECKSTATUS = result; \
|
||||
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
|
||||
{ \
|
||||
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
|
||||
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \
|
||||
<< " failed with error code " << CHECKSTATUS << ": " << status_msg \
|
||||
<< std::endl; \
|
||||
std::stringstream errmsg{}; \
|
||||
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \
|
||||
<< status_msg << ")"; \
|
||||
throw std::runtime_error(errmsg.str()); \
|
||||
} \
|
||||
std::cerr << "Error: " << msg << std::endl; \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
#define DECODER_CALL(result) \
|
||||
if((result) != ROCPROFILER_STATUS_SUCCESS) \
|
||||
{ \
|
||||
std::cerr << "Error: Generic decoder error" << std::endl; \
|
||||
}
|
||||
|
||||
namespace Callbacks
|
||||
{
|
||||
struct CodeobjInfo
|
||||
{
|
||||
int64_t addr = 0;
|
||||
size_t size = 0;
|
||||
size_t id = 0;
|
||||
std::string filename{};
|
||||
std::string uri{};
|
||||
};
|
||||
|
||||
struct ToolData
|
||||
{
|
||||
ToolData(const char* out)
|
||||
: out_dir(out){};
|
||||
|
||||
std::string out_dir{};
|
||||
std::mutex mut{};
|
||||
std::vector<CodeobjInfo> codeobjs{};
|
||||
std::vector<std::string> att_files{};
|
||||
|
||||
std::unordered_map<uint64_t, std::string> kernel_id_to_kernel_name = {};
|
||||
};
|
||||
|
||||
void
|
||||
tool_codeobj_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t*,
|
||||
@@ -107,6 +68,9 @@ shader_data_callback(rocprofiler_agent_id_t agent,
|
||||
rocprofiler_user_data_t userdata);
|
||||
|
||||
void
|
||||
finalize_json(void* userdata);
|
||||
init();
|
||||
|
||||
void
|
||||
finalize(void* /* tool_data */);
|
||||
|
||||
} // namespace Callbacks
|
||||
|
||||
Ссылка в новой задаче
Block a user