Adding source snapshot and partial serialization (#99)

* Adding source snapshot

* Adding option to serialize only on target kernel

* Fix for tidy

* Formatting

* Testing the new flag

---------

Co-authored-by: Giovanni Baraldi <gbaraldi@amd.com>

[ROCm/rocprofiler-sdk commit: a2fa188e14]
Этот коммит содержится в:
Baraldi, Giovanni
2025-01-11 00:43:06 +01:00
коммит произвёл GitHub
родитель e226e2a11a
Коммит 8abb65b166
7 изменённых файлов: 85 добавлений и 26 удалений
+3 -2
Просмотреть файл
@@ -42,8 +42,9 @@ typedef enum
ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, ///< Bitmask of shader engines.
ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, ///< Size of combined GPU buffer for ATT
ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, ///< Bitmask (GFX9) or ID (Navi) of SIMDs
ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL,
ROCPROFILER_ATT_PARAMETER_PERFCOUNTER,
ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, ///< Period [1,32] or disable (0) perfmon
ROCPROFILER_ATT_PARAMETER_PERFCOUNTER, ///< Perfmon ID and SIMD mask
ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, ///< Serializes kernels not under thread trace
ROCPROFILER_ATT_PARAMETER_LAST
} rocprofiler_att_parameter_type_t;
+43
Просмотреть файл
@@ -27,6 +27,7 @@
#include <fstream>
#include <iostream>
#include <sstream>
#include <unordered_set>
namespace rocprofiler
{
@@ -34,6 +35,19 @@ namespace att_wrapper
{
#define ATT_CSV_NAME "att_output.csv"
// Builds a json filetree by recursively inserting "path" into the json object.
void
navigate(nlohmann::json& json, std::vector<std::string>& path, const std::string& filename)
{
if(path.size() == 1) json[path.at(0)] = filename;
if(path.size() <= 1) return;
auto& j = json[path.at(0)];
path.erase(path.begin());
navigate(j, path, filename);
}
CodeFile::CodeFile(const Fspath& _dir, std::shared_ptr<AddressTable> _table)
: dir(_dir)
, filename(_dir / "code.json")
@@ -90,6 +104,8 @@ CodeFile::~CodeFile()
nlohmann::json jcode;
std::unordered_set<std::string> snapshots{};
for(auto& line : vec)
{
auto& isa = *line.second;
@@ -110,6 +126,14 @@ CodeFile::~CodeFile()
<< ", " << isa.hitcount << ", " << isa.latency << "]";
jcode.push_back(nlohmann::json::parse(code.str()));
size_t lineref = isa.code_line->comment.rfind(':');
if(lineref == 0 || lineref == std::string::npos) continue;
auto source_ref = isa.code_line->comment.substr(0, lineref);
if(!source_ref.empty() && snapshots.find(source_ref) == snapshots.end())
snapshots.insert(std::move(source_ref));
}
nlohmann::json json;
@@ -117,6 +141,25 @@ CodeFile::~CodeFile()
json["version"] = TOOL_VERSION;
OutputFile(filename) << json;
nlohmann::json jsnapfiletree;
size_t num_snap = 0;
for(auto& source_ref : snapshots)
{
if(rocprofiler::common::filesystem::exists(source_ref))
{
Fspath filepath(source_ref);
std::stringstream newfile;
newfile << "source_" << (num_snap++) << '_' << filepath.filename().string();
std::vector<std::string> path_elements(filepath.begin(), filepath.end());
navigate(jsnapfiletree, path_elements, newfile.str());
rocprofiler::common::filesystem::copy(filepath, dir / newfile.str());
}
}
if(num_snap != 0) OutputFile(dir / "snapshots.json") << jsnapfiletree;
}
} // namespace att_wrapper
+28 -21
Просмотреть файл
@@ -355,7 +355,7 @@ DispatchThreadTracer::pre_kernel_call(const hsa::Queue& queue,
if(control_flags == ROCPROFILER_ATT_CONTROL_NONE)
{
auto empty = std::make_unique<hsa::EmptyAQLPacket>();
maybe_add_serialization(empty);
if(params.bSerialize) maybe_add_serialization(empty);
return empty;
}
@@ -396,7 +396,8 @@ void
DispatchThreadTracer::post_kernel_call(DispatchThreadTracer::inst_pkt_t& aql,
const hsa::Queue::queue_info_session_t& session)
{
SignalSerializerExit signal(session);
std::unique_ptr<SignalSerializerExit> signal{nullptr};
if(params.bSerialize) signal = std::make_unique<SignalSerializerExit>(session);
if(post_move_data.load() < 1) return;
@@ -413,6 +414,8 @@ DispatchThreadTracer::post_kernel_call(DispatchThreadTracer::inst_pkt_t& a
auto it = agents.find(pkt->GetAgent());
if(it != agents.end() && it->second != nullptr)
it->second->iterate_data(pkt->GetHandle(), session.user_data);
if(!signal) std::make_unique<SignalSerializerExit>(session);
}
}
@@ -420,28 +423,33 @@ void
DispatchThreadTracer::start_context()
{
using corr_id_map_t = hsa::Queue::queue_info_session_t::external_corr_id_map_t;
CHECK_NOTNULL(hsa::get_queue_controller())->enable_serialization();
// Only one thread should be attempting to enable/disable this context
client.wlock([&](auto& client_id) {
if(client_id) return;
client_id = hsa::get_queue_controller()->add_callback(
std::nullopt,
[=](const hsa::Queue& q,
const hsa::rocprofiler_packet& /* kern_pkt */,
rocprofiler_kernel_id_t kernel_id,
rocprofiler_dispatch_id_t dispatch_id,
rocprofiler_user_data_t* user_data,
const corr_id_map_t& /* extern_corr_ids */,
const context::correlation_id* corr_id) {
return this->pre_kernel_call(q, kernel_id, dispatch_id, user_data, corr_id);
},
[=](const hsa::Queue& /* q */,
hsa::rocprofiler_packet /* kern_pkt */,
std::shared_ptr<hsa::Queue::queue_info_session_t>& session,
inst_pkt_t& aql,
kernel_dispatch::profiling_time) { this->post_kernel_call(aql, *session); });
client_id =
CHECK_NOTNULL(hsa::get_queue_controller())
->add_callback(
std::nullopt,
[=](const hsa::Queue& q,
const hsa::rocprofiler_packet& /* kern_pkt */,
rocprofiler_kernel_id_t kernel_id,
rocprofiler_dispatch_id_t dispatch_id,
rocprofiler_user_data_t* user_data,
const corr_id_map_t& /* extern_corr_ids */,
const context::correlation_id* corr_id) {
return this->pre_kernel_call(q, kernel_id, dispatch_id, user_data, corr_id);
},
[=](const hsa::Queue& /* q */,
hsa::rocprofiler_packet /* kern_pkt */,
std::shared_ptr<hsa::Queue::queue_info_session_t>& session,
inst_pkt_t& aql,
kernel_dispatch::profiling_time) {
this->post_kernel_call(aql, *session);
});
});
}
@@ -452,12 +460,11 @@ DispatchThreadTracer::stop_context() // NOLINT(readability-convert-member-funct
if(!client_id) return;
// Remove our callbacks from HSA's queue controller
hsa::get_queue_controller()->remove_callback(*client_id);
CHECK_NOTNULL(hsa::get_queue_controller())->remove_callback(*client_id);
client_id = std::nullopt;
});
auto* controller = hsa::get_queue_controller();
if(controller) controller->disable_serialization();
CHECK_NOTNULL(hsa::get_queue_controller())->disable_serialization();
}
void
+5 -3
Просмотреть файл
@@ -67,6 +67,8 @@ struct thread_trace_parameter_pack
uint64_t shader_engine_mask = DEFAULT_SE_MASK;
uint64_t buffer_size = DEFAULT_BUFFER_SIZE;
bool bSerialize = false;
// GFX9 Only
std::vector<std::pair<uint32_t, uint32_t>> perfcounters;
@@ -155,12 +157,12 @@ public:
void post_kernel_call(inst_pkt_t& aql, const hsa::queue_info_session& session);
std::unordered_map<hsa_agent_t, std::unique_ptr<ThreadTracerQueue>> agents;
std::unordered_map<hsa_agent_t, std::unique_ptr<ThreadTracerQueue>> agents{};
std::shared_mutex agents_map_mut;
std::shared_mutex agents_map_mut{};
std::atomic<int> post_move_data{0};
thread_trace_parameter_pack params;
thread_trace_parameter_pack params{};
};
class AgentThreadTracer
+4
Просмотреть файл
@@ -85,6 +85,7 @@ rocprofiler_configure_dispatch_thread_trace_service(
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL:
pack.perfcounter_ctrl = param.value;
break;
case ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL: pack.bSerialize = param.value != 0; break;
case ROCPROFILER_ATT_PARAMETER_LAST: return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
}
}
@@ -144,6 +145,9 @@ rocprofiler_configure_agent_thread_trace_service(
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL:
pack.perfcounter_ctrl = param.value;
break;
case ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL:
if(param.value != 0) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
break;
case ROCPROFILER_ATT_PARAMETER_LAST: return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
}
}
+1
Просмотреть файл
@@ -120,6 +120,7 @@ query_available_agents(rocprofiler_agent_version_t /* version */,
parameters.push_back({ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, 0xF});
parameters.push_back({ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, 0x6000000});
parameters.push_back({ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, 0x11});
parameters.push_back({ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, 0});
ROCPROFILER_CALL(
rocprofiler_configure_agent_thread_trace_service(agent_ctx,
+1
Просмотреть файл
@@ -71,6 +71,7 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data)
"code object tracing service configure");
std::vector<rocprofiler_att_parameter_t> params{};
params.push_back({ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, 1});
ROCPROFILER_CALL(
rocprofiler_configure_dispatch_thread_trace_service(client_ctx,