Adding source snapshot and partial serialization (#99)
* Adding source snapshot
* Adding option to serialize only on target kernel
* Fix for tidy
* Formatting
* Testing the new flag
---------
Co-authored-by: Giovanni Baraldi <gbaraldi@amd.com>
[ROCm/rocprofiler-sdk commit: a2fa188e14]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
e226e2a11a
Коммит
8abb65b166
+3
-2
@@ -42,8 +42,9 @@ typedef enum
|
||||
ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, ///< Bitmask of shader engines.
|
||||
ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, ///< Size of combined GPU buffer for ATT
|
||||
ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, ///< Bitmask (GFX9) or ID (Navi) of SIMDs
|
||||
ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL,
|
||||
ROCPROFILER_ATT_PARAMETER_PERFCOUNTER,
|
||||
ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, ///< Period [1,32] or disable (0) perfmon
|
||||
ROCPROFILER_ATT_PARAMETER_PERFCOUNTER, ///< Perfmon ID and SIMD mask
|
||||
ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, ///< Serializes kernels not under thread trace
|
||||
ROCPROFILER_ATT_PARAMETER_LAST
|
||||
} rocprofiler_att_parameter_type_t;
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
@@ -34,6 +35,19 @@ namespace att_wrapper
|
||||
{
|
||||
#define ATT_CSV_NAME "att_output.csv"
|
||||
|
||||
// Builds a json filetree by recursively inserting "path" into the json object.
|
||||
void
|
||||
navigate(nlohmann::json& json, std::vector<std::string>& path, const std::string& filename)
|
||||
{
|
||||
if(path.size() == 1) json[path.at(0)] = filename;
|
||||
|
||||
if(path.size() <= 1) return;
|
||||
|
||||
auto& j = json[path.at(0)];
|
||||
path.erase(path.begin());
|
||||
navigate(j, path, filename);
|
||||
}
|
||||
|
||||
CodeFile::CodeFile(const Fspath& _dir, std::shared_ptr<AddressTable> _table)
|
||||
: dir(_dir)
|
||||
, filename(_dir / "code.json")
|
||||
@@ -90,6 +104,8 @@ CodeFile::~CodeFile()
|
||||
|
||||
nlohmann::json jcode;
|
||||
|
||||
std::unordered_set<std::string> snapshots{};
|
||||
|
||||
for(auto& line : vec)
|
||||
{
|
||||
auto& isa = *line.second;
|
||||
@@ -110,6 +126,14 @@ CodeFile::~CodeFile()
|
||||
<< ", " << isa.hitcount << ", " << isa.latency << "]";
|
||||
|
||||
jcode.push_back(nlohmann::json::parse(code.str()));
|
||||
|
||||
size_t lineref = isa.code_line->comment.rfind(':');
|
||||
if(lineref == 0 || lineref == std::string::npos) continue;
|
||||
|
||||
auto source_ref = isa.code_line->comment.substr(0, lineref);
|
||||
|
||||
if(!source_ref.empty() && snapshots.find(source_ref) == snapshots.end())
|
||||
snapshots.insert(std::move(source_ref));
|
||||
}
|
||||
|
||||
nlohmann::json json;
|
||||
@@ -117,6 +141,25 @@ CodeFile::~CodeFile()
|
||||
json["version"] = TOOL_VERSION;
|
||||
|
||||
OutputFile(filename) << json;
|
||||
|
||||
nlohmann::json jsnapfiletree;
|
||||
size_t num_snap = 0;
|
||||
|
||||
for(auto& source_ref : snapshots)
|
||||
{
|
||||
if(rocprofiler::common::filesystem::exists(source_ref))
|
||||
{
|
||||
Fspath filepath(source_ref);
|
||||
std::stringstream newfile;
|
||||
newfile << "source_" << (num_snap++) << '_' << filepath.filename().string();
|
||||
|
||||
std::vector<std::string> path_elements(filepath.begin(), filepath.end());
|
||||
navigate(jsnapfiletree, path_elements, newfile.str());
|
||||
rocprofiler::common::filesystem::copy(filepath, dir / newfile.str());
|
||||
}
|
||||
}
|
||||
|
||||
if(num_snap != 0) OutputFile(dir / "snapshots.json") << jsnapfiletree;
|
||||
}
|
||||
|
||||
} // namespace att_wrapper
|
||||
|
||||
+28
-21
@@ -355,7 +355,7 @@ DispatchThreadTracer::pre_kernel_call(const hsa::Queue& queue,
|
||||
if(control_flags == ROCPROFILER_ATT_CONTROL_NONE)
|
||||
{
|
||||
auto empty = std::make_unique<hsa::EmptyAQLPacket>();
|
||||
maybe_add_serialization(empty);
|
||||
if(params.bSerialize) maybe_add_serialization(empty);
|
||||
return empty;
|
||||
}
|
||||
|
||||
@@ -396,7 +396,8 @@ void
|
||||
DispatchThreadTracer::post_kernel_call(DispatchThreadTracer::inst_pkt_t& aql,
|
||||
const hsa::Queue::queue_info_session_t& session)
|
||||
{
|
||||
SignalSerializerExit signal(session);
|
||||
std::unique_ptr<SignalSerializerExit> signal{nullptr};
|
||||
if(params.bSerialize) signal = std::make_unique<SignalSerializerExit>(session);
|
||||
|
||||
if(post_move_data.load() < 1) return;
|
||||
|
||||
@@ -413,6 +414,8 @@ DispatchThreadTracer::post_kernel_call(DispatchThreadTracer::inst_pkt_t& a
|
||||
auto it = agents.find(pkt->GetAgent());
|
||||
if(it != agents.end() && it->second != nullptr)
|
||||
it->second->iterate_data(pkt->GetHandle(), session.user_data);
|
||||
|
||||
if(!signal) std::make_unique<SignalSerializerExit>(session);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -420,28 +423,33 @@ void
|
||||
DispatchThreadTracer::start_context()
|
||||
{
|
||||
using corr_id_map_t = hsa::Queue::queue_info_session_t::external_corr_id_map_t;
|
||||
|
||||
CHECK_NOTNULL(hsa::get_queue_controller())->enable_serialization();
|
||||
|
||||
// Only one thread should be attempting to enable/disable this context
|
||||
client.wlock([&](auto& client_id) {
|
||||
if(client_id) return;
|
||||
|
||||
client_id = hsa::get_queue_controller()->add_callback(
|
||||
std::nullopt,
|
||||
[=](const hsa::Queue& q,
|
||||
const hsa::rocprofiler_packet& /* kern_pkt */,
|
||||
rocprofiler_kernel_id_t kernel_id,
|
||||
rocprofiler_dispatch_id_t dispatch_id,
|
||||
rocprofiler_user_data_t* user_data,
|
||||
const corr_id_map_t& /* extern_corr_ids */,
|
||||
const context::correlation_id* corr_id) {
|
||||
return this->pre_kernel_call(q, kernel_id, dispatch_id, user_data, corr_id);
|
||||
},
|
||||
[=](const hsa::Queue& /* q */,
|
||||
hsa::rocprofiler_packet /* kern_pkt */,
|
||||
std::shared_ptr<hsa::Queue::queue_info_session_t>& session,
|
||||
inst_pkt_t& aql,
|
||||
kernel_dispatch::profiling_time) { this->post_kernel_call(aql, *session); });
|
||||
client_id =
|
||||
CHECK_NOTNULL(hsa::get_queue_controller())
|
||||
->add_callback(
|
||||
std::nullopt,
|
||||
[=](const hsa::Queue& q,
|
||||
const hsa::rocprofiler_packet& /* kern_pkt */,
|
||||
rocprofiler_kernel_id_t kernel_id,
|
||||
rocprofiler_dispatch_id_t dispatch_id,
|
||||
rocprofiler_user_data_t* user_data,
|
||||
const corr_id_map_t& /* extern_corr_ids */,
|
||||
const context::correlation_id* corr_id) {
|
||||
return this->pre_kernel_call(q, kernel_id, dispatch_id, user_data, corr_id);
|
||||
},
|
||||
[=](const hsa::Queue& /* q */,
|
||||
hsa::rocprofiler_packet /* kern_pkt */,
|
||||
std::shared_ptr<hsa::Queue::queue_info_session_t>& session,
|
||||
inst_pkt_t& aql,
|
||||
kernel_dispatch::profiling_time) {
|
||||
this->post_kernel_call(aql, *session);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -452,12 +460,11 @@ DispatchThreadTracer::stop_context() // NOLINT(readability-convert-member-funct
|
||||
if(!client_id) return;
|
||||
|
||||
// Remove our callbacks from HSA's queue controller
|
||||
hsa::get_queue_controller()->remove_callback(*client_id);
|
||||
CHECK_NOTNULL(hsa::get_queue_controller())->remove_callback(*client_id);
|
||||
client_id = std::nullopt;
|
||||
});
|
||||
|
||||
auto* controller = hsa::get_queue_controller();
|
||||
if(controller) controller->disable_serialization();
|
||||
CHECK_NOTNULL(hsa::get_queue_controller())->disable_serialization();
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
+5
-3
@@ -67,6 +67,8 @@ struct thread_trace_parameter_pack
|
||||
uint64_t shader_engine_mask = DEFAULT_SE_MASK;
|
||||
uint64_t buffer_size = DEFAULT_BUFFER_SIZE;
|
||||
|
||||
bool bSerialize = false;
|
||||
|
||||
// GFX9 Only
|
||||
std::vector<std::pair<uint32_t, uint32_t>> perfcounters;
|
||||
|
||||
@@ -155,12 +157,12 @@ public:
|
||||
|
||||
void post_kernel_call(inst_pkt_t& aql, const hsa::queue_info_session& session);
|
||||
|
||||
std::unordered_map<hsa_agent_t, std::unique_ptr<ThreadTracerQueue>> agents;
|
||||
std::unordered_map<hsa_agent_t, std::unique_ptr<ThreadTracerQueue>> agents{};
|
||||
|
||||
std::shared_mutex agents_map_mut;
|
||||
std::shared_mutex agents_map_mut{};
|
||||
std::atomic<int> post_move_data{0};
|
||||
|
||||
thread_trace_parameter_pack params;
|
||||
thread_trace_parameter_pack params{};
|
||||
};
|
||||
|
||||
class AgentThreadTracer
|
||||
|
||||
@@ -85,6 +85,7 @@ rocprofiler_configure_dispatch_thread_trace_service(
|
||||
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL:
|
||||
pack.perfcounter_ctrl = param.value;
|
||||
break;
|
||||
case ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL: pack.bSerialize = param.value != 0; break;
|
||||
case ROCPROFILER_ATT_PARAMETER_LAST: return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
@@ -144,6 +145,9 @@ rocprofiler_configure_agent_thread_trace_service(
|
||||
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL:
|
||||
pack.perfcounter_ctrl = param.value;
|
||||
break;
|
||||
case ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL:
|
||||
if(param.value != 0) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
break;
|
||||
case ROCPROFILER_ATT_PARAMETER_LAST: return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,6 +120,7 @@ query_available_agents(rocprofiler_agent_version_t /* version */,
|
||||
parameters.push_back({ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, 0xF});
|
||||
parameters.push_back({ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, 0x6000000});
|
||||
parameters.push_back({ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, 0x11});
|
||||
parameters.push_back({ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, 0});
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_agent_thread_trace_service(agent_ctx,
|
||||
|
||||
@@ -71,6 +71,7 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data)
|
||||
"code object tracing service configure");
|
||||
|
||||
std::vector<rocprofiler_att_parameter_t> params{};
|
||||
params.push_back({ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, 1});
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_dispatch_thread_trace_service(client_ctx,
|
||||
|
||||
Ссылка в новой задаче
Block a user