SWDEV-389161:Adding fix for tensorflow app hang with rocprofv2

Change-Id: I48ade2e853468940aa5e1fca58ba22f686d89f26
Этот коммит содержится в:
Sriraksha Nagaraj
2023-05-11 20:28:31 +00:00
коммит произвёл Ammar ELWazir
родитель b817742d89
Коммит cdc74d31db
12 изменённых файлов: 660 добавлений и 222 удалений
+26 -3
Просмотреть файл
@@ -114,15 +114,38 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler
rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id);
typedef struct {
/**
* @brief tracer extra data
*
* This is applicable only if ::ACTIVITY_DOMAIN_HSA or ACTIVITY_DOMAIN_HIP is the domain of the tracer record
*/
const char* function_name;
/**
* @brief tracer extra data
*
* This is applicable only if ::ACTIVITY_DOMAIN_HIP is the domain of the tracer record
*/
union {
const char* kernel_name;
};
} rocprofiler_plugin_tracer_extra_data_t;
/**
* Report Synchronous Record.
*
* @param[in] record Pointer to the Synchronous Tracer record.
* @param[in] session_id Session ID
* @param[in] record Synchronous Tracer record.
* @param[in] data : api_data
* @param[in] tracer_data :Tracer record extra data such as function name and kernel name
* @return Returns 0 on success and -1 on error.
*/
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
rocprofiler_session_id_t session_id);
rocprofiler_plugin_tracer_extra_data_t tracer_extra_data);
/** @} */
+1 -1
Просмотреть файл
@@ -22,5 +22,5 @@
add_subdirectory(file)
add_subdirectory(perfetto)
add_subdirectory(ctf)
#add_subdirectory(ctf)
add_subdirectory(att)
+4 -3
Просмотреть файл
@@ -55,7 +55,7 @@ class att_plugin_t {
bool is_valid_{true};
inline bool att_file_exists(const std::string& name) {
struct stat buffer;
struct stat buffer;
return stat(name.c_str(), &buffer) == 0;
}
@@ -189,8 +189,9 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler
return att_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
rocprofiler_session_id_t session_id) {
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(
rocprofiler_record_tracer_t record,
rocprofiler_plugin_tracer_extra_data_t tracer_extra_data) {
if (!att_plugin || !att_plugin->IsValid()) return -1;
if (record.header.id.handle == 0) return 0;
return 0;
+1 -1
Просмотреть файл
@@ -33,7 +33,7 @@ set_target_properties(ctf_plugin PROPERTIES
LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../exportmap"
LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}")
set(METADATA_STREAM_FILE_DIR "${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/plugin/ctf")
target_compile_definitions(ctf_plugin PRIVATE
target_compile_definitions(ctf_plugin PUBLIC AMD_INTERNAL_BUILD PRIVATE
HIP_PROF_HIP_API_STRING=1
__HIP_PLATFORM_HCC__=1
CTF_PLUGIN_METADATA_FILE_PATH="${CMAKE_INSTALL_PREFIX}/${METADATA_STREAM_FILE_DIR}/metadata")
+4 -3
Просмотреть файл
@@ -88,8 +88,9 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
return 0;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(const rocprofiler_record_tracer_t record,
const rocprofiler_session_id_t session_id) {
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(
const rocprofiler_record_tracer_t record, const void* data,
rocprofiler_plugin_trace_record_data_t tracer_data) {
assert(the_plugin);
if (record.header.id.handle == 0) {
@@ -97,7 +98,7 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(const rocprofiler_record_
}
try {
the_plugin->HandleTracerRecord(record, session_id);
the_plugin->HandleTracerRecord(record, rocprofiler_session_id_t{0}, tracer_data, data);
} catch (const std::exception& exc) {
std::cerr << "rocprofiler_plugin_write_record(): " << exc.what() << std::endl;
return -1;
+75 -14
Просмотреть файл
@@ -137,6 +137,11 @@ class RocTxEventRecord final : public TracerEventRecord<barectf_roctx_ctx> {
: TracerEventRecord<barectf_roctx_ctx>{record, GetRecordBeginClockVal(record)},
id_{QueryId(record, session_id)},
msg_{QueryMsg(record, session_id)} {}
explicit RocTxEventRecord(const rocprofiler_record_tracer_t& record, uint64_t roctx_id,
std::string roctx_msg)
: TracerEventRecord<barectf_roctx_ctx>{record, GetRecordBeginClockVal(record)},
id_{roctx_id},
msg_{roctx_msg} {}
void Write(barectf_roctx_ctx& barectf_ctx) const override {
barectf_roctx_trace_roctx(&barectf_ctx, GetThreadId(), id_, msg_.c_str());
@@ -209,7 +214,9 @@ class HsaApiEventRecord : public TracerEventRecord<barectf_hsa_api_ctx> {
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
: TracerEventRecord<barectf_hsa_api_ctx>{record, clock_val},
api_data_{QueryApiData(record, session_id)} {}
explicit HsaApiEventRecord(const rocprofiler_record_tracer_t& record,
const std::uint64_t clock_val, hsa_api_data_t& api_data)
: TracerEventRecord<barectf_hsa_api_ctx>{record, clock_val}, api_data_(api_data) {}
const hsa_api_data_t& GetApiData() const noexcept { return api_data_; }
private:
@@ -246,6 +253,10 @@ class HsaApiEventRecordBegin final : public HsaApiEventRecord {
explicit HsaApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HsaApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
explicit HsaApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
hsa_api_data_t& api_data)
: HsaApiEventRecord{record, GetRecordBeginClockVal(record),
api_data} {}
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
@@ -259,6 +270,9 @@ class HsaApiEventRecordEnd final : public HsaApiEventRecord {
explicit HsaApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HsaApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
explicit HsaApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
hsa_api_data_t& api_data)
: HsaApiEventRecord{record, GetRecordBeginClockVal(record), api_data} {}
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
@@ -270,11 +284,17 @@ class HsaApiEventRecordEnd final : public HsaApiEventRecord {
class HipApiEventRecord : public TracerEventRecord<barectf_hip_api_ctx> {
protected:
explicit HipApiEventRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
const rocprofiler_session_id_t session_id,
const std::uint64_t clock_val)
: TracerEventRecord<barectf_hip_api_ctx>{record, clock_val},
api_data_{QueryApiData(record, session_id)},
kernel_name_{QueryKernelName(record, session_id)} {}
explicit HipApiEventRecord(const rocprofiler_record_tracer_t& record,
const std::uint64_t clock_val, hip_api_data_t& api_data,
std::string kernel_name)
: TracerEventRecord<barectf_hip_api_ctx>{record, clock_val},
api_data_{api_data},
kernel_name_{kernel_name} {}
const hip_api_data_t& GetApiData() const noexcept { return api_data_; }
const std::string& GetKernelName() const noexcept { return kernel_name_; }
@@ -339,6 +359,9 @@ class HipApiEventRecordBegin final : public HipApiEventRecord {
explicit HipApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HipApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
explicit HipApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
hip_api_data_t& api_data, std::string kernel_name)
: HipApiEventRecord{record, GetRecordBeginClockVal(record), api_data, kernel_name} {}
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
@@ -352,6 +375,9 @@ class HipApiEventRecordEnd final : public HipApiEventRecord {
explicit HipApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HipApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
explicit HipApiEventRecordEnd(const rocprofiler_record_tracer_t& record, hip_api_data_t& api_data,
std::string kernel_name)
: HipApiEventRecord{record, GetRecordBeginClockVal(record), api_data, kernel_name} {}
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
@@ -686,27 +712,60 @@ Plugin::Plugin(const std::size_t packet_size, const fs::path& trace_dir,
}
void Plugin::HandleTracerRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
const rocprofiler_session_id_t session_id,
rocprofiler_plugin_trace_record_data_t tracer_data,
const void* data) {
std::lock_guard<std::mutex> lock{lock_};
// Depending on the domain, create and add an event record to the
// corresponding tracer.
switch (record.domain) {
case ACTIVITY_DOMAIN_ROCTX:
roctx_tracer_.AddEventRecord(std::make_shared<const RocTxEventRecord>(record, session_id));
/*If data is nullptr then the call is asynchromous*/
if (data == nullptr)
roctx_tracer_.AddEventRecord(std::make_shared<const RocTxEventRecord>(record, session_id));
else {
const char* roctx_message = reinterpret_cast<const char*>(data);
std::string roctx_msg(roctx_message);
roctx_tracer_.AddEventRecord(
std::make_shared<const RocTxEventRecord>(record, tracer_data.roctx_id, roctx_msg));
}
break;
case ACTIVITY_DOMAIN_HSA_API: {
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordBegin>(record, session_id));
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordEnd>(record, session_id));
/*If data is nullptr then the call is asynchromous*/
if (data == nullptr) {
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordBegin>(record, session_id));
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordEnd>(record, session_id));
} else {
hsa_api_data_t hsa_api_data = *reinterpret_cast<const hsa_api_data_t*>(data);
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordBegin>(record, hsa_api_data));
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordEnd>(record, hsa_api_data));
}
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordBegin>(record, session_id));
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordEnd>(record, session_id));
/*If data is nullptr then the call is asynchromous*/
if (data == nullptr) {
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordBegin>(record, session_id));
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordEnd>(record, session_id));
} else {
std::string kernel_name;
hip_api_data_t hip_api_data = *reinterpret_cast<const hip_api_data_t*>(data);
if (tracer_data.kernel_name != nullptr)
kernel_name = rocmtools::cxx_demangle(std::string(tracer_data.kernel_name));
else
kernel_name = "";
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordBegin>(record, hip_api_data, kernel_name));
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordEnd>(record, hip_api_data, kernel_name));
}
break;
}
case ACTIVITY_DOMAIN_HSA_OPS:
@@ -738,7 +797,9 @@ void Plugin::HandleBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_buffer_id_t buffer_id) {
while (begin && begin < end) {
if (begin->kind == ROCPROFILER_TRACER_RECORD) {
HandleTracerRecord(*reinterpret_cast<const rocprofiler_record_tracer_t*>(begin), session_id);
rocprofiler_plugin_trace_record_data_t tracer_data = {};
HandleTracerRecord(*reinterpret_cast<const rocprofiler_record_tracer_t*>(begin), session_id,
tracer_data);
} else {
assert(begin->kind == ROCPROFILER_PROFILER_RECORD);
HandleProfilerRecord(*reinterpret_cast<const rocprofiler_record_profiler_t*>(begin),
+4 -1
Просмотреть файл
@@ -54,7 +54,10 @@ class Plugin final {
// Handles a tracer record.
void HandleTracerRecord(const rocprofiler_record_tracer_t& record,
rocprofiler_session_id_t session_id);
rocprofiler_session_id_t session_id,
rocprofiler_plugin_trace_record_data_t tracer_data,
const void* data = nullptr);
// Handles a profiler record.
void HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
+36 -84
Просмотреть файл
@@ -198,7 +198,8 @@ class file_plugin_t {
void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) {
rocprofiler_plugin_tracer_extra_data_t tracer_extra_data,
rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}){
std::lock_guard<std::mutex> lock(writing_lock);
std::string kernel_name;
std::string function_name;
@@ -211,81 +212,29 @@ class file_plugin_t {
reinterpret_cast<const char*>(tracer_record.api_data_handle.handle));
}
if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) {
size_t function_name_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
char* function_name_c = nullptr;
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_c));
if (function_name_c) function_name = std::string(function_name_c);
}
if (tracer_extra_data.function_name)
function_name = std::string(tracer_extra_data.function_name);
}
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) {
size_t function_name_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
char* function_name_c = nullptr;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_c));
if (function_name_c) function_name = std::string(function_name_c);
}
size_t kernel_name_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_size));
if (kernel_name_size > 1) {
char* kernel_name_str = nullptr;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_str));
if (kernel_name_str) {
kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
free(kernel_name_str);
// TODO: Change how this API returns a string.
}
}
if (tracer_extra_data.kernel_name)
kernel_name = rocmtools::cxx_demangle(std::string(tracer_extra_data.kernel_name));
if(tracer_extra_data.function_name)
function_name = std::string(tracer_extra_data.function_name);
}
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
size_t roctx_message_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_size));
if (roctx_message_size > 1) {
[[maybe_unused]] char* roctx_message_str =
static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_str));
if (roctx_message_str)
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
}
size_t roctx_id_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_id_size));
if (roctx_id_size > 1) {
[[maybe_unused]] char* roctx_id_str =
static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_id_str));
if (roctx_id_str) {
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
free(roctx_id_str);
}
}
const char* roctx_msg = reinterpret_cast<const char*>(tracer_record.api_data_handle.handle);
if(roctx_msg)
roctx_message = rocmtools::cxx_demangle(std::string(roctx_msg));
roctx_id = tracer_record.operation_id.id;
}
// return;
output_file_t* output_file = get_output_file(output_type_t::TRACER, tracer_record.domain);
*output_file << "Record(" << tracer_record.header.id.handle << "), Domain("
<< GetDomainName(tracer_record.domain) << "),";
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && roctx_id >= 0) *output_file << " ROCTX_ID(" << roctx_id << "),";
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && roctx_message.size() > 1) *output_file << " ROCTX_Message(" << roctx_message << "),";
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && roctx_id >= 0)
*output_file << " ROCTX_ID(" << roctx_id << "),";
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && roctx_message.size() > 1)
*output_file << " ROCTX_Message(" << roctx_message << "),";
if (function_name.size() > 1) *output_file << " Function(" << function_name << "),";
if (kernel_name.size() > 1) *output_file << " Kernel_Name(" << kernel_name.c_str() << "),";
if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_OPS || tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS) {
@@ -304,15 +253,17 @@ class file_plugin_t {
}
}
if (tracer_record.phase == ROCPROFILER_PHASE_NONE) {
*output_file << " Begin(" << tracer_record.timestamps.begin.value
<< "), End(" << tracer_record.timestamps.end.value << "),";
*output_file << " Begin(" << tracer_record.timestamps.begin.value << "), End("
<< tracer_record.timestamps.end.value << "),";
} else {
if(tracer_record.phase == ROCPROFILER_PHASE_ENTER && tracer_record.domain != ACTIVITY_DOMAIN_ROCTX){
if (tracer_record.phase == ROCPROFILER_PHASE_ENTER &&
tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) {
rocprofiler_timestamp_t timestamp;
rocprofiler_get_timestamp(&timestamp);
*output_file << " Begin(" << timestamp.value << "),";
}
if(tracer_record.phase == ROCPROFILER_PHASE_ENTER && tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
if (tracer_record.phase == ROCPROFILER_PHASE_ENTER &&
tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
rocprofiler_timestamp_t timestamp;
rocprofiler_get_timestamp(&timestamp);
*output_file << " timestamp(" << timestamp.value << "),";
@@ -418,7 +369,6 @@ class file_plugin_t {
<< "pc-sample(" << std::hex << std::showbase << sample.pc << "), "
<< "se(" << sample.se << ')' << std::endl;
}
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
@@ -432,9 +382,11 @@ class file_plugin_t {
break;
}
case ROCPROFILER_TRACER_RECORD: {
rocprofiler_plugin_tracer_extra_data_t tracer_extra_data;
tracer_extra_data.function_name = nullptr;
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
FlushTracerRecord(*tracer_record, session_id, buffer_id);
FlushTracerRecord(*tracer_record, session_id, tracer_extra_data, buffer_id);
break;
}
case ROCPROFILER_ATT_TRACER_RECORD: {
@@ -454,16 +406,16 @@ class file_plugin_t {
return 0;
}
bool is_valid() const { return valid_; }
bool is_valid() const { return valid_; }
private:
bool valid_{false};
private:
bool valid_{false};
output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"},
hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"},
hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"},
output_file_{"results.txt"};
};
output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"},
hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"},
hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"},
output_file_{"results.txt"};
};
file_plugin_t* file_plugin = nullptr;
@@ -500,9 +452,9 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
rocprofiler_session_id_t session_id) {
rocprofiler_plugin_tracer_extra_data_t tracer_extra_data){
if (!file_plugin || !file_plugin->is_valid()) return -1;
if (record.header.id.handle == 0) return 0;
file_plugin->FlushTracerRecord(record, session_id);
file_plugin->FlushTracerRecord(record, rocprofiler_session_id_t{0}, tracer_extra_data, rocprofiler_buffer_id_t{0});
return 0;
}
+53 -102
Просмотреть файл
@@ -340,12 +340,13 @@ class perfetto_plugin_t {
return 0;
}
int FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
rocprofiler_session_id_t session_id) {
int FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
rocprofiler_session_id_t session_id,
rocprofiler_plugin_tracer_extra_data_t tracer_extra_data) {
std::lock_guard<std::mutex> lock(writing_lock);
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
std::string kernel_name;
char* function_name;
const char* function_name;
char* activity_name;
std::string roctx_message;
uint64_t roctx_id = 0;
@@ -424,35 +425,11 @@ class perfetto_plugin_t {
}
}
auto& roctx_track = roctx_track_it->second;
size_t roctx_message_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_size));
if (roctx_message_size > 1) {
char* roctx_message_str = nullptr;
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_str));
if (roctx_message_str)
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
}
size_t roctx_id_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_id_size));
if (roctx_id_size > 1) {
char* roctx_id_str = nullptr;
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_id_str));
if (roctx_id_str) {
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
free(roctx_id_str);
}
}
if(tracer_record.phase == ROCPROFILER_PHASE_NONE) {
roctx_id = tracer_record.operation_id.id;
if (reinterpret_cast<const char*>(tracer_record.api_data_handle.handle))
roctx_message = std::string(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle));
if (tracer_record.phase == ROCPROFILER_PHASE_NONE) {
if (tracer_record.operation_id.id == 1) {
perfetto::StaticString roctx_message_pft(
(!roctx_message.empty() ? roctx_message.c_str() : ""));
@@ -465,19 +442,18 @@ class perfetto_plugin_t {
roctx_track_entries_--;
}
} else {
rocprofiler_timestamp_t timestamp;
rocprofiler_timestamp_t timestamp;
rocprofiler_get_timestamp(&timestamp);
if (tracer_record.operation_id.id == 1) {
perfetto::StaticString roctx_message_pft(
(!roctx_message.empty() ? roctx_message.c_str() : ""));
TRACE_EVENT_BEGIN("ROCTX_API", roctx_message_pft, roctx_track,
timestamp.value, "Timestamp(ns)",
timestamp.value, "RocTx ID", roctx_id);
roctx_track_entries_++;
} else {
TRACE_EVENT_END("ROCTX_API", roctx_track, timestamp.value);
roctx_track_entries_--;
}
perfetto::StaticString roctx_message_pft(
(!roctx_message.empty() ? roctx_message.c_str() : ""));
TRACE_EVENT_BEGIN("ROCTX_API", roctx_message_pft, roctx_track, timestamp.value,
"Timestamp(ns)", timestamp.value, "RocTx ID", roctx_id);
roctx_track_entries_++;
} else {
TRACE_EVENT_END("ROCTX_API", roctx_track, timestamp.value);
roctx_track_entries_--;
}
}
break;
}
@@ -505,23 +481,15 @@ class perfetto_plugin_t {
}
}
auto& hsa_track = hsa_track_it->second;
size_t function_name_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
function_name = nullptr;
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name));
}
function_name = tracer_extra_data.function_name;
rocprofiler_timestamp_t timestamp;
rocprofiler_get_timestamp(&timestamp);
if(tracer_record.phase == ROCPROFILER_PHASE_ENTER)
if (tracer_record.phase == ROCPROFILER_PHASE_ENTER)
TRACE_EVENT_BEGIN("HSA_API", perfetto::StaticString(function_name), hsa_track,
timestamp.value,
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
if(tracer_record.phase == ROCPROFILER_PHASE_EXIT)
timestamp.value,
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
if (tracer_record.phase == ROCPROFILER_PHASE_EXIT)
TRACE_EVENT_END("HSA_API", hsa_track, timestamp.value);
break;
}
@@ -550,34 +518,13 @@ class perfetto_plugin_t {
}
}
auto& hip_track = hip_track_it->second;
size_t function_name_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
function_name = nullptr;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name));
}
size_t kernel_name_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_size));
char* kernel_name_str;
if (kernel_name_size > 1) {
kernel_name_str = nullptr;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_str));
if (kernel_name_str) {
kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
free(kernel_name_str);
}
}
function_name = tracer_extra_data.function_name;
if (tracer_extra_data.kernel_name != nullptr)
kernel_name = std::string(tracer_extra_data.kernel_name);
rocprofiler_timestamp_t timestamp;
rocprofiler_get_timestamp(&timestamp);
if(tracer_record.phase == ROCPROFILER_PHASE_ENTER) {
if (tracer_record.phase == ROCPROFILER_PHASE_ENTER) {
if (kernel_name.size() > 0) {
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
timestamp.value, "Kernel Name", kernel_name,
@@ -588,10 +535,10 @@ class perfetto_plugin_t {
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
}
}
if(tracer_record.phase == ROCPROFILER_PHASE_EXIT) {
if (tracer_record.phase == ROCPROFILER_PHASE_EXIT) {
TRACE_EVENT_END("HIP_API", hip_track, timestamp.value);
}
if(tracer_record.phase == ROCPROFILER_PHASE_NONE) {
if (tracer_record.phase == ROCPROFILER_PHASE_NONE) {
if (kernel_name.size() > 0) {
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
tracer_record.timestamps.begin.value, "Kernel Name", kernel_name,
@@ -616,13 +563,14 @@ class perfetto_plugin_t {
// size_t stream_id_str_size = 0;
// char* stream_id_str;
// CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
// session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
// rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
// session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr,
// 0}, rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
// &stream_id_str_size));
// if (stream_id_str_size > 1) {
// stream_id_str = static_cast<char*>(malloc(stream_id_str_size * sizeof(char)));
// CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
// session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
// session_id, ROCPROFILER_HIP_STREAM_ID,
// rocprofiler_tracer_api_data_handle_t{nullptr, 0},
// rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
// &stream_id_str));
// if (stream_id_str != nullptr) stream_id = std::stoll(stream_id_str);
@@ -671,20 +619,20 @@ class perfetto_plugin_t {
} else {
activity_name = const_cast<char*>(std::string("N/A").c_str());
}
if(tracer_record.phase == ROCPROFILER_PHASE_NONE)
if (tracer_record.phase == ROCPROFILER_PHASE_NONE)
TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), stream_track,
tracer_record.timestamps.begin.value, "Agent ID",
tracer_record.agent_id.handle, "Process ID", GetPid(),
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
else if(tracer_record.phase == ROCPROFILER_PHASE_ENTER)
tracer_record.timestamps.begin.value, "Agent ID",
tracer_record.agent_id.handle, "Process ID", GetPid(),
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
else if (tracer_record.phase == ROCPROFILER_PHASE_ENTER)
TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), stream_track,
timestamp.value, "Agent ID",
tracer_record.agent_id.handle, "Process ID", GetPid(),
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
timestamp.value, "Agent ID", tracer_record.agent_id.handle,
"Process ID", GetPid(),
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
}
if(tracer_record.phase == ROCPROFILER_PHASE_NONE)
if (tracer_record.phase == ROCPROFILER_PHASE_NONE)
TRACE_EVENT_END("HIP_OPS", stream_track, tracer_record.timestamps.end.value);
else if(tracer_record.phase == ROCPROFILER_PHASE_EXIT)
else if (tracer_record.phase == ROCPROFILER_PHASE_EXIT)
TRACE_EVENT_END("HIP_OPS", stream_track, timestamp.value);
break;
}
@@ -754,9 +702,11 @@ class perfetto_plugin_t {
break;
}
case ROCPROFILER_TRACER_RECORD: {
rocprofiler_plugin_tracer_extra_data_t tracer_extra_data;
tracer_extra_data.function_name = nullptr;
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
FlushTracerRecord(*tracer_record, session_id);
FlushTracerRecord(*tracer_record, session_id, tracer_extra_data);
break;
}
default:
@@ -837,10 +787,11 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler
return perfetto_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
rocprofiler_session_id_t session_id) {
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(
rocprofiler_record_tracer_t record,
rocprofiler_plugin_tracer_extra_data_t tracer_extra_data) {
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
if (record.header.id.handle == 0) return 0;
perfetto_plugin->FlushTracerRecord(record, session_id);
perfetto_plugin->FlushTracerRecord(record, rocprofiler_session_id_t{0}, tracer_extra_data);
return 0;
}
+1
Просмотреть файл
@@ -21,6 +21,7 @@ target_include_directories(rocprofiler_tool
${PROJECT_SOURCE_DIR}/src)
target_compile_definitions(rocprofiler_tool
PUBLIC AMD_INTERNAL_BUILD
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__=1)
if(ASAN)
+167 -10
Просмотреть файл
@@ -22,6 +22,11 @@
#include <dlfcn.h>
#include <fcntl.h>
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
#include "hsa_prof_str.h"
#include <hip/hip_runtime.h>
#include <hip/amd_detail/hip_prof_str.h>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include <sys/syscall.h>
@@ -52,6 +57,7 @@
#include <chrono>
#include "utils/helper.h"
#include "trace_buffer.h"
namespace fs = std::experimental::filesystem;
@@ -60,7 +66,7 @@ namespace fs = std::experimental::filesystem;
do { \
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCProfiler API Call Error!"); \
} while (false)
TRACE_BUFFER_INSTANTIATE();
namespace {
struct shmd_t {
@@ -93,15 +99,12 @@ class rocprofiler_plugin_t {
reinterpret_cast<decltype(rocprofiler_plugin_write_buffer_records)*>(
dlsym(plugin_handle_, "rocprofiler_plugin_write_buffer_records"));
if (!rocprofiler_plugin_write_buffer_records_) return;
rocprofiler_plugin_write_record_ = reinterpret_cast<decltype(rocprofiler_plugin_write_record)*>(
dlsym(plugin_handle_, "rocprofiler_plugin_write_record"));
if (!rocprofiler_plugin_write_record_) return;
rocprofiler_plugin_finalize_ = reinterpret_cast<decltype(rocprofiler_plugin_finalize)*>(
dlsym(plugin_handle_, "rocprofiler_plugin_finalize"));
if (!rocprofiler_plugin_finalize_) return;
if (auto* initialize = reinterpret_cast<decltype(rocprofiler_plugin_initialize)*>(
dlsym(plugin_handle_, "rocprofiler_plugin_initialize"));
initialize != nullptr)
@@ -123,8 +126,7 @@ class rocprofiler_plugin_t {
assert(is_valid());
return rocprofiler_plugin_write_buffer_records_(std::forward<Args>(args)...);
}
private:
private:
bool valid_{false};
void* plugin_handle_;
@@ -135,6 +137,77 @@ class rocprofiler_plugin_t {
std::optional<rocprofiler_plugin_t> plugin;
struct hsa_api_trace_entry_t {
std::atomic<uint32_t> valid;
rocprofiler_record_tracer_t record;
const char* function_name;
hsa_api_trace_entry_t(rocprofiler_record_tracer_t tracer_record, const char* function_name_str,
const hsa_api_data_t& data)
: valid(rocprofiler::TRACE_ENTRY_INIT) {
record = tracer_record;
function_name = function_name_str ? strdup(function_name_str) : nullptr;
record.api_data_handle.handle = &data;
}
~hsa_api_trace_entry_t() { if (function_name != nullptr) free(const_cast<char*>(function_name)); }
};
struct roctx_trace_entry_t {
std::atomic<rocprofiler::TraceEntryState> valid;
rocprofiler_record_tracer_t record;
roctx_trace_entry_t(rocprofiler_record_tracer_t tracer_record, const char* roctx_message_str)
: valid(rocprofiler::TRACE_ENTRY_INIT) {
record = tracer_record;
roctx_message_str? record.api_data_handle.handle=strdup(roctx_message_str):nullptr;
}
~roctx_trace_entry_t() { }
};
struct hip_api_trace_entry_t {
std::atomic<uint32_t> valid;
rocprofiler_record_tracer_t record;
const char* function_name;
const char* kernel_name;
hip_api_trace_entry_t(rocprofiler_record_tracer_t tracer_record, const char* kernel_name_str,
const char* function_name_str, const hip_api_data_t& data)
: valid(rocprofiler::TRACE_ENTRY_INIT) {
record = tracer_record;
kernel_name = kernel_name_str ? strdup(kernel_name_str) : nullptr;
function_name = function_name_str ? strdup(function_name_str) : nullptr;
record.api_data_handle.handle=&data;
}
~hip_api_trace_entry_t() {
if (function_name != nullptr) free(const_cast<char*>(function_name));
if (kernel_name != nullptr) free(const_cast<char*>(kernel_name));
}
};
rocprofiler::TraceBuffer<hip_api_trace_entry_t> hip_api_buffer(
"HIP API", 0x200000, [](hip_api_trace_entry_t* entry) {
assert(plugin && "plugin is not initialized");
rocprofiler_plugin_tracer_extra_data_t tracer_extra_data;
tracer_extra_data.function_name = entry->function_name;
tracer_extra_data.kernel_name = entry->kernel_name;
plugin->write_callback_record(entry->record, tracer_extra_data);
});
rocprofiler::TraceBuffer<hsa_api_trace_entry_t> hsa_api_buffer(
"HSA API", 0x200000, [](hsa_api_trace_entry_t* entry) {
assert(plugin && "plugin is not initialized");
rocprofiler_plugin_tracer_extra_data_t tracer_extra_data;
tracer_extra_data.function_name = entry->function_name;
plugin->write_callback_record(entry->record,
tracer_extra_data);
});
rocprofiler::TraceBuffer<roctx_trace_entry_t> roctx_trace_buffer(
"rocTX API", 0x200000, [](roctx_trace_entry_t* entry) {
assert(plugin && "plugin is not initialized");
rocprofiler_plugin_tracer_extra_data_t tracer_extra_data;
tracer_extra_data.function_name = nullptr;
plugin->write_callback_record(
entry->record, tracer_extra_data);
});
} // namespace
uint64_t getFlushIntervalFromEnv() {
@@ -325,10 +398,94 @@ void plugins_load() {
}
}
}
/*
* A callback function for synchronous trace records.
* This function queries the api infoemation and populates the
* api_trace buffer and adds it to the trace buffer.
*/
void sync_api_trace_callback(rocprofiler_record_tracer_t tracer_record, rocprofiler_session_id_t session_id) {
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) {
size_t kernel_name_size = 0, function_name_size = 0;
char *function_name_c = nullptr, *kernel_name_c = nullptr;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
//function_name_c = (char*)malloc(function_name_size);
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_c));
}
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_size));
if (kernel_name_size > 1) {
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_c));
}
char* data = nullptr;
size_t size = 0;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_API_DATA, tracer_record.api_data_handle, tracer_record.operation_id, &size));
if(size > 0)
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_API_DATA, tracer_record.api_data_handle, tracer_record.operation_id, &data));
hip_api_data_t hip_api_data = *reinterpret_cast<hip_api_data_t*>(data);
//std::cout << "in api calback" << strlen(function_name_c) << "\t" << function_name_c << std::endl;
hip_api_trace_entry_t& entry = hip_api_buffer.Emplace(
tracer_record,
(const char*)kernel_name_c ? strdup(kernel_name_c) : nullptr,
(const char*)(function_name_c) ? strdup(function_name_c):nullptr,
hip_api_data);
entry.valid.store(rocprofiler::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) {
size_t function_name_size = 0;
char *function_name_c = nullptr;
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
void plugin_write_record(rocprofiler_record_tracer_t record, rocprofiler_session_id_t session_id) {
if (plugin) plugin->write_callback_record(record, session_id);
}
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_c));
}
char* data = nullptr;
size_t size=0;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_API_DATA, tracer_record.api_data_handle, tracer_record.operation_id, &size));
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_API_DATA, tracer_record.api_data_handle, tracer_record.operation_id, &data));
hsa_api_data_t hsa_api_data = *reinterpret_cast<hsa_api_data_t*>(data);
hsa_api_trace_entry_t& entry = hsa_api_buffer.Emplace(
tracer_record,
(const char*)(function_name_c),
hsa_api_data);
entry.valid.store(rocprofiler::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
size_t roctx_message_size = 0;
char *roctx_message_str = nullptr;
uint64_t roctx_id=0;
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_size));
if (roctx_message_size > 1) {
roctx_message_str =(char*)malloc(roctx_message_size * sizeof(char));
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_str));
if (roctx_message_str)
roctx_message_str ? rocmtools::cxx_demangle(std::string(strdup(roctx_message_str))).c_str() : nullptr;
}
roctx_trace_entry_t& entry = roctx_trace_buffer.Emplace(
tracer_record,
roctx_message_str);
entry.valid.store(rocprofiler::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
}
void wait_for_amdsys() {
while (amd_sys_handler.load(std::memory_order_relaxed)) {
@@ -542,7 +699,7 @@ ROCPROFILER_EXPORT bool OnLoad(void* table, uint64_t runtime_version,
apis_requested.size(), &filter_id, property));
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
CHECK_ROCPROFILER(
rocprofiler_set_api_trace_sync_callback(session_id, filter_id, plugin_write_record));
rocprofiler_set_api_trace_sync_callback(session_id, filter_id, sync_api_trace_callback));
filter_ids.emplace_back(filter_id);
break;
}
+288
Просмотреть файл
@@ -0,0 +1,288 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef TOOL_TRACE_BUFFER_H_
#define TOOL_TRACE_BUFFER_H_
#include <atomic>
#include <cassert>
#include <condition_variable>
#include <functional>
#include <future>
#include <iostream>
#include <list>
#include <mutex>
#include <optional>
#include <sstream>
#include <string>
#include <thread>
namespace rocprofiler {
class TraceBufferBase {
public:
static void FlushAll() {
std::lock_guard lock(mutex_);
for (auto* trace_buffer = head_; trace_buffer != nullptr; trace_buffer = trace_buffer->next_)
trace_buffer->Flush();
}
static void Register(TraceBufferBase* elem) {
std::lock_guard lock(mutex_);
auto** prev_ptr = &head_;
while (*prev_ptr != nullptr && elem->priority_ > (*prev_ptr)->priority_)
prev_ptr = &(*prev_ptr)->next_;
elem->next_ = *prev_ptr;
*prev_ptr = elem;
}
static void Unregister(TraceBufferBase* elem) {
std::lock_guard lock(mutex_);
auto** prev_ptr = &head_;
while (*prev_ptr != nullptr && *prev_ptr != elem) prev_ptr = &(*prev_ptr)->next_;
assert(*prev_ptr != nullptr && "elem is not in the list");
*prev_ptr = elem->next_;
}
TraceBufferBase(std::string name, int priority)
: name_(std::move(name)), priority_(priority), next_(nullptr) {}
TraceBufferBase(const TraceBufferBase&) = delete;
TraceBufferBase& operator=(const TraceBufferBase&) = delete;
virtual ~TraceBufferBase() { Unregister(this); }
virtual void Flush() = 0;
std::string name() && { return std::move(name_); }
const std::string& name() const& { return name_; }
private:
const std::string name_;
const int priority_;
TraceBufferBase* next_;
static TraceBufferBase* head_;
static std::mutex mutex_;
};
enum TraceEntryState { TRACE_ENTRY_INVALID = 0, TRACE_ENTRY_INIT = 1, TRACE_ENTRY_COMPLETE = 2 };
template <typename Entry, typename Allocator = std::allocator<Entry>>
class TraceBuffer : protected TraceBufferBase {
public:
using callback_t = std::function<void(Entry*)>;
TraceBuffer(std::string name, uint64_t size, callback_t flush_callback, int priority = 0)
: TraceBufferBase(std::move(name), priority),
flush_callback_(std::move(flush_callback)),
size_(size) {
assert(size_ != 0 && "cannot create an empty trace buffer");
Entry* write_buffer = allocator_.allocate(size_);
assert(write_buffer != nullptr);
buffer_list_.push_back(write_buffer);
read_index_ = 0;
write_index_ = {0, write_buffer};
AllocateFreeBuffer();
// Add this instance to the link list of all trace buffers in the process.
Register(this);
}
~TraceBuffer() override {
// Flush the remaining records. After flushing, there should not be any records left in the
// trace buffer.
Flush();
assert(read_index_ == write_index_.load().index);
// Acquire both the writer and worker lock as we are accessing shared variables they protect.
std::unique_lock writer_lock(write_mutex_, std::defer_lock);
std::unique_lock worker_lock(worker_mutex_, std::defer_lock);
std::lock(writer_lock, worker_lock);
// Deallocate the buffers.
allocator_.deallocate(write_index_.load().buffer, size_);
allocator_.deallocate(free_buffer_, size_);
// Stop the worker thread. The worker thread loop checks the 'worker_thread_' std::optional
// after waking up, and exits if it does not have a value.
if (worker_thread_) {
std::thread worker_thread = std::move(worker_thread_.value());
{
// Tell the worker thread loop to exit.
worker_thread_.reset();
free_buffer_ = nullptr;
worker_cond_.notify_one();
}
// Release the worker lock to allow the worker thread to exit.
worker_lock.unlock();
worker_thread.join();
}
}
// Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are
// monotonically increasing indices, with read_pointer % size always indexing inside the first
// buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with
// the next invocation after changing its state to 'complete'.
void Flush() override {
std::lock_guard lock(write_mutex_);
auto write_index = write_index_.load(std::memory_order_relaxed);
for (auto it = buffer_list_.begin(); it != buffer_list_.end();) {
auto end_of_buffer = read_index_ - read_index_ % size_ + size_;
while (read_index_ < std::min(write_index.index, end_of_buffer)) {
Entry* entry = &(*it)[read_index_ % size_];
// The entry is not yet complete, stop flushing here.
if (entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return;
flush_callback_(entry);
entry->~Entry();
++read_index_;
}
// The buffer is still in use or the read pointer did not reach the end of the buffer.
if (*it == write_index.buffer || read_index_ != end_of_buffer) return;
// All entries in the current buffer are now processed. Destroy the buffer and move onto the
// next buffer in the list.
allocator_.deallocate(*it, size_);
it = buffer_list_.erase(it);
}
}
template <typename... Args> Entry& Emplace(Args... args) {
return *new (GetEntry()) Entry(std::forward<Args>(args)...);
}
private:
Entry* GetEntry() {
auto current = write_index_.load(std::memory_order_relaxed);
while (true) {
// If the pointer is at the end of the current buffer, switch to the available free buffer and
// notify the worker thread to allocate a new buffer.
if (current.index != 0 && current.index % size_ == 0) {
std::lock_guard lock(write_mutex_);
// If the worker thread wasn't already started, start it now. This avoids starting a new
// thread when the trace buffer is created.
if (!worker_thread_) {
std::promise<void> ready;
auto future = ready.get_future();
{
std::lock_guard worker_lock(worker_mutex_);
worker_thread_.emplace(&TraceBuffer::WorkerThreadLoop, this, std::move(ready));
}
future.wait();
}
// Re-check the pointer overflow under the writer lock, another thread could have beaten us
// to it and already bumped the write_index_.
current = write_index_.load(std::memory_order_relaxed);
if (current.index % size_ == 0) {
std::unique_lock worker_lock(worker_mutex_);
// Wait for the free buffer to become available.
worker_cond_.wait(worker_lock, [this]() { return free_buffer_ != nullptr; });
current.buffer = free_buffer_;
buffer_list_.push_back(current.buffer);
write_index_.store({current.index + 1, current.buffer}, std::memory_order_relaxed);
// Tell the worker thread to allocate a new free buffer.
free_buffer_ = nullptr;
worker_cond_.notify_one();
// We successfully allocated a new buffer, return the first element.
return &current.buffer[0];
}
}
if (write_index_.compare_exchange_weak(current, {current.index + 1, current.buffer},
std::memory_order_relaxed))
return &current.buffer[current.index % size_];
}
}
void AllocateFreeBuffer() {
assert(free_buffer_ == nullptr);
free_buffer_ = allocator_.allocate(size_);
assert(free_buffer_ != nullptr);
for (size_t i = 0; i < size_; ++i)
free_buffer_[i].valid.store(TRACE_ENTRY_INVALID, std::memory_order_relaxed);
}
void WorkerThreadLoop(std::promise<void> ready) {
std::unique_lock lock(worker_mutex_);
// This worker thread is now ready to accept work.
ready.set_value();
while (true) {
worker_cond_.wait(lock, [this]() { return free_buffer_ == nullptr; });
if (!worker_thread_) break;
AllocateFreeBuffer();
worker_cond_.notify_one();
}
}
// The WriteIndex is used to store both the index and the buffer associated with that index (the
// buffer contains the trace buffer records at [index - index % size, index - index % size_t +
// size_ - 1]) in a single atomic variable.
struct WriteIndex {
uint64_t index;
Entry* buffer;
};
const callback_t flush_callback_;
const uint64_t size_;
uint64_t read_index_; // The index of the next record to flush.
std::atomic<WriteIndex> write_index_; // The index of the next record that could be written.
Entry* free_buffer_{nullptr}; // The next available free buffer.
std::optional<std::thread> worker_thread_;
std::mutex worker_mutex_;
std::condition_variable worker_cond_;
std::mutex write_mutex_;
std::list<Entry*> buffer_list_;
Allocator allocator_;
};
} // namespace rocprofiler
#define TRACE_BUFFER_INSTANTIATE() \
rocprofiler::TraceBufferBase* rocprofiler::TraceBufferBase::head_ = nullptr; \
std::mutex rocprofiler::TraceBufferBase::mutex_;
#endif // TOOL_TRACE_BUFFER_H_