Updates/fixes for CI, docs, tests, samples, and common library (#528)

- .github/workflows/continuous_integration.yml
  - apt-get update before apt-get install
  - remove libgtest-dev
  - actions-comment-pull-request: v2.4.3 -> v2.5.0
- .github/workflows/formatting.yml
  - create-pull-request: v5 -> v6
- cmake/rocprofiler_options.cmake
  - remove unused ROCPROFILER_DEBUG_TRACE and ROCPROFILER_LD_AQLPROFILE options
- samples/counter_collection/callback_client.cpp
  - corr_id field renamed to correlation_id
- samples/counter_collection/client.cpp
  - corr_id field renamed to correlation_id
- include/rocprofiler-sdk/fwd.h
  - In rocprofiler_record_counter_t: rename corr_id field to correlation_id
  - doxygen fixes
- lib/common/utility.*
  - remove get_accurate_clock_id_impl
  - timestamp_ns() defaults to CLOCK_BOOTTIME
- lib/rocprofiler-sdk/counters/core.cpp
  - fix spelling mistake: extrenal -> external
  - corr_id field renamed to correlation_id
- lib/rocprofiler-sdk-tool/tool.cpp
  - fix destruction of static tool::output_file before finalization
- scripts/update-docs.sh
  - define PROJECT_NAME
- tests/async-copy-tracing/validate.py
  - init_time and fini_time checks
  - hip_api_traces, marker_api_tracing
- tests/common/serialization.hpp
  - fix save function for rocprofiler_record_counter_t following rename of corr_id to correlation_id
- tests/kernel-tracing/validate.py
  - init_time and fini_time checks
  - relax test_total_runtime range
- tests/rocprofv3/tracing/CMakeLists.txt
  - remove -M from rocprofv3-test-systrace-execute
  - exclude test_hsa_api_trace in rocprofv3-test-systrace-validate due to HIP API tracing
- tests/rocprofv3/tracing/validate.py
  - update test_kernel_trace to accept mangled or demangled
- tests/tools/json-tool.cpp
  - remove use of GLOG
  - include init_time and fini_time
  - write_json(...) function
Этот коммит содержится в:
Jonathan R. Madsen
2024-02-22 00:16:43 -06:00
коммит произвёл GitHub
родитель 7adffd5b22
Коммит 0d939edbba
17 изменённых файлов: 292 добавлений и 233 удалений
+6 -4
Просмотреть файл
@@ -89,7 +89,7 @@ jobs:
run: |
git config --global --add safe.directory '*'
apt-get update
apt-get install -y cmake clang-tidy-11 g++-11 g++-12 libgtest-dev python3-pip
apt-get install -y cmake clang-tidy-11 g++-11 g++-12 python3-pip
update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-11 10
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12
@@ -257,7 +257,8 @@ jobs:
shell: bash
run: |
git config --global --add safe.directory '*'
apt-get install -y cmake libgtest-dev python3-pip gcovr wkhtmltopdf xvfb xfonts-base xfonts-75dpi xfonts-100dpi xfonts-utils xfonts-encodings libfontconfig
apt-get update
apt-get install -y cmake python3-pip gcovr wkhtmltopdf xvfb xfonts-base xfonts-75dpi xfonts-100dpi xfonts-utils xfonts-encodings libfontconfig
python3 -m pip install -r requirements.txt
python3 -m pip install pytest pycobertura
@@ -356,7 +357,7 @@ jobs:
- name: Write Code Coverage Comment
if: github.event_name == 'pull_request'
timeout-minutes: 5
uses: thollander/actions-comment-pull-request@v2.4.3
uses: thollander/actions-comment-pull-request@v2.5.0
with:
comment_tag: codecov-report
filePath: .codecov/report.md
@@ -471,7 +472,8 @@ jobs:
shell: bash
run: |
git config --global --add safe.directory '*'
apt-get install -y cmake libgtest-dev python3-pip libasan8 libtsan2 software-properties-common
apt-get update
apt-get install -y cmake python3-pip libasan8 libtsan2 software-properties-common
python3 -m pip install -r requirements.txt
python3 -m pip install pytest
add-apt-repository ppa:ubuntu-toolchain-r/test
+3 -3
Просмотреть файл
@@ -48,7 +48,7 @@ jobs:
- name: Create pull request
if: failure()
uses: peter-evans/create-pull-request@v5
uses: peter-evans/create-pull-request@v6
with:
commit-message: "cmake formatting (cmake-format)"
branch: ${{ steps.extract_branch.outputs.branch }}-cmake-format
@@ -90,7 +90,7 @@ jobs:
- name: Create pull request
if: failure()
uses: peter-evans/create-pull-request@v5
uses: peter-evans/create-pull-request@v6
with:
commit-message: "source formatting (clang-format v11)"
branch: ${{ steps.extract_branch.outputs.branch }}-clang-format
@@ -137,7 +137,7 @@ jobs:
- name: Create pull request
if: failure()
uses: peter-evans/create-pull-request@v5
uses: peter-evans/create-pull-request@v6
with:
commit-message: "python formatting (black)"
branch: ${{ steps.extract_branch.outputs.branch }}-python-format
-3
Просмотреть файл
@@ -56,9 +56,6 @@ if(ROCPROFILER_BUILD_TESTS)
"Enable building gtest (Google testing) library internally" ON ADVANCED)
endif()
rocprofiler_add_option(ROCPROFILER_DEBUG_TRACE "Enable debug tracing" OFF ADVANCED)
rocprofiler_add_option(ROCPROFILER_LD_AQLPROFILE
"Enable direct loading of AQL-profile HSA extension" OFF ADVANCED)
rocprofiler_add_option(ROCPROFILER_ENABLE_CLANG_TIDY "Enable clang-tidy checks" OFF
ADVANCED)
+1 -1
Просмотреть файл
@@ -79,7 +79,7 @@ record_callback(rocprofiler_queue_id_t,
for(size_t i = 0; i < record_count; ++i)
{
ss << "(Id: " << record_data[i].id << " Value [D]: " << record_data[i].counter_value
<< " Corr_Id: " << record_data[i].corr_id.internal << "),";
<< " Corr_Id: " << record_data[i].correlation_id.internal << "),";
}
auto* output_stream = static_cast<std::ostream*>(callback_data_args);
if(!output_stream) throw std::runtime_error{"nullptr to output stream"};
+1 -1
Просмотреть файл
@@ -99,7 +99,7 @@ buffered_callback(rocprofiler_context_id_t,
// Print the returned counter data.
auto* record = static_cast<rocprofiler_record_counter_t*>(header->payload);
ss << "(Id: " << record->id << " Value [D]: " << record->counter_value
<< " Corr_Id: " << record->corr_id.internal << "),";
<< " Corr_Id: " << record->correlation_id.internal << "),";
}
}
+14 -9
Просмотреть файл
@@ -344,8 +344,8 @@ typedef uint64_t rocprofiler_counter_dimension_id_t;
*/
typedef union rocprofiler_user_data_t
{
uint64_t value;
void* ptr;
uint64_t value; ///< usage example: set to process id, thread id, etc.
void* ptr; ///< usage example: set to address of data allocation
} rocprofiler_user_data_t;
//--------------------------------------------------------------------------------------//
@@ -504,10 +504,12 @@ rocprofiler_record_header_compute_hash(uint32_t category, uint32_t kind)
*/
typedef struct
{
const char* name;
size_t instance_size;
rocprofiler_counter_dimension_id_t
id; //<< Id for this dimension used by @ref rocprofiler_query_record_dimension_position
const char* name;
size_t instance_size;
rocprofiler_counter_dimension_id_t id;
/// @var id
/// @brief Id for this dimension used by @ref rocprofiler_query_record_dimension_position
} rocprofiler_record_dimension_info_t;
/**
@@ -515,9 +517,12 @@ typedef struct
*/
typedef struct
{
rocprofiler_counter_instance_id_t id;
double counter_value; //<< counter value
rocprofiler_correlation_id_t corr_id;
rocprofiler_counter_instance_id_t id; ///< counter identifier
double counter_value; ///< counter value
rocprofiler_correlation_id_t correlation_id;
/// @var correlation_id
/// @brief Used to correlate the kernel data to an API call
} rocprofiler_record_counter_t;
/**
+1 -26
Просмотреть файл
@@ -62,35 +62,10 @@ get_clock_name(clockid_t _id)
default: break;
}
return "CLOCK_UNKNOWN";
#undef CLOCK_NAME_CASE_STATEMENT
}
} // namespace
clockid_t
get_accurate_clock_id_impl()
{
auto clock = CLOCK_MONOTONIC;
utsname kernelInfo;
if(uname(&kernelInfo) == 0)
{
try
{
std::string ver = kernelInfo.release;
size_t idx;
int major = std::stoi(ver, &idx);
int minor = std::stoi(ver.substr(idx + 1));
if(major > 4 || ((major == 4) && (minor >= 4)))
{
clock = CLOCK_MONOTONIC_RAW;
}
} catch(...)
{
// Kernel version string doesn't conform to the standard pattern.
// Keep using the "safe" (non-RAW) clock.
}
}
return clock;
}
uint64_t
get_clock_period_ns_impl(clockid_t _clk_id)
{
+7 -31
Просмотреть файл
@@ -48,9 +48,6 @@ namespace rocprofiler
{
namespace common
{
clockid_t
get_accurate_clock_id_impl();
uint64_t
get_clock_period_ns_impl(clockid_t _clk_id);
@@ -62,20 +59,6 @@ get_tid()
return _v;
}
inline clockid_t
get_accurate_clock_id()
{
static auto clk_id = get_accurate_clock_id_impl();
return clk_id;
}
inline uint64_t
get_accurate_clock_period_ns()
{
static auto clk_period = get_clock_period_ns_impl(get_accurate_clock_id());
return clk_period;
}
inline uint64_t
get_ticks(clockid_t clk_id_v) noexcept
{
@@ -92,24 +75,17 @@ get_ticks(clockid_t clk_id_v) noexcept
return (static_cast<uint64_t>(ts.tv_sec) * nanosec) + static_cast<uint64_t>(ts.tv_nsec);
}
// this equates to HSA-runtime library implementation of os::ReadAccurateClock()
// CLOCK_MONOTONIC_RAW equates to HSA-runtime library implementation of os::ReadAccurateClock()
// CLOCK_BOOTTIME equates to HSA-runtime library implementation of os::ReadSystemClock()
template <int ClockT = CLOCK_BOOTTIME>
inline uint64_t
timestamp_ns()
{
auto&& clk_period = get_accurate_clock_period_ns();
if(ROCPROFILER_LIKELY(clk_period == 1)) return get_ticks(get_accurate_clock_id());
return get_ticks(get_accurate_clock_id()) / clk_period;
}
constexpr auto _clk = ClockT;
static auto _clk_period = get_clock_period_ns_impl(_clk);
// this equates to HSA-runtime library implementation of os::ReadSystemClock()
inline uint64_t
system_timestamp_ns()
{
constexpr auto boottime_clk = CLOCK_BOOTTIME;
static auto boottime_clk_period = get_clock_period_ns_impl(boottime_clk);
if(ROCPROFILER_LIKELY(boottime_clk_period == 1)) return get_ticks(boottime_clk);
return get_ticks(boottime_clk) / boottime_clk_period;
if(ROCPROFILER_LIKELY(_clk_period == 1)) return get_ticks(_clk);
return get_ticks(_clk) / _clk_period;
}
std::vector<std::string>
+155 -102
Просмотреть файл
@@ -52,104 +52,154 @@
#include <unordered_set>
#include <vector>
namespace common = ::rocprofiler::common;
namespace tool = ::rocprofiler::tool;
static const uint32_t lds_block_size = 128 * 4;
namespace common = ::rocprofiler::common;
namespace tool = ::rocprofiler::tool;
namespace
{} // namespace
{
constexpr uint32_t lds_block_size = 128 * 4;
auto&
auto destructors = new std::vector<std::function<void()>>{};
template <typename Tp>
Tp&
get_dereference(Tp* ptr)
{
return *CHECK_NOTNULL(ptr);
}
template <typename Tp>
void
add_destructor(Tp*& ptr)
{
static auto _mutex = std::mutex{};
auto _lk = std::unique_lock<std::mutex>{_mutex};
destructors->emplace_back([&ptr]() {
delete ptr;
ptr = nullptr;
});
}
#define ADD_DESTRUCTOR(PTR) \
{ \
static auto _once = std::once_flag{}; \
std::call_once(_once, []() { add_destructor(PTR); }); \
}
tool::output_file*&
get_hsa_api_file()
{
static auto _v = tool::output_file{"hsa_api_trace",
tool::csv::api_csv_encoder{},
{"Domain",
"Function",
"Process_Id",
"Thread_Id",
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp"}};
static auto* _v = new tool::output_file{"hsa_api_trace",
tool::csv::api_csv_encoder{},
{"Domain",
"Function",
"Process_Id",
"Thread_Id",
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp"}};
ADD_DESTRUCTOR(_v);
return _v;
}
auto&
tool::output_file*&
get_hip_api_file()
{
static auto _v = tool::output_file{"hip_api_trace",
tool::csv::api_csv_encoder{},
{"Domain",
"Function",
"Process_Id",
"Thread_Id",
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp"}};
static auto* _v = new tool::output_file{"hip_api_trace",
tool::csv::api_csv_encoder{},
{"Domain",
"Function",
"Process_Id",
"Thread_Id",
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp"}};
ADD_DESTRUCTOR(_v);
return _v;
}
auto&
tool::output_file*&
get_kernel_trace_file()
{
static auto _v = tool::output_file{"kernel_trace",
tool::csv::kernel_trace_csv_encoder{},
{"Kind",
"Agent_Id",
"Queue_Id",
"Kernel_Id",
"Kernel_Name",
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp",
"Private_Segment_Size",
"Group_Segment_Size",
"Workgroup_Size_X",
"Workgroup_Size_Y",
"Workgroup_Size_Z",
"Grid_Size_X",
"Grid_Size_Y",
"Grid_Size_Z"}};
static auto* _v = new tool::output_file{"kernel_trace",
tool::csv::kernel_trace_csv_encoder{},
{"Kind",
"Agent_Id",
"Queue_Id",
"Kernel_Id",
"Kernel_Name",
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp",
"Private_Segment_Size",
"Group_Segment_Size",
"Workgroup_Size_X",
"Workgroup_Size_Y",
"Workgroup_Size_Z",
"Grid_Size_X",
"Grid_Size_Y",
"Grid_Size_Z"}};
ADD_DESTRUCTOR(_v);
return _v;
}
auto&
tool::output_file*&
get_counter_collection_file()
{
static auto _v = tool::output_file{"counter_collection",
tool::csv::counter_collection_csv_encoder{},
{"Counter_Id",
"Agent_Id",
"Queue_Id",
"Process_Id",
"Thread_Id",
"Grid_Size",
"Kernel-Name",
"Workgroup_Size",
"LDS_Block_Size",
"Scratch_Size",
"VGPR_Count",
"SGPR_Count",
"Counter_Name",
"Counter_Value"}};
static auto* _v = new tool::output_file{"counter_collection",
tool::csv::counter_collection_csv_encoder{},
{"Counter_Id",
"Agent_Id",
"Queue_Id",
"Process_Id",
"Thread_Id",
"Grid_Size",
"Kernel-Name",
"Workgroup_Size",
"LDS_Block_Size",
"Scratch_Size",
"VGPR_Count",
"SGPR_Count",
"Counter_Name",
"Counter_Value"}};
ADD_DESTRUCTOR(_v);
return _v;
}
auto&
tool::output_file*&
get_memory_copy_trace_file()
{
static auto _v = tool::output_file{"memory_copy_trace",
tool::csv::memory_copy_csv_encoder{},
{"Kind",
"Direction",
"Source_Agent_Id",
"Destination_Agent_Id",
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp"}};
static auto* _v = new tool::output_file{"memory_copy_trace",
tool::csv::memory_copy_csv_encoder{},
{"Kind",
"Direction",
"Source_Agent_Id",
"Destination_Agent_Id",
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp"}};
ADD_DESTRUCTOR(_v);
return _v;
}
tool::output_file*&
get_marker_api_file()
{
static auto* _v = new tool::output_file{"marker_api_trace",
tool::csv::marker_csv_encoder{},
{"Domain",
"Function",
"Process_Id",
"Thread_Id",
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp"}};
ADD_DESTRUCTOR(_v);
return _v;
}
#undef ADD_DESTRUCTOR
struct marker_entry
{
uint64_t cid = 0;
@@ -159,21 +209,6 @@ struct marker_entry
std::string message = {};
};
auto&
get_marker_api_file()
{
static auto _v = tool::output_file{"marker_api_trace",
tool::csv::marker_csv_encoder{},
{"Domain",
"Function",
"Process_Id",
"Thread_Id",
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp"}};
return _v;
}
struct buffer_ids
{
rocprofiler_buffer_id_t hsa_api_trace = {};
@@ -283,7 +318,7 @@ cntrl_tracing_callback(rocprofiler_callback_tracing_record_t record,
record.correlation_id.internal,
user_data->value,
ts);
get_marker_api_file() << ss.str();
get_dereference(get_marker_api_file()) << ss.str();
}
}
}
@@ -319,7 +354,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
record.correlation_id.internal,
ts,
ts);
get_marker_api_file() << ss.str();
get_dereference(get_marker_api_file()) << ss.str();
}
}
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA)
@@ -349,7 +384,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
auto ss = std::stringstream{};
tool::csv::marker_csv_encoder::write_row(
ss, kind_name, val.message, val.pid, val.tid, val.cid, val.data.value, ts);
get_marker_api_file() << ss.str();
get_dereference(get_marker_api_file()) << ss.str();
}
}
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA)
@@ -384,7 +419,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
_entry.cid,
_entry.data.value,
ts);
get_marker_api_file() << ss.str();
get_dereference(get_marker_api_file()) << ss.str();
global_range.wlock([](auto& map, auto _key) { return map.erase(_key); }, _id);
}
@@ -408,7 +443,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
record.correlation_id.internal,
user_data->value,
ts);
get_marker_api_file() << ss.str();
get_dereference(get_marker_api_file()) << ss.str();
}
}
}
@@ -504,7 +539,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
record->grid_size.y,
record->grid_size.z);
get_kernel_trace_file() << kernel_trace_ss.str();
get_dereference(get_kernel_trace_file()) << kernel_trace_ss.str();
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API ||
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API ||
@@ -525,7 +560,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
record->start_timestamp,
record->end_timestamp);
get_hsa_api_file() << hsa_trace_ss.str();
get_dereference(get_hsa_api_file()) << hsa_trace_ss.str();
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)
{
@@ -543,7 +578,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
record->start_timestamp,
record->end_timestamp);
get_memory_copy_trace_file() << memory_copy_trace_ss.str();
get_dereference(get_memory_copy_trace_file()) << memory_copy_trace_ss.str();
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API ||
header->kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API)
@@ -562,7 +597,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
record->start_timestamp,
record->end_timestamp);
get_hip_api_file() << hip_trace_ss.str();
get_dereference(get_hip_api_file()) << hip_trace_ss.str();
}
else
{
@@ -575,7 +610,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
{
auto* profiler_record = static_cast<rocprofiler_record_counter_t*>(header->payload);
rocprofiler_tool_kernel_properties_t kernel_properties =
GetKernelProperties(profiler_record->corr_id.internal);
GetKernelProperties(profiler_record->correlation_id.internal);
rocprofiler_counter_id_t counter_id;
size_t pos;
rocprofiler_counter_info_v0_t version;
@@ -606,7 +641,7 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
fmt::format("{}[{}]", version.name, pos),
profiler_record->counter_value);
get_counter_collection_file() << counter_collection_ss.str();
get_dereference(get_counter_collection_file()) << counter_collection_ss.str();
}
}
}
@@ -925,27 +960,45 @@ tool_fini(void* tool_data)
flush();
rocprofiler_stop_context(get_client_ctx());
if(destructors)
{
for(const auto& itr : *destructors)
itr();
delete destructors;
destructors = nullptr;
}
(void) (tool_data);
}
} // namespace
extern "C" rocprofiler_tool_configure_result_t*
rocprofiler_configure(uint32_t /*version*/,
const char* /*runtime_version*/,
rocprofiler_configure(uint32_t version,
const char* runtime_version,
uint32_t priority,
rocprofiler_client_id_t* id)
{
common::init_logging("ROCPROF_LOG_LEVEL");
FLAGS_colorlogtostderr = true;
// only activate if main tool
if(priority > 0) return nullptr;
// set the client name
id->name = "rocprofiler-tool";
id->name = "rocprofv3";
// store client info
client_identifier = id;
// note that rocprofv3 is not the primary tool
LOG_IF(WARNING, priority > 0) << id->name << " has a priority of " << priority
<< " (not primary tool)";
// compute major/minor/patch version info
uint32_t major = version / 10000;
uint32_t minor = (version % 10000) / 100;
uint32_t patch = version % 100;
LOG(INFO) << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
<< " (" << runtime_version << ")";
// create configure data
static auto cfg = rocprofiler_tool_configure_result_t{
sizeof(rocprofiler_tool_configure_result_t), &tool_init, &tool_fini, nullptr};
+5 -5
Просмотреть файл
@@ -265,10 +265,10 @@ queue_cb(const std::shared_ptr<counter_callback_info>& info,
if(const auto* _corr_id = correlation_id)
{
_corr_id_v.internal = _corr_id->internal;
if(const auto* extrenal =
if(const auto* external =
rocprofiler::common::get_val(extern_corr_ids, info->internal_context))
{
_corr_id_v.external = *extrenal;
_corr_id_v.external = *external;
}
}
@@ -380,10 +380,10 @@ completed_cb(const std::shared_ptr<counter_callback_info>& info,
if(const auto* _corr_id = session.correlation_id)
{
_corr_id_v.internal = _corr_id->internal;
if(const auto* extrenal =
if(const auto* external =
rocprofiler::common::get_val(session.extern_corr_ids, info->internal_context))
{
_corr_id_v.external = *extrenal;
_corr_id_v.external = *external;
}
}
@@ -396,7 +396,7 @@ completed_cb(const std::shared_ptr<counter_callback_info>& info,
for(auto& val : *ret)
{
val.corr_id = _corr_id_v;
val.correlation_id = _corr_id_v;
if(buf)
buf->emplace(ROCPROFILER_BUFFER_CATEGORY_COUNTERS, 0, val);
else
+1 -1
Просмотреть файл
@@ -21,7 +21,7 @@ message "Changing directory to ${WORK_DIR}"
cd ${WORK_DIR}
message "Generating rocprofiler-sdk.dox"
cmake -DSOURCE_DIR=${SOURCE_DIR} -P ${WORK_DIR}/generate-doxyfile.cmake
cmake -DSOURCE_DIR=${SOURCE_DIR} -DPROJECT_NAME="Rocprofiler SDK" -P ${WORK_DIR}/generate-doxyfile.cmake
message "Generating doxygen xml files"
doxygen rocprofiler-sdk.dox
+29 -16
Просмотреть файл
@@ -8,7 +8,8 @@ import pytest
def node_exists(name, data, min_len=1):
assert name in data
assert data[name] is not None
assert len(data[name]) >= min_len
if isinstance(data[name], (list, tuple, dict, set)):
assert len(data[name]) >= min_len
def test_data_structure(input_data):
@@ -19,6 +20,12 @@ def test_data_structure(input_data):
sdk_data = data["rocprofiler-sdk-json-tool"]
node_exists("metadata", sdk_data)
node_exists("pid", sdk_data["metadata"])
node_exists("main_tid", sdk_data["metadata"])
node_exists("init_time", sdk_data["metadata"])
node_exists("fini_time", sdk_data["metadata"])
node_exists("agents", sdk_data)
node_exists("call_stack", sdk_data)
node_exists("callback_records", sdk_data)
@@ -28,11 +35,15 @@ def test_data_structure(input_data):
node_exists("code_objects", sdk_data["callback_records"])
node_exists("kernel_symbols", sdk_data["callback_records"])
node_exists("hsa_api_traces", sdk_data["callback_records"])
node_exists("hip_api_traces", sdk_data["callback_records"], 0)
node_exists("marker_api_traces", sdk_data["callback_records"])
node_exists("names", sdk_data["buffer_records"])
node_exists("kernel_dispatches", sdk_data["buffer_records"])
node_exists("memory_copies", sdk_data["buffer_records"], 4)
node_exists("hsa_api_traces", sdk_data["buffer_records"])
node_exists("hip_api_traces", sdk_data["buffer_records"], 0)
node_exists("marker_api_traces", sdk_data["buffer_records"])
def test_timestamps(input_data):
@@ -41,7 +52,7 @@ def test_timestamps(input_data):
cb_start = {}
cb_end = {}
for titr in ["hsa_api_traces", "marker_api_traces"]:
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
for itr in sdk_data["callback_records"][titr]:
cid = itr["record"]["correlation_id"]["internal"]
phase = itr["record"]["phase"]
@@ -56,18 +67,20 @@ def test_timestamps(input_data):
for itr in sdk_data["buffer_records"][titr]:
assert itr["start_timestamp"] <= itr["end_timestamp"]
for itr in sdk_data["buffer_records"]["memory_copies"]:
assert itr["start_timestamp"] <= itr["end_timestamp"]
for titr in ["kernel_dispatches", "memory_copies"]:
for itr in sdk_data["buffer_records"][titr]:
assert itr["start_timestamp"] < itr["end_timestamp"]
assert itr["correlation_id"]["internal"] > 0
assert itr["correlation_id"]["external"] > 0
assert sdk_data["metadata"]["init_time"] < itr["start_timestamp"]
assert sdk_data["metadata"]["init_time"] < itr["end_timestamp"]
assert sdk_data["metadata"]["fini_time"] > itr["start_timestamp"]
assert sdk_data["metadata"]["fini_time"] > itr["end_timestamp"]
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
assert itr["start_timestamp"] < itr["end_timestamp"]
assert itr["correlation_id"]["internal"] > 0
assert itr["correlation_id"]["external"] > 0
api_start = cb_start[itr["correlation_id"]["internal"]]
api_end = cb_end[itr["correlation_id"]["internal"]]
assert api_start < itr["start_timestamp"]
assert api_end <= itr["end_timestamp"]
api_start = cb_start[itr["correlation_id"]["internal"]]
api_end = cb_end[itr["correlation_id"]["internal"]]
assert api_start < itr["start_timestamp"]
assert api_end <= itr["end_timestamp"]
def test_internal_correlation_ids(input_data):
@@ -75,7 +88,7 @@ def test_internal_correlation_ids(input_data):
sdk_data = data["rocprofiler-sdk-json-tool"]
api_corr_ids = []
for titr in ["hsa_api_traces", "marker_api_traces"]:
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
for itr in sdk_data["callback_records"][titr]:
api_corr_ids.append(itr["record"]["correlation_id"]["internal"])
@@ -101,7 +114,7 @@ def test_external_correlation_ids(input_data):
sdk_data = data["rocprofiler-sdk-json-tool"]
extern_corr_ids = []
for titr in ["hsa_api_traces", "marker_api_traces"]:
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
for itr in sdk_data["callback_records"][titr]:
assert itr["record"]["correlation_id"]["external"] > 0
assert (
@@ -110,7 +123,7 @@ def test_external_correlation_ids(input_data):
extern_corr_ids.append(itr["record"]["correlation_id"]["external"])
extern_corr_ids = list(set(sorted(extern_corr_ids)))
for titr in ["hsa_api_traces", "marker_api_traces"]:
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
for itr in sdk_data["buffer_records"][titr]:
assert itr["correlation_id"]["external"] > 0
assert itr["thread_id"] == itr["correlation_id"]["external"]
+4 -4
Просмотреть файл
@@ -205,9 +205,9 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_record_t data)
{
SAVE_DATA_FIELD(context_id);
SAVE_DATA_FIELD(thread_id);
SAVE_DATA_FIELD(correlation_id);
SAVE_DATA_FIELD(kind);
SAVE_DATA_FIELD(operation);
SAVE_DATA_FIELD(correlation_id);
SAVE_DATA_FIELD(phase);
}
@@ -217,8 +217,8 @@ save_buffer_tracing_api_record(ArchiveT& ar, Tp data)
{
SAVE_DATA_FIELD(size);
SAVE_DATA_FIELD(kind);
SAVE_DATA_FIELD(correlation_id);
SAVE_DATA_FIELD(operation);
SAVE_DATA_FIELD(correlation_id);
SAVE_DATA_FIELD(start_timestamp);
SAVE_DATA_FIELD(end_timestamp);
SAVE_DATA_FIELD(thread_id);
@@ -237,7 +237,7 @@ save(ArchiveT& ar, rocprofiler_record_counter_t data)
{
SAVE_DATA_FIELD(id);
SAVE_DATA_FIELD(counter_value);
SAVE_DATA_FIELD(corr_id);
SAVE_DATA_FIELD(correlation_id);
}
template <typename ArchiveT>
@@ -278,8 +278,8 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_copy_record_t data)
{
SAVE_DATA_FIELD(size);
SAVE_DATA_FIELD(kind);
SAVE_DATA_FIELD(correlation_id);
SAVE_DATA_FIELD(operation);
SAVE_DATA_FIELD(correlation_id);
SAVE_DATA_FIELD(start_timestamp);
SAVE_DATA_FIELD(end_timestamp);
SAVE_DATA_FIELD(dst_agent_id);
+23 -14
Просмотреть файл
@@ -8,7 +8,8 @@ import pytest
def node_exists(name, data, min_len=1):
assert name in data
assert data[name] is not None
assert len(data[name]) >= min_len
if isinstance(data[name], (list, tuple, dict, set)):
assert len(data[name]) >= min_len
def test_data_structure(input_data):
@@ -19,6 +20,12 @@ def test_data_structure(input_data):
sdk_data = data["rocprofiler-sdk-json-tool"]
node_exists("metadata", sdk_data)
node_exists("pid", sdk_data["metadata"])
node_exists("main_tid", sdk_data["metadata"])
node_exists("init_time", sdk_data["metadata"])
node_exists("fini_time", sdk_data["metadata"])
node_exists("agents", sdk_data)
node_exists("call_stack", sdk_data)
node_exists("callback_records", sdk_data)
@@ -60,18 +67,20 @@ def test_timestamps(input_data):
for itr in sdk_data["buffer_records"][titr]:
assert itr["start_timestamp"] <= itr["end_timestamp"]
for itr in sdk_data["buffer_records"]["memory_copies"]:
assert itr["start_timestamp"] <= itr["end_timestamp"]
for titr in ["kernel_dispatches", "memory_copies"]:
for itr in sdk_data["buffer_records"][titr]:
assert itr["start_timestamp"] < itr["end_timestamp"]
assert itr["correlation_id"]["internal"] > 0
assert itr["correlation_id"]["external"] > 0
assert sdk_data["metadata"]["init_time"] < itr["start_timestamp"]
assert sdk_data["metadata"]["init_time"] < itr["end_timestamp"]
assert sdk_data["metadata"]["fini_time"] > itr["start_timestamp"]
assert sdk_data["metadata"]["fini_time"] > itr["end_timestamp"]
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
assert itr["start_timestamp"] < itr["end_timestamp"]
assert itr["correlation_id"]["internal"] > 0
assert itr["correlation_id"]["external"] > 0
api_start = cb_start[itr["correlation_id"]["internal"]]
api_end = cb_end[itr["correlation_id"]["internal"]]
assert api_start < itr["start_timestamp"]
assert api_end <= itr["end_timestamp"]
api_start = cb_start[itr["correlation_id"]["internal"]]
api_end = cb_end[itr["correlation_id"]["internal"]]
assert api_start < itr["start_timestamp"]
assert api_end <= itr["end_timestamp"]
def test_total_runtime(input_data):
@@ -84,8 +93,8 @@ def test_total_runtime(input_data):
expected_runtime = 1.0e3 # one second in milliseconds
assert (sum(runtime_data) * 1.0e-6) >= (0.9 * expected_runtime)
assert (sum(runtime_data) * 1.0e-6) <= (1.1 * expected_runtime)
assert (sum(runtime_data) * 1.0e-6) >= (0.8 * expected_runtime)
assert (sum(runtime_data) * 1.0e-6) <= (1.2 * expected_runtime)
def test_internal_correlation_ids(input_data):
+3 -2
Просмотреть файл
@@ -84,7 +84,7 @@ set_tests_properties(
add_test(
NAME rocprofv3-test-systrace-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -M --sys-trace -d
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --sys-trace -d
${CMAKE_CURRENT_BINARY_DIR}/%argt%-systrace -o out
$<TARGET_FILE:simple-transpose>)
@@ -102,7 +102,8 @@ set_tests_properties(
add_test(
NAME rocprofv3-test-systrace-validate
COMMAND
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --hsa-input
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py -k
"not test_hsa_api_trace" --hsa-input
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-systrace/out_hsa_api_trace.csv
--kernel-input
${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-systrace/out_kernel_trace.csv
+7 -3
Просмотреть файл
@@ -2,7 +2,6 @@
import sys
import pytest
import re
def test_hsa_api_trace(hsa_input_data):
@@ -37,13 +36,18 @@ def test_hsa_api_trace(hsa_input_data):
def test_kernel_trace(kernel_input_data):
mangled_kernel_name = "_Z15matrixTransposePfS_i.kd"
valid_kernel_names = (
"_Z15matrixTransposePfS_i.kd",
"matrixTranspose(float*, float*, int)",
)
assert len(kernel_input_data) == 1
for row in kernel_input_data:
assert row["Kind"] == "KERNEL_DISPATCH"
assert int(row["Agent_Id"]) > 0
assert int(row["Queue_Id"]) > 0
assert int(row["Kernel_Id"]) > 0
assert row["Kernel_Name"] == mangled_kernel_name
assert row["Kernel_Name"] in valid_kernel_names
assert int(row["Correlation_Id"]) > 0
assert int(row["Workgroup_Size_X"]) == 4
assert int(row["Workgroup_Size_Y"]) == 4
+32 -8
Просмотреть файл
@@ -43,8 +43,6 @@
#include <rocprofiler-sdk/registration.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <glog/logging.h>
#include <unistd.h>
#include <atomic>
#include <cassert>
@@ -432,7 +430,7 @@ dispatch_callback(rocprofiler_queue_id_t, /*queue_id*/
// Counters we want to collect (here its SQ_WAVES_sum)
auto* counters_env = getenv("ROCPROF_COUNTERS");
if(std::string(counters_env) != "SQ_WAVES_sum")
LOG(FATAL) << "Counter not supported in the test tool";
throw std::runtime_error{"Counter not supported in the test tool"};
std::set<std::string> counters_to_collect = {"SQ_WAVES_sum"};
// GPU Counter IDs
@@ -728,9 +726,16 @@ auto buffers = std::array<rocprofiler_buffer_id_t*, 6>{&hsa_api_buffered_buffer,
auto agents = std::vector<rocprofiler_agent_t>{};
rocprofiler_timestamp_t init_time = 0;
rocprofiler_timestamp_t fini_time = 0;
rocprofiler_thread_id_t main_tid = 0;
int
tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
{
rocprofiler_get_timestamp(&init_time);
rocprofiler_get_thread_id(&main_tid);
assert(tool_data != nullptr);
rocprofiler_available_agents_cb_t iterate_cb =
@@ -997,6 +1002,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
return 0;
}
void
write_json(call_stack_t* _call_stack);
void
tool_fini(void* tool_data)
{
@@ -1006,6 +1014,8 @@ tool_fini(void* tool_data)
stop();
flush();
rocprofiler_get_timestamp(&fini_time);
std::cerr << "[" << getpid() << "][" << __FUNCTION__
<< "] Finalizing... agents=" << agents.size()
<< ", code_object_callback_records=" << code_object_records.size()
@@ -1027,6 +1037,17 @@ tool_fini(void* tool_data)
_call_stack->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""});
}
write_json(_call_stack);
std::cerr << "[" << getpid() << "][" << __FUNCTION__ << "] Finalization complete.\n"
<< std::flush;
delete _call_stack;
}
void
write_json(call_stack_t* _call_stack)
{
auto ofname = std::string{"rocprofiler-tool-results.json"};
if(auto* eofname = getenv("ROCPROFILER_TOOL_OUTPUT_FILE")) ofname = eofname;
@@ -1068,6 +1089,14 @@ tool_fini(void* tool_data)
json_ar.setNextName("rocprofiler-sdk-json-tool");
json_ar.startNode();
json_ar.setNextName("metadata");
json_ar.startNode();
json_ar(cereal::make_nvp("pid", getpid()));
json_ar(cereal::make_nvp("main_tid", main_tid));
json_ar(cereal::make_nvp("init_time", init_time));
json_ar(cereal::make_nvp("fini_time", fini_time));
json_ar.finishNode();
json_ar(cereal::make_nvp("agents", agents));
if(_call_stack) json_ar(cereal::make_nvp("call_stack", *_call_stack));
@@ -1114,11 +1143,6 @@ tool_fini(void* tool_data)
*ofs << std::flush;
if(cleanup) cleanup(ofs);
std::cerr << "[" << getpid() << "][" << __FUNCTION__ << "] Finalization complete.\n"
<< std::flush;
delete _call_stack;
}
void