roctx annotation in mandelbrot benchmark (#1084)

Этот коммит содержится в:
Jonathan R. Madsen
2025-10-21 11:21:28 -05:00
коммит произвёл GitHub
родитель 6b0f87c3a6
Коммит 55ca813ded
2 изменённых файлов: 36 добавлений и 0 удалений
+3
Просмотреть файл
@@ -29,6 +29,8 @@ set(CMAKE_HIP_STANDARD 17)
set(CMAKE_HIP_EXTENSIONS OFF)
set(CMAKE_HIP_STANDARD_REQUIRED ON)
find_package(rocprofiler-sdk-roctx REQUIRED)
set_source_files_properties(mandelbrot.cpp PROPERTIES LANGUAGE HIP)
set_source_files_properties(utils.cpp PROPERTIES LANGUAGE HIP)
@@ -37,6 +39,7 @@ target_sources(mandelbrot PRIVATE mandelbrot.cpp utils.cpp)
target_compile_options(mandelbrot PRIVATE -W -Wall -Wextra -Wpedantic -Werror
-ffp-contract=fast)
target_include_directories(mandelbrot PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(mandelbrot PRIVATE rocprofiler-sdk-roctx::rocprofiler-sdk-roctx)
install(
TARGETS mandelbrot
+33
Просмотреть файл
@@ -25,6 +25,8 @@
#include "utils.hpp"
#include <rocprofiler-sdk-roctx/roctx.h>
#include <hip/hip_vector_types.h>
#include <hip/math_functions.h>
#include <omp.h>
@@ -473,6 +475,19 @@ hipPerfMandelBrot::printResults()
std::cout << std::endl;
}
struct roctx_range
{
template <typename... Args>
roctx_range(Args&&... args)
{
auto _ss = std::stringstream{};
((_ss << args), ...);
roctxRangePush(_ss.str().c_str());
}
~roctx_range() { roctxRangePop(); }
};
// Wrappers for the kernel launches
void
hipPerfMandelBrot::float_mad(uint* out,
@@ -487,6 +502,9 @@ hipPerfMandelBrot::float_mad(uint* out,
int threads_per_block,
int kernelCnt)
{
auto _range =
roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"};
int streamCnt = getNumStreams();
hipLaunchKernelGGL(float_mad_kernel<float>,
dim3(blocks),
@@ -515,6 +533,9 @@ hipPerfMandelBrot::float_mandel_unroll(uint* out,
int threads_per_block,
int kernelCnt)
{
auto _range =
roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"};
int streamCnt = getNumStreams();
hipLaunchKernelGGL(float_mandel_unroll_kernel<float>,
dim3(blocks),
@@ -543,6 +564,9 @@ hipPerfMandelBrot::double_mad(uint* out,
int threads_per_block,
int kernelCnt)
{
auto _range =
roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"};
int streamCnt = getNumStreams();
hipLaunchKernelGGL(double_mad_kernel<double>,
dim3(blocks),
@@ -571,6 +595,9 @@ hipPerfMandelBrot::double_mandel_unroll(uint* out,
int threads_per_block,
int kernelCnt)
{
auto _range =
roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"};
int streamCnt = getNumStreams();
hipLaunchKernelGGL(float_mandel_unroll_kernel<double>,
dim3(blocks),
@@ -589,6 +616,8 @@ hipPerfMandelBrot::double_mandel_unroll(uint* out,
void
hipPerfMandelBrot::run(unsigned int testCase, unsigned int /* deviceId */)
{
auto _run_range = roctx_range{__FUNCTION__, "(testCase=", testCase, ")"};
unsigned int numStreams = getNumStreams();
coordIdx = testCase % numCoords;
@@ -667,6 +696,8 @@ hipPerfMandelBrot::run(unsigned int testCase, unsigned int /* deviceId */)
for(unsigned int k = 0; k < numLoops; k++)
{
auto _loop_range = roctx_range{__FUNCTION__, "(testCase=", testCase, ") :: loop #", k};
if((testCase == 0 || testCase == 1 || testCase == 2 || testCase == 5 || testCase == 6 ||
testCase == 7 || testCase == 10 || testCase == 11 || testCase == 12))
{
@@ -805,6 +836,8 @@ hipPerfMandelBrot::checkData(uint* ptr)
int
main(int argc, char* argv[])
{
auto _range = roctx_range{argv[0]};
// Default values for kernels and streams
unsigned int numStreamsWarmup = 1, numKernelsWarmup = 1;
unsigned int numStreamsSync = 1, numKernelsSync = 1;