Remove OMPT category and fix certain preprocessor checks (#1165)

* Part 1: Remove OMPT Category
* Part 2: Properly remove backend choices
* Part 3: Ensure preprocessor checks if user defined var to OFF
This commit is contained in:
Kian Cossettini
2025-10-02 21:08:18 -04:00
کامیت شده توسط GitHub
والد c0f8627e7f
کامیت edfda63701
6فایلهای تغییر یافته به همراه20 افزوده شده و 15 حذف شده
@@ -752,18 +752,20 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
"mutex-locks", "spin-locks", "rw-locks",
"rocm" };
#if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS)
#if(!defined(ROCPROFSYS_USE_MPI) || ROCPROFSYS_USE_MPI == 0) && \
(!defined(ROCPROFSYS_USE_MPI_HEADERS) || ROCPROFSYS_USE_MPI_HEADERS == 0)
_backend_choices.erase("mpip");
#endif
#if !defined(ROCPROFSYS_USE_OMPT)
#if !defined(ROCPROFSYS_USE_OMPT) || ROCPROFSYS_USE_OMPT == 0
_backend_choices.erase("ompt");
#endif
#if !defined(ROCPROFSYS_USE_ROCM)
#if !defined(ROCPROFSYS_USE_ROCM) || ROCPROFSYS_USE_ROCM == 0
_backend_choices.erase("rocm");
_backend_choices.erase("amd-smi");
_backend_choices.erase("rcclp");
_backend_choices.erase("ompt");
#endif
parser.start_group("BACKEND OPTIONS",
@@ -576,18 +576,20 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
"rcclp", "amd-smi", "rocm", "mutex-locks",
"spin-locks", "rw-locks" };
#if !defined(ROCPROFSYS_USE_MPI) && !defined(ROCPROFSYS_USE_MPI_HEADERS)
#if(!defined(ROCPROFSYS_USE_MPI) || ROCPROFSYS_USE_MPI == 0) && \
(!defined(ROCPROFSYS_USE_MPI_HEADERS) || ROCPROFSYS_USE_MPI_HEADERS == 0)
_backend_choices.erase("mpip");
#endif
#if !defined(ROCPROFSYS_USE_OMPT)
#if !defined(ROCPROFSYS_USE_OMPT) || ROCPROFSYS_USE_OMPT == 0
_backend_choices.erase("ompt");
#endif
#if !defined(ROCPROFSYS_USE_ROCM)
#if !defined(ROCPROFSYS_USE_ROCM) || ROCPROFSYS_USE_ROCM == 0
_backend_choices.erase("amd-smi");
_backend_choices.erase("rocm");
_backend_choices.erase("rcclp");
_backend_choices.erase("ompt");
#endif
if(gpu::device_count() == 0)
@@ -596,6 +598,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
_backend_choices.erase("rcclp");
_backend_choices.erase("amd-smi");
_backend_choices.erase("rocm");
_backend_choices.erase("ompt");
#if defined(ROCPROFSYS_USE_ROCM)
update_env(_data, "ROCPROFSYS_USE_AMD_SMI", false);
@@ -119,7 +119,6 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rccl, ROCPROFSYS_CATEGORY_ROCM_RCCL, "
ROCPROFSYS_DEFINE_CATEGORY(category, pthread, ROCPROFSYS_CATEGORY_PTHREAD, "pthread", "POSIX threading functions")
ROCPROFSYS_DEFINE_CATEGORY(category, kokkos, ROCPROFSYS_CATEGORY_KOKKOS, "kokkos", "KokkosTools regions")
ROCPROFSYS_DEFINE_CATEGORY(category, mpi, ROCPROFSYS_CATEGORY_MPI, "mpi", "MPI regions")
ROCPROFSYS_DEFINE_CATEGORY(category, ompt, ROCPROFSYS_CATEGORY_OMPT, "ompt", "OpenMP tools regions")
ROCPROFSYS_DEFINE_CATEGORY(category, process_sampling, ROCPROFSYS_CATEGORY_PROCESS_SAMPLING, "process_sampling", "Process-level data")
ROCPROFSYS_DEFINE_CATEGORY(category, comm_data, ROCPROFSYS_CATEGORY_COMM_DATA, "comm_data", "MPI/RCCL counters for tracking amount of data sent or received")
ROCPROFSYS_DEFINE_CATEGORY(category, causal, ROCPROFSYS_CATEGORY_CAUSAL, "causal", "Causal profiling data")
@@ -192,7 +191,6 @@ using name = perfetto_category<Tp...>;
ROCPROFSYS_PERFETTO_CATEGORY(category::pthread), \
ROCPROFSYS_PERFETTO_CATEGORY(category::kokkos), \
ROCPROFSYS_PERFETTO_CATEGORY(category::mpi), \
ROCPROFSYS_PERFETTO_CATEGORY(category::ompt), \
ROCPROFSYS_PERFETTO_CATEGORY(category::sampling), \
ROCPROFSYS_PERFETTO_CATEGORY(category::process_sampling), \
ROCPROFSYS_PERFETTO_CATEGORY(category::comm_data), \
@@ -107,17 +107,18 @@ struct functors;
} // namespace component
} // namespace rocprofsys
#if !defined(ROCPROFSYS_USE_RCCL)
#if !defined(ROCPROFSYS_USE_RCCL) || ROCPROFSYS_USE_RCCL == 0
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, category::rocm_rccl, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::rcclp_handle, false_type)
#endif
#if !defined(ROCPROFSYS_USE_RCCL) && !defined(ROCPROFSYS_USE_MPI)
#if(!defined(ROCPROFSYS_USE_RCCL) || ROCPROFSYS_USE_RCCL == 0) && \
(!defined(ROCPROFSYS_USE_MPI) || ROCPROFSYS_USE_MPI == 0)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::comm_data_tracker_t, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::comm_data, false_type)
#endif
#if !defined(TIMEMORY_USE_LIBUNWIND)
#if(!defined(TIMEMORY_USE_LIBUNWIND) || TIMEMORY_USE_LIBUNWIND == 0)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, category::sampling, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::backtrace, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::backtrace_metrics, false_type)
@@ -127,7 +128,8 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_cpu_clock, fa
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_percent, false_type)
#endif
#if !defined(TIMEMORY_USE_LIBUNWIND) || !defined(ROCPROFSYS_USE_ROCM)
#if(!defined(TIMEMORY_USE_LIBUNWIND) || TIMEMORY_USE_LIBUNWIND == 0) || \
(!defined(ROCPROFSYS_USE_ROCM) || ROCPROFSYS_USE_ROCM == 0)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy_gfx,
false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy_umc,
@@ -72,7 +72,6 @@ extern "C"
ROCPROFSYS_CATEGORY_PTHREAD,
ROCPROFSYS_CATEGORY_KOKKOS,
ROCPROFSYS_CATEGORY_MPI,
ROCPROFSYS_CATEGORY_OMPT,
ROCPROFSYS_CATEGORY_PROCESS_SAMPLING,
ROCPROFSYS_CATEGORY_COMM_DATA,
ROCPROFSYS_CATEGORY_CAUSAL,
@@ -69,8 +69,9 @@ using tracing_count_categories_t =
// convert these categories to throughput points
using causal_throughput_categories_t =
type_list<category::host, category::kokkos, category::ompt, category::rocm_hip_api,
category::rocm_hsa_api, category::rocm_rccl, category::rocm_marker_api>;
type_list<category::host, category::kokkos, category::rocm_ompt_api,
category::rocm_hip_api, category::rocm_hsa_api, category::rocm_rccl,
category::rocm_marker_api>;
// define this outside of category region functions so that the
// static thread_local is global instead of per-template instantiation