Shared Library Constructor (rocprofv3 deadlock fix) (#599)
* Moved tests/apps to tests/bin * Renamed cmake project in tests/bin * Update samples - Use ROCPROFILER_DEFAULT_FAIL_REGEX - tweaks to stdout messages * Update tests - Use ROCPROFILER_DEFAULT_FAIL_REGEX * Add tests/lib - libraries with HIP code * Update PTL submodule - remove atexit delete of thread_id_map * Update cmake/rocprofiler_options.cmake - Set ROCPROFILER_DEFAULT_FAIL_REGEX * Update common lib: env + logging - improved customization of logging settings - default to disabling logging to files - install failure handler for rocprofv3 - set_env support in environment.* * Add lib/rocprofiler-sdk/shared_library.cpp - shared library constructor * Update lib/rocprofiler-sdk-tool/tool.cpp - destructor thread safety - convert callback_name_info and buffered_name_info to pointers - install failure handler for logging * Add tests/bin/hip-in-libraries - hip-in-libraries is an exe which uses two shared libraries where each shared library contains HIP kernels - used for testing deadlocking within __hipRegisterFatBinary * Update bin/rocprofv3 - reorganized the env variables - use exec to launch command - set ROCPROFILER_LIBRARY_CTOR=1 * Add tests/rocprofv3/tracing-hip-in-libraries - uses hip-in-libraries exe for exe which uses shared libraries to launch HIP kernels * Update bin/rocprofv3 - fix counter collection (no exec) * Update lib/rocprofiler-sdk-tool/tool.cpp - replace "Kernel-Name" with "Kernel_Name" * Update lib/rocprofiler-sdk/registration.cpp Use RTLD_LOCAL instead of RTLD_GLOBAL for env libraries * Update tests/rocprofv3 - replace "Kernel-Name" with "Kernel_Name" * Update tests - vector-ops (bin) stream syncs + runs with 4 queues per device - improve counter-collection/input1 validation - rocprofv3/tracing-hip-in-libraries does not do sys-trace - improved validation script for tracing-hip-in-libraries - updated dispatch_callback in json-tool.cpp following reworking of prototypes for counter collection * Update samples/counter_collection - updated dispatch_callback(s) and record_callback(s) following reworking of prototypes * Update bin/rocprofv3 - reorganized help menu - added options for sub-HSA tables - added --hip-runtime-trace - changed --hip-trace to include --hip-compiler-trace * Update lib/rocprofiler-sdk-tool - improved kernel filtering - removed arch_vgpr, accum_vgpr, sgpr code (in rocprofiler-sdk) - fixed issue with counter-collection w/o tracing - added support for fine grained HSA API tracing - removed directly linking to HSA-runtime * Update lib/rocprofiler-sdk/agent.cpp - rocp_agents != hsa_agents is non-fatal when ROCPROFILER_BUILD_CI=OFF (CMake option) * GPR (vector and scalar) info in kernel symbol data - rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t contains general purpose register info * Header include order fix - Include repo headers first - Third party library headers next - standard library headers last * Update dispatch profiling public API - introduce rocprofiler_profile_counting_dispatch_data_t - change signature of rocprofiler_profile_counting_dispatch_callback_t and rocprofiler_profile_counting_record_callback_t - provide rocprofiler_user_data_t pointer in dispatch callback - provide rocprofiler_user_data_t value (from dispatch cb) in record callback * Update tests/bin/CMakeLists.txt - fix add_subdirectory(hip-in-libraries) order * Update VERSION - bump to 0.2.0 in prep for AFAR
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
665c546e65
Коммит
7b6d3c70bd
@@ -145,7 +145,7 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
if(argc > 3) nsync = atoll(argv[3]);
|
||||
|
||||
auto_lock_t _lk{print_lock};
|
||||
std::cout << "[" << rank << "][" << tid << "] M: " << M << " N: " << N << std::endl;
|
||||
std::cout << "[transpose][" << rank << "][" << tid << "] M: " << M << " N: " << N << std::endl;
|
||||
_lk.unlock();
|
||||
|
||||
std::default_random_engine _engine{std::random_device{}() * (rank + 1) * (tid + 1)};
|
||||
@@ -173,7 +173,7 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
dim3 block(32, 32, 1); // transpose_a
|
||||
|
||||
print_lock.lock();
|
||||
printf("[%i][%i] grid=(%i,%i,%i), block=(%i,%i,%i)\n",
|
||||
printf("[transpose][%i][%i] grid=(%i,%i,%i), block=(%i,%i,%i)\n",
|
||||
rank,
|
||||
tid,
|
||||
grid.x,
|
||||
@@ -198,8 +198,10 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
|
||||
float GB = (float) size * nitr * 2 / (1 << 30);
|
||||
|
||||
print_lock.lock();
|
||||
std::cout << "[" << rank << "][" << tid << "] Runtime of transpose is " << time << " sec\n"
|
||||
<< "The average performance of transpose is " << GB / time << " GBytes/sec"
|
||||
std::cout << "[transpose][" << rank << "][" << tid << "] Runtime of transpose is " << time
|
||||
<< " sec\n";
|
||||
std::cout << "[transpose][" << rank << "][" << tid
|
||||
<< "] The average performance of transpose is " << GB / time << " GBytes/sec"
|
||||
<< std::endl;
|
||||
print_lock.unlock();
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user