diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f47812af6..32eeb9b717 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -299,5 +299,5 @@ Example for file plugin output: ### Added - Updated supported GPU architectures in README with profiler versions -- Automatic ISA dumping for ATT -- CSV mode for ATT +- Automatic ISA dumping for ATT. See README. +- CSV mode for ATT. See README. diff --git a/plugin/att/att.cpp b/plugin/att/att.cpp index a11b3a8a12..7689d19c0c 100644 --- a/plugin/att/att.cpp +++ b/plugin/att/att.cpp @@ -83,7 +83,7 @@ class att_plugin_t { if (!att_tracer_record) return ROCPROFILER_STATUS_ERROR; - std::string kernel_name_mangled{}; + std::string kernel_name_mangled; // Found problem with rocprofiler API for invalid kernel_ids; if (att_tracer_record->kernel_id.handle != TEST_INVALID_KERNEL) { size_t name_length; diff --git a/plugin/att/att_to_csv.py b/plugin/att/att_to_csv.py index fbd12aa0b8..acc0b3f966 100755 --- a/plugin/att/att_to_csv.py +++ b/plugin/att/att_to_csv.py @@ -15,4 +15,3 @@ def dump_csv(code): writer = csv.writer(f) writer.writerow(['Line', 'Instruction', 'Hitcount', 'Cycles', 'Addr', 'C++ Reference']) [writer.writerow([m[5], m[0], m[7], m[8], hex(m[6]), m[3]]) for m in code] - #[writer.writerow(m) for m in code] diff --git a/plugin/att/stitch.py b/plugin/att/stitch.py index b471edb4df..e1ada4e680 100644 --- a/plugin/att/stitch.py +++ b/plugin/att/stitch.py @@ -159,7 +159,6 @@ class PCTranslator: def swappc(self, line, line_num, inst_index): try: loc = self.addrmap[self.insts[inst_index+1][2]] - #print('Jumping to:', loc, self.code[loc]) return loc except: print('SWAPPC: Could not find addr', self.insts[inst_index+1][2], 'for', line) @@ -167,7 +166,6 @@ class PCTranslator: def setpc(self, line, inst_index): try: loc = self.addrmap[self.insts[inst_index+1][2]] - #print('Jumping to:', loc, self.code[loc]) return loc except: print('SETPC: Could not find addr', self.insts[inst_index+1][2], 'for', line) @@ -212,14 +210,11 @@ def move_down_fork(fork, insts, i): #(fork : Fork, insts : list, i : int): and insts[i][1] == fork.insts[i+1].inst_type: i += 2 else: - #print('Failed at', i, insts[i]) return False, i if len(fork.insts) != len(insts): - #print('Failed at the end at', i, insts[i]) return False, i - #print('Reached end of ', fork.name) return True, i FORK_TREE = Fork() @@ -490,9 +485,6 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto): print('Parsing terminated at:', as_line) break - #print(matched, as_line) - #print([WaveInstCategory[insts[i+k][1]] for k in range(10) if i+k < len(insts)]) - if matched: result.append(inst + (reverse_map[line],)) i += 1 diff --git a/src/core/isa_capture/code_object_track.cpp b/src/core/isa_capture/code_object_track.cpp index 0ea0a24eb6..7ffe2b0846 100644 --- a/src/core/isa_capture/code_object_track.cpp +++ b/src/core/isa_capture/code_object_track.cpp @@ -242,7 +242,7 @@ void codeobj_record::stop_capture(rocprofiler_record_id_t id) { } rocprofiler_codeobj_symbols_t codeobj_record::get_capture(rocprofiler_record_id_t id) { - std::atomic_thread_fence(std::memory_order_acquire); // Fencing the state of the map + std::lock_guard lock(mutex); // Fencing the state of the map auto& pair = record_id_map.at(id.handle); return pair.second->get(pair.first); } diff --git a/tests-v2/featuretests/profiler/profiler_gtest.cpp b/tests-v2/featuretests/profiler/profiler_gtest.cpp index 56d56b6d56..b0848b2003 100644 --- a/tests-v2/featuretests/profiler/profiler_gtest.cpp +++ b/tests-v2/featuretests/profiler/profiler_gtest.cpp @@ -72,6 +72,7 @@ void __attribute__((constructor)) globalsetting() { std::stringstream gfx_path; gfx_path << app_path << metrics_path; setenv("ROCPROFILER_METRICS_PATH", gfx_path.str().c_str(), true); + setenv("ROCPROFILER_MAX_ATT_PROFILES", "2", 1); } /** @@ -613,7 +614,7 @@ class ATTCollection : public ::testing::Test { for (int i = 0; i < se_num; i++) { if (!att_tracer_record->shader_engine_data) continue; auto se_att_trace = att_tracer_record->shader_engine_data[i]; - if (!se_att_trace.buffer_ptr || !se_att_trace.buffer_size) continue; + if (!se_att_trace.buffer_ptr || se_att_trace.buffer_size < 8192) continue; bCollected = true; } } @@ -623,7 +624,7 @@ class ATTCollection : public ::testing::Test { }; bool ATTCollection::bCollected = false; -TEST_F(ATTCollection, WhenRunningATTItCollectsTraceData) { +TEST_F(ATTCollection, WhenRunningATTItCollectsTraceDataWithOldAPI) { int result = ROCPROFILER_STATUS_ERROR; // inititalize ROCProfiler @@ -635,8 +636,6 @@ TEST_F(ATTCollection, WhenRunningATTItCollectsTraceData) { std::vector parameters; parameters.emplace_back(rocprofiler_att_parameter_t{ROCPROFILER_ATT_COMPUTE_UNIT, 0}); parameters.emplace_back(rocprofiler_att_parameter_t{ROCPROFILER_ATT_SE_MASK, 0xF}); - //parameters.emplace_back(rocprofiler_att_parameter_t{ROCPROFILER_ATT_SIMD_SELECT, 0x3}); // Replace below tests once aqlprofile passes - //parameters.emplace_back(rocprofiler_att_parameter_t{ROCPROFILER_ATT_BUFFER_SIZE, 0x1000000}); // Replace below tests once aqlprofile passes parameters.emplace_back(rocprofiler_att_parameter_t{ROCPROFILER_ATT_MASK, 0x0F00}); parameters.emplace_back(rocprofiler_att_parameter_t{ROCPROFILER_ATT_TOKEN_MASK, 0x344B}); parameters.emplace_back(rocprofiler_att_parameter_t{ROCPROFILER_ATT_TOKEN_MASK2, 0xFFFF}); @@ -690,6 +689,71 @@ TEST_F(ATTCollection, WhenRunningATTItCollectsTraceData) { EXPECT_EQ(bCollected, true); } +// New API +TEST_F(ATTCollection, WhenRunningATTItCollectsTraceDataWithNewAPI) { + int result = ROCPROFILER_STATUS_ERROR; + + // inititalize ROCProfiler + result = rocprofiler_initialize(); + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, result); + + // Att trace collection parameters + rocprofiler_session_id_t session_id; + std::vector parameters; + parameters.emplace_back(rocprofiler_att_parameter_t{ROCPROFILER_ATT_COMPUTE_UNIT, 0}); + parameters.emplace_back(rocprofiler_att_parameter_t{ROCPROFILER_ATT_SE_MASK, 0xF}); + parameters.emplace_back(rocprofiler_att_parameter_t{ROCPROFILER_ATT_SIMD_SELECT, 0x3}); // Replace below tests once aqlprofile passes + parameters.emplace_back(rocprofiler_att_parameter_t{ROCPROFILER_ATT_BUFFER_SIZE, 0x1000000}); // Replace below tests once aqlprofile passes + + // create a session + result = rocprofiler_create_session(ROCPROFILER_NONE_REPLAY_MODE, &session_id); + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, result); + + // create a buffer to hold att trace records for each kernel launch + rocprofiler_buffer_id_t buffer_id; + result = rocprofiler_create_buffer(session_id, FlushCallback, 0x9999, &buffer_id); + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, result); + + // create a filter for collecting att traces + rocprofiler_filter_id_t filter_id; + rocprofiler_filter_property_t property = {}; + result = rocprofiler_create_filter(session_id, ROCPROFILER_ATT_TRACE_COLLECTION, + rocprofiler_filter_data_t{.att_parameters = ¶meters[0]}, + parameters.size(), &filter_id, property); + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, result); + + // set buffer for the filter + result = rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id); + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, result); + + // activating att tracing session + result = rocprofiler_start_session(session_id); + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, result); + + // Launch a kernel + LaunchVectorAddKernel(); + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, result); + + // deactivate att tracing session + result = rocprofiler_terminate_session(session_id); + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, result); + + // dump att tracing data + result = rocprofiler_flush_data(session_id, buffer_id); + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, result); + + // destroy session + result = rocprofiler_destroy_session(session_id); + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, result); + + // finalize att tracing by destroying rocprofiler object + result = rocprofiler_finalize(); + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, result); + + // check if we got data from any shader engine + EXPECT_EQ(bCollected, true); +} + /* * ################################################### * ############ MultiThreaded API Tests ################