diff --git a/projects/rocprofiler-sdk/samples/pc_sampling/pcs.cpp b/projects/rocprofiler-sdk/samples/pc_sampling/pcs.cpp index 24167715e5..3ad808ac42 100644 --- a/projects/rocprofiler-sdk/samples/pc_sampling/pcs.cpp +++ b/projects/rocprofiler-sdk/samples/pc_sampling/pcs.cpp @@ -319,6 +319,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/, << "z=" << std::setw(5) << pc_sample->workgroup_id.z << "), " << "wave_id: " << std::setw(2) << static_cast(pc_sample->wave_id) << ", " + << "chiplet: " << std::setw(2) << static_cast(pc_sample->chiplet) + << ", " << "cu_id: " << pc_sample->hw_id << ", " << "correlation: {internal=" << std::setw(7) << pc_sample->correlation_id.internal << ", " diff --git a/projects/rocprofiler-sdk/tests/pc_sampling/address_translation.cpp b/projects/rocprofiler-sdk/tests/pc_sampling/address_translation.cpp index 0632f7ac64..c4143af2a7 100644 --- a/projects/rocprofiler-sdk/tests/pc_sampling/address_translation.cpp +++ b/projects/rocprofiler-sdk/tests/pc_sampling/address_translation.cpp @@ -107,7 +107,7 @@ KernelObject::KernelObject(uint64_t code_object_id, uint64_t vaddr = begin_address; while(vaddr < end_address) { - auto inst = translator.get(vaddr); + auto inst = translator.get(code_object_id, vaddr); vaddr += inst->size; this->add_instruction(std::move(inst)); } @@ -143,6 +143,10 @@ dump_flat_profile() { _sample_instruction->process([&](const SampleInstruction& sample_instruction) { ss << sample_instruction.sample_count(); + // Each instruction should be visited exactly once. + // Otherwise, code object loading/unloading and relocations + // are not handled properly. + assert(visited_instructions.count(sample_instruction.inst()) == 0); // Assure that each instruction is counted once. if(visited_instructions.count(sample_instruction.inst()) == 0) { diff --git a/projects/rocprofiler-sdk/tests/pc_sampling/address_translation.hpp b/projects/rocprofiler-sdk/tests/pc_sampling/address_translation.hpp index 1426fcfe83..5e572f8c03 100644 --- a/projects/rocprofiler-sdk/tests/pc_sampling/address_translation.hpp +++ b/projects/rocprofiler-sdk/tests/pc_sampling/address_translation.hpp @@ -40,6 +40,27 @@ namespace address_translation { using Instruction = rocprofiler::codeobj::disassembly::Instruction; using CodeobjAddressTranslate = rocprofiler::codeobj::disassembly::CodeobjAddressTranslate; +using marker_id_t = rocprofiler::codeobj::disassembly::marker_id_t; + +/** + * @brief Pair (code_object_id, pc_addr) uniquely identifies an instruction. + */ +struct inst_id_t +{ + marker_id_t code_object_id; + uint64_t pc_addr; + + bool operator==(const inst_id_t& b) const + { + return this->pc_addr == b.pc_addr && this->code_object_id == b.code_object_id; + }; + + bool operator<(const inst_id_t& b) const + { + if(this->code_object_id == b.code_object_id) return this->pc_addr < b.pc_addr; + return this->code_object_id < b.code_object_id; + }; +}; class KernelObject { @@ -207,8 +228,9 @@ public: { auto lock = std::unique_lock{mut}; - auto inst_id = get_instruction_id(*instruction); - auto itr = samples.find(inst_id); + inst_id_t inst_id = {.code_object_id = instruction->codeobj_id, + .pc_addr = instruction->ld_addr}; + auto itr = samples.find(inst_id); if(itr == samples.end()) { // Add new instruction @@ -225,28 +247,19 @@ public: { auto lock = std::shared_lock{mut}; - auto inst_id = get_instruction_id(inst); - auto itr = samples.find(inst_id); + // TODO: Avoid creating a new instance of `inst_id_t` whenever querying + // sampled instructions. + inst_id_t inst_id = {.code_object_id = inst.codeobj_id, .pc_addr = inst.ld_addr}; + auto itr = samples.find(inst_id); if(itr == samples.end()) return nullptr; return itr->second.get(); + return nullptr; } private: - // For the sake of this test, we use `ld_addr` as the instruction identifier. - // TODO: To cover code object loading/unloading and relocations, - // use `(code_object_id + ld_addr)` as the unique identifier. - // This assumes the decoder chage to return code_object_id as part - // of the `LoadedCodeobjDecoder::get(uint64_t ld_addr)` method. - using instrution_id_t = uint64_t; - instrution_id_t get_instruction_id(const Instruction& instruction) const - { - // Ensure the decoder determined the `ld_addr`. - assert(instruction.ld_addr > 0); - return instruction.ld_addr; - } - - std::unordered_map> samples; - mutable std::shared_mutex mut; + // TODO: optimize to use unordered_map + std::map> samples; + mutable std::shared_mutex mut; }; std::mutex& diff --git a/projects/rocprofiler-sdk/tests/pc_sampling/codeobj.cpp b/projects/rocprofiler-sdk/tests/pc_sampling/codeobj.cpp index a9cd688ee5..04a1441e71 100644 --- a/projects/rocprofiler-sdk/tests/pc_sampling/codeobj.cpp +++ b/projects/rocprofiler-sdk/tests/pc_sampling/codeobj.cpp @@ -185,7 +185,7 @@ codeobj_tracing_callback(rocprofiler_callback_tracing_record_t record, // extract symbols from code object auto& kernel_object_map = client::address_translation::get_kernel_object_map(); - auto symbolmap = translator.getSymbolMap(); + auto symbolmap = translator.getSymbolMap(data->code_object_id); for(auto& [vaddr, symbol] : symbolmap) { kernel_object_map.add_kernel( diff --git a/projects/rocprofiler-sdk/tests/pc_sampling/pcs.cpp b/projects/rocprofiler-sdk/tests/pc_sampling/pcs.cpp index dbd6f0bae9..98101e0788 100644 --- a/projects/rocprofiler-sdk/tests/pc_sampling/pcs.cpp +++ b/projects/rocprofiler-sdk/tests/pc_sampling/pcs.cpp @@ -348,6 +348,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/, << "z=" << std::setw(5) << pc_sample->workgroup_id.z << "), " << "wave_id: " << std::setw(2) << static_cast(pc_sample->wave_id) << ", " + << "chiplet: " << std::setw(2) + << static_cast(pc_sample->chiplet) << ", " << "cu_id: " << pc_sample->hw_id << ", " << "correlation: {internal=" << std::setw(7) << pc_sample->correlation_id.internal << ", "