diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp index 6624142ad0..9d3efbb193 100644 --- a/src/util/hsa_rsrc_factory.cpp +++ b/src/util/hsa_rsrc_factory.cpp @@ -33,6 +33,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include @@ -68,8 +69,7 @@ hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) // returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but // no pool was found meeting the requirements. If an error is encountered, we // return that error. -static hsa_status_t -FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) { +static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) { hsa_status_t err; hsa_amd_segment_t segment; uint32_t flag; @@ -78,21 +78,18 @@ FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; } - err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, - &segment); + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); if (HSA_AMD_SEGMENT_GLOBAL != segment) { return HSA_STATUS_SUCCESS; } - err = hsa_amd_memory_pool_get_info(pool, - HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; - if ((karg_st == 0 && kern_arg) || - (karg_st != 0 && !kern_arg)) { + if ((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) { return HSA_STATUS_SUCCESS; } @@ -152,39 +149,35 @@ HsaRsrcFactory::~HsaRsrcFactory() { } hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) { - void* handle = dlopen(kAqlProfileLib, RTLD_NOW); - if (handle == NULL) { - fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror()); - return HSA_STATUS_ERROR; - } - dlerror(); /* Clear any existing error */ + void* handle = dlopen(kAqlProfileLib, RTLD_NOW); + if (handle == NULL) { + fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror()); + return HSA_STATUS_ERROR; + } + dlerror(); /* Clear any existing error */ - api->hsa_ven_amd_aqlprofile_error_string = - (decltype(::hsa_ven_amd_aqlprofile_error_string)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_error_string"); - api->hsa_ven_amd_aqlprofile_validate_event = - (decltype(::hsa_ven_amd_aqlprofile_validate_event)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_validate_event"); - api->hsa_ven_amd_aqlprofile_start = - (decltype(::hsa_ven_amd_aqlprofile_start)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_start"); - api->hsa_ven_amd_aqlprofile_stop = - (decltype(::hsa_ven_amd_aqlprofile_stop)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); + api->hsa_ven_amd_aqlprofile_error_string = + (decltype(::hsa_ven_amd_aqlprofile_error_string)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_error_string"); + api->hsa_ven_amd_aqlprofile_validate_event = + (decltype(::hsa_ven_amd_aqlprofile_validate_event)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_validate_event"); + api->hsa_ven_amd_aqlprofile_start = + (decltype(::hsa_ven_amd_aqlprofile_start)*)dlsym(handle, "hsa_ven_amd_aqlprofile_start"); + api->hsa_ven_amd_aqlprofile_stop = + (decltype(::hsa_ven_amd_aqlprofile_stop)*)dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); #if AQL_PROFILE_READ_API_ENABLE - api->hsa_ven_amd_aqlprofile_read = - (decltype(::hsa_ven_amd_aqlprofile_read)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_read"); + api->hsa_ven_amd_aqlprofile_read = + (decltype(::hsa_ven_amd_aqlprofile_read)*)dlsym(handle, "hsa_ven_amd_aqlprofile_read"); #endif // AQL_PROFILE_READ_API_ENABLE - api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = - (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); - api->hsa_ven_amd_aqlprofile_get_info = - (decltype(::hsa_ven_amd_aqlprofile_get_info)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_get_info"); - api->hsa_ven_amd_aqlprofile_iterate_data = - (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_iterate_data"); + api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = + (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + api->hsa_ven_amd_aqlprofile_get_info = (decltype(::hsa_ven_amd_aqlprofile_get_info)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_get_info"); + api->hsa_ven_amd_aqlprofile_iterate_data = + (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_iterate_data"); return HSA_STATUS_SUCCESS; } @@ -226,11 +219,17 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false; - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &agent_info->cu_num); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), &agent_info->waves_per_cu); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &agent_info->simds_per_cu); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &agent_info->se_num); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), &agent_info->shader_arrays_per_se); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), + &agent_info->cu_num); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), + &agent_info->waves_per_cu); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), + &agent_info->simds_per_cu); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), + &agent_info->se_num); + hsa_agent_get_info(agent, + static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), + &agent_info->shader_arrays_per_se); agent_info->cpu_pool = {}; agent_info->kern_arg_pool = {}; @@ -349,7 +348,7 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t hsa_status_t status = HSA_STATUS_ERROR; uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, (void**)&buffer); + status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast(&buffer)); uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; return ptr; } @@ -364,7 +363,7 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size uint8_t* buffer = NULL; if (!cpu_agents_.empty()) { size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_amd_memory_pool_allocate(cpu_list_[0]->kern_arg_pool, size, 0, (void**)&buffer); + status = hsa_amd_memory_pool_allocate(cpu_list_[0]->kern_arg_pool, size, 0, reinterpret_cast(&buffer)); // Both the CPU and GPU can access the kernel arguments if (status == HSA_STATUS_SUCCESS) { hsa_agent_t ag_list[1] = {agent_info->dev_id}; @@ -384,7 +383,7 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; if (!cpu_agents_.empty()) { - status = hsa_amd_memory_pool_allocate(cpu_list_[0]->cpu_pool, size, 0, (void**)&buffer); + status = hsa_amd_memory_pool_allocate(cpu_list_[0]->cpu_pool, size, 0, reinterpret_cast(&buffer)); // Both the CPU and GPU can access the memory if (status == HSA_STATUS_SUCCESS) { hsa_agent_t ag_list[1] = {agent_info->dev_id}; @@ -395,6 +394,19 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s return ptr; } +// Allocate memory for command buffer. +// @param agent_info Agent from whose memory region to allocate +// @param size Size of memory in terms of bytes +// @return uint8_t* Pointer to buffer, null if allocation fails. +uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t size) { + size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; + uint8_t* ptr = (agent_info->is_apu && CMD_MEMORY_MMAP) + ? reinterpret_cast( + mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0, 0)) + : AllocateSysMemory(agent_info, size); + return ptr; +} + // Copy data from GPU to host memory bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size) { hsa_status_t status = HSA_STATUS_ERROR; @@ -404,7 +416,8 @@ bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src if (status == HSA_STATUS_SUCCESS) { status = hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s); if (status == HSA_STATUS_SUCCESS) { - if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) { + if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, + HSA_WAIT_STATE_BLOCKED) != 0) { status = HSA_STATUS_ERROR; } } @@ -432,7 +445,8 @@ bool HsaRsrcFactory::FreeMemory(void* ptr) { // be used to submit for execution // @return bool true if successful, false otherwise bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, - const char* kernel_name, hsa_executable_t* executable, hsa_executable_symbol_t* code_desc) { + const char* kernel_name, hsa_executable_t* executable, + hsa_executable_symbol_t* code_desc) { hsa_status_t status = HSA_STATUS_ERROR; // Build the code object filename @@ -456,13 +470,13 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br } // Create executable. - status = hsa_executable_create_alt(HSA_PROFILE_FULL, - HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable); + status = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, executable); CHECK_STATUS("Error in creating executable object", status); // Load code object. - status = hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, - code_obj_rdr, NULL, NULL); + status = hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr, + NULL, NULL); CHECK_STATUS("Error in loading executable object", status); // Freeze executable. @@ -504,7 +518,7 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { return true; } -uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) { +uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { const uint32_t slot_size_b = 0x40; // adevance command queue @@ -515,14 +529,15 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) { } uint32_t slot_idx = (uint32_t)(write_idx % queue->size); - uint32_t* queue_slot = (uint32_t*)((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); - uint32_t* slot_data = (uint32_t*)packet; + uint32_t* queue_slot = reinterpret_cast((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); + const uint32_t* slot_data = reinterpret_cast(packet); // Copy buffered commands into the queue slot. // Overwrite the AQL invalid header (first dword) last. // This prevents the slot from being read until it's fully written. memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t)); - std::atomic* header_atomic_ptr = reinterpret_cast*>(&queue_slot[0]); + std::atomic* header_atomic_ptr = + reinterpret_cast*>(&queue_slot[0]); header_atomic_ptr->store(slot_data[0], std::memory_order_release); // ringdoor bell @@ -530,6 +545,22 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) { return write_idx; } +uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) { + const uint32_t slot_size_b = 0x40; + if ((size_bytes & (slot_size_b - 1)) != 0) { + fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes); + abort(); + } + + const char* begin = reinterpret_cast(packet); + const char* end = begin + size_bytes; + uint64_t write_idx = 0; + for (const char* ptr = begin; ptr < end; ptr += slot_size_b) { + write_idx = Submit(queue, ptr); + } + + return write_idx; +} HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL; HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; diff --git a/src/util/hsa_rsrc_factory.h b/src/util/hsa_rsrc_factory.h index c07798c22c..034bc3dae9 100644 --- a/src/util/hsa_rsrc_factory.h +++ b/src/util/hsa_rsrc_factory.h @@ -22,8 +22,8 @@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING POSSIBILITY OF SUCH DAMAGE. ********************************************************************/ -#ifndef _HSA_RSRC_FACTORY_H_ -#define _HSA_RSRC_FACTORY_H_ +#ifndef SRC_UTIL_HSA_RSRC_FACTORY_H_ +#define SRC_UTIL_HSA_RSRC_FACTORY_H_ #include #include @@ -200,6 +200,12 @@ class HsaRsrcFactory { // @return uint8_t* Pointer to buffer, null if allocation fails. uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size); + // Allocate memory for command buffer. + // @param agent_info Agent from whose memory region to allocate + // @param size Size of memory in terms of bytes + // @return uint8_t* Pointer to buffer, null if allocation fails. + uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size); + // Copy data from GPU to host memory bool Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size); bool Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size); @@ -215,13 +221,14 @@ class HsaRsrcFactory { // be used to submit for execution // @return true if successful, false otherwise bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name, - hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc); + hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc); // Print the various fields of Hsa Gpu Agents bool PrintGpuAgents(const std::string& header); // Submit AQL packet to given queue - static uint64_t Submit(hsa_queue_t* queue, void* packet); + static uint64_t Submit(hsa_queue_t* queue, const void* packet); + static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); // Return AqlProfile API table typedef hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_pfn_t; @@ -242,17 +249,20 @@ class HsaRsrcFactory { // Constructor of the class. Will initialize the Hsa Runtime and // query the system topology to get the list of Cpu and Gpu devices - HsaRsrcFactory(bool initialize_hsa); + explicit HsaRsrcFactory(bool initialize_hsa); // Destructor of the class ~HsaRsrcFactory(); - // HSA was initialized - const bool initialize_hsa_; - // Add an instance of AgentInfo representing a Hsa Gpu agent const AgentInfo* AddAgentInfo(const hsa_agent_t agent); + // To mmap command buffer memory + static const bool CMD_MEMORY_MMAP = false; + + // HSA was initialized + const bool initialize_hsa_; + static HsaRsrcFactory* instance_; static mutex_t mutex_; @@ -277,4 +287,4 @@ class HsaRsrcFactory { } // namespace util } // namespace rocprofiler -#endif // _HSA_RSRC_FACTORY_H_ +#endif // SRC_UTIL_HSA_RSRC_FACTORY_H_ diff --git a/test/ctrl/test_hsa.cpp b/test/ctrl/test_hsa.cpp index 2a7d1c7ede..ee37a7bee8 100644 --- a/test/ctrl/test_hsa.cpp +++ b/test/ctrl/test_hsa.cpp @@ -112,6 +112,15 @@ bool TestHsa::Setup() { // Start the timer object hsa_timer_.StartTimer(setup_timer_idx_); + // Load and Finalize Kernel Code Descriptor + const char* brig_path = brig_path_obj_.c_str(); + bool suc = hsa_rsrc_->LoadAndFinalize(agent_info_, brig_path, name_.c_str(), &hsa_exec_, + &kernel_code_desc_); + if (suc == false) { + std::cerr << "Error in loading and finalizing Kernel" << std::endl; + return false; + } + mem_map_t& mem_map = test_->GetMemMap(); for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) { mem_descr_t& des = it->second; @@ -119,10 +128,25 @@ bool TestHsa::Setup() { case TestKernel::LOCAL_DES_ID: des.ptr = hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size); break; - case TestKernel::KERNARG_DES_ID: - des.ptr = hsa_rsrc_->AllocateKernArgMemory(agent_info_, des.size); - if (des.ptr) memset(des.ptr, 0, des.size); + case TestKernel::KERNARG_DES_ID: { + // Check the kernel args size + const size_t kernarg_size = des.size; + size_t size_info = 0; + hsa_executable_symbol_get_info( + kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info); + const bool kernarg_missmatch = (kernarg_size > size_info); + if (kernarg_missmatch) { + std::cout << "kernarg_size = " << kernarg_size << ", size_info = " << size_info + << std::flush << std::endl; + TEST_ASSERT(!kernarg_missmatch); + break; + } + // ALlocate kernarg memory + des.size = size_info; + des.ptr = hsa_rsrc_->AllocateKernArgMemory(agent_info_, size_info); + if (des.ptr) memset(des.ptr, 0, size_info); break; + } case TestKernel::SYS_DES_ID: des.ptr = hsa_rsrc_->AllocateSysMemory(agent_info_, des.size); if (des.ptr) memset(des.ptr, 0, des.size); @@ -132,20 +156,12 @@ bool TestHsa::Setup() { break; default: break; - }; + } TEST_ASSERT(des.ptr != NULL); if (des.ptr == NULL) return false; } test_->Init(); - // Load and Finalize Kernel Code Descriptor - char* brig_path = (char*)brig_path_obj_.c_str(); - bool suc = hsa_rsrc_->LoadAndFinalize(agent_info_, brig_path, name_.c_str(), &hsa_exec_, &kernel_code_desc_); - if (suc == false) { - std::cerr << "Error in loading and finalizing Kernel" << std::endl; - return false; - } - // Stop the timer object hsa_timer_.StopTimer(setup_timer_idx_); setup_time_taken_ = hsa_timer_.ReadTimer(setup_timer_idx_); @@ -161,7 +177,6 @@ bool TestHsa::Run() { const uint32_t work_grid_size = test_->GetGridSize(); uint32_t group_segment_size = 0; uint32_t private_segment_size = 0; - const size_t kernarg_segment_size = test_->GetKernargSize(); uint64_t code_handle = 0; // Retrieve the amount of group memory needed @@ -173,12 +188,6 @@ bool TestHsa::Run() { HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &private_segment_size); - // Check the kernel args size - size_t size_info = 0; - hsa_executable_symbol_get_info( - kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info); - TEST_ASSERT(kernarg_segment_size == size_info); - if (kernarg_segment_size != size_info) return false; // Retrieve handle of the code block hsa_executable_symbol_get_info(kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, @@ -220,13 +229,8 @@ bool TestHsa::Run() { // Wait on the dispatch signal until the kernel is finished. // Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling - if (hsa_signal_wait_scacquire( - hsa_signal_, - HSA_SIGNAL_CONDITION_LT, - 1, - UINT64_MAX, - HSA_WAIT_STATE_BLOCKED) != 0) - { + if (hsa_signal_wait_scacquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, + HSA_WAIT_STATE_BLOCKED) != 0) { TEST_ASSERT("signal_wait failed"); } @@ -252,7 +256,7 @@ bool TestHsa::VerifyResults() { suc = hsa_rsrc_->Memcpy(agent_info_, output, test_->GetOutputPtr(), size); if (!suc) std::clog << "> VerifyResults: Memcpy failed" << std::endl << std::flush; } else { - output = test_->GetOutputPtr();; + output = test_->GetOutputPtr(); suc = true; } diff --git a/test/ctrl/test_kernel.h b/test/ctrl/test_kernel.h index 01b06c3433..974e21c673 100644 --- a/test/ctrl/test_kernel.h +++ b/test/ctrl/test_kernel.h @@ -67,8 +67,8 @@ class TestKernel { bool IsLocal(const mem_descr_t& descr) const { return (descr.id == LOCAL_DES_ID); } // Methods to get the kernel attributes - const mem_descr_t& GetKernargDescr() { return *test_map_[KERNARG_EXP_ID]; }; - const mem_descr_t& GetOutputDescr() { return *test_map_[OUTPUT_EXP_ID]; }; + const mem_descr_t& GetKernargDescr() { return *test_map_[KERNARG_EXP_ID]; } + const mem_descr_t& GetOutputDescr() { return *test_map_[OUTPUT_EXP_ID]; } void* GetKernargPtr() { return GetKernargDescr().ptr; } uint32_t GetKernargSize() { return GetKernargDescr().size; } void* GetOutputPtr() { return GetOutputDescr().ptr; } @@ -77,7 +77,7 @@ class TestKernel { virtual uint32_t GetGridSize() const = 0; // Return reference output - void* GetRefOut() { return test_map_[REFOUT_EXP_ID]->ptr; }; + void* GetRefOut() { return test_map_[REFOUT_EXP_ID]->ptr; } // Print output virtual void PrintOutput(const void* ptr) const = 0; diff --git a/test/simple_convolution/simple_convolution.cpp b/test/simple_convolution/simple_convolution.cpp index d05856bcb5..7665941392 100644 --- a/test/simple_convolution/simple_convolution.cpp +++ b/test/simple_convolution/simple_convolution.cpp @@ -299,16 +299,16 @@ void SimpleConvolution::Init() { mem_descr_t input_des = GetDescr(INPUT_BUF_ID); mem_descr_t mask_des = GetDescr(MASK_BUF_ID); mem_descr_t output_des = GetDescr(LOCAL_BUF_ID); -#if 1 +#if 0 printf("kernarg_des %p 0x%x\n", kernarg_des.ptr, kernarg_des.size); printf("input_des %p 0x%x\n", input_des.ptr, input_des.size); printf("mask_des %p 0x%x\n", mask_des.ptr, mask_des.size); printf("output_des %p 0x%x\n", output_des.ptr, output_des.size); #endif - uint32_t* input = (uint32_t*)input_des.ptr; - uint32_t* output_local = (uint32_t*)output_des.ptr; - float* mask = (float*)mask_des.ptr; - kernel_args_t* kernel_args = (kernel_args_t*)kernarg_des.ptr; + uint32_t* input = reinterpret_cast(input_des.ptr); + uint32_t* output_local = reinterpret_cast(output_des.ptr); + float* mask = reinterpret_cast(mask_des.ptr); + kernel_args_t* kernel_args = reinterpret_cast(kernarg_des.ptr); if (randomize_seed_) { // random initialisation of input diff --git a/test/simple_convolution/simple_convolution.h b/test/simple_convolution/simple_convolution.h index a2391b97a2..38424baaed 100644 --- a/test/simple_convolution/simple_convolution.h +++ b/test/simple_convolution/simple_convolution.h @@ -64,7 +64,6 @@ class SimpleConvolution : public TestKernel { uint32_t arg41; uint32_t arg5; uint32_t arg51; - uint64_t pad[6]; }; // Reference CPU implementation of Simple Convolution diff --git a/test/tool/gfx_metrics.xml b/test/tool/gfx_metrics.xml index 1e05cefc27..ce949f851e 100644 --- a/test/tool/gfx_metrics.xml +++ b/test/tool/gfx_metrics.xml @@ -40,9 +40,6 @@ - - - @@ -86,7 +83,4 @@ - - - diff --git a/test/tool/input.xml b/test/tool/input.xml index 9ba6165082..cd22e03e50 100644 --- a/test/tool/input.xml +++ b/test/tool/input.xml @@ -10,7 +10,7 @@ # List of metrics # SQTT trace with parameters diff --git a/test/util/hsa_rsrc_factory.cpp b/test/util/hsa_rsrc_factory.cpp index c8537b3272..4a386e7ecc 100644 --- a/test/util/hsa_rsrc_factory.cpp +++ b/test/util/hsa_rsrc_factory.cpp @@ -67,8 +67,7 @@ hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) // returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but // no pool was found meeting the requirements. If an error is encountered, we // return that error. -static hsa_status_t -FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) { +static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) { hsa_status_t err; hsa_amd_segment_t segment; uint32_t flag; @@ -77,21 +76,18 @@ FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; } - err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, - &segment); + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); if (HSA_AMD_SEGMENT_GLOBAL != segment) { return HSA_STATUS_SUCCESS; } - err = hsa_amd_memory_pool_get_info(pool, - HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; - if ((karg_st == 0 && kern_arg) || - (karg_st != 0 && !kern_arg)) { + if ((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) { return HSA_STATUS_SUCCESS; } @@ -151,39 +147,35 @@ HsaRsrcFactory::~HsaRsrcFactory() { } hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) { - void* handle = dlopen(kAqlProfileLib, RTLD_NOW); - if (handle == NULL) { - fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror()); - return HSA_STATUS_ERROR; - } - dlerror(); /* Clear any existing error */ + void* handle = dlopen(kAqlProfileLib, RTLD_NOW); + if (handle == NULL) { + fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror()); + return HSA_STATUS_ERROR; + } + dlerror(); /* Clear any existing error */ - api->hsa_ven_amd_aqlprofile_error_string = - (decltype(::hsa_ven_amd_aqlprofile_error_string)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_error_string"); - api->hsa_ven_amd_aqlprofile_validate_event = - (decltype(::hsa_ven_amd_aqlprofile_validate_event)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_validate_event"); - api->hsa_ven_amd_aqlprofile_start = - (decltype(::hsa_ven_amd_aqlprofile_start)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_start"); - api->hsa_ven_amd_aqlprofile_stop = - (decltype(::hsa_ven_amd_aqlprofile_stop)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); + api->hsa_ven_amd_aqlprofile_error_string = + (decltype(::hsa_ven_amd_aqlprofile_error_string)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_error_string"); + api->hsa_ven_amd_aqlprofile_validate_event = + (decltype(::hsa_ven_amd_aqlprofile_validate_event)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_validate_event"); + api->hsa_ven_amd_aqlprofile_start = + (decltype(::hsa_ven_amd_aqlprofile_start)*)dlsym(handle, "hsa_ven_amd_aqlprofile_start"); + api->hsa_ven_amd_aqlprofile_stop = + (decltype(::hsa_ven_amd_aqlprofile_stop)*)dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); #if AQL_PROFILE_READ_API_ENABLE - api->hsa_ven_amd_aqlprofile_read = - (decltype(::hsa_ven_amd_aqlprofile_read)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_read"); + api->hsa_ven_amd_aqlprofile_read = + (decltype(::hsa_ven_amd_aqlprofile_read)*)dlsym(handle, "hsa_ven_amd_aqlprofile_read"); #endif // AQL_PROFILE_READ_API_ENABLE - api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = - (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); - api->hsa_ven_amd_aqlprofile_get_info = - (decltype(::hsa_ven_amd_aqlprofile_get_info)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_get_info"); - api->hsa_ven_amd_aqlprofile_iterate_data = - (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*) - dlsym(handle, "hsa_ven_amd_aqlprofile_iterate_data"); + api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = + (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + api->hsa_ven_amd_aqlprofile_get_info = (decltype(::hsa_ven_amd_aqlprofile_get_info)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_get_info"); + api->hsa_ven_amd_aqlprofile_iterate_data = + (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_iterate_data"); return HSA_STATUS_SUCCESS; } @@ -225,11 +217,17 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false; - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), &agent_info->cu_num); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), &agent_info->waves_per_cu); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), &agent_info->simds_per_cu); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), &agent_info->se_num); - hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), &agent_info->shader_arrays_per_se); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), + &agent_info->cu_num); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), + &agent_info->waves_per_cu); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), + &agent_info->simds_per_cu); + hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), + &agent_info->se_num); + hsa_agent_get_info(agent, + static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), + &agent_info->shader_arrays_per_se); agent_info->cpu_pool = {}; agent_info->kern_arg_pool = {}; @@ -348,7 +346,7 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t hsa_status_t status = HSA_STATUS_ERROR; uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, (void**)&buffer); + status = hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast(&buffer)); uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; return ptr; } @@ -363,7 +361,7 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size uint8_t* buffer = NULL; if (!cpu_agents_.empty()) { size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_amd_memory_pool_allocate(cpu_list_[0]->kern_arg_pool, size, 0, (void**)&buffer); + status = hsa_amd_memory_pool_allocate(cpu_list_[0]->kern_arg_pool, size, 0, reinterpret_cast(&buffer)); // Both the CPU and GPU can access the kernel arguments if (status == HSA_STATUS_SUCCESS) { hsa_agent_t ag_list[1] = {agent_info->dev_id}; @@ -383,7 +381,7 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; if (!cpu_agents_.empty()) { - status = hsa_amd_memory_pool_allocate(cpu_list_[0]->cpu_pool, size, 0, (void**)&buffer); + status = hsa_amd_memory_pool_allocate(cpu_list_[0]->cpu_pool, size, 0, reinterpret_cast(&buffer)); // Both the CPU and GPU can access the memory if (status == HSA_STATUS_SUCCESS) { hsa_agent_t ag_list[1] = {agent_info->dev_id}; @@ -400,9 +398,10 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s // @return uint8_t* Pointer to buffer, null if allocation fails. uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t size) { size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - uint8_t* ptr = (agent_info->is_apu && CMD_MEMORY_MMAP) ? - reinterpret_cast( mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0, 0)) : - AllocateSysMemory(agent_info, size); + uint8_t* ptr = (agent_info->is_apu && CMD_MEMORY_MMAP) + ? reinterpret_cast( + mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0, 0)) + : AllocateSysMemory(agent_info, size); return ptr; } @@ -415,7 +414,8 @@ bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src if (status == HSA_STATUS_SUCCESS) { status = hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s); if (status == HSA_STATUS_SUCCESS) { - if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) { + if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, + HSA_WAIT_STATE_BLOCKED) != 0) { status = HSA_STATUS_ERROR; } } @@ -443,7 +443,8 @@ bool HsaRsrcFactory::FreeMemory(void* ptr) { // be used to submit for execution // @return bool true if successful, false otherwise bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, - const char* kernel_name, hsa_executable_t* executable, hsa_executable_symbol_t* code_desc) { + const char* kernel_name, hsa_executable_t* executable, + hsa_executable_symbol_t* code_desc) { hsa_status_t status = HSA_STATUS_ERROR; // Build the code object filename @@ -467,13 +468,13 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br } // Create executable. - status = hsa_executable_create_alt(HSA_PROFILE_FULL, - HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable); + status = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, executable); CHECK_STATUS("Error in creating executable object", status); // Load code object. - status = hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, - code_obj_rdr, NULL, NULL); + status = hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr, + NULL, NULL); CHECK_STATUS("Error in loading executable object", status); // Freeze executable. @@ -515,7 +516,7 @@ bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { return true; } -uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) { +uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { const uint32_t slot_size_b = 0x40; // adevance command queue @@ -526,14 +527,15 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) { } uint32_t slot_idx = (uint32_t)(write_idx % queue->size); - uint32_t* queue_slot = (uint32_t*)((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); - uint32_t* slot_data = (uint32_t*)packet; + uint32_t* queue_slot = reinterpret_cast((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); + const uint32_t* slot_data = reinterpret_cast(packet); // Copy buffered commands into the queue slot. // Overwrite the AQL invalid header (first dword) last. // This prevents the slot from being read until it's fully written. memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t)); - std::atomic* header_atomic_ptr = reinterpret_cast*>(&queue_slot[0]); + std::atomic* header_atomic_ptr = + reinterpret_cast*>(&queue_slot[0]); header_atomic_ptr->store(slot_data[0], std::memory_order_release); // ringdoor bell @@ -541,7 +543,22 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, void* packet) { return write_idx; } +uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) { + const uint32_t slot_size_b = 0x40; + if ((size_bytes & (slot_size_b - 1)) != 0) { + fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes); + abort(); + } + + const char* begin = reinterpret_cast(packet); + const char* end = begin + size_bytes; + uint64_t write_idx = 0; + for (const char* ptr = begin; ptr < end; ptr += slot_size_b) { + write_idx = Submit(queue, ptr); + } + + return write_idx; +} HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL; HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; - diff --git a/test/util/hsa_rsrc_factory.h b/test/util/hsa_rsrc_factory.h index fc9b37e071..92109cfc93 100644 --- a/test/util/hsa_rsrc_factory.h +++ b/test/util/hsa_rsrc_factory.h @@ -22,8 +22,8 @@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING POSSIBILITY OF SUCH DAMAGE. ********************************************************************/ -#ifndef _HSA_RSRC_FACTORY_H_ -#define _HSA_RSRC_FACTORY_H_ +#ifndef TEST_UTIL_HSA_RSRC_FACTORY_H_ +#define TEST_UTIL_HSA_RSRC_FACTORY_H_ #include #include @@ -219,13 +219,14 @@ class HsaRsrcFactory { // be used to submit for execution // @return true if successful, false otherwise bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name, - hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc); + hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc); // Print the various fields of Hsa Gpu Agents bool PrintGpuAgents(const std::string& header); // Submit AQL packet to given queue - static uint64_t Submit(hsa_queue_t* queue, void* packet); + static uint64_t Submit(hsa_queue_t* queue, const void* packet); + static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); // Return AqlProfile API table typedef hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_pfn_t; @@ -246,7 +247,7 @@ class HsaRsrcFactory { // Constructor of the class. Will initialize the Hsa Runtime and // query the system topology to get the list of Cpu and Gpu devices - HsaRsrcFactory(bool initialize_hsa); + explicit HsaRsrcFactory(bool initialize_hsa); // Destructor of the class ~HsaRsrcFactory(); @@ -282,4 +283,4 @@ class HsaRsrcFactory { }; -#endif // _HSA_RSRC_FACTORY_H_ +#endif // TEST_UTIL_HSA_RSRC_FACTORY_H_ diff --git a/test/util/xml.h b/test/util/xml.h index 898b305b10..8c93d377de 100644 --- a/test/util/xml.h +++ b/test/util/xml.h @@ -1,5 +1,5 @@ -#ifndef SRC_XML_XML_H_ -#define SRC_XML_XML_H_ +#ifndef TEST_UTIL_XML_H_ +#define TEST_UTIL_XML_H_ #include #include @@ -11,6 +11,7 @@ #include #include #include +#include #include namespace xml { @@ -52,13 +53,13 @@ class Xml { } for (auto* incl : incl_nodes) { const std::string& incl_name = path + incl->opts["file"]; - Xml *ixml = Create(incl_name, xml); + Xml* ixml = Create(incl_name, xml); if (ixml == NULL) { delete xml; xml = NULL; break; } else { - delete(ixml); + delete (ixml); } } if (xml) { @@ -70,7 +71,7 @@ class Xml { return xml; } - static void Destroy(Xml *xml) { delete xml; } + static void Destroy(Xml* xml) { delete xml; } std::string GetName() { return file_name_; } @@ -93,23 +94,25 @@ class Xml { nodes_t GetNodes(const std::string& global_tag) { return (*map_)[global_tag]; } - template - F ForEach(const F& f_i) { + template F ForEach(const F& f_i) { F f = f_i; - if (map_) for (auto& entry : *map_) { - for (auto node : entry.second) { - if (f.fun(entry.first, node) == false) break; + if (map_) { + for (auto& entry : *map_) { + for (auto node : entry.second) { + if (f.fun(entry.first, node) == false) break; + } } } return f; } - template - F ForEach(const F& f_i) const { + template F ForEach(const F& f_i) const { F f = f_i; - if (map_) for (auto& entry : *map_) { - for (auto node : entry.second) { - if (f.fun(entry.first, node) == false) break; + if (map_) { + for (auto& entry : *map_) { + for (auto node : entry.second) { + if (f.fun(entry.first, node) == false) break; + } } } return f; @@ -139,8 +142,7 @@ class Xml { comment_(false), included_(false), level_(NULL), - map_(NULL) - { + map_(NULL) { if (obj != NULL) { map_ = obj->map_; level_ = obj->level_; @@ -165,7 +167,7 @@ class Xml { bool Init() { fd_ = open(file_name_.c_str(), O_RDONLY); if (fd_ == -1) { - //perror((std::string("open XML file ") + file_name_).c_str()); + // perror((std::string("open XML file ") + file_name_).c_str()); return false; } @@ -180,20 +182,19 @@ class Xml { void PreProcess() { uint32_t ind = 0; - const uint32_t buf_size = 128; - char buf[buf_size]; + char buf[kBufSize]; bool error = false; while (1) { const uint32_t pos = lseek(fd_, 0, SEEK_CUR); - uint32_t size = read(fd_, buf, buf_size); + uint32_t size = read(fd_, buf, kBufSize); if (size <= 0) break; buf[size - 1] = '\0'; if (strncmp(buf, "#include \"", 10) == 0) { - for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind); + for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) {} if (ind == size) { - fprintf(stderr, "XML PreProcess failed, line size limit %d\n", (int)buf_size); + fprintf(stderr, "XML PreProcess failed, line size limit %zu\n", kBufSize); error = true; break; } @@ -201,7 +202,7 @@ class Xml { size = ind; lseek(fd_, pos + ind + 1, SEEK_SET); - for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind); + for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) {} if (ind == size) { error = true; break; @@ -229,9 +230,9 @@ class Xml { token_t token = (remainder.size()) ? remainder : NextToken(); remainder.clear(); -// token_t token1 = token; -// token1.push_back('\0'); -// std::cout << "> " << &token1[0] << std::endl; + // token_t token1 = token; + // token1.push_back('\0'); + // std::cout << "> " << &token1[0] << std::endl; // End of file if (token.size() == 0) break; @@ -259,8 +260,9 @@ class Xml { else BadFormat(token); token.push_back('\0'); - } else + } else { token[i] = '\0'; + } const char* tag = &token[ind]; if (node_begin) { @@ -272,8 +274,9 @@ class Xml { } UpLevel(); } - } else + } else { BadFormat(token); + } break; case DECL_STATE: if (token[0] == '>') { @@ -300,7 +303,7 @@ class Xml { } bool SpaceCheck() const { - bool cond = ((buffer_[index_] == ' ') || (buffer_[index_] == ' ')); + bool cond = ((buffer_[index_] == ' ') || (buffer_[index_] == '\t')); return cond; } @@ -325,29 +328,32 @@ class Xml { while (1) { if (data_size_ == 0) { - data_size_ = read(fd_, buffer_, buf_size_); + data_size_ = read(fd_, buffer_, kBufSize); if (data_size_ <= 0) break; } - if (token.empty()) + if (token.empty()) { while ((index_ < data_size_) && (SpaceCheck() || LineEndCheck())) { ++index_; } + } while ((index_ < data_size_) && (in_string || !(SpaceCheck() || LineEndCheck()))) { const char symb = buffer_[index_]; bool skip_symb = false; switch (symb) { case '\\': - if (special_symb) special_symb = false; - else { + if (special_symb) { + special_symb = false; + } else { special_symb = true; skip_symb = true; } break; case '"': - if (special_symb) special_symb = false; - else { + if (special_symb) { + special_symb = false; + } else { in_string = !in_string; if (!in_string) { buffer_[index_] = ' '; @@ -411,8 +417,8 @@ class Xml { unsigned file_line_; int fd_; - static const unsigned buf_size_ = 256; - char buffer_[buf_size_]; + static const size_t kBufSize = 256; + char buffer_[kBufSize]; unsigned data_size_; unsigned index_; @@ -426,4 +432,4 @@ class Xml { } // namespace xml -#endif // SRC_XML_XML_H_ +#endif // TEST_UTIL_XML_H_